Once a time i had faced a thing,Which makes me to do a User Defined function which will check xml is well-formed. Some time we are saving the XML as VARCHAR in table. May be that time we don't knew whether it is well formed xml or not.But when select the records as xml we have to cast it to XML type that time it will raise error.
Consider there are 10000 records in table due to one record whole select process is interrupt. How to find that particular record. There is no pre-defined function which will test varchar type is in well formed xml.
Based on that we can select only well formed data as XML.
Let we take a scenario
1. Create a table
2. Insert some values with well formed xml and not
3. then finally we want the data as xml in output.so we are going to select with cast as xml.
Step 1:
create table samplexml
(
id int identity(1,1),
fromname varchar(40),
xmldata nvarchar(2000)
)
Step 2:
Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name>AC123</Name></Model><Model><Name>WE345</Name></Model></Products>'
Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name>AF234</Name></Model><Model><Name>WD329<Name></Model></Products>'
Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name
sub="ss">DF556</Name></Model><Model><Name>WD098</Name></Model></Products>'
Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name
sub="ss>DF456</Name></Model><Model><Name>gd028</Name></Model></Products>'
In the Above code second value is not well formed. Model tag is not closed and fourth insert have attribute not closed well
Step 3:
When selecting the records from the Table as Xml "will Throw an Error "
Msg 9436, Level 16, State 1, Line 1
XML parsing: line 1, character 75, end tag does not match start tag
Select id , fromname,
cast(xmldata as xml) "ProductXml"
From samplexml
Now from the above we cant able to select the single records due to error in casting.If there is 10000 Records how we can find the particular row which is not in correct format (or) we have to select the records which can be able to cast instead of interrupt.
To Overcome this, i have implemented a user defined function which will tell whether particualr varchar data is in well-formed xml or not , If it well formed then function will return 1 or if not then-1
How to find the particular record is not a well formed xml using function ?
SELECT ID,FROMNAME, DBO.ISXMLFORMAT(XMLDATA) FROM SAMPLEXML
ISXMLFORMAT is the user defined function which will get the data as input and return the 1 for well formed xml (syntax check), -1 for not well formed xml
When run the above select statement it will show the result which data are well formed xml.To select the Well-formed xml and then cast as xml using following query.
SELECT ID,FROMNAME, CAST(XMLDATA AS XML) FROM SAMPLEXML
WHERE DBO.ISXMLFORMAT(XMLDATA) =1
Output :
CREATE FUNCTION ISXMLFORMAT(@DATA
NVARCHAR(MAX))
RETURNS INT
AS
BEGIN
DECLARE @S NVARCHAR(MAX)
DECLARE @CHAR CHAR(1)
DECLARE
@opentag char(1)
DECLARE
@endtag char(1)
DECLARE @ATT NVARCHAR(1) = 'S'
DECLARE @WORD NVARCHAR(MAX) = ''
DECLARE @OFLAG VARCHAR(1) = 'N'
DECLARE @EFLAG VARCHAR(1) = 'N'
DECLARE @TCLOSE VARCHAR(MAX) =''
DECLARE @LENGTH INT
DECLARE @MIN INT = 1;
DECLARE @COM NVARCHAR(MAX)
DECLARE @key NVARCHAR(100)=''
DECLARE @EQUAL NVARCHAR(1) ='N'
DECLARE @OQUOTE NVARCHAR(1)='N'
DECLARE @EQUOTE NVARCHAR(1)='N'
DECLARE @OSQUOTE NVARCHAR(1)='N'
DECLARE @ESQUOTE NVARCHAR(1)='N'
DECLARE @KEYEND NVARCHAR(1)='N'
DECLARE
@STLETTER CHAR(1)
DECLARE @TAB TABLE
(
ID INT
IDENTITY(1,1),
COL VARCHAR(100)
)
SELECT @S = @DATA;
SELECT @LENGTH = LEN(@S);
SELECT @STLETTER = SUBSTRING(@S,1,1);
IF @STLETTER <> '<' AND @STLETTER <> '' AND @STLETTER <> ' '
RETURN -1;
IF SUBSTRING(@S,1,5) = '
BEGIN
SELECT @MIN =
40;
END
WHILE @MIN<=@LENGTH
BEGIN
SELECT @CHAR =
SUBSTRING(@S,@MIN,1);
IF @char = '<' AND SUBSTRING(@S,@MIN+1,1) <> ' '
BEGIN
SELECT @MIN=@MIN+1;
WHILE SUBSTRING(@s,@min,1) <> '>'
BEGIN
IF SUBSTRING(@S,@MIN,1) = '/'
BEGIN
IF SUBSTRING(@S,@MIN,1) = '/' AND SUBSTRING(@S,@MIN+1,1) = '> '
BEGIN
SELECT @MIN = @MIN +1;
SELECT @WORD=NULL;
END
ELSE
BEGIN
SELECT @EFLAG = 'S'
SELECT @COM=COL
FROM @TAB
WHERE ID = (SELECT MAX(ID) FROM @TAB)
SELECT @MIN=@MIN+1;
END
END
ELSE
BEGIN
BEGIN
IF SUBSTRING(@S,@MIN,1) = ' ' and SUBSTRING(@S,@MIN+1,2) <> '/>'
BEGIN
IF @WORD IS not NULL
BEGIN
INSERT INTO
@TAB(COL) SELECT @WORD;
SELECT @WORD=
null;
END
SELECT @min =
@min +1;
WHILE SUBSTRING(@s,@min,1) <> '>'
BEGIN
IF SUBSTRING(@S,@MIN,1) <> '=' AND SUBSTRING(@S,@MIN,1) <> '/' AND SUBSTRING(@S,@MIN+1,1) <> '>'
BEGIN
IF SUBSTRING(@S,@MIN,1) = '"' AND @OQUOTE <> 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @OQUOTE =
'S'
END
ELSE IF
SUBSTRING(@S,@MIN,1) = '"' AND
@OQUOTE =
'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @equote =
'S'
END
ELSE IF
SUBSTRING(@S,@MIN,1) = '''' AND @osquote <> 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @osquote =
'S'
END
ELSE IF
SUBSTRING(@S,@MIN,1) = '''' AND @osquote = 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @esquote =
'S'
END
ELSE IF
@equal<>'S'
BEGIN
SELECT @KEY=@KEY+SUBSTRING(@S,@MIN,1);
END
IF @equal='S' AND @OQUOTE ='S' AND @equote= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OQUOTE ='N',@equote= 'N',@keyend='S'
END
IF @equal='S' AND @OSQUOTE ='S' AND @ESQUOTE= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OsQUOTE ='N',@esquote= 'N',@keyend='S'
END
END
ELSE
BEGIN
IF (SUBSTRING(@S,@MIN,1) = '"' OR SUBSTRING(@S,@MIN,1) = '''') AND SUBSTRING(@S,@MIN+1,1) = '>'
BEGIN
IF SUBSTRING(@S,@MIN,1) = '"' AND @OQUOTE = 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @equote =
'S'
END
IF SUBSTRING(@S,@MIN,1) = '''' AND @osquote = 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @esquote =
'S'
END
IF @equal='S' AND @OQUOTE ='S' AND @equote= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OQUOTE ='N',@equote= 'N',@keyend='S'
END
IF @equal='S' AND @OSQUOTE ='S' AND @ESQUOTE= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OsQUOTE ='N',@esquote= 'N',@keyend='S'
END
END
IF (SUBSTRING(@S,@MIN,1) = ' ' OR SUBSTRING(@S,@MIN,1) = '/') and SUBSTRING(@S,@MIN,2) = '/>' AND @keyend='S'
BEGIN
DELETE FROM
@TAB WHERE ID = (SELECT MAX(ID) FROM @TAB)
SELECT @KEYEND ='N'
END
IF @KEY <> ''
SELECT @equal='S'
END
SELECT @min =
@min +1;
END
SELECT @min =
@min -1;
--set the attr status.
IF @KEY<>'' OR @equal<>'N'OR @OsQUOTE <>'N'OR @esquote<>'N'
SELECT @ATT =
'N'
END
ELSE
BEGIN
SELECT @WORD =
@WORD + SUBSTRING(@S,@MIN,1);
END
END
SELECT @MIN=@MIN+1;
END
END
IF @COM = @WORD and @EFLAG = 'S'
BEGIN
DELETE FROM
@TAB WHERE ID = (SELECT MAX(ID) FROM @TAB)
SELECT @EFLAG =
'N'
END
ELSE
BEGIN
IF @WORD IS not NULL
INSERT INTO
@TAB(COL) SELECT @WORD;
END
SELECT @WORD='';
SELECT @TCLOSE='';
END
-- ELSE
-- BEGIN
----
Return -1
-- END
SELECT @MIN=@MIN+1;
IF SUBSTRING(@S,@MIN,1) = '"' OR SUBSTRING(@S,@MIN,1) = '>' OR SUBSTRING(@S,@MIN,1) = '&' OR SUBSTRING(@S,@MIN,1) = ''''
BEGIN
Return -1
END
END
IF (SELECT COUNT(*) FROM @TAB) > 0 OR @ATT='N'
BEGIN
Return -1;
END
ELSE
BEGIN
Return 1;
END
Return -1;
END
From the above function we can validate the syntax of the varchar type is in xml format.