Monday, 15 July 2013

Solution for Check VARCHAR(n) for Well Formed XML before cast or convert in Sql Server using User Defined Function

















               Once a time i had faced a thing,Which makes me to do a User Defined function which will check xml is well-formed. Some time we are saving  the XML as VARCHAR in table. May be that time we don't knew whether it is well formed xml or not.But when select the records as xml we have to cast it to XML type that time it will raise error.

Consider there are 10000 records in table due to one record whole select process is interrupt. How to find that particular record. There is no pre-defined function which will test varchar type is in well formed xml.
Based on that we can select only well formed data as XML.


Let we take a scenario

1. Create a table 
2. Insert some values with well formed xml and not
3. then finally we want the data as xml in output.so we are going to select with cast as xml.

Step 1: 
create table samplexml
(
id int identity(1,1),
fromname varchar(40),
xmldata nvarchar(2000)
)

Step 2:
Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name>AC123</Name></Model><Model><Name>WE345</Name></Model></Products>'

Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name>AF234</Name></Model><Model><Name>WD329<Name></Model></Products>'

Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name sub="ss">DF556</Name></Model><Model><Name>WD098</Name></Model></Products>'

Insert into samplexml(fromname,xmldata)
select 'samsung' ,'<Products><Model><Name sub="ss>DF456</Name></Model><Model><Name>gd028</Name></Model></Products>'

In the Above code second value is not well formed. Model tag is not closed and fourth insert have attribute not closed well 

Step 3: 

When selecting the records from the Table as Xml "will Throw an Error "
Msg 9436, Level 16, State 1, Line 1

XML parsing: line 1, character 75, end tag does not match start tag


Select id , fromname, cast(xmldata as xml) "ProductXml" From samplexml

Now from the above we cant able to select the single records due to error in casting.If there is 10000 Records how we can find the particular row which is not in correct format (or) we have to select the records which can be able to cast instead of interrupt.

To Overcome this, i have implemented a user defined function which will tell whether particualr varchar data is in well-formed xml or not , If it well formed then function will return 1 or if not then-1 

How to find the particular record is not a well formed xml using function ?

SELECT ID,FROMNAME, DBO.ISXMLFORMAT(XMLDATA) FROM SAMPLEXML 

ISXMLFORMAT is the user defined function which will get the data as input and return the 1 for well formed xml (syntax check), -1 for not well formed xml

















When run the above select statement it will show the result which data are well formed xml.To select the Well-formed xml and then cast as xml using following query.

SELECT ID,FROMNAME, CAST(XMLDATA AS XML) FROM SAMPLEXML 
WHERE DBO.ISXMLFORMAT(XMLDATA) =1 

Output :














CREATE FUNCTION ISXMLFORMAT(@DATA NVARCHAR(MAX))
RETURNS INT
AS
BEGIN
DECLARE @S              NVARCHAR(MAX)
DECLARE @CHAR           CHAR(1)
DECLARE @opentag    char(1)
DECLARE @endtag     char(1)
DECLARE @ATT            NVARCHAR(1) = 'S'
DECLARE @WORD           NVARCHAR(MAX) = ''
DECLARE @OFLAG          VARCHAR(1) = 'N'
DECLARE @EFLAG          VARCHAR(1) = 'N'
DECLARE @TCLOSE   VARCHAR(MAX) =''
DECLARE @LENGTH   INT
DECLARE @MIN            INT = 1;
DECLARE @COM            NVARCHAR(MAX)
DECLARE @key            NVARCHAR(100)=''
DECLARE @EQUAL          NVARCHAR(1) ='N'
DECLARE @OQUOTE   NVARCHAR(1)='N'
DECLARE @EQUOTE         NVARCHAR(1)='N'
DECLARE @OSQUOTE  NVARCHAR(1)='N'
DECLARE @ESQUOTE  NVARCHAR(1)='N'
DECLARE @KEYEND         NVARCHAR(1)='N'
DECLARE @STLETTER   CHAR(1)

DECLARE @TAB            TABLE
(
ID INT IDENTITY(1,1),
COL VARCHAR(100)
)

SELECT @S = @DATA;
SELECT @LENGTH = LEN(@S);
SELECT @STLETTER = SUBSTRING(@S,1,1);

IF @STLETTER <> '<' AND  @STLETTER <> '' AND  @STLETTER <> ' '
RETURN  -1;

IF SUBSTRING(@S,1,5) = '
BEGIN
  SELECT @MIN = 40;
END

WHILE @MIN<=@LENGTH
BEGIN

SELECT @CHAR = SUBSTRING(@S,@MIN,1);
IF @char = '<' AND SUBSTRING(@S,@MIN+1,1) <> ' '
BEGIN
SELECT @MIN=@MIN+1;

WHILE SUBSTRING(@s,@min,1) <> '>'
BEGIN
   IF SUBSTRING(@S,@MIN,1) = '/'
BEGIN

IF SUBSTRING(@S,@MIN,1) = '/' AND  SUBSTRING(@S,@MIN+1,1) = '> '
BEGIN
   SELECT @MIN = @MIN +1;
   SELECT @WORD=NULL;
END


ELSE

BEGIN

  SELECT @EFLAG = 'S'

  SELECT    @COM=COL
  FROM      @TAB
  WHERE     ID = (SELECT MAX(ID) FROM @TAB)
  SELECT @MIN=@MIN+1;
END
END
ELSE
BEGIN
BEGIN
IF SUBSTRING(@S,@MIN,1) = ' ' and SUBSTRING(@S,@MIN+1,2) <> '/>'
BEGIN
IF @WORD IS not NULL
BEGIN

INSERT INTO @TAB(COL) SELECT @WORD;
SELECT @WORD= null;
END

SELECT @min = @min +1;
WHILE SUBSTRING(@s,@min,1) <> '>'
BEGIN

IF SUBSTRING(@S,@MIN,1) <> '=' AND  SUBSTRING(@S,@MIN,1) <> '/' AND SUBSTRING(@S,@MIN+1,1) <> '>'
BEGIN
IF SUBSTRING(@S,@MIN,1) = '"' AND @OQUOTE <> 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @OQUOTE = 'S'
END
ELSE IF SUBSTRING(@S,@MIN,1) = '"' AND @OQUOTE  = 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @equote = 'S'
END
ELSE IF SUBSTRING(@S,@MIN,1) = '''' AND @osquote <> 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @osquote = 'S'
END
ELSE IF SUBSTRING(@S,@MIN,1) = '''' AND @osquote  = 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @esquote = 'S'
END
ELSE IF @equal<>'S'
BEGIN
SELECT @KEY=@KEY+SUBSTRING(@S,@MIN,1);
END

IF @equal='S' AND @OQUOTE ='S' AND @equote= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OQUOTE ='N',@equote= 'N',@keyend='S'
END

IF @equal='S' AND @OSQUOTE ='S' AND @ESQUOTE= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OsQUOTE ='N',@esquote= 'N',@keyend='S'
END
END
ELSE
BEGIN

IF (SUBSTRING(@S,@MIN,1) = '"' OR SUBSTRING(@S,@MIN,1) = '''') AND SUBSTRING(@S,@MIN+1,1) = '>'
BEGIN
IF SUBSTRING(@S,@MIN,1) = '"' AND @OQUOTE  = 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @equote = 'S'
END

IF SUBSTRING(@S,@MIN,1) = '''' AND @osquote  = 'S' AND @equal='S' AND @KEY <> ''
BEGIN
SELECT @esquote = 'S'
END

IF @equal='S' AND @OQUOTE ='S' AND @equote= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OQUOTE ='N',@equote= 'N',@keyend='S'
END

IF @equal='S' AND @OSQUOTE ='S' AND @ESQUOTE= 'S'
BEGIN
SELECT @KEY='',@equal='N',@OsQUOTE ='N',@esquote= 'N',@keyend='S'
END

END

IF (SUBSTRING(@S,@MIN,1) = ' ' OR  SUBSTRING(@S,@MIN,1) = '/') and SUBSTRING(@S,@MIN,2) = '/>' AND @keyend='S'
BEGIN
DELETE FROM @TAB WHERE ID = (SELECT MAX(ID) FROM @TAB)
SELECT @KEYEND ='N'
END
IF  @KEY <> ''
SELECT @equal='S'
END
SELECT @min = @min +1;
END

SELECT @min = @min -1;
--set the attr status.
IF @KEY<>'' OR  @equal<>'N'OR @OsQUOTE <>'N'OR @esquote<>'N'
SELECT @ATT = 'N'

END
ELSE
BEGIN
SELECT @WORD = @WORD + SUBSTRING(@S,@MIN,1);
END
END
SELECT @MIN=@MIN+1;
END
END

IF @COM = @WORD and @EFLAG = 'S'
BEGIN
DELETE FROM @TAB WHERE ID = (SELECT MAX(ID) FROM @TAB)
SELECT @EFLAG = 'N'
END
ELSE
BEGIN
IF @WORD IS not NULL
INSERT INTO @TAB(COL) SELECT @WORD;
END

SELECT @WORD='';
SELECT @TCLOSE='';
END
--  ELSE
--  BEGIN
----      Return -1
--  END
SELECT @MIN=@MIN+1;

IF SUBSTRING(@S,@MIN,1) = '"' OR SUBSTRING(@S,@MIN,1) = '>' OR SUBSTRING(@S,@MIN,1) = '&'  OR SUBSTRING(@S,@MIN,1) = ''''
BEGIN
Return -1
END
END
IF (SELECT COUNT(*) FROM @TAB) > 0 OR @ATT='N'
BEGIN
Return -1;
END
ELSE
BEGIN
Return 1;
END
Return -1;

END


From the above function we can validate the syntax of the varchar type is in xml format.