Compare 2 rows using tsql code

  • I haven't used this code in several years, I think it will get you what you want. I see others have already provided answers though.  It does loop through the columns, so may not be very fast.
    SELECT column_name, data_type,
        CAST(CASE   WHEN data_type = 'datetime' THEN ''
                    WHEN data_type = 'int'      THEN ''
                    WHEN data_type = 'decimal'   THEN CONCAT('(', CAST(NUMERIC_PRECISION_RADIX AS VARCHAR(10)), ', ', CAST(NUMERIC_SCALE AS VARCHAR(10)), ')')
                    ELSE CONCAT('(', CAST(CHARACTER_MAXIMUM_LENGTH AS VARCHAR(10)), ')')
        END AS VARCHAR(10)) AS data_length
    INTO #columns
    FROM INFORMATION_SCHEMA.COLUMNS
    WHERE table_name = 'table name here'
        AND column_name NOT IN ('Rec_Type', 'record_key')
    ;

    /* I had a table that I had created records of 'before' and after.
        You could use two different tables as long as they have the same colums.
    */
    SELECT *
    INTO #Before
    FROM dbo.table name here
    WHERE Rec_Type = 'Before'
    ;

    SELECT *
    INTO #After
    FROM dbo.table name here
    WHERE Rec_Type = 'After'
    ;

    CREATE TABLE #column_diff
    (
        Rec_Type      VARCHAR(6),
        record_key      INT,
        COLUMN_NAME      VARCHAR(100),
        Column_Value   VARCHAR(1000)
    )
    ;

    CREATE TABLE #diff
    (
        record_key      INT
    )
    ;

    DECLARE @column_count AS INT;
    DECLARE @column AS VARCHAR(100);
    DECLARE @data_type AS VARCHAR(100);
    DECLARE @data_length AS VARCHAR(100);

    SET @column_count = (SELECT COUNT(*) FROM #columns);

    LOOPHERE:

    SET @column = (SELECT TOP 1 COLUMN_NAME FROM #columns);
    SET @data_type = (SELECT data_type FROM #columns WHERE COLUMN_NAME = @column);
    SET @data_length = (SELECT data_length FROM #columns WHERE COLUMN_NAME = @column);

    /* in my case the record_key was all I needed to join the two tables. */
    INSERT INTO #diff
    EXEC ('SELECT l.record_key
            FROM #Before l
                INNER JOIN #After r ON
                    l.record_key = r.record_key
            WHERE CAST(ISNULL(l.' + @column + ', 0) AS ' + @data_type + @data_length + ')
             <> CAST(ISNULL(r.' + @column + ', 0) AS ' + @data_type + @data_length + ')')
    ;

    IF (SELECT COUNT(*) FROM #diff) > 0
        BEGIN
            INSERT INTO #column_diff
            EXEC ('SELECT l.Rec_Type, l.record_key, NULL, l. ' + @column +
                '   FROM dbo.table name here l
                        INNER JOIN #diff d ON
                            l.record_key = d.record_key')
            UPDATE #column_diff
            SET COLUMN_NAME = @column
            WHERE COLUMN_NAME IS NULL
        END

    DELETE
    FROM #columns
    WHERE COLUMN_NAME = @column
    ;

    SET @column_count = (SELECT COUNT(*) FROM #columns);

    TRUNCATE TABLE #diff;

    IF @column_count > 0 GOTO LOOPHERE

    SELECT *
    FROM #column_diff
    ORDER BY record_key, COLUMN_NAME, Rec_Type DESC
    ;

    -------------------------------------------------------------
    we travel not to escape life but for life not to escape us
    Don't fear failure, fear regret.

  • Hi below86,

    Thanks for reply. But quite frankly I wasn't able to understand. Can you please demonstrate with an example?
    When I try to execute the code I get all sorts of errors here and there? Does it give the column data differences or column datatype differences??

  • Here is what I set up as data:
    CREATE TABLE dbo.source_table
    (
        RecordId                        INT IDENTITY(1, 1),
        First_Name                        CHAR(50),
        Last_Name                        CHAR(50),
        Date_Of_Birth                    DATE,
        Age                                INT,
        Process_Date                    DATETIME,
        Yearly_Salary                    DECIMAL(13, 2)
    )
    ;

    INSERT INTO dbo.source_table
        (First_Name, Last_Name, Date_Of_Birth, Age, Process_Date, Yearly_Salary)
    SELECT 'Joe', 'Blow', '01/01/1970', 47, GETDATE(), 100000
    UNION ALL
    SELECT 'Jane', 'Doe', '04/01/1975', 47, GETDATE(), 105000
    UNION ALL
    SELECT 'Sammy', 'Hagr', '10/13/1947', 70, GETDATE(), 1000000
    UNION ALL
    SELECT 'Steve', 'Irwin', '06/01/1968', 49, GETDATE(), 102000
    ;

    SELECT *
    INTO dbo.changed_table
    FROM dbo.source_table
    ;

    UPDATE dbo.changed_table
    SET Yearly_Salary = Yearly_Salary * 1.05
    WHERE Date_Of_Birth > '12/31/1969'
    ;

    UPDATE dbo.changed_table
    SET Last_Name = 'Hagar'
    WHERE Last_Name = 'Hagr'
    ;


    I then modified the code to look like this:
    SELECT column_name, data_type,
        CAST(CASE   WHEN data_type = 'datetime' THEN ''
                    WHEN data_type = 'int'      THEN ''
                    WHEN data_type = 'decimal'   THEN CONCAT('(', CAST(NUMERIC_PRECISION_RADIX AS VARCHAR(10)), ', ', CAST(NUMERIC_SCALE AS VARCHAR(10)), ')')
                    ELSE CONCAT('(', CAST(CHARACTER_MAXIMUM_LENGTH AS VARCHAR(10)), ')')
        END AS VARCHAR(10)) AS data_length
    INTO #columns
    FROM INFORMATION_SCHEMA.COLUMNS
    WHERE table_name = 'source_table'
        AND column_name NOT IN ('RecordId')
    ;

    SELECT *
    INTO #Before
    FROM dbo.source_table
    ;

    SELECT *
    INTO #After
    FROM dbo.changed_table
    ;

    CREATE TABLE #column_diff
    (
        RecordId      INT,
        COLUMN_NAME      VARCHAR(100),
        Column_Value   VARCHAR(1000)
    )
    ;

    CREATE TABLE #diff
    (
        RecordId      INT
    )
    ;

    DECLARE @column_count AS INT;
    DECLARE @column AS VARCHAR(100);
    DECLARE @data_type AS VARCHAR(100);
    DECLARE @data_length AS VARCHAR(100);

    SET @column_count = (SELECT COUNT(*) FROM #columns);

    LOOPHERE:

    SET @column = (SELECT TOP 1 COLUMN_NAME FROM #columns);
    SET @data_type = (SELECT data_type FROM #columns WHERE COLUMN_NAME = @column);
    SET @data_length = (SELECT data_length FROM #columns WHERE COLUMN_NAME = @column);

    INSERT INTO #diff
    EXEC ('SELECT l.RecordId
            FROM #Before l
                INNER JOIN #After r ON
                    l.RecordId = r.RecordId
            WHERE CAST(ISNULL(l.' + @column + ', 0) AS ' + @data_type + @data_length + ')
             <> CAST(ISNULL(r.' + @column + ', 0) AS ' + @data_type + @data_length + ')')
    ;

    IF (SELECT COUNT(*) FROM #diff) > 0
        BEGIN
            INSERT INTO #column_diff
            EXEC ('SELECT l.RecordId, NULL, l. ' + @column +
                '   FROM dbo.source_table l
                        INNER JOIN #diff d ON
                            l.RecordId = d.RecordId')
            UPDATE #column_diff
            SET COLUMN_NAME = @column
            WHERE COLUMN_NAME IS NULL
        END

    DELETE
    FROM #columns
    WHERE COLUMN_NAME = @column
    ;

    SET @column_count = (SELECT COUNT(*) FROM #columns);

    TRUNCATE TABLE #diff;

    IF @column_count > 0 GOTO LOOPHERE

    SELECT *
    FROM #column_diff
    ORDER BY RecordId, COLUMN_NAME DESC
    ;

    I did get one error 'Incorrect syntax near ')'.'  but it seems to have ran fine.
    Here is the results returned:
    RecordId    COLUMN_NAME    Column_Value
    1           Yearly_Salary    100000.00
    2           Yearly_Salary    105000.00
    3           Last_Name        Hagr               
                 
    So it shows you the column name and what that value looked like on your before table.

    -------------------------------------------------------------
    we travel not to escape life but for life not to escape us
    Don't fear failure, fear regret.

  • below86 - Thursday, October 26, 2017 8:26 AM

    Here is what I set up as data:
    CREATE TABLE dbo.source_table
    (
        RecordId                        INT IDENTITY(1, 1),
        First_Name                        CHAR(50),
        Last_Name                        CHAR(50),
        Date_Of_Birth                    DATE,
        Age                                INT,
        Process_Date                    DATETIME,
        Yearly_Salary                    DECIMAL(13, 2)
    )
    ;

    INSERT INTO dbo.source_table
        (First_Name, Last_Name, Date_Of_Birth, Age, Process_Date, Yearly_Salary)
    SELECT 'Joe', 'Blow', '01/01/1970', 47, GETDATE(), 100000
    UNION ALL
    SELECT 'Jane', 'Doe', '04/01/1975', 47, GETDATE(), 105000
    UNION ALL
    SELECT 'Sammy', 'Hagr', '10/13/1947', 70, GETDATE(), 1000000
    UNION ALL
    SELECT 'Steve', 'Irwin', '06/01/1968', 49, GETDATE(), 102000
    ;

    SELECT *
    INTO dbo.changed_table
    FROM dbo.source_table
    ;

    UPDATE dbo.changed_table
    SET Yearly_Salary = Yearly_Salary * 1.05
    WHERE Date_Of_Birth > '12/31/1969'
    ;

    UPDATE dbo.changed_table
    SET Last_Name = 'Hagar'
    WHERE Last_Name = 'Hagr'
    ;


    I then modified the code to look like this:
    SELECT column_name, data_type,
        CAST(CASE   WHEN data_type = 'datetime' THEN ''
                    WHEN data_type = 'int'      THEN ''
                    WHEN data_type = 'decimal'   THEN CONCAT('(', CAST(NUMERIC_PRECISION_RADIX AS VARCHAR(10)), ', ', CAST(NUMERIC_SCALE AS VARCHAR(10)), ')')
                    ELSE CONCAT('(', CAST(CHARACTER_MAXIMUM_LENGTH AS VARCHAR(10)), ')')
        END AS VARCHAR(10)) AS data_length
    INTO #columns
    FROM INFORMATION_SCHEMA.COLUMNS
    WHERE table_name = 'source_table'
        AND column_name NOT IN ('RecordId')
    ;

    SELECT *
    INTO #Before
    FROM dbo.source_table
    ;

    SELECT *
    INTO #After
    FROM dbo.changed_table
    ;

    CREATE TABLE #column_diff
    (
        RecordId      INT,
        COLUMN_NAME      VARCHAR(100),
        Column_Value   VARCHAR(1000)
    )
    ;

    CREATE TABLE #diff
    (
        RecordId      INT
    )
    ;

    DECLARE @column_count AS INT;
    DECLARE @column AS VARCHAR(100);
    DECLARE @data_type AS VARCHAR(100);
    DECLARE @data_length AS VARCHAR(100);

    SET @column_count = (SELECT COUNT(*) FROM #columns);

    LOOPHERE:

    SET @column = (SELECT TOP 1 COLUMN_NAME FROM #columns);
    SET @data_type = (SELECT data_type FROM #columns WHERE COLUMN_NAME = @column);
    SET @data_length = (SELECT data_length FROM #columns WHERE COLUMN_NAME = @column);

    INSERT INTO #diff
    EXEC ('SELECT l.RecordId
            FROM #Before l
                INNER JOIN #After r ON
                    l.RecordId = r.RecordId
            WHERE CAST(ISNULL(l.' + @column + ', 0) AS ' + @data_type + @data_length + ')
             <> CAST(ISNULL(r.' + @column + ', 0) AS ' + @data_type + @data_length + ')')
    ;

    IF (SELECT COUNT(*) FROM #diff) > 0
        BEGIN
            INSERT INTO #column_diff
            EXEC ('SELECT l.RecordId, NULL, l. ' + @column +
                '   FROM dbo.source_table l
                        INNER JOIN #diff d ON
                            l.RecordId = d.RecordId')
            UPDATE #column_diff
            SET COLUMN_NAME = @column
            WHERE COLUMN_NAME IS NULL
        END

    DELETE
    FROM #columns
    WHERE COLUMN_NAME = @column
    ;

    SET @column_count = (SELECT COUNT(*) FROM #columns);

    TRUNCATE TABLE #diff;

    IF @column_count > 0 GOTO LOOPHERE

    SELECT *
    FROM #column_diff
    ORDER BY RecordId, COLUMN_NAME DESC
    ;

    I did get one error 'Incorrect syntax near ')'.'  but it seems to have ran fine.
    Here is the results returned:
    RecordId    COLUMN_NAME    Column_Value
    1           Yearly_Salary    100000.00
    2           Yearly_Salary    105000.00
    3           Last_Name        Hagr               
                 
    So it shows you the column name and what that value looked like on your before table.

    Nice thought!

Viewing 4 posts - 16 through 18 (of 18 total)

You must be logged in to reply to this topic. Login to reply