Group By Date loses records but Date required for join

  • Hi,

    I have two tables Staging.ClaCases and StagingViews.ClaQuestionView. The join is as follows and this works:

    FROM Staging.ClaCases cc

    LEFT JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = it.ClaCaseID

    WHERE (it.ClaCaseID = cqv.ClaCaseID AND it.QuestionClassID = cqv.QuestionClassID

    AND cqv.ClaQuestionHistoryID = (SELECT max(cqv2.ClaQuestionHistoryID)

    FROM StagingViews.ClaQuestionView cqv2

    WHERE cqv.ClaCaseID = cqv2.ClaCaseID

    AND cqv.QuestionID = cqv2.QuestionID

    AND cqv.QuestionClassID = cqv2.QuestionClassID)

    AND cqv.Question = 'is this for sos?'

    Basically I start off by performing a count on a certain condition ie How many times the QuestionID 202 has been asked per NameID from StagingViews.ClaQuestionView tables and I join this Staging.ClaCases (This join needs to be done).

    Once I perform this count I join to another table #InitialTable_25 and add a column NoOfSOSCalls.

    My problem is I need to add condition: where CTE.QuestionDate < it.discoverdate

    Now obtaining CTE.QuestionDate is a problem. When I use this in the Group By clause in the 'FROM CTE' I lose results.

    Here is my code to date, will probably be easier to understand if you have a look. I have also provided some sample Data so that you can understand what my objective is.

    WITH CTE AS (

    SELECT z.NameID, z.NoOfSOSCalls--, z.DiscoverDate

    FROM

    (SELECT

    cc.NameID

    --,QuestionDate

    ,COUNT(*) as NoOfSOSCalls

    ,MAX(ISNULL(CASE WHEN cqv.QuestionID = 202 THEN cqv.Answer ELSE NULL END,0)) AS Reported_SOS

    FROM Staging.ClaCases cc

    LEFT JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = cc.ClaCaseID

    WHERE (cc.ClaCaseID = cqv.ClaCaseID AND cc.QuestionClassID = cqv.QuestionClassID

    AND cqv.ClaQuestionHistoryID = (SELECT max(cqv2.ClaQuestionHistoryID)

    FROM StagingViews.ClaQuestionView cqv2

    WHERE cqv.ClaCaseID = cqv2.ClaCaseID

    AND cqv.QuestionID = cqv2.QuestionID

    AND cqv.QuestionClassID = cqv2.QuestionClassID)

    AND cqv.Question = 'is this for sos?'

    )

    Group By

    cc.NameID

    --,QuestionDate

    ) z

    )

    Select it.*, NoOfSOSCalls

    INTO #InitialTable_25

    FROM #InitialTable_24 it

    LEFT JOIN CTE

    ON it.NameID = CTE.NameID

    --where CTE.QuestionDate < it.discoverdate

    HERE is the Sample Data and my objective. Note #InitialTable_24 is exactly the same as #InitialTable_25 less the NoOfSOSCalls Column

    DROP TABLE #ClaQuestionView

    CREATE TABLE #ClaQuestionView (NameID INT,ClaCaseID int,Question varchar(max), Answer varchar(max),QuestionID INT,QuestionClassID INT,ClaQuestionHistoryID int, QuestionDate date )

    INSERT INTO #ClaQuestionView VALUES (1234,5339,'is this for sos?','Y',202,120,801,'2008-11-21')

    INSERT INTO #ClaQuestionView VALUES (1234,40285,'is this for sos?','Y',202,120,801,'2009-05-14')

    INSERT INTO #ClaQuestionView VALUES (1234,19110,'is this for sos?','Y',202,120,801,'2010-01-11')

    INSERT INTO #ClaQuestionView VALUES (5356,19630,'is this for sos?','Y',202,120,901,'2011-05-19')

    INSERT INTO #ClaQuestionView VALUES (3421,53489,'is this for sos?','Y',202,120,901,'2008-07-07')

    INSERT INTO #ClaQuestionView VALUES (4533,22245,'is this for sos?','Y',202,120,901,'2008-11-21')

    INSERT INTO #ClaQuestionView VALUES (4533,634,'is this for sos?','Y',202,120,500,'2009-09-09')

    INSERT INTO #ClaQuestionView VALUES (5674,3435,'is this for sos?','Y',202,120,500,'2010-11-24')

    INSERT INTO #ClaQuestionView VALUES (5674,1455,'is this for sos?','Y',202,120,500,'2008-12-22')

    INSERT INTO #ClaQuestionView VALUES (5674,97521,'is this for sos?','Y',202,120,722,'2011-05-21')

    INSERT INTO #ClaQuestionView VALUES (5674,2355,'is this for sos?','Y',202,120,722,'2008-06-30')

    INSERT INTO #ClaQuestionView VALUES (8565,75632,'is this for sos?','Y',202,120,722,'2009-03-21')

    select * from #ClaQuestionView

    DROP TABLE ClaCases

    CREATE TABLE ClaCases (NameID INT,ClaCaseID int,Description varchar(max), DiscoverDate date )

    INSERT INTO ClaCases VALUES (1234,5339,'sos','2008-11-21')

    INSERT INTO ClaCases VALUES (1234,3455,'Accident','2009-01-13')

    INSERT INTO ClaCases VALUES (1234,40285,'sos','2009-05-14')

    INSERT INTO ClaCases VALUES (1234,19110,'sos','2010-01-11')

    INSERT INTO ClaCases VALUES (1234,32134,'Accident','2010-11-11')

    INSERT INTO ClaCases VALUES (5356,19630,'sos','2011-05-19')

    INSERT INTO ClaCases VALUES (5356,35352,'Accident','2011-11-09')

    INSERT INTO ClaCases VALUES (3421,53489,'Accident','2008-05-15')

    INSERT INTO ClaCases VALUES (3421,563249,'sos','2008-07-07')

    INSERT INTO ClaCases VALUES (4533,22245,'sos','2008-11-21')

    INSERT INTO ClaCases VALUES (4533,63487,'Accident','2009-09-09')

    INSERT INTO ClaCases VALUES (5674,3435,'sos','2010-11-24')

    INSERT INTO ClaCases VALUES (5674,6346,'Accident','2011-01-13')

    INSERT INTO ClaCases VALUES (5674,2355,'sos','2008-06-30')

    INSERT INTO ClaCases VALUES (5674,97521,'Accident','2008-07-21')

    INSERT INTO ClaCases VALUES (5674,1455,'sos','2008-12-22')

    INSERT INTO ClaCases VALUES (5674,75632,'sos','2009-03-21')

    INSERT INTO ClaCases VALUES (5674,35567,'sos','2011-05-21')

    Select * from ClaCases

    DROP TABLE #InitialTable_25

    CREATE TABLE #InitialTable_25 (NameID INT,ClaCaseID int,Description varchar(max), Discoverdate date, NoOfSOSCalls int)

    INSERT INTO #InitialTable_25 VALUES (1234,3455,'Accident','2009-01-13',1)

    INSERT INTO #InitialTable_25 VALUES (1234,32134,'Accident','2010-11-11',3)

    INSERT INTO #InitialTable_25 VALUES (5356,35352,'Accident','2011-11-09',1)

    INSERT INTO #InitialTable_25 VALUES (3421,53489,'Accident','2008-05-15',0)

    INSERT INTO #InitialTable_25 VALUES (4533,63487,'Accident','2009-09-09',1)

    INSERT INTO #InitialTable_25 VALUES (5674,6346,'Accident','2011-01-13',1)

    INSERT INTO #InitialTable_25 VALUES (5674,97521,'Accident','2008-07-21',2)

    Select * from #InitialTable_25

    Thanks in Advance!!!

  • Hello

    The first query in your post shows a LEFT JOIN, but there are references to columns from it in the WHERE clause, resulting in an INNER join.

    Written slightly differently, it looks like this:

    SELECT *

    FROM (

    SELECT *,

    MAX_ClaQuestionHistoryID = MAX(cqv.ClaQuestionHistoryID) OVER(PARTITION BY cqv.ClaCaseID, cqv.QuestionID, cqv.QuestionClassID)

    FROM Staging.ClaCases cc

    INNER JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = it.ClaCaseID

    --AND it.ClaCaseID = cqv.ClaCaseID

    AND it.QuestionClassID = cqv.QuestionClassID

    AND cqv.Question = 'is this for sos?'

    ) d

    WHERE d.ClaQuestionHistoryID = d.MAX_ClaQuestionHistoryID

    Can you check that this yields the same results as your query? Also, can you change the column wildcard to column names? Cheers.


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • mic.con87 (12/22/2011)


    ...

    My problem is I need to add condition: where CTE.QuestionDate < it.discoverdate

    Now obtaining CTE.QuestionDate is a problem. When I use this in the Group By clause in the 'FROM CTE' I lose results...

    This is almost certainly because the row containing MAX(cqv.ClaQuestionHistoryID) has a QuestionDate which is past the it.discoverdate - do you want the row which has max ClaQuestionHistoryID and QuestionDate which less than it.discoverdate?

    You easily could join #InitialTable_24 to Staging.ClaCases - provided that, by design, neither of them contain dupes on NameID. It's a little more complicated if they do - you'd have to join to a rollup of #InitialTable_24 on NameID.


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • Your query only yields results when you specify ClaQuestionHistoryID in the select ie

    SELECT *

    FROM (

    SELECT ClaQuestionHistoryID,

    MAX_ClaQuestionHistoryID = MAX(cqv.ClaQuestionHistoryID) OVER(PARTITION BY cqv.ClaCaseID, cqv.QuestionID, cqv.QuestionClassID)

    FROM Staging.ClaCases it

    INNER JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = it.ClaCaseID

    --AND it.ClaCaseID = cqv.ClaCaseID

    AND it.QuestionClassID = cqv.QuestionClassID

    AND cqv.Question = 'is this for sos?'

    ) d

    WHERE d.ClaQuestionHistoryID = d.MAX_ClaQuestionHistoryID

    when using your code i receive the error: 'The column 'ClaCaseID' was specified multiple times for 'd'.'

    I'm not sure what you mean by "Also, can you change the column wildcard to column names?"

    I require the column with max ClaQuestionHistoryID and QuestionDate which less than it.discoverdate. There may be 2 ClaCaseID that are identical but the one with the latest ClaQuestionHistoryID will return the 'Answer' I require.

  • mic.con87 (12/22/2011)


    ...when using your code i receive the error: 'The column 'ClaCaseID' was specified multiple times for 'd'.'

    I'm not sure what you mean by "Also, can you change the column wildcard to column names?"

    -- explicitly naming the columns in the output so that you don't get an error 'The column 'columnname' was specified multiple times for 'd'.'

    I require the column with max ClaQuestionHistoryID and QuestionDate which less than it.discoverdate. There may be 2 ClaCaseID that are identical but the one with the latest ClaQuestionHistoryID will return the 'Answer' I require.

    Try this:

    SELECT

    it.*,

    c.NoOfSOSCalls

    INTO #InitialTable_25

    FROM #InitialTable_24 it

    LEFT JOIN (

    SELECT

    d.NameID,

    NoOfSOSCalls = COUNT(*)--,

    --Reported_SOS = MAX(ISNULL(CASE WHEN d.QuestionID = 202 THEN d.Answer ELSE NULL END,0))

    FROM (

    SELECT

    cc.NameID,

    cqv.QuestionID,

    cqv.Answer,

    cqv.ClaQuestionHistoryID,

    MAX_ClaQuestionHistoryID = MAX(cqv.ClaQuestionHistoryID) OVER(PARTITION BY cqv.ClaCaseID, cqv.QuestionID, cqv.QuestionClassID)

    FROM Staging.ClaCases cc

    INNER JOIN #InitialTable_24 it ON it.NameID = cc.NameID

    INNER JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = it.ClaCaseID

    AND it.QuestionClassID = cqv.QuestionClassID

    AND cqv.Question = 'is this for sos?'

    WHERE cc.QuestionDate < it.discoverdate

    ) d

    WHERE d.ClaQuestionHistoryID = d.MAX_ClaQuestionHistoryID

    GROUP BY d.NameID

    ) c

    ON it.NameID = c.NameID


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • The code runs however the entire NoOFSOSCalls remains NULL.

    I had to make one change to your code. QuestionDate is from 'cqv' and not from 'it' so I changed the alias...I have a feeling it needs a minor tweak. Any suggestions?

  • Sure. Focus on the inner SELECT:

    SELECT

    cc.NameID,

    cqv.QuestionID,

    cqv.Answer,

    cqv.ClaQuestionHistoryID,

    MAX_ClaQuestionHistoryID = MAX(cqv.ClaQuestionHistoryID) OVER(PARTITION BY cqv.ClaCaseID, cqv.QuestionID, cqv.QuestionClassID)

    FROM Staging.ClaCases cc

    INNER JOIN #InitialTable_24 it ON it.NameID = cc.NameID

    INNER JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = it.ClaCaseID

    AND it.QuestionClassID = cqv.QuestionClassID

    AND cqv.Question = 'is this for sos?'

    WHERE cqv.QuestionDate < it.discoverdate

    If you don't get any results from this, then comment out the date filter (WHERE cqv.QuestionDate < it.discoverdate) and put both columns in the output.


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • No results are returned even if I do comment out the where clause...

  • mic.con87 (12/22/2011)


    No results are returned even if I do comment out the where clause...

    Okaaaay...comment out the join to the temp table, it's the only change left. Also, please post the script you are using - just in case there's something lost between here and there.


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • I had a look at your code and I found something that ensures no results will be returned ie AND it.QuestionClassID = cqv.QuestionClassID

    it.QuestionClassID will never be the same as cqv.QuestionClassID since all the ClaCaseID's in #InitialTable_24 are based on other QuestionClassID's and NEVER on 120.

    cqv.QuestionClassID = 120....this corresponds to the QuestionID = 202 which is also Question 'is this for sos?'

    I just realized there is another column which may help simplify the solution.

    On Staging.ClaCases there is also a column QuestionClassID = 120 and these are the counts I require. I would need to perform a count(*) of all these QuestionClassID that are less than the discoverydate of the ClaCaseID's in #InitialTable_24.

    The query to do the count will be:

    Select COUNT(*), nameid

    from Staging.ClaCases

    where QuestionClassID = 120

    group by nameid

    and the join will be

    select * from #InitialTable_25 it

    left join Staging.ClaCases cc

    ON cc.ClaCaseID = it.ClaCaseID

    Now the only issue is doing the count from Staging.ClaCases

    that are less than the clacaseid's in the #InitialTable_25.

    Any suggestions?

  • mic.con87 (12/22/2011)


    I had a look at your code and I found something that ensures no results will be returned ie AND it.QuestionClassID = cqv.QuestionClassID

    it.QuestionClassID will never be the same as cqv.QuestionClassID since all the ClaCaseID's in #InitialTable_24 are based on other QuestionClassID's and NEVER on 120.

    ...

    My mistake - I copied the first query in your first post, where the table aliases are incorrect. So the query should look like this:

    SELECT

    cc.NameID,

    --cqv.QuestionID,

    --cqv.Answer,

    cqv.ClaQuestionHistoryID,

    MAX_ClaQuestionHistoryID = MAX(cqv.ClaQuestionHistoryID) OVER(PARTITION BY cqv.ClaCaseID, cqv.QuestionID, cqv.QuestionClassID)

    FROM Staging.ClaCases cc

    INNER JOIN #InitialTable_24 it ON it.NameID = cc.NameID

    INNER JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = cc.ClaCaseID

    AND cqv.QuestionClassID = cc.QuestionClassID

    AND cqv.Question = 'is this for sos?'

    WHERE cc.QuestionDate < it.discoverdate


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • The query returns results however they are incorrect. I had to change the alias cc.QuestionDate to cqv.QuestionDate...The count doesn't seem to take into account cqv.DiscoverDate < it.discoverdate...

  • mic.con87 (12/22/2011)


    The query returns results however they are incorrect. I had to change the alias cc.QuestionDate to cqv.QuestionDate...The count doesn't seem to take into account cqv.DiscoverDate < it.discoverdate...

    They will be - the filter on MAX isn't in there yet.

    One thing at a time. Check that the date filter (cqv.DiscoverDate < it.discoverdate) is respected:

    SELECT

    cc.NameID,

    cqv.QuestionDate,

    it.discoverdate,

    cqv.ClaQuestionHistoryID,

    MAX_ClaQuestionHistoryID = MAX(cqv.ClaQuestionHistoryID) OVER(PARTITION BY cqv.ClaCaseID, cqv.QuestionID, cqv.QuestionClassID)

    FROM Staging.ClaCases cc

    INNER JOIN #InitialTable_24 it ON it.NameID = cc.NameID

    INNER JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = cc.ClaCaseID

    AND cqv.QuestionClassID = cc.QuestionClassID

    AND cqv.Question = 'is this for sos?'

    WHERE cqv.QuestionDate < it.discoverdate


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • So far so good, cqv.QuestionDate < it.discoverdate holds for all cases 🙂

  • Good - then apply the next level:

    SELECT

    d.NameID,

    NoOfSOSCalls = COUNT(*)--,

    --Reported_SOS = MAX(ISNULL(CASE WHEN d.QuestionID = 202 THEN d.Answer ELSE NULL END,0))

    FROM (

    SELECT

    cc.NameID,

    --cqv.QuestionDate,

    --it.discoverdate,

    cqv.ClaQuestionHistoryID,

    MAX_ClaQuestionHistoryID = MAX(cqv.ClaQuestionHistoryID) OVER(PARTITION BY cqv.ClaCaseID, cqv.QuestionID, cqv.QuestionClassID)

    FROM Staging.ClaCases cc

    INNER JOIN #InitialTable_24 it ON it.NameID = cc.NameID

    INNER JOIN StagingViews.ClaQuestionView cqv

    ON cqv.ClaCaseID = cc.ClaCaseID

    AND cqv.QuestionClassID = cc.QuestionClassID

    AND cqv.Question = 'is this for sos?'

    WHERE cqv.QuestionDate < it.discoverdate

    ) d

    WHERE d.ClaQuestionHistoryID = d.MAX_ClaQuestionHistoryID

    GROUP BY d.NameID


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

Viewing 15 posts - 1 through 15 (of 28 total)

You must be logged in to reply to this topic. Login to reply