help with query to compress the adjacent records into one

Question

Post reply

help with query to compress the adjacent records into one

Viewing 6 posts - 16 through 20 (of 20 total)

You must be logged in to reply to this topic. Login to reply

Dave Ballantyne SSC-Dedicated Points: 33556 More actions · Answer 1

Heres a quick example over 20 thousand rows , yes i know i said at least a million , but im rushing to go home

Drop table #dateList



go

Create table #dateList

(

UserId integer,

DateCol smalldatetime

)

go

create unique clustered index idxdateList on #dateList(userid,DateCol)

go

with cteRown

as

(

select top(10000) row_number() over (order by (select null)) as Rown

 from  syscolumns a cross join syscolumns b

)

insert into  #dateList

Select 1,dateadd(dd,rown,getdate())

from   cteRown

where  rown%7 <> 0

go

with cteRown

as

(

select top(10000) row_number() over (order by (select null)) as Rown

 from  syscolumns a cross join syscolumns b

)

insert into  #dateList

Select 2,dateadd(dd,rown,getdate())

from   cteRown

where  rown%6 <> 0

go

with cteList

as

(

Select userid,

       datecol,

       datecol + row_number() over (partition by userid order by datecol desc) as Grouping

from #dateList

)

select userid,min(datecol),max(datecol)

  from cteList

 group by userid,grouping

order by 1,2

Clear Sky SQL
My Blog[/url]

Paul White SSC Guru Points: 150457 More actions · Answer 2

I suppose expanding the ranges could be expensive if the ranges are large.

The original posted question looks like a simple 'data island' problem to me, so we should be able to handle it without expansion.

A 'data island' is just a range where there is a gap in the data before and after the island.

One approach is just to look for the start and stop dates which occur at the beginning and end of each island.

Assuming that there are no duplicated start or stop dates in the data, and assuming there are no overlapping ranges, the following code is pretty efficient:



CREATE TABLE #Example

    (

    company         VARCHAR(10) NOT NULL,

    employee_id     INTEGER NOT NULL,

    id              INTEGER NOT NULL,

    course_id       INTEGER NOT NULL,

    start_date      DATETIME NOT NULL,

    stop_date       DATETIME NOT NULL,

    );

INSERT  #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '5 September 2006','27 August 2007');

INSERT  #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '28 August 2007', '4 March 2008');

INSERT  #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '5 March 2008', '20 April 2008');

INSERT  #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '21 April 2008', '23 April 2008');

INSERT  #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 02, 11, '24 April 2008', '20 September 2009');

INSERT  #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 02, 11, '1 January 2010', '31 December 9999');

WITH    Grouped

AS      (

        -- Identify the groups in the input rows with a sequential number

        -- In this example, groups are identified by the combination of

        -- company, employee_id, id, and course_id

        SELECT  E.*,

                grp = DENSE_RANK() OVER (ORDER BY company, employee_id, id, course_id)

        FROM    #Example E

        ),

        Starts

AS      (

        -- Find start dates for each data island

        SELECT  E1.*,

                rn = ROW_NUMBER() OVER (PARTITION BY E1.grp ORDER BY E1.start_date ASC)

        FROM    Grouped E1

        WHERE   NOT EXISTS

                (

                SELECT  *

                FROM    Grouped E2

                WHERE   E2.stop_date = E1.start_date - 1

                AND     E2.grp = E1.grp

                )

        ),

        Stops

AS      (

        -- Find stop dates for each data island

        SELECT  E1.*,

                rn = ROW_NUMBER() OVER (PARTITION BY E1.grp ORDER BY E1.start_date ASC)

        FROM    Grouped E1

        WHERE   NOT EXISTS

                (

                SELECT  *

                FROM    Grouped E2

                WHERE   E2.start_date - 1 = E1.stop_date

                AND     E2.grp = E1.grp

                )

        )

SELECT  R.grp,          -- Group ID

        R.rn,           -- Island ID within each group

        R.start_date,   -- The start of the island

        P.stop_date,    -- The end of the island

        R.company,      --

        R.employee_id,  -- Remaining rows are just group details

        R.id,           --

        R.course_id     --

FROM    Starts AS R

JOIN    Stops AS P

        -- Join the islands together, for each group

        ON  P.rn = R.rn

        AND P.grp = R.grp;

DROP TABLE #Example;