Is INTERSECTS the best method to accomplish this?

Question

Post reply

Is INTERSECTS the best method to accomplish this?

Viewing 3 posts - 16 through 17 (of 17 total)

You must be logged in to reply to this topic. Login to reply

mpdillon Hall of Fame Points: 3484 More actions · Answer 1

mpdillon

Hall of Fame

Points: 3484

July 9, 2012 at 6:53 am

#1510308

Thanks everyone for your assistance. That should do it for me.

pat

Gullimeel Hall of Fame Points: 3761 More actions · Answer 2

Do not use the method posted by me.That was wrong..Below one is correct..

Try all 3 methods for your query and see which one is the best.If there is clustred index on orderno then instersect might outperform other methods.Also, check how much data is there for A and B and C independently.



select count(ordno) from

(

select OrdNo,

sum(distinct(case when ItemNo = 'A' then 1 when ItemNo = 'B' then 2 when ItemNo = 'C' then 4 end)) as status

from OrdLine

where ItemNo IN ('A', 'B', 'C')

group by OrdNo

) dta

where

1=1

--and status = 3 -- a,b but not c

--and status = 5--does have a,c but not b

--and status = 6--does have b,c but not a

and status = 7 --all 3

--and status = 0 --none of them

--and status = 1 -- just A

--and status = 2 -- just b

--and status = 4 -- just C

GulliMeel

Finding top n Worst Performing queries[/url]
Improve the performance of Merge Join(special case)
How to Post Performance Problem -Gail Shaw[/url]

Gullimeel Hall of Fame Points: 3761 More actions · Answer 3

--see the results yourself..Intersect will perform better then other two and if proper indexes will be there the diff will be huge.

Intersect perform badly in Dwain's test because he has uniform data and 1 million rows for each a,b,c.Thus other methods were better as intersect did 3 joins of 1 million rows.

Other methods simply process 3 million rows..

However, in real world all the orders wont have all 3 or maybe in lots of these 3 wont appear at all. In that case our method will still process the whole table's data but Intersect will use the join based on number of rows for a,b,c and those wont be 1 million.They could be 1 million in worst case but these could be possibility that there are very few c or b or a .Then intersect will perform badly.

Between my method and dwain's method.Dwain's method should work little bit faster as I am consuming extra CPU by using a case statement.

Conclusion: Try all 3 methods for your requirement and see which one fits better.But I am sure intersect one should performa better as i expect that you will have index on ordno and not all ordno will have all 3 itemnos.



set nocount on

go

drop table OrdLine

go

CREATE TABLE OrdLine(	[OrdNo] [int] not null,	[ItemNo] [varchar](50) NULL) ON [PRIMARY]

;WITH Tally (n) AS (

	SELECT TOP 100000 ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

	FROM sys.all_columns c1 CROSS JOIN sys.all_columns c2)

INSERT INTO OrdLine

SELECT OrdNo=n, CHAR(65 + abs(CHECKSUM(newid()))%26 )

FROM Tally

CROSS APPLY (SELECT 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9 union select 10) a(ItemNo)

go

select 'no of rows for a,b,c'

select itemno,COUNT(*) cnt from OrdLine where ItemNo in ('A','B','C') group by ItemNo

order by 1,2

go

PRINT '--- OP''s query.No index at all'

SET STATISTICS TIME ON

select count(OrdNo)

from

(

select ordno

from

(

Select OrdNo From OrdLine

where ItemNo = 'A'

Intersect

Select OrdNo From OrdLine

where ItemNo = 'B'

) dtb

intersect

Select OrdNo From OrdLine

where ItemNo = 'C'

) dta

SET STATISTICS TIME OFF

PRINT '--- Dwain''s query..No index at all'

SET STATISTICS TIME ON

select COUNT(ordno)

from(

Select OrdNo

From OrdLine

where ItemNo IN ('A', 'B', 'C')

GROUP BY OrdNo

HAVING COUNT(distinct ItemNo) = 3

) dta

SET STATISTICS TIME OFF

PRINT '--- Gullimeel''s query..No index at all'

SET STATISTICS TIME ON

--change the id with ordno and cd with product code

select count(ordno) from

(

select OrdNo,

sum(distinct(case when ItemNo = 'A' then 1 when ItemNo = 'B' then 2 when ItemNo = 'C' then 4 else 0 end)) as status

from OrdLine

where ItemNo IN ('A', 'B', 'C')

group by OrdNo

) dta

where

1=1

and status = 7 --all 3

SET STATISTICS TIME OFF

go

create clustered index idx_ordline on Ordline([OrdNo])

go

PRINT '--- OP''s query.Clustered index on Ordno.'

SET STATISTICS TIME ON

select count(OrdNo)

from

(

select ordno

from

(

Select OrdNo From OrdLine

where ItemNo = 'A'

Intersect

Select OrdNo From OrdLine

where ItemNo = 'B'

) dtb

intersect

Select OrdNo From OrdLine

where ItemNo = 'C'

) dta

SET STATISTICS TIME OFF

PRINT '--- Dwain''s query.Clustered index on Ordno.'

SET STATISTICS TIME ON

select COUNT(ordno)

from(

Select OrdNo

From OrdLine

where ItemNo IN ('A', 'B', 'C')

GROUP BY OrdNo

HAVING COUNT(distinct ItemNo) = 3

) dta

SET STATISTICS TIME OFF

PRINT '--- Gullimeel''s query.Clustered index on Ordno.'

SET STATISTICS TIME ON

--change the id with ordno and cd with product code

select count(ordno) from

(

select OrdNo,

sum(distinct(case when ItemNo = 'A' then 1 when ItemNo = 'B' then 2 when ItemNo = 'C' then 4 else 0 end)) as status

from OrdLine

where ItemNo IN ('A', 'B', 'C')

group by OrdNo

) dta

where

1=1

and status = 7 --all 3

SET STATISTICS TIME OFF

go

create nonclustered index idx_itemno on Ordline(itemno)

go

PRINT '--- OP''s query.Clustered index on Ordno and non clustered index on itemno and ordno.'

SET STATISTICS TIME ON

select count(OrdNo)

from

(

select ordno

from

(

Select OrdNo From OrdLine

where ItemNo = 'A'

Intersect

Select OrdNo From OrdLine

where ItemNo = 'B'

) dtb

intersect

Select OrdNo From OrdLine

where ItemNo = 'C'

) dta

SET STATISTICS TIME OFF

PRINT '--- Dwain''s query.Clustered index on Ordno and non clustered index on itemno and ordno.'

SET STATISTICS TIME ON

select COUNT(ordno)

from(

Select OrdNo

From OrdLine

where ItemNo IN ('A', 'B', 'C')

GROUP BY OrdNo

HAVING COUNT(distinct ItemNo) = 3

) dta

SET STATISTICS TIME OFF

PRINT '--- Gullimeel''s query.Clustered index on Ordno and non clustered index on itemno and ordno.'

SET STATISTICS TIME ON

--change the id with ordno and cd with product code

select count(ordno) from

(

select OrdNo,

sum(distinct(case when ItemNo = 'A' then 1 when ItemNo = 'B' then 2 when ItemNo = 'C' then 4 else 0 end)) as status

from OrdLine

where ItemNo IN ('A', 'B', 'C')

group by OrdNo

) dta

where

1=1

and status = 7 --all 3

SET STATISTICS TIME OFF

go

GulliMeel

Finding top n Worst Performing queries[/url]
Improve the performance of Merge Join(special case)
How to Post Performance Problem -Gail Shaw[/url]