April 26, 2013 at 5:19 am
Here is sample code below:
Both queries will retun same result.
But I want to know which one better to use?
CREATE TABLE #temp1 (ID int, Name varchar(10))
INSERT INTO #temp1 VALUES (1,'A'),(2,'B'),(3,'C')
CREATE TABLE #temp2 (ID int, Name varchar(10))
INSERT INTO #temp2 VALUES (1,'A'),(2,'B'),(4,'D')
SELECT t.ID, t.Name FROM #temp1 t
LEFT JOIN #temp2 t1 ON t.ID = t1.ID
WHERE t1.ID IS NULL
SELECT t.ID, t.Name FROM #temp1 t
EXCEPT
SELECT t1.id, t1.name FROM #temp2 t1
_______________________________________________________________
To get quick answer follow this link:
http://www.sqlservercentral.com/articles/Best+Practices/61537/
April 26, 2013 at 5:47 am
It depends. You will have to test both the queries and check the execution plans.
Sometimes, performance changes when there is change in the Volume of data.
Hence, I would advise you to test your queries in a test environment which has sufficient data and decide which would be better.
How to post data/code on a forum to get the best help - Jeff Moden
http://www.sqlservercentral.com/articles/Best+Practices/61537/
April 26, 2013 at 5:54 am
Is there any reason you can't just test it?
USE tempdb;
SET NOCOUNT ON;
IF object_id('#temp1') IS NOT NULL
BEGIN
DROP TABLE #temp1;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 25) + 64 AS Name
INTO #temp1
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
IF object_id('#temp2') IS NOT NULL
BEGIN
DROP TABLE #temp2;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 25) + 64 AS Name
INTO #temp2
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
CREATE CLUSTERED INDEX cl_ID_temp1 ON #temp1 (ID);
CREATE CLUSTERED INDEX cl_ID_temp2 ON #temp2 (ID);
DECLARE @Loop CHAR(1) = '0', @HOLDER INT, @Duration CHAR(12), @StartTime DATETIME;
WHILE @Loop <= 5
BEGIN
RAISERROR('Loop: %s',0,1,@Loop) WITH NOWAIT;
RAISERROR('============',0,1) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
LEFT OUTER JOIN #temp2 t2 ON t1.ID = t2.ID
WHERE t2.ID IS NULL
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('LEFT OUTER JOIN / ID IS NULL Duration: %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
EXCEPT
SELECT t2.ID, t2.Name
FROM #temp2 t2
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('EXCEPT Duration: %s',0,1,@Duration) WITH NOWAIT;
SET @Loop = @Loop + 1;
END
Loop: 0
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:740
EXCEPT Duration: 00:00:03:800
Loop: 1
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:677
EXCEPT Duration: 00:00:00:997
Loop: 2
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:710
EXCEPT Duration: 00:00:00:940
Loop: 3
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:723
EXCEPT Duration: 00:00:00:987
Loop: 4
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:683
EXCEPT Duration: 00:00:01:030
Loop: 5
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:730
EXCEPT Duration: 00:00:01:017
That would suggest LEFT OUTER JOIN for the sample tables I knocked up.
If I make it more likely to get a hit: -
USE tempdb;
SET NOCOUNT ON;
IF object_id('#temp1') IS NOT NULL
BEGIN
DROP TABLE #temp1;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 2) + 64 AS Name
INTO #temp1
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
IF object_id('#temp2') IS NOT NULL
BEGIN
DROP TABLE #temp2;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 2) + 64 AS Name
INTO #temp2
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
CREATE CLUSTERED INDEX cl_ID_temp1 ON #temp1 (ID);
CREATE CLUSTERED INDEX cl_ID_temp2 ON #temp2 (ID);
DECLARE @Loop CHAR(1) = '0', @HOLDER INT, @Duration CHAR(12), @StartTime DATETIME;
WHILE @Loop <= 5
BEGIN
RAISERROR('Loop: %s',0,1,@Loop) WITH NOWAIT;
RAISERROR('============',0,1) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
LEFT OUTER JOIN #temp2 t2 ON t1.ID = t2.ID
WHERE t2.ID IS NULL
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('LEFT OUTER JOIN / ID IS NULL Duration: %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
EXCEPT
SELECT t2.ID, t2.Name
FROM #temp2 t2
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('EXCEPT Duration: %s',0,1,@Duration) WITH NOWAIT;
SET @Loop = @Loop + 1;
END
Loop: 0
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:680
EXCEPT Duration: 00:00:03:947
Loop: 1
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:717
EXCEPT Duration: 00:00:00:893
Loop: 2
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:680
EXCEPT Duration: 00:00:00:890
Loop: 3
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:700
EXCEPT Duration: 00:00:00:903
Loop: 4
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:693
EXCEPT Duration: 00:00:01:053
Loop: 5
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:653
EXCEPT Duration: 00:00:00:943
Similar sort of results.
Adding a non-clustered index: -
USE tempdb;
SET NOCOUNT ON;
IF object_id('#temp1') IS NOT NULL
BEGIN
DROP TABLE #temp1;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 2) + 64 AS Name
INTO #temp1
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
IF object_id('#temp2') IS NOT NULL
BEGIN
DROP TABLE #temp2;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 2) + 64 AS Name
INTO #temp2
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
CREATE CLUSTERED INDEX cl_ID_temp1 ON #temp1 (ID);
CREATE CLUSTERED INDEX cl_ID_temp2 ON #temp2 (ID);
CREATE NONCLUSTERED INDEX nc_Name_temp1 ON #temp1 (Name);
CREATE NONCLUSTERED INDEX nc_Name_temp2 ON #temp2 (Name);
DECLARE @Loop CHAR(1) = '0', @HOLDER INT, @Duration CHAR(12), @StartTime DATETIME;
WHILE @Loop <= 5
BEGIN
RAISERROR('Loop: %s',0,1,@Loop) WITH NOWAIT;
RAISERROR('============',0,1) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
LEFT OUTER JOIN #temp2 t2 ON t1.ID = t2.ID
WHERE t2.ID IS NULL
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('LEFT OUTER JOIN / ID IS NULL Duration: %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
EXCEPT
SELECT t2.ID, t2.Name
FROM #temp2 t2
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('EXCEPT Duration: %s',0,1,@Duration) WITH NOWAIT;
SET @Loop = @Loop + 1;
END
Loop: 0
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:633
EXCEPT Duration: 00:00:01:220
Loop: 1
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:750
EXCEPT Duration: 00:00:01:100
Loop: 2
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:720
EXCEPT Duration: 00:00:01:407
Loop: 3
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:677
EXCEPT Duration: 00:00:01:140
Loop: 4
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:883
EXCEPT Duration: 00:00:01:357
Loop: 5
============
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:907
EXCEPT Duration: 00:00:01:123
Closer, but still have left outer join in the lead.
Swap the execution order: -
USE tempdb;
SET NOCOUNT ON;
IF object_id('#temp1') IS NOT NULL
BEGIN
DROP TABLE #temp1;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 2) + 64 AS Name
INTO #temp1
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
IF object_id('#temp2') IS NOT NULL
BEGIN
DROP TABLE #temp2;
END;
--500,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
(ABS(CHECKSUM(NEWID())) % 2) + 64 AS Name
INTO #temp2
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
CREATE CLUSTERED INDEX cl_ID_temp1 ON #temp1 (ID);
CREATE CLUSTERED INDEX cl_ID_temp2 ON #temp2 (ID);
CREATE NONCLUSTERED INDEX nc_Name_temp1 ON #temp1 (Name);
CREATE NONCLUSTERED INDEX nc_Name_temp2 ON #temp2 (Name);
DECLARE @Loop CHAR(1) = '0', @HOLDER INT, @Duration CHAR(12), @StartTime DATETIME;
WHILE @Loop <= 5
BEGIN
RAISERROR('Loop: %s',0,1,@Loop) WITH NOWAIT;
RAISERROR('============',0,1) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
EXCEPT
SELECT t2.ID, t2.Name
FROM #temp2 t2
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('EXCEPT Duration: %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT t1.ID, t1.Name
FROM #temp1 t1
LEFT OUTER JOIN #temp2 t2 ON t1.ID = t2.ID
WHERE t2.ID IS NULL
)A;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('LEFT OUTER JOIN / ID IS NULL Duration: %s',0,1,@Duration) WITH NOWAIT;
SET @Loop = @Loop + 1;
END
Loop: 0
============
EXCEPT Duration: 00:00:01:310
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:630
Loop: 1
============
EXCEPT Duration: 00:00:01:137
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:590
Loop: 2
============
EXCEPT Duration: 00:00:01:160
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:590
Loop: 3
============
EXCEPT Duration: 00:00:01:140
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:567
Loop: 4
============
EXCEPT Duration: 00:00:01:160
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:600
Loop: 5
============
EXCEPT Duration: 00:00:01:177
LEFT OUTER JOIN / ID IS NULL Duration: 00:00:00:607
So, we've learnt that if my sample data is a good approximation of your actual data, then the left outer join / null method is slightly faster on my hardware.
April 26, 2013 at 6:07 am
Thanks a lot cadavre for such a excellent explanation 🙂
_______________________________________________________________
To get quick answer follow this link:
http://www.sqlservercentral.com/articles/Best+Practices/61537/
April 26, 2013 at 6:13 am
Testing in a test environment is key. You just can't know which one will be best without testing them.
When I test, I like to run the number of rows up to a level I expect in a production environment. This prevents me from being lulled into a false sense of security that "my query works great with 5 rows, so it would work with 5K rows or with 5M rows." I ran the number of rows up to over 120,000 and got the best performance out of this one:
SELECT t1.id, t1.Name
FROM #temp1 t1
WHERE NOT EXISTS (SELECT 1
FROM #temp2 t2
WHERE t2.id = t1.id
AND t2.name = t1.name);
One thing to note is that the result sets won't always be the same. If you have duplicate rows in the tables, the JOIN returns one row for each duplicate and the EXCEPT returns only one row. The NOT EXISTS returns one row for each duplicate but with better performance, which you can reduce to one row by adding a DISTINCT.
I know the ID field is usually a primary key, but I wasn't sure without a key defined in your DML. Oh yeah, keys will drastically impact the performance of your queries, so be sure to test with expected keys in place.
Viewing 5 posts - 1 through 4 (of 4 total)
You must be logged in to reply to this topic. Login to reply