-- Code and scripts from Chapter 3: Query Execution -- Let’s compare the various ways of viewing query plans. -- As an example, consider the following query: DECLARE @Country nvarchar(15) SET @Country = N'USA' SELECT O.[CustomerId], MAX(O.[Freight]) AS MaxFreight FROM [Customers] C JOIN [Orders] O ON C.[CustomerId] = O.[CustomerId] WHERE C.[Country] = @Country GROUP BY O.[CustomerId] OPTION (OPTIMIZE FOR (@Country = N'UK')); -- Let’s begin by looking at an example of a scan. Consider -- the following query: SELECT [OrderId] FROM [Orders] WHERE [RequiredDate] = '1998-03-26'; -- Now let’s look at an example of an index seek. Suppose we -- have a similar query, but this time the predicate is on -- the OrderDate column on which we do have an index: SELECT [OrderId] FROM [Orders] WHERE [OrderDate] = '1998-02-26'; -- For example, consider the following query: SELECT [OrderId], [CustomerId] FROM [Orders] WHERE [OrderDate] = '1998-02-26'; -- For example, consider this query: SELECT O.[OrderId] FROM [Customers] C JOIN [Orders] O ON C.[CustomerId] = O.[CustomerId] WHERE C.[City] = N'London'; -- For example, consider the following query, which returns -- the number of employees who were hired after each -- other employee: SELECT E1.[EmployeeId], COUNT(*) FROM [Employees] E1 JOIN [Employees] E2 ON E1.[HireDate] < E2.[HireDate] GROUP BY E1.[EmployeeId]; -- Now consider the following identical query which has been -- rewritten to use a CROSS APPLY: SELECT E1.[EmployeeId], ECnt.Cnt FROM [Employees] E1 CROSS APPLY ( SELECT COUNT(*) Cnt FROM [Employees] E2 WHERE E1.[HireDate] < E2.[HireDate] ) ECnt; -- We’ll make the rather unrealistic assumption that two -- customers with the same name are indeed the same customer. CREATE TABLE [Customer1] ([CustomerId] int PRIMARY KEY, [Name] nvarchar(30)); CREATE TABLE [Customer2] ([CustomerId] int PRIMARY KEY, [Name] nvarchar(30)); SELECT C1.[Name], C1.[CustomerId], C2.[CustomerId] FROM [Customer1] C1 FULL OUTER JOIN [Customer2] C2 ON C1.[Name] = C2.[Name]; -- The following script creates a simple partition function -- and scheme that defines four partitions, creates a -- partitioned table using this scheme, and then selects -- rows from the table: CREATE PARTITION FUNCTION [PtnFn] (int) AS RANGE FOR VALUES (1, 10, 100); CREATE PARTITION SCHEME [PtnSch] AS PARTITION [PtnFn] ALL TO ([PRIMARY]); CREATE TABLE [PtnTable] ([PK] int PRIMARY KEY, [Data] int) ON [PtnSch]([PK]); SELECT [PK], [Data] FROM [PtnTable]; -- For example, the following query simply joins the -- Orders and Customers tables: SELECT O.[OrderId], C.[CustomerId], C.[ContactName] FROM [Orders] O JOIN [Customers] C ON O.[CustomerId] = C.[CustomerId]; -- The following query returns a list of orders that shipped -- to cities different from the city that we have on file for -- the customer who placed the order: SELECT O.[OrderId], C.[CustomerId], C.[ContactName] FROM [Orders] O JOIN [Customers] C ON O.[CustomerId] = C.[CustomerId] AND O.[ShipCity] <> C.[City] ORDER BY C.[CustomerId]; -- The following query is nearly identical to the earlier merge -- join example except that we select one additional column, -- the OrderDate column, from the Orders table: SELECT O.[OrderId], O.[OrderDate], C.[CustomerId], C.[ContactName] FROM [Orders] O JOIN [Customers] C ON O.[CustomerId] = C.[CustomerId]; -- Let’s begin by considering a trivial example: SELECT COUNT(*) FROM [Orders]; -- A single-stream aggregate operator can calculate multiple -- scalar aggregates at the same time: SELECT MIN([OrderDate]), MAX([OrderDate]) FROM [Orders]; -- Some aggregates such as AVG are actually calculated from -- two other aggregates such as SUM and COUNT: SELECT AVG([Freight]) FROM [Orders]; -- Although SUM does not need to be computed per se, it still -- needs the count: SELECT SUM([Freight]) FROM [Orders]; --Consider this query to compute the number of distinct cities -- to which we’ve shipped orders: SELECT COUNT(DISTINCT [ShipCity]) FROM [Orders]; -- For example, this query gets the same plan as the -- above MIN/MAX query without the DISTINCT keyword: SELECT MIN(DISTINCT [OrderDate]), MAX(DISTINCT [OrderDate]) FROM [Orders]; -- For example, the following query is identical to the simple -- COUNT(*) query with which we began this discussion: SELECT COUNT(DISTINCT [OrderId]) FROM [Orders]; -- Multiple Distinct... Consider this query: SELECT COUNT(DISTINCT [ShipAddress]), COUNT(DISTINCT [ShipCity]) FROM [Orders]; -- Consider the following query that counts the number of -- orders shipped to each address: SELECT [ShipAddress], [ShipCity], COUNT(*) FROM [Orders] GROUP BY [ShipAddress], [ShipCity]; -- For example, suppose that we extended the above query with an -- ORDER BY clause that sorts the results on the GROUP BY keys: SELECT [ShipAddress], [ShipCity], COUNT(*) FROM [Orders] GROUP BY [ShipAddress], [ShipCity] ORDER BY [ShipAddress], [ShipCity]; -- For instance, consider the following query which counts -- orders by customer (instead of shipping address): SELECT [CustomerId], COUNT(*) FROM [Orders] GROUP BY [CustomerId]; -- For example: SELECT DISTINCT [CustomerId] FROM [Orders]; -- Can also be written as: SELECT [CustomerId] FROM [Orders] GROUP BY [CustomerId]; -- For example, suppose we would like to find the number of -- distinct customers served by each employee: SELECT [EmployeeId], COUNT(DISTINCT [CustomerId]) FROM [Orders] GROUP BY [EmployeeId]; -- For example, let’s temporarily create an index on the -- EmployeeId and CustomerId columns of the Orders table: CREATE INDEX [EmployeeCustomer] ON [Orders] (EmployeeId, CustomerId); -- Let’s drop the temporary index before we continue: DROP INDEX [Orders].[EmployeeCustomer]; -- Suppose that we wish to find the total number of orders -- taken by each employee, as well as the total number of -- distinct customers served by each employee: SELECT [EmployeeId], COUNT(*), COUNT(DISTINCT [CustomerId]) FROM [Orders] GROUP BY [EmployeeId]; -- Now consider the following essentially identical query, -- which groups the Orders table on the ShipCountry column: SELECT [ShipCountry], COUNT(*) FROM [Orders] GROUP BY [ShipCountry]; -- However, suppose we explicitly request a sort using an -- ORDER BY clause in the query: SELECT [ShipCountry], COUNT(*) FROM [Orders] GROUP BY [ShipCountry] ORDER BY [ShipCountry]; -- However, if we repeat the above query against the BigOrders -- table, we still get the same 21 groups: SELECT [ShipCountry], COUNT(*) FROM [BigOrders] GROUP BY [ShipCountry] ORDER BY [ShipCountry]; -- For example, suppose we just want a list of distinct countries -- to which we’ve shipped orders: SELECT DISTINCT [ShipCountry] FROM [Orders]; -- Our Northwind2 database includes a HugeOrders table, which -- includes the same data from the original Orders table repeated -- 25 times. SELECT [ShipCountry], COUNT(DISTINCT [EmployeeId]), COUNT(DISTINCT [CustomerId]) FROM [HugeOrders] GROUP BY [ShipCountry]; -- For example, here is a simple UNION ALL query that returns a -- list of all employees and customers: SELECT [FirstName] + N' ' + [LastName], [City], [Country] FROM [Employees] UNION ALL SELECT [ContactName], [City], [Country] FROM [Customers]; -- Now let’s consider the following similar query, which outputs -- a list of all cities and countries in which we have employees -- and/or customers: SELECT [City], [Country] FROM [Employees] UNION SELECT [City], [Country] FROM [Customers]; -- For example, consider the following query, which combines -- data from the Orders and BigOrders tables to generate a -- list of all countries to which we have shipped orders: SELECT [ShipCountry] FROM [Orders] UNION SELECT [ShipCountry] FROM [BigOrders]; -- Here is the script to create the new table and indexes along -- with the test query: SELECT [EmployeeId], [FirstName] + N' ' + [LastName] AS [ContactName], [City], [Country] INTO [NewEmployees] FROM [Employees]; ALTER TABLE [NewEmployees] ADD CONSTRAINT [PK_NewEmployees] PRIMARY KEY ([EmployeeId]); CREATE INDEX [ContactName] ON [NewEmployees]([ContactName]); CREATE INDEX [ContactName] ON [Customers]([ContactName]); SELECT [ContactName] FROM [NewEmployees] UNION ALL SELECT [ContactName] FROM [Customers] ORDER BY [ContactName]; -- In other words, we want to eliminate duplicates, but we are -- not interested in whether the results are sorted. SELECT [ContactName] FROM [NewEmployees] UNION SELECT [ContactName] FROM [Customers]; --Before continuing, let’s drop the extra table and indexes -- that we created just for this example: DROP TABLE [NewEmployees]; DROP INDEX [Customers].[ContactName]; -- To see an example of a hash union, we need to create a large -- table with big rows but many duplicates. The following script -- creates two tables. The first table, BigTable, has 100,000 -- rows, and each row includes a char(1000) column, but all of -- the rows have the same value for the Dups column. The second -- table, SmallTable, has a uniqueness constraint to guarantee -- that there are no duplicates. CREATE TABLE [BigTable] ([PK] int PRIMARY KEY, [Dups] int, [Pad] char(1000)); CREATE TABLE [SmallTable] ([PK] int PRIMARY KEY, [NoDups] int UNIQUE, [Pad] char(1000)); SET NOCOUNT ON; DECLARE @i int; SET @i = 0; BEGIN TRAN WHILE @i < 100000 BEGIN INSERT [BigTable] VALUES (@i, 0, NULL); SET @i = @i + 1; IF @i % 1000 = 0 BEGIN COMMIT TRAN; BEGIN TRAN; END END COMMIT TRAN; SELECT [Dups], [Pad] FROM [BigTable] UNION SELECT [NoDups], [Pad] FROM [SmallTable]; -- Consider the following query with a simple IN list predicate: SELECT [OrderId] FROM [Orders] WHERE [ShipPostalCode] IN (N'05022', N'99362'); -- Now consider the following identical query, which uses -- variables in place of constants: DECLARE @SPC1 nvarchar(20), @SPC2 nvarchar(20); SELECT @SPC1 = N'05022', @SPC2 = N'99362'; SELECT [OrderId] FROM [Orders] WHERE [ShipPostalCode] IN (@SPC1, @SPC2); -- To answer this question, let’s consider a slightly more -- complex query: DECLARE @OD1 datetime, @OD2 datetime; SELECT @OD1 = '1998-01-01', @OD2 ='1998-01-04'; SELECT [OrderId] FROM [Orders] WHERE [OrderDate] BETWEEN @OD1 AND DATEADD(day, 6, @OD1) OR [OrderDate] BETWEEN @OD2 AND DATEADD(day, 6, @OD2); -- Next, let’s consider a slightly different query that OR’s -- predicates on two different columns: SELECT [OrderId] FROM [Orders] WHERE [OrderDate] BETWEEN '1998-01-01' AND '1998-01-07' OR [ShippedDate] BETWEEN '1998-01-01' AND '1998-01-07'; -- The optimizer effectively rewrote the query as a union: SELECT [OrderId] FROM [Orders] WHERE [OrderDate] BETWEEN '1998-01-01' AND '1998-01-07' UNION SELECT [OrderId] FROM [Orders] WHERE [ShippedDate] BETWEEN '1998-01-01' AND '1998-01-07'; -- Next, let’s consider the following nearly identical query: SELECT [OrderId] FROM [Orders] WHERE [OrderDate] = '1998-01-01' OR [ShippedDate] = '1998-01-01'; -- For example, consider the following query, which is identical -- to the above query but selects the OrderDate and ShippedDate -- columns in addition to the OrderId column: SELECT [OrderId], [OrderDate], [ShippedDate] FROM [BigOrders] WHERE [OrderDate] = '1998-01-01' OR [ShippedDate] = '1998-01-01'; -- SQL Server can also use multiple indexes to execute queries -- with AND’ed predicates. For example, consider the following -- query: SELECT [OrderId] FROM [Orders] WHERE [OrderDate] = '1998-02-26' AND [ShippedDate] = '1998-03-04'; -- The optimizer has effectively rewritten this query as a join -- (although the explicit rewrite does not get the same plan): SELECT O1.[OrderId] FROM [Orders] O1 JOIN [Orders] O2 ON O1.[OrderId] = O2.[OrderId] WHERE O1.[OrderDate] = '1998-02-26' AND O2.[ShippedDate] = '1998-03-04'; --Now consider the following query which searches for orders -- that match a range of dates: SELECT [OrderId] FROM [BigOrders] WHERE [OrderDate] BETWEEN '1998-02-01' AND '1998-02-04' AND [ShippedDate] BETWEEN '1998-02-09' AND '1998-02-12'; -- For example, the following query, which selects the OrderDate -- and ShippedDate columns in addition to the OrderId column, -- uses the same plan as the similar example that selected just -- the OrderId column: SELECT [OrderId], [OrderDate], [ShippedDate] FROM [Orders] WHERE [OrderDate] = '1998-02-26' AND [ShippedDate] = '1998-03-04'; -- The following query returns a list of orders where the freight -- charge exceeds the average freight charge for all orders: SELECT O1.[OrderId], O1.[Freight] FROM [Orders] O1 WHERE O1.[Freight] > ( SELECT AVG(O2.[Freight]) FROM [Orders] O2 ); -- This time we want to find those orders placed by a specific -- customer that we’ve selected by name: SELECT O.[OrderId] FROM [Orders] O WHERE O.[CustomerId] = ( SELECT C.[CustomerId] FROM [Customers] C WHERE C.[ContactName] = N'Maria Anders'; ) -- For example, as long as we are not concerned that there might -- be two customers with the same name, the above query can be -- written as a simple join: SELECT O.[OrderId] FROM [Orders] O JOIN [Customers] C ON O.[CustomerId] = C.[CustomerId] WHERE C.[ContactName] = N'Maria Anders'; -- While writing this query as a simple join is the best option, -- let’s see what happens if we create a unique index on the -- ContactName column: CREATE UNIQUE INDEX [ContactName] ON [Customers] ([ContactName]); SELECT O.[OrderId] FROM [Orders] O WHERE O.[CustomerId] = ( SELECT C.[CustomerId] FROM [Customers] C WHERE C.[ContactName] = N'Maria Anders' ); DROP INDEX [Customers].[ContactName]; -- The following query is similar to the first subquery we tried, -- but this time it returns those orders in which the freight -- charge exceeds the average freight charge for all previously -- placed orders: SELECT O1.[OrderId] FROM [Orders] O1 WHERE O1.[Freight] > ( SELECT AVG(O2.[Freight]) FROM [Orders] O2 WHERE O2.[OrderDate] < O1.[OrderDate] ); -- Suppose that we wish to find those orders for which the -- freight charge exceeds the average freight charge for -- all orders placed by the same customer: SELECT O1.[OrderId], O1.[Freight] FROM [Orders] O1 WHERE O1.[Freight] > ( SELECT AVG(O2.[Freight]) FROM [Orders] O2 WHERE O2.[CustomerId] = O1.[CustomerId] ); -- Finally, suppose that we want to compute the order with the -- maximum freight charge placed by each customer: SELECT O1.[OrderId], O1.[Freight] FROM [Orders] O1 WHERE O1.[Freight] = ( SELECT MAX(O2.[Freight]) FROM [Orders] O2 WHERE O2.[CustomerId] = O1.[CustomerId] ); -- For example, the following query uses a noncorrelated subquery -- to return orders placed by customers who live in London: SELECT O.[OrderId] FROM [Orders] O WHERE O.[CustomerId] IN ( SELECT C.[CustomerId] FROM [Customers] C WHERE C.[City] = N'London' ); -- We can easily write the same query using a correlated subquery: SELECT O.[OrderId] FROM [Orders] O WHERE EXISTS ( SELECT * FROM [Customers] C WHERE C.[CustomerId] = O.[CustomerId] AND C.[City] = N'London' ); -- For a more complex example of subquery decorrelation, consider -- the following query, which outputs a list of orders along -- with the freight charge for each order and the average freight -- charge for all orders by the same customer: SELECT O1.[OrderId], O1.[Freight], ( SELECT AVG(O2.[Freight]) FROM [Orders] O2 WHERE O2.[CustomerId] = O1.[CustomerId] ) Avg_Freight FROM [Orders] O1; -- SQL Server uses some slightly more exotic join functionality, -- which we have not seen yet, to evaluate CASE expressions with -- subqueries. To see how these plans work, we’ll use the -- following script to set up an artificial scenario: CREATE TABLE [MainTable] ([PK] int PRIMARY KEY, [Col1] int, [Col2] int, [Col3] int); CREATE TABLE [WhenTable] ([PK] int PRIMARY KEY, [Data] int); CREATE TABLE [ThenTable] ([PK] int PRIMARY KEY, [Data] int); CREATE TABLE [ElseTable] ([PK] int PRIMARY KEY, [Data] int); INSERT [MainTable] VALUES (1, 11, 101, 1001); INSERT [MainTable] VALUES (2, 12, 102, 1002); INSERT [WhenTable] VALUES (11, NULL); INSERT [ThenTable] VALUES (101, 901); INSERT [ElseTable] VALUES (102, 902); SELECT M.[PK], CASE WHEN EXISTS (SELECT * FROM [WhenTable] W WHERE W.[PK] = M.[Col1]) THEN (SELECT T.[Data] FROM [ThenTable] T WHERE T.[PK] = M.[Col2]) ELSE (SELECT E.[Data] FROM [ElseTable] E WHERE E.[PK] = M.[Col3]) END AS Case_Expr FROM [MainTable] M; DROP TABLE [MainTable], [WhenTable], [ThenTable], [ElseTable]; -- To get parallel plans, we need fairly big tables; if the -- tables are too small, the optimizer concludes that a serial -- plan is perfectly adequate. The following script creates -- two tables. Each table has 250,000 rows and, thanks to the -- fixed-length char(200) column, well over 6,500 pages. CREATE TABLE [HugeTable1] ( [Key] int, [Data] int, [Pad] char(200), CONSTRAINT [PK1] PRIMARY KEY ([Key]) ); SET NOCOUNT ON; DECLARE @i int; BEGIN TRAN; SET @i = 0; WHILE @i < 250000; BEGIN INSERT [HugeTable1] VALUES(@i, @i, NULL); SET @i = @i + 1; IF @i % 1000 = 0 BEGIN COMMIT TRAN; BEGIN TRAN; END END COMMIT TRAN; SELECT [Key], [Data], [Pad] INTO [HugeTable2] FROM [HugeTable1]; ALTER TABLE [HugeTable2] ADD CONSTRAINT [PK2] PRIMARY KEY ([Key]); -- Now let’s try the simplest possible query: SELECT [Key], [Data] FROM [HugeTable1]; -- Now suppose we add a fairly selective predicate to the query: SELECT [Key], [Data] FROM [HugeTable1] WHERE [Data] < 1000; -- Now observe what happens if we add an ORDER BY clause to the -- query: SELECT [Key], [Data] FROM [HugeTable1] WHERE [Data] < 1000 ORDER BY [Key]; -- The OPTION (MAXDOP 1) query hint forces SQL Server to execute -- this query in serial. We will discuss this and other hints -- in more detail in Chapters 4 and 5. SELECT MIN(T1.[Key] + T2.[Key]) FROM [HugeTable1] T1 CROSS JOIN [HugeTable2] T2 OPTION (MAXDOP 1); -- Here is a simple example of a parallel nested loops join: SELECT T1.[Key], T1.[Data], T2.[Data] FROM [HugeTable1] T1 JOIN [HugeTable2] T2 ON T1.[Key] = T2.[Key] WHERE T1.[Data] < 100; -- (Recall that a round-robin exchange sends each subsequent -- packet of rows to the next consumer thread in a fixed -- sequence.) Here is one such example: SELECT T1_Top.[Key], T1_Top.[Data], T2.[Data] FROM ( SELECT TOP 100 T1.[Key], T1.[Data] FROM [HugeTable1] T1 ORDER BY T1.[Data] ) T1_Top, [HugeTable2] T2 WHERE T1_Top.[Key] = T2.[Key]; -- For example, consider this query: SELECT T1.[Key], T1.[Data], T2.[Key] FROM [HugeTable1] T1 JOIN [HugeTable2] T2 ON T1.[Data] = T2.[Data] WHERE T1.[Key] = 0; -- For example, consider the following query: SELECT T1.[Key], T1.[Data], T2.[Key] FROM [HugeTable1] T1 JOIN [HugeTable2] T2 ON T1.[Key] = T2.[Data] ORDER BY T1.[Key]; -- To see an example of a parallel hash join, consider the -- following simple query: SELECT T1.[Key], T1.[Data], T2.[Key] FROM [HugeTable1] T1 JOIN [HugeTable2] T2 ON T1.[Data] = T2.[Data]; -- For example, the following query includes a very selective -- predicate: SELECT T1.[Key], T1.[Data], T2.[Key] FROM [HugeTable1] T1 JOIN [HugeTable2] T2 ON T1.[Data] = T2.[Data] WHERE T1.[Key] < 100; -- Bitmap Filtering ... Next, suppose we have a moderately -- selective predicate on the build input to a hash join: SELECT T1.[Key], T1.[Data], T2.[Key] FROM [HugeTable1] T1 JOIN [HugeTable2] T2 ON T1.[Data] = T2.[Data] WHERE T1.[Key] < 10000; -- This statement is guaranteed to violate the foreign key -- constraint on the ShipVia column and will fail. UPDATE [Orders] SET [ShipVia] = 4 WHERE [ShipCity] = N'London';