-- Code and scripts from Chapter 4: -- Troubleshooting Query Performance -- The first query below has a SARG involving the CustomerID -- column in the BigOrders table in the Northwind2 database. -- SQL Server is able to use the statistical information to -- estimate that 25 rows will meet the condition in the WHERE -- clause. The optimizer will choose to use the index on -- CustomerID. USE Northwind2 SELECT * FROM BigOrders WHERE CustomerID = 'Ocean'; -- In the next query, because of the presence of the function -- UPPER, there is no SARG and the index on CustomerID is not -- even considered. The optimizer has no real choice other -- than to scan the entire table using a Clustered Index Scan. USE Northwind2 SELECT * FROM BigOrders WHERE UPPER(CustomerID) = 'OCEAN'; -- For example, suppose that the Employees table has an index on -- last name, first name, and date of hire. The following query -- is covered by this index: SELECT LastName, HireDate FROM Employees WHERE FirstName = 'Sven'; -- Thus, the following is also a covered query: SELECT EmployeeID, FirstName, LastName FROM Employees WHERE LastName LIKE 'B%'; -- We can use DBCC SHOW_STATISTICS to display the statistics -- information for this index: DBCC SHOW_STATISTICS('Orders', 'cust_date_indx'); -- As an example, let’s use the Person.Contacts table in the -- AdventureWorks database and build a nonclustered index on -- the Lastname and FirstName columns as shown: USE AdventureWorks; CREATE INDEX NameIndex on Person.Contact(LastName, FirstName); -- For example, the following query will return 1,255 rows and -- the optimizer will choose a plan using a clustered index scan: SELECT * FROM Person.Contact WHERE FirstName like 'K%'; -- However, searching for a less common value, as in the -- following query, yields a different plan. SELECT * FROM Person.Contact WHERE FirstName like 'Y%'; -- In this case, we have a clustered index on EmployeeID, which -- is a unique value. USE Northwind2; SELECT FirstName, LastName, EmployeeID FROM Employees WHERE LastName BETWEEN 'Smith' AND 'Snow' AND FirstName BETWEEN 'Daniel' and 'David'; -- You can see the main differences if you take the same query -- and data and write an OR query instead of an AND: SELECT FirstName, LastName, EmployeeID FROM Employees WHERE LastName BETWEEN 'Smith' AND 'Snow' OR FirstName BETWEEN 'Daniel' AND 'David'; -- The second situation in which there are no usable statistics -- is when the values in a SARG are variables. Consider this -- example: DECLARE @name varchar(30); SET @name = 'Zelda'; SELECT FirstName, LastName, EmployeeID FROM Employees WHERE LastName > @name; -- We can create a similar situation here by making a copy of the -- orders table in the Northwind2 database and adding a new -- column to it with a default value of 2,000-byte character -- field. SELECT * INTO neworders FROM Orders; GO ALTER TABLE neworders ADD full_details char(2000) NOT NULL DEFAULT 'full details'; -- We can gather some information about this table with the -- following command: EXEC sp_spaceused neworders, true -- The results will tell us that there are 830 rows in the table, -- 3,272 KB for data, which equates to 409 data pages. That -- doesn’t seem completely unreasonable, so we can try selecting -- all the rows after enabling STATISTICS IO: SET STATISTICS IO ON; GO SELECT * FROM neworders; -- We can use the Dynamic Management Function -- sys.dm_db_index_physical_stats to observe how many forwarded -- rows exist in the neworders table, using the following query: SELECT forwarded_record_count FROM sys.dm_db_index_physical_stats (db_id('Northwind2'), object_id('neworders'), null, null, 'detailed'); -- If for some reason we don’t want to keep the clustered index, -- we can drop it. But the act of creating the clustered index -- will reorganize the table. SET STATISTICS IO OFF; GO CREATE CLUSTERED INDEX orderID_cl ON neworders(orderID); GO DROP INDEX neworders.orderID_cl; GO EXEC sp_spaceused neworders, true; SET STATISTICS IO ON; GO SELECT ROWS = count(*) FROM neworders; -- You can see this behavior by first examining the Customers -- table in the Northwind2 database with the following command: EXEC sp_helpstats Customers; -- The first time you run this after creating the Northwind2 -- database, you should not get any statistics information -- returned. However, if you query the Customers table on an -- unindexed column and then examine the statistics again, you -- should see different results. SELECT * FROM Customers WHERE ContactName = 'Hanna Moos'; GO EXEC sp_helpstats Customers; -- Since SQL Server also creates statistics for every index you -- build, to see a complete list of the statistics on your table -- you would also need to enumerate your indexes. You could do -- that using either of the following statements: EXEC sp_helpindex Customers; -- Or SELECT * FROM sys.indexes WHERE object_id = OBJECT_ID('Customers'); -- As an alternative, you can define a computed column for the -- expression on which you want to filter, using a command like -- the following: ALTER TABLE [Order Details] ADD TotalSale AS UnitPrice * Quantity; -- You create multicolumn statistics by using the following -- statement: CREATE STATISTICS LocationStatistics ON Locations(City, State); -- To refresh all statistics on the Orders table with a full -- scan, use the following command: UPDATE STATISTICS Orders WITH FULLSCAN; -- So we’ll create a computed column using CHECKSUM and index that: USE pubs; GO DROP INDEX titles.titleind; GO ALTER TABLE titles ADD hash_title AS CHECKSUM(title); GO CREATE INDEX hash_index ON titles(hash_title); GO -- First, let’s try just searching for a particular title: SELECT * FROM titles WHERE title = 'Cooking with Computers: Surreptitious Balance Sheets'; -- If you look at the query plan for this SELECT, you’ll see that -- a clustered index scan is done, which means the whole table -- must be searched. Instead, let’s also query on the checksum -- value in the hash_title column: SELECT * FROM titles WHERE title = 'Cooking with Computers: Surreptitious Balance Sheets' AND hash_title = CHECKSUM('Cooking with Computers: Surreptitious Balance Sheets'); -- Let’s look at an example. Consider the following query: SELECT [OrderId], [CustomerId], [OrderDate] FROM [Orders] ORDER BY [OrderDate]; -- Now let’s add a FAST 1 hint to the same query: SELECT [OrderId], [CustomerId], [OrderDate] FROM [Orders] ORDER BY [OrderDate] OPTION (FAST 1); -- Now suppose we change the FAST 1 hint to a FAST 100 hint: SELECT [OrderId], [CustomerId], [OrderDate] FROM [Orders] ORDER BY [OrderDate] OPTION (FAST 100); -- Consider the following query to look up orders shipped to -- a specific postal code: SELECT [OrderId], [OrderDate] FROM [Orders] WHERE [ShipPostalCode] = N'05022'; -- Now let’s observe what happens if we use a variable instead -- of a constant: DECLARE @ShipCode nvarchar(20); SET @ShipCode = N'05022'; SELECT [OrderId], [OrderDate] FROM [Orders] WHERE [ShipPostalCode] = @ShipCode; -- Now let’s see what happens when we add an OPTIMIZE FOR hint -- to the query: DECLARE @ShipCode nvarchar(20); SET @ShipCode = N'05022'; SELECT [OrderId], [OrderDate] FROM [Orders] WHERE [ShipPostalCode] = @ShipCode OPTION (OPTIMIZE FOR (@ShipCode = N'05022')); -- To see how we can use query-level join hints, consider the -- following query, which looks for orders placed by customers -- living in London: SELECT O.[OrderId] FROM [Customers] C JOIN [Orders] O ON C.[CustomerId] = O.[CustomerId] WHERE C.[City] = N'London'; -- The query optimizer chooses a nested loops join for this -- query.... If we want to force a merge join, we simply need -- to add a MERGE JOIN hint to the query as follows: SELECT O.[OrderId] FROM [Customers] C JOIN [Orders] O ON C.[CustomerId] = O.[CustomerId] WHERE C.[City] = N'London' OPTION (MERGE JOIN); -- We can easily force a hash aggregate with the HASH GROUP hint: SELECT [CustomerId], MAX([OrderDate]) FROM [Orders] GROUP BY [CustomerId] OPTION (HASH GROUP); -- To see an example of how we can use this hint, consider the -- following query, which returns a list of customers who have -- had orders shipped to London or who live in London: SELECT [CustomerId] FROM [Orders] WHERE [ShipCity] = N'London' UNION SELECT [CustomerId] FROM [Customers] WHERE [City] = N'London'; -- Now observe what happens if we add a MERGE UNION hint: SELECT [CustomerId] FROM [Orders] WHERE [ShipCity] = N'London' UNION SELECT [CustomerId] FROM [Customers] WHERE [City] = N'London' OPTION (MERGE UNION); -- For example, the following two queries are semantically -- equivalent, but using the FORCE ORDER and HASH JOIN hints -- and moving the ON clause that links Employees and Orders -- changes the join order. The join appearing in bold is -- processed first. SELECT O.[OrderId] FROM [Customers] C JOIN [Orders] O JOIN [Employees] E ON O.[EmployeeId] = E.[EmployeeId] ON C.[CustomerId] = O.[CustomerId] WHERE C.[City] = N'London' AND E.[City] = N'London' OPTION (FORCE ORDER, HASH JOIN); SELECT O.[OrderId] FROM [Customers] C JOIN [Orders] O ON C.[CustomerId] = O.[CustomerId] JOIN [Employees] E ON O.[EmployeeId] = E.[EmployeeId] WHERE C.[City] = N'London' AND E.[City] = N'London' OPTION (FORCE ORDER, HASH JOIN); -- Let’s look at a simple example of how we can use the -- FORCE ORDER hint to change the join order. For this example, -- consider the following query which joins three tables: SELECT O.[OrderId] FROM [Employees] E JOIN [Orders] O ON O.[EmployeeId] = E.[EmployeeId] JOIN [Customers] C ON O.[CustomerId] = C.[CustomerId] WHERE E.[LastName] = N'Peacock' AND C.[City] = N'London'; -- Now observe how the join order changes if we include the -- FORCE ORDER hint: SELECT O.[OrderId] FROM [Employees] E JOIN [Orders] O ON O.[EmployeeId] = E.[EmployeeId] JOIN [Customers] C ON O.[CustomerId] = C.[CustomerId] WHERE E.[LastName] = N'Peacock' AND C.[City] = N'London' OPTION (FORCE ORDER); -- We can force this plan either by grouping the joins using -- parenthesis or by using a subquery. The ANSI join alternative -- is generally a bit simpler and may appear more natural: SELECT O.[OrderId] FROM [Employees] E JOIN ( [Customers] C JOIN [Orders] O ON O.[CustomerId] = C.[CustomerId] ) ON O.[EmployeeId] = E.[EmployeeId] WHERE C.[City] = N'London' AND E.[LastName] = N'Peacock' OPTION (FORCE ORDER); -- Here is the same query rewritten with a subquery: SELECT CO.[OrderId] FROM [Employees] E, ( SELECT O.[OrderId], O.[EmployeeId] FROM [Customers] C, [Orders] O WHERE O.[CustomerId] = C.[CustomerId] AND C.[City] = N'London' ) CO WHERE CO.[EmployeeId] = E.[EmployeeId] AND E.[LastName] = N'Peacock' OPTION (FORCE ORDER); -- Let’s look at an example of how we can use the FORCE ORDER -- hint to control the placement of an aggregation operator. -- Consider the following query, which counts the number of -- orders placed by customers from the United States: SELECT O.[CustomerId], COUNT(*) FROM [Customers] C JOIN [Orders] O ON C.[CustomerId] = O.[CustomerId] WHERE C.[Country] = N'USA' GROUP BY O.[CustomerId]; -- Suppose that we would rather perform the join and filtering -- before we compute the aggregation. We can do that with the -- FORCE ORDER hint: SELECT O.[CustomerId], COUNT(*) FROM [Customers] C JOIN [Orders] O ON C.[CustomerId] = O.[CustomerId] WHERE C.[Country] = N'USA' GROUP BY O.[CustomerId] OPTION (FORCE ORDER); -- We might use an INDEX hint to force the optimizer to choose a -- covering index (or the heap), even if it means performing a -- full table scan. For example, consider the following query: SELECT [OrderId], [CustomerId] FROM [Orders] WHERE [ShipPostalCode] = N'99362'; -- Using an index hint, we can force the optimizer to choose a -- clustered index scan instead of a bookmark lookup (although -- in this case we are probably better off with the original plan): SELECT [OrderId], [CustomerId] FROM [Orders] WITH (INDEX(1)) WHERE [ShipPostalCode] = N'99362'; -- Consider the same query from the prior example, but let’s -- use a variable for the lookup value. DECLARE @ShipCode nvarchar(20); SET @ShipCode = N'99362'; SELECT [OrderId], [CustomerId] FROM [Orders] WHERE [ShipPostalCode] = @ShipCode; -- If we know that all or virtually all executions of this -- statement will use a value for @ShipCode that retrieves -- relatively few rows, we might want to force the plan that -- uses a nonclustered index seek with a bookmark lookup: DECLARE @ShipCode nvarchar(20); SET @ShipCode = N'99362'; SELECT [OrderId], [CustomerId] FROM [Orders] WITH (INDEX([ShipPostalCode])) WHERE [ShipPostalCode] = @ShipCode; -- For instance, consider the following query: SELECT [OrderId], [CustomerId] FROM [Orders] WHERE [OrderDate] = '1998-02-26' AND [ShipPostalCode] = N'99362'; -- While this plan is quite reasonable, we could choose to force -- an index intersection by including two indexes in the hint: SELECT [OrderId], [CustomerId] FROM [Orders] WITH (INDEX([OrderDate], [ShipPostalCode])) WHERE [OrderDate] = '1998-02-26' AND [ShipPostalCode] = N'99362'; -- However, if we have at least one unique index (clustered or -- nonclustered) on the table, we can rewrite a query to force -- a union plan that closely mimics an index union. For example, -- consider the following query (which by default uses a full -- clustered index scan): SELECT [OrderId], [CustomerId] FROM [Orders] WHERE [OrderDate] = '1998-02-26' OR [ShipPostalCode] = N'83720'; -- We can rewrite this query as a union: SELECT [OrderId], [CustomerId] FROM [Orders] WITH (INDEX([OrderDate])) WHERE [OrderDate] = '1998-02-26' UNION SELECT [OrderId], [CustomerId] FROM [Orders] WITH (INDEX([ShipPostalCode])) WHERE [ShipPostalCode] = N'83720'; -- Suppose that we also want to ensure that the join between the -- Customers and Orders tables continues to use a nested loops -- join. We can achieve this result with join hints as follows: SELECT O.[OrderId] FROM [Employees] E INNER MERGE JOIN ( [Customers] C INNER LOOP JOIN [Orders] O ON O.[CustomerId] = C.[CustomerId] ) ON O.[EmployeeId] = E.[EmployeeId] WHERE C.[City] = N'London' AND E.[LastName] = N'Peacock'; -- Begin by collecting the XML plan for the same query that we -- used for the ANSI-style join hint example: SET SHOWPLAN_XML ON; GO SELECT O.[OrderId] FROM [Employees] E INNER MERGE JOIN ( [Customers] C INNER LOOP JOIN [Orders] O ON O.[CustomerId] = C.[CustomerId] ) ON O.[EmployeeId] = E.[EmployeeId] WHERE C.[City] = N'London' AND E.[LastName] = N'Peacock'; GO SET SHOWPLAN_XML OFF; GO -- Notice that this query yields the same plan as the query with -- the join hints. SELECT O.[OrderId] FROM [Employees] E JOIN ( [Customers] C JOIN [Orders] O ON O.[CustomerId] = C.[CustomerId] ) ON O.[EmployeeId] = E.[EmployeeId] WHERE C.[City] = N'London' AND E.[LastName] = N'Peacock' OPTION (USE PLAN N' ... ');