Geek Sync | Why Did My Clever Index Change Backfire?

You've got a great indexing strategy to speed up your SQL Server! You deploy the change and then... it all goes wrong.

Join IDERA and Kendra Little on Wednesday, March 21 at 11 AM CT as she explains why index changes may slow down queries, cause queries to fail, cause unexpected outages, or just not work like they did when you tested them. You’ll leave the session with a set of practical steps to ensure you sidestep common indexing blunders and have confidence that your clever index change will speed up your queries instead of slowing them down. This is a demo-packed session you will not want to miss.

You can view Kendra's slides here. This Geek Sync will be recorded and the recording will be available in our Resource Center.

00-populate-BabbyNames.sql
/*****************************************************************************
Copyright (c) 2018 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com

Setup:
    Download BabbyNames.bak.zip (41 MB zipped database backup)
    https://github.com/LitKnd/BabbyNames/releases/tag/v1.1

Then review and run the script below on a SQL Server 2017 dedicated test instance
    Developer Edition recommended (Enterprise and Evaluation Editions will work too)

The script
    Restores the database (edit the file locations for your instance)
    Expands and modifies the data
        8GB data files (multiple files in a couple of filegroups)
        2GB log file
    Duration on my test instance (4 vCPUs, SSDs): ~5 minutes
    
This requires some tempdb space to do sorts, etc. I ran with 4x 1GB tempdb files.

*****************************************************************************/

SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON;
GO

EXEC sp_configure 'show advanced options', 1;
GO
RECONFIGURE
GO

EXEC sp_configure 'max degree of parallelism', 4;
GO

EXEC sp_configure 'cost threshold for parallelism', 100
GO

EXEC sp_configure 'max server memory (MB)', 7000;
GO

RECONFIGURE
GO


/****************************************************
Restore small BabbyNames database
****************************************************/
use master;
GO

IF DB_ID('BabbyNames') IS NOT NULL 
BEGIN
    IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames') = 'ONLINE'
    ALTER DATABASE BabbyNames
        SET SINGLE_USER
        WITH ROLLBACK IMMEDIATE;
END
GO

RESTORE DATABASE BabbyNames
    FROM DISK=N'S:\MSSQL\Backup\BabbyNames.bak'
    WITH
        MOVE 'BabbyNames' TO 'S:\MSSQL\DATA\BabbyNames.mdf',
        MOVE 'BabbyNames_log' TO 'S:\MSSQL\DATA\BabbyNames_log.ldf',
        REPLACE,
        RECOVERY;
GO

ALTER DATABASE BabbyNames SET RECOVERY SIMPLE;
GO


ALTER DATABASE BabbyNames
    ADD FILEGROUP fg_FirstNameByBirthDate;
GO


ALTER DATABASE BabbyNames ADD FILE 
(   NAME = fg_FirstNameByBirthDate_f1,
    FILENAME = 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f1.ndf',
    SIZE = 2GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

ALTER DATABASE BabbyNames ADD FILE 
(   NAME = fg_FirstNameByBirthDate_f2,
    FILENAME = 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f2.ndf',
    SIZE = 2GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

ALTER DATABASE BabbyNames ADD FILE 
(   NAME = fg_FirstNameByBirthDate_f3,
    FILENAME = 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f3.ndf',
    SIZE = 2GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

ALTER DATABASE BabbyNames ADD FILE 
(   NAME = fg_FirstNameByBirthDate_f4,
    FILENAME = 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f4.ndf',
    SIZE = 2GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

/* Make fg_FirstNameByBirthDate the default filegroup */
ALTER DATABASE BabbyNames MODIFY FILEGROUP fg_FirstNameByBirthDate DEFAULT;  
GO  

ALTER DATABASE BabbyNames MODIFY FILE (NAME='BabbyNames_log', SIZE=2GB, FILEGROWTH=512MB);
GO

/* just in case you want to play around with Hekaton... */
ALTER DATABASE BabbyNames ADD FILEGROUP MemoryOptimizedData CONTAINS MEMORY_OPTIMIZED_DATA;
GO

ALTER DATABASE BabbyNames 
    ADD FILE( NAME = 'MemoryOptimizedData' , FILENAME = 'S:\MSSQL\Data\BabbyNames_MemoryOptimizedData') 
    TO FILEGROUP MemoryOptimizedData;  
GO  


/****************************************************
Configure database and expand data
****************************************************/

SET STATISTICS IO, TIME OFF;
GO
SET XACT_ABORT, NOCOUNT ON;
GO
SET ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING, ANSI_WARNINGS, ARITHABORT, CONCAT_NULL_YIELDS_NULL ON;
GO
SET NUMERIC_ROUNDABORT OFF;
GO


ALTER DATABASE BabbyNames SET QUERY_STORE = ON
GO
ALTER DATABASE BabbyNames SET QUERY_STORE 
    (OPERATION_MODE = READ_WRITE, 
    CLEANUP_POLICY = (STALE_QUERY_THRESHOLD_DAYS = 365), MAX_STORAGE_SIZE_MB = 1024
    )
GO


ALTER DATABASE BabbyNames SET COMPATIBILITY_LEVEL=140;
GO

ALTER DATABASE BabbyNames SET TARGET_RECOVERY_TIME = 60 SECONDS;
GO

USE BabbyNames;
GO

EXEC evt.logme N'Restored small BabbyNames database';
GO

ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
GO

ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
GO



/******************************************************/
/* ref.Numbers                                        */
/******************************************************/

/* Create ref.Numbers. This is a helper "numbers" table just to help us in the next step.*/
IF SCHEMA_ID('ref') IS NULL
BEGIN
    EXEC evt.logme N'Create schema ref.';

    EXEC ('CREATE SCHEMA ref AUTHORIZATION dbo');
END
GO

EXEC evt.logme N'Create ref.Numbers.';
GO
IF OBJECT_ID('ref.Numbers','U') IS NOT NULL
BEGIN
    EXEC evt.logme N'Table ref.Numbers already exists, dropping.';

    DROP TABLE ref.Numbers;
END
GO

CREATE TABLE ref.Numbers (
    Num INT NOT NULL,
) on fg_FirstNameByBirthDate;
GO

EXEC evt.logme N'Load ref.Numbers.';
GO
INSERT ref.Numbers
    (Num)
SELECT TOP 10000000
    ROW_NUMBER() OVER (ORDER BY fn1.ReportYear)
FROM agg.FirstNameByYear AS fn1
CROSS JOIN agg.FirstNameByYear AS fn2;
GO

EXEC evt.logme N'Index and key ref.Numbers.';
GO

CREATE CLUSTERED COLUMNSTORE INDEX ccx_ref_Numbers on ref.Numbers;
GO

ALTER TABLE ref.Numbers
    ADD CONSTRAINT pk_refNumbers_Num
        PRIMARY KEY NONCLUSTERED (Num)
        ON fg_FirstNameByBirthDate;
GO


/******************************************************/
/* Helper index                                       */
/******************************************************/

EXEC evt.logme N'Create nccx_halp ON agg.FirstNameByYear.';
GO

CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_halp on agg.FirstNameByYear 
    (FirstNameId, ReportYear, NameCount, Gender)
    ON fg_FirstNameByBirthDate
GO



/******************************************************/
/* Create and load dbo.FirstNameByBirthDate           */
/******************************************************/

EXEC evt.logme N'Load dbo.FirstNameByBirthDateStage';
GO

/* This select into gets parallel insert plan, 20170906.
We set the default filegroup above so this will go into fg_FirstNameByBirthDate,
even though we can't explicitly direct it to do so until sql server 2017. */
SELECT
    ISNULL(CAST(ROW_NUMBER() OVER (ORDER BY (SELECT 'boop')) AS BIGINT),0) AS FirstNameByBirthDateId,
    DATEADD(mi,n.Num * 5.1,CAST('1/1/' + CAST(ReportYear AS CHAR(4)) AS datetime2(0))) as FakeBirthDateStamp,
    fn.StateCode,
    fn.FirstNameId,
    Gender,
    CAST(NULL AS TINYINT) as Flag1,
    CAST(NULL AS CHAR(1)) as Flag2
INTO dbo.FirstNameByBirthDateStage  ON fg_FirstNameByBirthDate /* This added in SQL Server 2017*/
FROM agg.FirstNameByYearState AS fn
CROSS APPLY (select Num from ref.Numbers where Num <= fn.NameCount) AS n
WHERE fn.ReportYear >= 1970 /* Limit size of dataset here */

    OPTION (RECOMPILE);
GO

EXEC evt.logme N'Add BirthYear computed column to  dbo.FirstNameByBirthDateStage';
GO

ALTER TABLE dbo.FirstNameByBirthDateStage
    ADD BirthYear as YEAR(FakeBirthDateStamp);
GO

EXEC evt.logme N'Create dbo.FirstNameByBirthDate, which has an identity property';
GO



CREATE TABLE dbo.FirstNameByBirthDate (
    FirstNameByBirthDateId BIGINT IDENTITY(1,1),
    FakeBirthDateStamp DATETIME2(0),
    StateCode CHAR(2) NOT NULL,
    FirstNameId INT NOT NULL,
    Gender CHAR(1) NOT NULL,
    Flag1 TINYINT NULL,
    Flag2 CHAR(1) NULL,
    BirthYear AS YEAR(FakeBirthDateStamp)
) ON fg_FirstNameByBirthDate
GO



EXEC evt.logme N'Switch data from dbo.FirstNameByBirthDateStage to dbo.FirstNameByBirthDate';
GO

ALTER TABLE dbo.FirstNameByBirthDateStage SWITCH TO dbo.FirstNameByBirthDate;
GO


EXEC evt.logme N'dbcc checkident reseed for dbo.FirstNameByBirthDateStage';
GO

DBCC CHECKIDENT ('dbo.FirstNameByBirthDate', RESEED);
GO


EXEC evt.logme N'DROP TABLE FirstNameByBirthDateStage';
GO
DROP TABLE FirstNameByBirthDateStage;
GO

EXEC evt.logme N'Create clustered PK on dbo.FirstNameByBirthDate';
GO

ALTER TABLE dbo.FirstNameByBirthDate
    ADD CONSTRAINT pk_FirstNameByBirthDate_FirstNameByBirthDateId
        PRIMARY KEY CLUSTERED (FirstNameByBirthDateId)
	WITH (SORT_IN_TEMPDB = ON, 
        DATA_COMPRESSION = ROW)
    ON fg_FirstNameByBirthDate;
GO

EXEC evt.logme N'Clean up nccx_halp ON agg.FirstNameByYear.';
GO
DROP INDEX IF EXISTS nccx_halp ON agg.FirstNameByYear;
GO


/******************************************************/
/* Foreign key constraints                            */
/******************************************************/

/* Special! Skipping this one for indexed view demo */
--EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_FirstNameId';
--GO

--ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK  
--ADD CONSTRAINT FK_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId) 
--    REFERENCES ref.FirstName (FirstNameId);
--GO



EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_StateCode';
GO

ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK  
ADD CONSTRAINT FK_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode) 
    REFERENCES ref.State (StateCode);
GO



/******************************************************/
/*  Demo setup                                        */
/******************************************************/


/* Sepcial! Flag for filtered index demo */
UPDATE dbo.FirstNameByBirthDate
    SET Flag1 = 1
WHERE FirstNameId IN (31015, 61451)
GO
UPDATE dbo.FirstNameByBirthDate
    SET Flag1 = 2
WHERE FirstNameId IN (32831, 5730)
GO
UPDATE dbo.FirstNameByBirthDate
    SET Flag1 = 3
WHERE FirstNameId IN (91787)
GO



/******************************************************/
/*  Backup                                            */
/******************************************************/

EXEC evt.logme N'Run a full backup';
GO


BACKUP DATABASE BabbyNames
    TO DISK=N'S:\MSSQL\Backup\BabbyNames_1of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames_2of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames_3of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames_4of4.bak'
    WITH INIT, COMPRESSION, STATS=5;
GO


/* Restore command (for reference) */
--use master;
--GO

--IF DB_ID('BabbyNames') IS NOT NULL
--BEGIN
--    ALTER DATABASE BabbyNames
--        SET SINGLE_USER
--        WITH ROLLBACK IMMEDIATE;
--END
--GO

--RESTORE DATABASE BabbyNames
--    FROM DISK=N'S:\MSSQL\Backup\BabbyNames_1of4.bak',
--    DISK=N'S:\MSSQL\Backup\BabbyNames_2of4.bak',
--    DISK=N'S:\MSSQL\Backup\BabbyNames_3of4.bak',
--    DISK=N'S:\MSSQL\Backup\BabbyNames_4of4.bak'
--    WITH
--        MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames.mdf',
--        MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames_log.ldf',
--        REPLACE,
--        RECOVERY;
--GO



/******************************************************/
/* All done                                           */
/******************************************************/

EXEC evt.logme N'BEEP BOOP WE ARE DONE';
GO

01-restore.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com

The script restores the database created and configured in 00-populate-BabbyNames.sql

If you've just run 00-populate-BabbyNames.sql, you don't have to run this.

If you want to reset your environment without rerunning 00-populate-BabbyNames.sql, 
    modify the file locations here to restore the database.    
*****************************************************************************/


SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON;
GO

EXEC sp_configure 'show advanced options', 1;
GO
RECONFIGURE
GO

EXEC sp_configure 'max degree of parallelism', 4;
GO

EXEC sp_configure 'cost threshold for parallelism', 100
GO

EXEC sp_configure 'max server memory (MB)', 7000;
GO

RECONFIGURE
GO

/****************************************************
Restore database
****************************************************/
use master;
GO

IF DB_ID('BabbyNames') IS NOT NULL
BEGIN
    IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames') = 'ONLINE'
    ALTER DATABASE BabbyNames
        SET SINGLE_USER
        WITH ROLLBACK IMMEDIATE;
END
GO

/* What can I say, I like to carefully lay out my files.*/
RESTORE DATABASE BabbyNames
    FROM DISK=N'S:\MSSQL\Backup\BabbyNames_1of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames_2of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames_3of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames_4of4.bak'
    WITH
        MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames.mdf',
		MOVE 'fg_FirstNameByBirthDate_f1' TO 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f1.ndf',
		MOVE 'fg_FirstNameByBirthDate_f2' TO 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f2.ndf',
		MOVE 'fg_FirstNameByBirthDate_f3' TO 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f3.ndf',
		MOVE 'fg_FirstNameByBirthDate_f4' TO 'S:\MSSQL\Data\BabbyNames_fg_FirstNameByBirthDate_f4.ndf',
        MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames_log.ldf',
		MOVE 'MemoryOptimizedData' TO 'S:\MSSQL\Data\BabbyNames_MemoryOptimizedData',
        REPLACE,
        RECOVERY;
GO

02-filtered-index.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/

RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO




/* To start out, we did some testing, safely away from production */
USE BabbyNames;
GO
CREATE OR ALTER PROC dbo.ReviewFlags
    @Flag1 TINYINT = 1
AS
    BEGIN TRY
        IF ISNULL(@Flag1,0) NOT IN (1, 2, 3, 4, 5)
            RAISERROR ('@Flag1 must be a number between 1 and 5', 11, 1);

        SELECT 
            fnbd.FirstNameByBirthDateId, 
            fnbd.FakeBirthDateStamp, 
            fn.FirstName,
            fnbd.StateCode,
            fnbd.Gender,
            fnbd.BirthYear
        FROM dbo.FirstNameByBirthDate as fnbd
        JOIN ref.FirstName as fn on
            fnbd.FirstNameId = fn.FirstNameId
        WHERE fnbd.Flag1 = @Flag1;
    END TRY
    BEGIN CATCH
        THROW;
    END CATCH
GO

/* We're using this table in our demo today.
It has one row per baby born in the USA since 1970.
CX is ~3.3GB, ~146.5 million rows.
*/
exec sp_help 'dbo.FirstNameByBirthDate';
GO



/* Baseline: no nonclustered index */
/* Look at the actual plan, then the stats IO & time */
SET STATISTICS IO, TIME ON;
GO
EXEC dbo.ReviewFlags @Flag1 = 2;
GO
SET STATISTICS IO, TIME OFF;
GO

--Table 'FirstNameByBirthDate'. Scan count 5, logical reads 430026, physical reads 0, read-ahead reads 420153, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
--Table 'FirstName'. Scan count 5, logical reads 1399, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
--Table 'Workfile'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
--Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

-- SQL Server Execution Times:
--   CPU time = 14375 ms,  elapsed time = 5259 ms.





/* Create a filtered index */
CREATE INDEX ix_FirstNameByBirthDate_Flag1_INCLUDES_FILTER
    on dbo.FirstNameByBirthDate
    (Flag1)
    INCLUDE (FirstNameByBirthDateId, FakeBirthDateStamp, FirstNameId, StateCode, Gender)
    WHERE  (Flag1 IN (1,2,3,4,5) );
GO



/* How big is this index? (MB and rows) */
SELECT
	si.index_id,
	si.name as index_name,
	si.fill_factor,
	si.is_primary_key,
	ps.reserved_page_count * 8./1024. as reserved_MB,
	ps.lob_reserved_page_count * 8./1024. lob_reserved_MB,
	ps.row_count
FROM sys.dm_db_partition_stats ps
JOIN sys.indexes si on ps.object_id=si.object_id and ps.index_id=si.index_id
JOIN sys.objects so on si.object_id=so.object_id
JOIN sys.schemas sc on so.schema_id=sc.schema_id
WHERE sc.name='dbo' and
    so.name='FirstNameByBirthDate';
GO


/* Will ReviewFlags use it? 
Look at the actual plan.
What does the warning mean?*/
EXEC dbo.ReviewFlags @Flag1 = 2;
GO


/* 
We have a parameterization problem...

Our query uses WHERE fnbd.Flag = @Flag1
Our index says WHERE Flag IN (1,2,3,4,5)

What if SQL Server caches a plan for @Flag1 = 2
Then tries to reuse the plan for @Flag1=7?

*/


/* 
There are a few ways to fix this. 
They aren't all created equal! We could use a RECOMPILE hint, 
but think of that as lighting a $dollar$ on fire every time it runs.

We could change the procedure...
*/


CREATE OR ALTER PROC dbo.ReviewFlags_Revised
    @Flag1 TINYINT = 1
AS
    BEGIN TRY
        IF ISNULL(@Flag1,0) NOT IN (1, 2, 3, 4, 5)
            RAISERROR ('@Flag1 must be a number between 1 and 5', 11, 1);

        SELECT 
            fnbd.FirstNameByBirthDateId, 
            fnbd.FakeBirthDateStamp, 
            fn.FirstName,
            fnbd.StateCode,
            fnbd.Gender,
            fnbd.BirthYear
        FROM dbo.FirstNameByBirthDate as fnbd
        JOIN ref.FirstName as fn on
            fnbd.FirstNameId = fn.FirstNameId
        WHERE fnbd.Flag1 = @Flag1
        and fnbd.Flag1 IN (1, 2, 3, 4, 5); /* <-- added */
    END TRY
    BEGIN CATCH
        THROW;
    END CATCH
GO

SET STATISTICS TIME, IO ON;
GO
EXEC dbo.ReviewFlags_Revised @Flag1 = 2;
GO
SET STATISTICS TIME, IO OFF;
GO



/************************************************************************
What if we introduce flag 6? 

We'll have to change the index AND queries that list the flags explicitly
In our case, there's a simpler solution
We change the filter!
************************************************************************/


/* Just a reminder of what our procedure looks like.
Notice that it does not say "IS NOT NULL" anywhere.
*/
CREATE OR ALTER PROC dbo.ReviewFlags
    @Flag1 TINYINT = 1
AS
    BEGIN TRY
        IF ISNULL(@Flag1,0) NOT IN (1, 2, 3, 4, 5)
            RAISERROR ('@Flag1 must be a number between 1 and 5', 11, 1);

        SELECT 
            fnbd.FirstNameByBirthDateId, 
            fnbd.FakeBirthDateStamp, 
            fn.FirstName,
            fnbd.StateCode,
            fnbd.Gender,
            fnbd.BirthYear
        FROM dbo.FirstNameByBirthDate as fnbd
        JOIN ref.FirstName as fn on
            fnbd.FirstNameId = fn.FirstNameId
        WHERE fnbd.Flag1 = @Flag1;
    END TRY
    BEGIN CATCH
        THROW;
    END CATCH
GO

DROP INDEX IF EXISTS ix_FirstNameByBirthDate_Flag1_INCLUDES_FILTER
    on dbo.FirstNameByBirthDate;
GO

CREATE INDEX ix_FirstNameByBirthDate_Flag1_INCLUDES_FILTER
    on dbo.FirstNameByBirthDate
    (Flag1)
    INCLUDE (FirstNameByBirthDateId, FakeBirthDateStamp, FirstNameId, StateCode, Gender)
    --WHERE  (Flag1 IN (1,2,3,4,5) )
    WHERE  (Flag1 IS NOT NULL );
GO


/* Does it match? */
SET STATISTICS IO, TIME ON;
GO
EXEC dbo.ReviewFlags @Flag1 = 2;
GO
SET STATISTICS IO, TIME OFF;
GO







/************************************************************************
Whew, we got the index to be used, and took advantage of 
SQL Server being smart about matching "is not null" to equality predicates
************************************************************************/
/* We're ready to deploy to production
This will be great
RIGHT?�




What could we have missed?�

*/














/* It turns out that inserts from our application run with
some different session settings than we realized.
Here's a simulation.... */
SET ANSI_NULLS OFF;
GO
BEGIN TRAN

INSERT dbo.FirstNameByBirthDate (FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1)
SELECT SYSDATETIME(), 'OR', 2, 'F', NULL;


ROLLBACK
GO
SET ANSI_NULLS ON;
GO


/* To get inserts going, we need to drop the index or change the application
to use different SET options when it connects (probably not a quick change) */
DROP INDEX ix_FirstNameByBirthDate_Flag1_INCLUDES_FILTER
    on dbo.FirstNameByBirthDate;
GO

/* Now re-test inserts */



/* Ouch, it would have been better to discover that BEFORE deploying to production. */



/* Reset set options for this session */
SET ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING, ANSI_WARNINGS, ARITHABORT, CONCAT_NULL_YIELDS_NULL ON;
GO
SET NUMERIC_ROUNDABORT OFF;
GO


/* Cleanup */
DROP TABLE IF EXISTS dbo.FirstNameByBirthDateNEWBIE;
GO






/***********************************************************************************/
/* Reference Queries: Additional queries looking for trouble with SET OPTIONS      */
/* These aren't foolproof, always test outside of prod first                       */
/* Reference: https://technet.microsoft.com/en-us/library/ms175088(v=sql.105).aspx */
/***********************************************************************************/

/* Non-system tables ANSI_NULLS OFF */
SELECT 
	sc.name as schema_name,
	t.name as table_name
FROM sys.tables AS t
JOIN sys.schemas AS sc on t.schema_id = sc.schema_id
WHERE
	t.is_ms_shipped=0
	AND t.uses_ansi_nulls = 0
ORDER BY sc.name, t.name
GO


/* Non-system tables with char, varchar, binary, and varbinary columns with ANSI_PADDING OFF */
SELECT 
	sc.name as schema_name,
	t.name as table_name, 
	c.name as column_name
FROM sys.tables AS t
JOIN sys.schemas AS sc on t.schema_id = sc.schema_id
JOIN sys.columns AS c on c.object_id=t.object_id
JOIN sys.types AS ty on c.system_type_id=ty.system_type_id
WHERE
	t.is_ms_shipped=0
	AND c.is_ansi_padded=0
	and ty.name IN ('char', 'varchar', 'binary', 'varbinary')
ORDER BY sc.name, t.name, c.name
GO


/* ANSI_WARNINGS --  */
/* This property doesn't stick around with the table, but if a
session has it turned OFF, that session will be unable to modify filtered indexes,
indexed computed columns, or indexed views.
This includes not being able to insert/update/delete! */

/* Current user sessions with ANSI_WARNINGS off */
SELECT *
FROM sys.dm_exec_sessions
WHERE ansi_warnings=0
	and is_user_process=1;
GO


/* Current user sessions with ARITHABORT OFF  */
SELECT *
FROM sys.dm_exec_sessions
WHERE arithabort = 0
	and is_user_process=1;
GO

/* Current user sessions with CONCAT_NULL_YIELDS_NULL OFF  */
SELECT *
FROM sys.dm_exec_sessions
WHERE concat_null_yields_null = 0
	and is_user_process=1;
GO

/* Databases with NUMERIC_ROUNDABORT ON.  */
/* This is a very old, rarely changed property. */
/* The only place in the metadata you can see it besides your own session is sys.databases.*/
SELECT *
FROM sys.databases
WHERE is_numeric_roundabort_on=1;
GO

/* Non-system objects with QUOTED_IDENTIFIER OFF  */
SELECT 
	name, 
	create_date
FROM sys.objects AS so
WHERE 
	OBJECTPROPERTYEX(object_id, 'ExecIsQuotedIdentOn') = 0
	AND type <> 'S'
GO


03-indexed-view.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/


RAISERROR(N'Did you mean to run the whole thing?',20,1) WITH LOG;
GO

USE BabbyNames;
GO

/* We need to support some new queries that aggregate name count by year. 
This takes ~28 seconds to run */
SET STATISTICS TIME, IO ON;
GO
SELECT TOP 100
	fn.FirstName, 
	YEAR(fnby.FakeBirthDateStamp) as BirthYear,
	COUNT(*) AS NamedThatYear
FROM dbo.FirstNameByBirthDate AS fnby
JOIN ref.FirstName AS fn on fnby.FirstNameId=fn.FirstNameId
GROUP BY 
	fn.FirstName,
	YEAR(fnby.FakeBirthDateStamp)
ORDER BY COUNT(*) DESC
GO
SET STATISTICS TIME, IO OFF;
GO


--Table 'FirstName'. Scan count 5, logical reads 1399, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
--Table 'FirstNameByBirthDate'. Scan count 5, logical reads 429889, physical reads 8519, read-ahead reads 147988, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
--Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 1246, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
--Table 'Workfile'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

--(1 row affected)

-- SQL Server Execution Times:
--   CPU time = 104860 ms,  elapsed time = 30851 ms.





/* SCHEMABINDING is required to index this after it is created */
CREATE OR ALTER VIEW dbo.FirstNameCountByYear
WITH SCHEMABINDING
AS
SELECT
	fn.FirstName, 
	YEAR(fnby.FakeBirthDateStamp) as BirthYear,
	COUNT_BIG(*) AS NamedThatYear
FROM dbo.FirstNameByBirthDate AS fnby
JOIN ref.FirstName AS fn on fnby.FirstNameId=fn.FirstNameId
GROUP BY 
	fn.FirstName,
	YEAR(fnby.FakeBirthDateStamp)
GO

/* This materializes the index and takes ~30 seconds 
Unique CX must be created first */
CREATE UNIQUE CLUSTERED INDEX cx_FirstNameCountByYear_FirstName_BirthYear
	ON dbo.FirstNameCountByYear (FirstName, BirthYear);
GO




/* How big is this index? (MB and rows) */
SELECT
	si.index_id,
	si.name as index_name,
	si.fill_factor,
	si.is_primary_key,
	ps.reserved_page_count * 8./1024. as reserved_MB,
	ps.lob_reserved_page_count * 8./1024. lob_reserved_MB,
	ps.row_count
FROM sys.dm_db_partition_stats ps
JOIN sys.indexes si on ps.object_id=si.object_id and ps.index_id=si.index_id
JOIN sys.objects so on si.object_id=so.object_id
JOIN sys.schemas sc on so.schema_id=sc.schema_id
WHERE sc.name='dbo' and
    so.name='FirstNameCountByYear';
GO




/* Does this query use it? 
Note that the query doesn't refer to the view by name -
I haven't changed the query at all.*/
SET STATISTICS TIME, IO ON;
GO
SELECT TOP 100
	fn.FirstName, 
	YEAR(fnby.FakeBirthDateStamp) as BirthYear,
	COUNT(*) AS NamedThatYear
FROM dbo.FirstNameByBirthDate AS fnby
JOIN ref.FirstName AS fn on fnby.FirstNameId=fn.FirstNameId
GROUP BY 
	fn.FirstName,
	YEAR(fnby.FakeBirthDateStamp)
ORDER BY COUNT(*) DESC
GO
SET STATISTICS TIME, IO ON;
GO

/* Auto-matching the query to the view is an $Enterprise$ feature */

--(100 rows affected)
--Table 'FirstNameCountByYear'. Scan count 1, logical reads 1336, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

--(1 row affected)









/* What if we add a WHERE clause to the query? 
Look at the estimated plan*/
SELECT TOP 100
	fn.FirstName, 
	YEAR(fnby.FakeBirthDateStamp) as BirthYear,
	COUNT(*) AS NamedThatYear
FROM dbo.FirstNameByBirthDate AS fnby
JOIN ref.FirstName AS fn on fnby.FirstNameId=fn.FirstNameId
WHERE 
    YEAR(fnby.FakeBirthDateStamp) = 2015 /* <-- Added */
GROUP BY 
	fn.FirstName,
	YEAR(fnby.FakeBirthDateStamp)
ORDER BY COUNT(*) DESC
GO

/* Rewrite the query to use the indexed view directly with the WHERE clause */
/* Look at the estimated plan */
SELECT TOP 100
	FirstName, 
    NamedThatYear
FROM dbo.FirstNameCountByYear
WHERE 
    BirthYear = 2015
ORDER BY NamedThatYear DESC;
GO


/* Add WITH (NOEXPAND). 
This is required in Standard Edition and often needed in Enterprise/Developer */
SET STATISTICS TIME, IO ON;
GO

SELECT TOP 100
	FirstName, 
    NamedThatYear
FROM dbo.FirstNameCountByYear WITH (NOEXPAND)
WHERE 
    BirthYear = 2015
ORDER BY NamedThatYear DESC
GO
SET STATISTICS TIME, IO OFF;
GO



/*
Let�s deploy to production
Pat yourself on the back and go for a cup of coffee.






This will be great
RIGHT?�
What could we have missed?
*/















/* We get a report that every time new names are reported, things are getting weirdly slow.
Run this with actual plans*/
SET STATISTICS TIME, IO ON;
GO
BEGIN TRAN
	INSERT ref.FirstName(FirstName, FirstReportYear, LastReportYear, TotalNameCount)
	VALUES ('Misteroo', 2021, 2021, 1)

ROLLBACK
SET STATISTICS TIME, IO OFF;
GO














/* 
Every time we insert a row into ref.FirstName...

SQL Server checks if it needs to�update the indexed view

It follows the joins in the view definition to see:
    Are related rows in dbo.FirstNameByBirthDate?
    If so, I�m going to need to update the view!

There�s no nonclustered index to make that check fast
So it�s scanning the clustered index
	

*/


/* We COULD create an index... */
CREATE INDEX ix_HelpMyIndexedView
	ON dbo.FirstNameByBirthDate (FirstNameId) INCLUDE (FakeBirthDateStamp);
GO


/* But what if we create a foreign key? 
You don't HAVE to specify WITH CHECK when adding, 
but doing it all the time you want it checked is a good practice*/
ALTER TABLE dbo.FirstNameByBirthDate 
    WITH CHECK
ADD CONSTRAINT FK_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId) 
    REFERENCES ref.FirstName (FirstNameId);
GO

/* In this case, the foreign key defines:
	ref.FirstName.FirstNameId is the parent of FirstNameIds in dbo.FirstNameByBirthDate

	If the key is checked, SQL Server knows there are no orphans in dbo.FirstNameByBirthDate

	So a new FirstNameId in ref.FirstName can't have any matching rows in dbo.FirstNameByBirthDate yet

*/



/* How are inserts now? */
/* Run the query. Then look at the plan */
SET STATISTICS IO, TIME ON;
GO
BEGIN TRAN
	INSERT ref.FirstName(FirstName, FirstReportYear, LastReportYear, TotalNameCount)
	VALUES ('Misteroo', 2021, 2021, 1)

ROLLBACK
SET STATISTICS IO, TIME OFF;
GO



/* Extra credit: The foreign key only helps in the case of inserts.
If we update FirstName in ref.FirstName, it is going to have to update the indexed view.
It will need a nonclustered index to speed up this query.

View the plan for this before creating a supporting nonclustered index. 
You can either design your own, or scroll up to see the one I suggest.
Create the index and verify that SQL Server then uses it.

ROLLBACK is used here to make this re-runnable.
*/
BEGIN TRAN

    UPDATE ref.FirstName
    SET FirstName = 'Kendarr'
    WHERE FirstName = 'Kendar'

ROLLBACK



/* Cleanup */
DROP INDEX IF EXISTS ix_HelpMyIndexedView ON dbo.FirstNameByBirthDate;
GO
DROP INDEX IF EXISTS cx_FirstNameCountByYear_FirstName_BirthYear ON dbo.FirstNameCountByYear;
GO
DROP VIEW  IF EXISTS dbo.FirstNameCountByYear;
GO


04-index-cleanup.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/

RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO

USE BabbyNames;
GO


/* Setup for this demo */

CREATE INDEX ix_ref_FirstName_FirstNameId_INCLUDES on ref.FirstName (FirstNameId) 
    INCLUDE (FirstName);
GO
CREATE INDEX ix_agg_FirstNameByYear_ReportYear_FirstNameId on agg.FirstNameByYear (ReportYear, FirstNameId)
GO
CREATE INDEX ix_agg_FirstNameByYearState_ReportYear_FirstNameId on agg.FirstNameByYearState (ReportYear, FirstNameId )
GO



/* We are doing some cleanup, and we hit a duplicate key situation... */

exec sp_helpindex 'ref.FirstName';
GO

/* We check the index 
It has an included column of FirstName
But the clustered index of the table has all the columns, too
It's really fast to find any FirstName with the clustered index
 */
SET STATISTICS TIME, IO ON;
GO

    SELECT FirstName
    FROM ref.FirstName WITH (INDEX(1))
    WHERE FirstNameId = 10

    SELECT FirstName
    FROM ref.FirstName WITH (INDEX([ix_ref_FirstName_FirstNameId_INCLUDES]))
    WHERE FirstNameId = 10

SET STATISTICS TIME, IO OFF;
GO

/*
We know that unneeded indexes waste resources: disk, memory (when used), 
index mainteance time/perf, checkdb time / perf, backup time / space, 
plus there's overhead for data modifications


We know that if any index hints reference the index by name, they will start failing.
We check the codebase for those, there aren't any hints for this index.

So we clean up the duplicate NC.
*/





DROP INDEX ix_ref_FirstName_FirstNameId_INCLUDES on ref.FirstName;
GO







/* 

What could go wrong?







*/












/* 
When we change indexes, we may inadvertantly cause a deadlock scenario.
*/



/* Imagine that we have a procedure like this-- 
except it'd be smart enough to handle when zero rows are modified, error handling, etc. */
CREATE OR ALTER PROCEDURE dbo.UpdateExistingNameData
    @ReportYear INT,
    @FirstName VARCHAR(255),
    @Gender CHAR(1),
    @NameCount INT,
    @StateCode CHAR(2)
AS
    DECLARE @output table (FirstNameId INT);
    DECLARE @FirstNameId INT;

BEGIN TRAN
    UPDATE ref.FirstName
    SET TotalNameCount += @NameCount
        OUTPUT inserted.FirstNameId INTO @output
    WHERE FirstName = @FirstName;

    SELECT @FirstNameId = FirstNameId from @output;

    -- When you get to this step in the debugger, pause and run the query below in a second SSMS app 
    UPDATE agg.FirstNameByYearState
        SET NameCount += @NameCount
    WHERE
        ReportYear = @ReportYear
        AND FirstNameId = @FirstNameId
        AND Gender = @Gender 
        AND StateCode = @StateCode;
COMMIT
GO



--Step into this procedure with the TSQL Debugger
EXEC dbo.UpdateExistingNameData
    @ReportYear = 2015,
    @FirstName = 'Kendra',
    @Gender = 'F',
    @NameCount = 5,
    @StateCode = 'OR'
GO




--Start up a second SSMS. Run this when you hit the note to do so in the debugger
USE BabbyNames;
GO
BEGIN TRAN

    SELECT fn.FirstName, COUNT(*)
    FROM agg.FirstNameByYearState as fnbys WITH (HOLDLOCK)
    JOIN ref.FirstName as fn on fnbys.FirstNameId=fn.FirstNameId
    WHERE fnbys.ReportYear = 2014
    GROUP by fn.FirstName

    SELECT fn.FirstName, COUNT(*)
    FROM agg.FirstNameByYear as fnby WITH (HOLDLOCK)
    JOIN ref.FirstName as fn on fnby.FirstNameId=fn.FirstNameId
    WHERE fnby.ReportYear = 2014
    GROUP by fn.FirstName

COMMIT





--Add the index back, then run through the sequence again
--Don't forget to close the old debugger session window if you left it open!
CREATE INDEX ix_ref_FirstName_FirstNameId_INCLUDES on ref.FirstName (FirstNameId) 
    INCLUDE (FirstName);
GO