Walk through of Microsoft SQL Server Change Data Capture

Chris Skorlinski
Microsoft SQL Server Escalation Services

This sample script walks through some of the Microsoft Change Data Captures functions.  Copy and past the script into SQL Server Management Studio and execute each command.  Sample output is included in the script. This demo is based on the [AdventureWorksLT2008] database

For an Overview of Change Data Capture, you can download a PDF slidedeck from a presentation I gave at SQLSaturday.

USE [AdventureWorksLT2008]
GO

-- Create Sample table used for CDC Demo
CREATE TABLE [SalesLT].[CustomerCDCDemo](
    [CustomerID] [int] IDENTITY(1,1) NOT NULL,
    [FirstName] [dbo].[Name] NOT NULL,
    [LastName] [dbo].[Name] NOT NULL,
    [Phone] [dbo].[Phone] NULL,
    [CreditLimit] Money NULL,
    [ModifiedDate] [datetime] NOT NULL,
CONSTRAINT [PK_CustomerCDCDemo] PRIMARY KEY CLUSTERED 
(
    [CustomerID] ASC
) ON [PRIMARY]
) ON [PRIMARY]
GO


ALTER TABLE [SalesLT].[CustomerCDCDemo] ADD  CONSTRAINT [DF_CustomerCDCDemo_ModifiedDate]  DEFAULT (getdate()) FOR [ModifiedDate]
GO

-- Load Sample table with data
SET Identity_Insert AdventureWorksLT2008.[SalesLT].[CustomerCDCDemo] ON
GO

INSERT [SalesLT].[CustomerCDCDemo](
[CustomerID],[FirstName],[LastName],[Phone],[CreditLimit],ModifiedDate)
SELECT [CustomerID],[FirstName],[LastName],[Phone],1000,ModifiedDate
FROM SalesLT.Customer
GO

--(847 row(s) affected)

use AdventureWorksLT2008
go

--
-- Turn on Change Data Capture at the database level
--
exec sys.sp_cdc_enable_db


--
-- Turn on CDC for the sample [CustomerCDCDemo] table
--    created in the Schema Setup script

sys.sp_cdc_enable_table
    @source_schema = N'SalesLT'
    ,@source_name = N'CustomerCDCDemo'
    ,@role_name = N'cdc_admin'
    ,@capture_instance = N'CustomerCredit'    -->>Name used in CDC functions
    ,@supports_net_changes = 1    
    ,@index_name = NULL
        -->>Captured only required columns
    ,@captured_column_list = 'CustomerID,FirstName,LastName,Phone,CreditLimit,ModifiedDate'
        -->>Store "captured" data on different disk
    ,@filegroup_name = NULL                    

-- 2 Jobs are automatically created.

--    Job 'cdc.AdventureWorksLT2008_capture' started successfully.
--    Job 'cdc.AdventureWorksLT2008_cleanup' started successfully.

--Next explore System Tables created by CDC
SELECT * FROM cdc.change_tables
SELECT * FROM cdc.captured_columns
SELECT * FROM cdc.index_columns
SELECT * FROM cdc.ddl_history
SELECT * FROM cdc.CustomerCredit_CT
SELECT * FROM cdc.lsn_time_mapping



-- >> DEMO using Change Data Capture functrions
--    cdc_get_all and cdc_get_net changes

use AdventureWorksLT2008
go

--===================================================
-- Perform changes to SalesLT.[CustomerCDCDemo] table
-- Run each statement one at a time
--===================================================
insert into SalesLT.[CustomerCDCDemo] 
    ([FirstName],[LastName],[Phone],[CreditLimit])
values
    (N'Chris','Skorlinski','803-555-1212',1000)    
Go

-- Update Name
update SalesLT.[CustomerCDCDemo] 
set [FirstName] = N'Christopher'    
where [Phone] = '803-555-1212'
Go

-- Update Credit limit
update SalesLT.[CustomerCDCDemo] 
set CreditLimit = 3000        -- from 1000 to 3000
where [Phone] = '803-555-1212'

--Show new record with corrected name 
--    and increased Creditlimit
select [CustomerID],[FirstName],[LastName],[Phone],[CreditLimit]
from SalesLT.[CustomerCDCDemo] 
where [Phone] = '803-555-1212'
go

CustomerID  FirstName                           LastName                            Phone                     CreditLimit
----------- ----------------------------------- ----------------------------------- ------------------------- ---------------------
30119       Christopher                         Skorlinski                          803-555-1212              3000.00

(1 row(s) affected)    


/*===================================================
    Use CDC Functions to retrieve history of changes.
    Can return "current" values or both "current" and "previous"

         cdc.fn_cdc_get_ALL_changes (@begin_lsn, @end_lsn)
         cdc.fn_cdc_get_NET_changes (@begin_lsn, @end_lsn)
         
    @begin_lsn = the start_lsn value from the cdc.change_tables system table. 
    @end_lsn = the max lsn value in the cdc.change_tables system table. 
--===================================================
*/
declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_get_min_lsn('CustomerCredit')
set @end_lsn = sys.fn_cdc_get_max_lsn()

select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[CreditLimit]
from cdc.fn_cdc_get_all_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all')


__$start_lsn           __$operation __$update_mask                        FirstName                           CreditLimit
---------------------- ------------ ------------------------------------- ----------------------------------- ---------------------
0x00000037000003000004 2            0x3F                                  Chris                               1000.00
0x00000037000003290004 4            0x02                                  Christopher                         1000.00
0x00000037000003340004 4            0x10                                  Christopher                         3000.00

(3 row(s) affected)


/*
Row 1 shows the insert; __$operation=2
Row 2 shows the update of the FirstName; __$operation=4
Row 3 shows the update of the CreditLimit;  __$operation=4 
*/    

/*
>>If we had 100 columns, how would we tell which was the NEW data?

CDC provides helper function to use __$update_mask 
    to identify which column contains the NEW data
    
        0x02 = 00010 = 2nd column = FirstName
        0x10 = 10000 = 5th column = CreditLimit
*/
    
--===================================================
-->>What if we needed to see previous values?
--===================================================    
declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_get_min_lsn('CustomerCredit')
set @end_lsn = sys.fn_cdc_get_max_lsn()
    
select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[CreditLimit]
      
from cdc.fn_cdc_get_all_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all update old')      -- see previous values
    
    
__$start_lsn           __$operation __$update_mask                        FirstName                           CreditLimit
---------------------- ------------ ------------------------------------- ----------------------------------- ---------------------
0x00000037000003000004 2            0x3F                                  Chris                               1000.00
0x00000037000003290004 3            0x02                                  Chris                               1000.00
0x00000037000003290004 4            0x02                                  Christopher                         1000.00
0x00000037000003340004 3            0x10                                  Christopher                         1000.00
0x00000037000003340004 4            0x10                                  Christopher                         3000.00

(5 row(s) affected)
    

/*
Row 1 shows the insert.                    __$operation=2
Row 2 shows the Previous FirstName.        __$operation=3
Row 3 shows the Updated FirstName.        __$operation=4
Row 4 shows the Previous CreditLimit.    __$operation=3
Row 5 shows the Updated CreditLimit.    __$operation=4
*/

/*===================================================
-->>Row changed multiple time, 
    but I only want to see final "combined" changes.

cdc.fn_cdc_get_net_changes_
 
Special feature, more overhead, must be enabled when
the table is configured for CDC tracking.
 
sys.sp_cdc_enable_table
    ,@supports_net_changes = 1    

--===================================================*/
declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_get_min_lsn('CustomerCredit')
set @end_lsn = sys.fn_cdc_get_max_lsn()

select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[CreditLimit]
      
from cdc.fn_cdc_get_net_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all')


__$start_lsn           __$operation __$update_mask                        FirstName                           CreditLimit
---------------------- ------------ ------------------------------------- ----------------------------------- ---------------------
0x00000037000003340004 2            NULL                                  Christopher                         3000.00

(1 row(s) affected)


/*===================================================
How can CDC show me what rows were DELETED?

    __$operation = 1
=====================================================*/

-- delete new Customer

delete SalesLT.[CustomerCDCDemo] 
where [Phone] = '803-555-1212'
    

-->>fn_cdc_get_all_changes
declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_get_min_lsn('CustomerCredit')
set @end_lsn = sys.fn_cdc_get_max_lsn()

select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[CreditLimit]
      
from cdc.fn_cdc_get_all_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all update old')      -- see previous values

__$start_lsn           __$operation __$update_mask                        FirstName                           CreditLimit
---------------------- ------------ ------------------------------------- ----------------------------------- ---------------------
...
0x00000038000000390005 1            0x3F                                  Christopher                         3000.00

-->>>NOTE:  Save the $start_lsn value, <<<<<
--        we'll use that later in the script

--        example: 0x00000038000000390005


/*===================================================
After DELETE, what happened to NET_changed output?
=====================================================*/
declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_get_min_lsn('CustomerCredit')
set @end_lsn = sys.fn_cdc_get_max_lsn()

select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[LastName],[Phone],[CreditLimit]
from cdc.fn_cdc_get_net_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all')


(0 row(s) affected)

-- The NET effect after the Insert, Update, Delete, is 0 change (no rows)

/*===================================================
How does CDC Tracking Tables which make this happend

    [CustomerCredit_CT] -- name supplied when table was enabled.
    [lsn_time_mapping]-- list transactions and time_stamp for collections 

=====================================================*/

--[CustomerCredit_CT] populated by background "_capture" job 
SELECT [__$start_lsn]
      ,[__$seqval]
      ,[__$operation]
      ,[__$update_mask]
      ,[FirstName]
      ,[CreditLimit]
  FROM [cdc].[CustomerCredit_CT]
GO

__$start_lsn           __$seqval              __$operation __$update_mask                        FirstName                           CreditLimit
---------------------- ---------------------- ------------ ------------------------------------- ----------------------------------- ---------------------
0x00000037000003000004 0x00000037000003000003 2            0x3F                                  Chris                               1000.00
0x00000037000003290004 0x00000037000003290002 3            0x02                                  Chris                               1000.00
0x00000037000003290004 0x00000037000003290002 4            0x02                                  Christopher                         1000.00
0x00000037000003340004 0x00000037000003340002 3            0x10                                  Christopher                         1000.00
0x00000037000003340004 0x00000037000003340002 4            0x10                                  Christopher                         3000.00
0x00000038000000390005 0x00000038000000390002 1            0x3F                                  Christopher                         3000.00

(6 row(s) affected)



--One row for each batch of transactions captured
SELECT [start_lsn]            -- 0x0000003600000A820005 lsn last capture
      ,[tran_begin_time]
      ,[tran_end_time]
      ,[tran_id]
      ,[tran_begin_lsn]
  FROM [cdc].[lsn_time_mapping]
  

start_lsn              tran_begin_time         tran_end_time           tran_id                tran_begin_lsn
---------------------- ----------------------- ----------------------- ---------------------- ----------------------
0x0000003500001ECC004D 2010-09-30 15:39:07.543 2010-09-30 15:39:07.543 0x00                   0x00000000000000000000
  <<insert transaction>>>
0x00000037000003000004 2010-09-30 15:41:09.480 2010-09-30 15:41:09.480 0x0000000013EF         0x00000037000003000002
  <<2 update transactions>>
0x00000037000003290004 2010-09-30 15:41:18.553 2010-09-30 15:41:18.553 0x0000000013F9         0x00000037000003290001
0x00000037000003340004 2010-09-30 15:41:47.767 2010-09-30 15:41:47.767 0x000000001401         0x00000037000003340001
<<<pause in activity, no data captured>>>
0x00000038000000230001 2010-09-30 15:46:50.000 2010-09-30 15:46:50.000 0x00                   0x00000000000000000000
<<<delete transaction>>>
0x00000038000000390005 2010-09-30 15:48:03.493 2010-09-30 15:48:03.493 0x000000001455         0x00000038000000390001

(6 row(s) affected)


--===================================================
--Run Multiple INSERT/UPDATEs
--===================================================
insert into SalesLT.[CustomerCDCDemo] 
    ([FirstName],[LastName],[Phone],[CreditLimit])
values
    (N'Erica','Skorlinski','704-555-1212',1000)    

update SalesLT.[CustomerCDCDemo] 
set CreditLimit = CreditLimit + 1500
where [CustomerID] <10

-- 2nd update
update SalesLT.[CustomerCDCDemo] 
set CreditLimit = CreditLimit + 50
where [CustomerID] <10

update SalesLT.[CustomerCDCDemo] 
set CreditLimit = CreditLimit + 3000
where LastName like 'S%'

DELETE SalesLT.[CustomerCDCDemo] 
where LastName like 'L%'


--===================================================
--Retrieve changed data
--===================================================
declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_get_min_lsn('CustomerCredit')
set @end_lsn = sys.fn_cdc_get_max_lsn()

select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[LastName],[CreditLimit]
      
from cdc.fn_cdc_get_all_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all')

__$start_lsn           __$operation __$update_mask                        FirstName                           LastName                            CreditLimit
---------------------- ------------ ------------------------------------- ----------------------------------- ----------------------------------- ---------------------
0x00000037000003000004 2            0x3F                                  Chris                               Skorlinski                          1000.00
0x00000037000003290004 4            0x02                                  Christopher                         Skorlinski                          1000.00
0x00000037000003340004 4            0x10                                  Christopher                         Skorlinski                          3000.00
0x00000038000000390005 1            0x3F                                  Christopher                         Skorlinski                          3000.00
0x00000038000000B90004 2            0x3F                                  Erica                               Skorlinski                          1000.00
0x00000038000000BA0010 4            0x10                                  Orlando                             Gee                                 2500.00
...
0x00000038000000EB0040 1            0x3F                                  Robert                              Lyeba                               1000.00
0x00000038000000EB0040 1            0x3F                                  Robert                              Lyon                                1000.00
(123 row(s) affected)


/*===================================================

We still see data for DELETED     "'Chris','Skorlinski','803-555-1212'"; why?      
    
Answer: We're pulling ALL changes for since CDC was enabled, not just recently changed data.
How do we only retrive the NEWly change data?

Answer: We'll need to start searching all data
        modified after the first CDC capture.
        We do this my increasing the MIN LSN (or starting position)
        to the MAX LSN (ending position) of the previous CDC capture
        
        MAX LSN = 0x00000038000000390005

        New @begin_lsn = Previous @end_lsn
         
Tip: Use the previous @end_lsn as the "seed" for the
     fn_cdc_increment_lsn function to find the next "valid" @begin_lsn.         

=====================================================*/

declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_increment_lsn(0x00000038000000390005) --lsn saved 1st run
set @end_lsn = sys.fn_cdc_get_max_lsn()

select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[CreditLimit]
      
from cdc.fn_cdc_get_all_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all')
GO


__$start_lsn           __$operation __$update_mask                        FirstName                           CreditLimit
---------------------- ------------ ------------------------------------- ----------------------------------- ---------------------
0x00000038000000B90004 2            0x3F                                  Erica                               1000.00
0x00000038000000BA0010 4            0x10                                  Orlando                             2500.00
...
0x00000038000000EB0040 1            0x3F                                  Judy                                1000.00
0x00000038000000EB0040 1            0x3F                                  Robert                              1000.00
0x00000038000000EB0040 1            0x3F                                  Robert                              1000.00

(119 row(s) affected)


-->Show just NET changes since last run
declare @begin_lsn binary(10), @end_lsn binary(10)

set @begin_lsn = sys.fn_cdc_increment_lsn(0x0000003600000A820005)
set @end_lsn = sys.fn_cdc_get_max_lsn()

select [__$start_lsn],[__$operation],[__$update_mask]
      ,[FirstName],[LastName],[Phone],[CreditLimit]
from cdc.fn_cdc_get_net_changes_CustomerCredit(
    @begin_lsn, @end_lsn, 'all')


--8 rows were changed twice, only seeing the NET effect.
(111 row(s) affected)


/*===================================================
What are different techniques to remember the @end_lsn
in order to use it as the @begin_lsn on the next run?

-- Track the history of processing CDC by capture instance table
--    http://msdn.microsoft.com/en-us/library/bb895315.aspx

create table SalesLT.tblCDCRunHistory_log (
    cdc_capture_log_id    int identity not null
,    capture_instance    nvarchar(50) not null    
,    begin_time            datetime not null    
,    begin_lsn            binary(10) not null
,    end_lsn                binary(10) not null
,    end_time            datetime null
,    status_code            int not null default 0
)

=====================================================*/


--Next explore System Tables created by CDC
SELECT * FROM cdc.change_tables
SELECT * FROM cdc.captured_columns
SELECT * FROM cdc.index_columns
SELECT * FROM cdc.ddl_history
SELECT * FROM cdc.CustomerCredit_CT
SELECT * FROM cdc.lsn_time_mapping


--
-- To turn off Change Data Capture
--
exec sys.sp_cdc_disable_table 
  @source_schema = 'SalesLT' , 
  @source_name = 'CustomerCDCDemo',
  @capture_instance = 'CustomerCredit'
Go  

Drop Table  SalesLT.CustomerCDCDemo
go

exec sys.sp_cdc_disable_db
go


Command(s) completed successfully.