Example: Index fragmentation with insert/updates, measuring it and fixing it

Example: Index fragmentation with insert/updates, measuring it and fixing it

Rate This
  • Comments 1

This blog shows a very simple example to illustrate data fragmentation, measuring it, seeing behind the scene data and steps to address it. I recommend reading the overall series on index fragementation at http://blogs.msdn.com/sqlserverstorageengine/archive/tags/Index+Fragmentation+Series/default.aspx

 

 

-- TSQL Script

create database indextest

go

 

 

use indextest

go

 

 

-- create the index after loading the data

drop table t_ci

go

 

create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))

go

 

 

-- load the data

declare @i int

select @i = 0

while (@i < 1000)

begin

insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))

set @i = @i + 1

end

 

-- find fragmentation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

 

OUTPUT

-- main cause of fragmentation is that initial page allocation is using mixed extent

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

40                           7.25                       4                    94.127625401532

 

(1 row(s) affected)

 

--find extent allocations. Note, first 8 pages are allocated from mixed extents

-- in the output below, I removed some columns that are not relevant here.

dbcc extentinfo (0, 't_ci', 0)

 

OUTPUT

file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1           153         1           1           2137058649  0           1               

1           155         1           1           2137058649  0           1               

1           156         1           1           2137058649  0           1               

1           157         1           1           2137058649  0           1               

1           158         1           1           2137058649  0           1               

1           159         1           1           2137058649  0           1               

1           180         1           1           2137058649  0           1               

1           181         1           1           2137058649  0           1               

1           208         8           8           2137058649  0           1               

1           216         8           8           2137058649  0           1               

1           224         5           8           2137058649  0           1               

 

-- this shows all the pages alllocated to t_ci

-- I looked at the pages and I found total of 30 pages with 4 fragements. So the average size is 7.5

dbcc ind ('indextest', 't_ci', 0)

 

 

-- create the clustered index

create clustered index ci on t_ci(c1)

go

 

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

 

 

OUTPUT

-- you don't see any fragementation because SQL Server allocates uniform extents.

-- In the output below, I removed the row for non-leaf index pages

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

0                            28                         1                    98.3726958240672

 

-- this shows all the pages alllocated to t_ci

-- I looked at the pages and I found that logical/physical order was perfactly alinged.

-- also, all pages were allocated from uniform extents except for PFS page.

dbcc ind ('indextest', 't_ci', 1)

 

-- here is the output of page allocations

dbcc extentinfo (0, 't_ci', 1)

file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number     

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1           183         1           1           2137058649  1           1                  

1           232         8           8           2137058649  1           1                  

1           240         8           8           2137058649  1           1                   

1           248         8           8           2137058649  1           1                  

 

 

 

-- create the index and load the data. This is different from the previous example

-- as here we create the index on the empty table and then load the data.

 

drop table t_ci

go

 

create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))

go

 

-- create the clustered index

create clustered index ci on t_ci(c1)

go

 

-- load the data

declare @i int

select @i = 0

while (@i < 1000)

begin

insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))

set @i = @i + 1

end

 

 

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

 

-- output: again, you see the fragementaion becuase first 8 page allcoations come from mixed page extents

-- This is because when you are loading into empty table, SQL Server cannot predict its size. So

-- it does mixed page allocations. This is different when you create the indec

-- after loading the data because at that time it knows the size of the index.

-- Also note that the average used percentage in pages was close to 100%. This means that

-- the pages were not split in half. This is one of the optimizations SQL Server does when

-- loading ordered data.

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

14.2857142857143             5.6                        5                    98.3726958240672

 

 

--find extent allocations. Note, first 8 pages are allocated from mixed extents

dbcc extentinfo (0, 't_ci', 1)

file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number      

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1           180         1           1           5575058     1           1                  

1           182         1           1           5575058     1           1                  

1           183         1           1           5575058     1           1                  

1           142         1           1           5575058     1           1                  

1           143         1           1           5575058     1           1                  

1           153         1           1           5575058     1           1                  

1           154         1           1           5575058     1           1                  

1           155         1           1           5575058     1           1                  

1           192         8           8           5575058     1           1                   

1           200         8           8           5575058     1           1                  

1           208         5           8           5575058     1           1                  

 

drop index ci on t_ci

-- create the clustered index to start with unfragmented data

create clustered index ci on t_ci(c1)

go

 

-- update all rows such that each row 900 bytes. This will cause page splits thereby

-- lead to fragmentation

update t_ci set c4 = replicate ('b', 1000)

 

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

 

-- Severe fragementation because of page splits. Also note that the space usage in each page is around 50%

-- This affects the performance in two ways (1) you will need to read more pages to get to the

-- same number of rows as pages are 1/2 full. (2) Since there are 117 fragements, it will cause

-- more physical IOs if we are scanning for a range of keys.

--

-- Just to emphasize, the fragmentation happens because of DML operations. So if there is very little

-- DML activity on a table, it may not get fragmented.  Other important point to understand

-- is that fragementaion ONLY impacts range queries but NOT singleton selects.So again if

-- you determine that most access thru an index are singleton SELECT, there is no performance benefit

-- in defragmenting it except of course you will be able to space by compacting the data on the page.

--

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

40.4332129963899             2.40869565217391           115                  50.0636520879664

 

--find extent allocations. Note, that the pages are allocated from mixed extents only. This

-- is because the table is already bigger than the 8 page threshold

dbcc extentinfo (0, 't_ci', 1)

file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number      

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1           183         1           1           5575058     1           1                

1           440         8           8           5575058     1           1                 

1           448         8           8           5575058     1           1                

1           456         8           8           5575058     1           1                

1           464         8           8           5575058     1           1                

1           472         8           8           5575058     1           1                

1           480         8           8           5575058     1           1                

1           488         8           8           5575058     1           1                

1           496         8           8           5575058     1           1                

1           504         8           8           5575058     1           1                

1           512         8           8           5575058     1           1                

1           520         8           8           5575058     1           1                

1           528         8           8           5575058     1           1                

1           536         8           8           5575058     1           1                

1           544         8           8           5575058     1           1                

1           552         8           8           5575058     1           1                 

1           560         8           8           5575058     1           1                

1           568         8           8           5575058     1           1                

1           576         7           8           5575058     1           1                

 

 

-- you can use the following command to show all the pages alllocated to t_ci and their links.

dbcc ind ('indextest', 't_ci', 1)

 

-- Now do an index defrag.

alter index ci on t_ci reorganize

 

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

 

-- note that the fragmentation was reduced significantly and the space usage on pages is around 100%. In other

-- words, the defragementation makes the physical order same as the logical order and compacts the pages.

-- The reason you see the fragementation is that the 'Reorganize' command does not allocate new extents

-- as index rebuild command. It works with the already allocated pages and moves the data around and then

-- deallocates pages not needed. If you choose to 'rebuild' the index, then the fragmentation will be completely

-- removed but you must question if it is worth it? Index Reorganize takes very little space (one extra page) and

-- breaks the work into smaller transactions. So you minimze large log chain and space overhead.On the flip side,

-- it is single threaded and fully logged operation.

 

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

3.4965034965035              10.2142857142857           14                   98.3726958240672

Leave a Comment
  • Please add 3 and 5 and type the answer here:
  • Post