Microsoft SQL Server Front End Blog

The writers of this blog are a part of the Microsoft SQL Server Manageability team. We will be blogging about our tools, scripts, webcasts, and miscellaneous tips that will help you get the most out of SQL Agent, Data collector and other tools

Data Collector's sp_purge_data may run for hours if there are many orphaned plans to purge

Data Collector's sp_purge_data may run for hours if there are many orphaned plans to purge

  • Comments 2
 
 
Here is the fix for data collector purge job taking too long to complete. Please let us know if this fix solves your problem. Please send us your feedback to  sethu.srinivasan@microsoft.com. I have also attached the query as attachment to this blog entry
 
 
 
-- run this batch in Data collector's management warehouse database
-- use MDW
--
-- This stored proc removes orphaned notable_query_plan data from the warehouse
--
IF (NOT OBJECT_ID(N'core.sp_purge_orphaned_notable_query_plan', 'P') IS NULL)
BEGIN
RAISERROR('Dropping procedure [core].[sp_purge_orphaned_notable_query_plan] ...', 0, 1) WITH NOWAIT;
DROP PROCEDURE [core].[sp_purge_orphaned_notable_query_plan]
END
GO

RAISERROR('Creating procedure [core].[sp_purge_orphaned_notable_query_plan] ...', 0, 1) WITH NOWAIT;
GO
CREATE PROCEDURE [core].[sp_purge_orphaned_notable_query_plan]
@duration smallint = NULL,
@end_time datetime = NULL,
@delete_batch_size int = 500
AS
BEGIN
PRINT 'Begin purging orphaned records in snapshots.notable_query_plan Current UTC Time:' + CONVERT(VARCHAR, GETUTCDATE())

DECLARE @stop_purge int

-- Delete orphaned rows from snapshots.notable_query_plan. Query plans are not deleted by the generic purge
-- process that deletes other data (above) because query plan rows are not tied to a particular snapshot ID.
-- Purging query plans table as a special case, by looking for plans that
-- are no longer referenced by any of the rows in the snapshots.query_stats table. We need to delete these
-- rows in small chunks, since deleting many GB in a single delete statement would cause lock escalation and
-- an explosion in the size of the transaction log (individual query plans can be 10-50MB).
DECLARE @rows_affected int;
-- set expected rows affected as delete batch size
SET @rows_affected = @delete_batch_size;

-- select set of orphaned query plans to be deleted into a temp table
SELECT qp.[sql_handle],
qp.plan_handle,
qp.plan_generation_num,
qp.statement_start_offset,
qp.statement_end_offset,
qp.creation_time
INTO #tmp_notable_query_plan
FROM snapshots.notable_query_plan AS qp
WHERE NOT EXISTS (
SELECT snapshot_id
FROM snapshots.query_stats AS qs
WHERE qs.[sql_handle] = qp.[sql_handle] AND qs.plan_handle = qp.plan_handle
AND qs.plan_generation_num = qp.plan_generation_num
AND qs.statement_start_offset = qp.statement_start_offset
AND qs.statement_end_offset = qp.statement_end_offset
AND qs.creation_time = qp.creation_time)

WHILE (@rows_affected = @delete_batch_size)
BEGIN
-- Deleting TOP N orphaned rows in query plan table by joining info from temp table variable
-- This is done to speed up delete query.
DELETE TOP (@delete_batch_size) snapshots.notable_query_plan
FROM snapshots.notable_query_plan AS qp , #tmp_notable_query_plan AS tmp
WHERE tmp.[sql_handle] = qp.[sql_handle]
AND tmp.plan_handle = qp.plan_handle
AND tmp.plan_generation_num = qp.plan_generation_num
AND tmp.statement_start_offset = qp.statement_start_offset
AND tmp.statement_end_offset = qp.statement_end_offset
AND tmp.creation_time = qp.creation_time

SET @rows_affected = @@ROWCOUNT;
IF(@rows_affected > 0)
BEGIN
RAISERROR ('Deleted %d orphaned rows from snapshots.notable_query_plan', 0, 1, @rows_affected) WITH NOWAIT;
END

-- Check if the execution of the stored proc exceeded the @duration specified
IF (@duration IS NOT NULL)
BEGIN
IF (GETUTCDATE()>=@end_time)
BEGIN
PRINT 'Stopping purge. More than ' + CONVERT(VARCHAR, @duration) + ' minutes passed since the start of operation.';
BREAK
END
END

-- Check if somebody wanted to stop the purge operation
SELECT @stop_purge = COUNT(stop_purge) FROM [core].[purge_info_internal]
IF (@stop_purge > 0)
BEGIN
PRINT 'Stopping purge. Detected a user request to stop purge.';
BREAK
END
END;

PRINT 'End purging orphaned records in snapshots.notable_query_plan Current UTC Time:' + CONVERT(VARCHAR, GETUTCDATE())
END

GO

--
-- This stored proc removes orphaned notable_query_text data from the warehouse
--
IF (NOT OBJECT_ID(N'core.sp_purge_orphaned_notable_query_text', 'P') IS NULL)
BEGIN
RAISERROR('Dropping procedure [core].[sp_purge_orphaned_notable_query_text] ...', 0, 1) WITH NOWAIT;
DROP PROCEDURE [core].[sp_purge_orphaned_notable_query_text]
END
GO

RAISERROR('Creating procedure [core].[sp_purge_orphaned_notable_query_text] ...', 0, 1) WITH NOWAIT;
GO
CREATE PROCEDURE [core].[sp_purge_orphaned_notable_query_text]
@duration smallint = NULL,
@end_time datetime = NULL,
@delete_batch_size int = 500
AS
BEGIN
PRINT 'Begin purging orphaned records in snapshots.notable_query_text Current UTC Time:' + CONVERT(VARCHAR, GETUTCDATE())

DECLARE @stop_purge int

-- Delete orphaned rows from snapshots.notable_query_text. Query texts are not deleted by the generic purge
-- process that deletes other data (above) because query text rows are not tied to a particular snapshot ID.
-- Purging query text table as a special case, by looking for plans that
-- are no longer referenced by any of the rows in the snapshots.query_stats table. We need to delete these
-- rows in small chunks, since deleting many GB in a single delete statement would cause lock escalation and
-- an explosion in the size of the transaction log (individual query plans can be 10-50MB).
DECLARE @rows_affected int;
-- set expected rows affected as delete batch size
SET @rows_affected = @delete_batch_size;

SELECT qt.[sql_handle]
INTO #tmp_notable_query_text
FROM snapshots.notable_query_text AS qt
WHERE NOT EXISTS (
SELECT snapshot_id
FROM snapshots.query_stats AS qs
WHERE qs.[sql_handle] = qt.[sql_handle])

WHILE (@rows_affected = @delete_batch_size)
BEGIN
-- Deleting TOP N orphaned rows in query text table by joining info from temp table
-- This is done to speed up delete query.
DELETE TOP (@delete_batch_size) snapshots.notable_query_text
FROM snapshots.notable_query_text AS qt, #tmp_notable_query_text AS tmp
WHERE tmp.[sql_handle] = qt.[sql_handle]

SET @rows_affected = @@ROWCOUNT;
IF(@rows_affected > 0)
BEGIN
RAISERROR ('Deleted %d orphaned rows from snapshots.notable_query_text', 0, 1, @rows_affected) WITH NOWAIT;
END

-- Check if the execution of the stored proc exceeded the @duration specified
IF (@duration IS NOT NULL)
BEGIN
IF (GETUTCDATE()>=@end_time)
BEGIN
PRINT 'Stopping purge. More than ' + CONVERT(VARCHAR, @duration) + ' minutes passed since the start of operation.';
BREAK
END
END

-- Check if somebody wanted to stop the purge operation
SELECT @stop_purge = COUNT(stop_purge) FROM [core].[purge_info_internal]
IF (@stop_purge > 0)
BEGIN
PRINT 'Stopping purge. Detected a user request to stop purge.';
BREAK
END
END;

PRINT 'End purging orphaned records in snapshots.notable_query_text Current UTC Time:' + CONVERT(VARCHAR, GETUTCDATE())

END

GO

--
-- This stored proc removes data from the warehouse that reached its expiration date
--
IF (NOT OBJECT_ID(N'core.sp_purge_data', 'P') IS NULL)
BEGIN
RAISERROR('Dropping procedure [core].[sp_purge_data] ...', 0, 1) WITH NOWAIT;
DROP PROCEDURE [core].[sp_purge_data]
END
GO

RAISERROR('Creating procedure [core].[sp_purge_data] ...', 0, 1) WITH NOWAIT;
GO
CREATE PROCEDURE [core].[sp_purge_data]
@retention_days smallint = NULL,
@instance_name sysname = NULL,
@collection_set_uid uniqueidentifier = NULL,
@duration smallint = NULL,
@delete_batch_size int = 500
AS
BEGIN
-- Security check (role membership)
IF (NOT (ISNULL(IS_MEMBER(N'mdw_admin'), 0) = 1) AND NOT (ISNULL(IS_SRVROLEMEMBER(N'sysadmin'), 0) = 1))
BEGIN
RAISERROR(14677, 16, -1, 'mdw_admin')
RETURN(1) -- Failure
END

-- Validate parameters
IF ((@retention_days IS NOT NULL) AND (@retention_days < 0))
BEGIN
RAISERROR(14200, -1, -1, '@retention_days')
RETURN(1) -- Failure
END

IF ((@duration IS NOT NULL) AND (@duration < 0))
BEGIN
RAISERROR(14200, -1, -1, '@duration')
RETURN(1) -- Failure
END

-- This table will contain a record if somebody requests purge to stop
-- If user requested us to purge data - we reset the content of it - and proceed with purge
-- If somebody in a different session wants purge operations to stop he adds a record
-- that we will discover while purge in progress
--
-- We dont clear this flag when we exit since multiple purge operations with differnet
-- filters may proceed, and we want all of them to stop.
DELETE FROM [core].[purge_info_internal]

SET @instance_name = NULLIF(LTRIM(RTRIM(@instance_name)), N'')

-- Calculate the time when the operation should stop (NULL otherwise)
DECLARE @end_time datetime
IF (@duration IS NOT NULL)
BEGIN
SET @end_time = DATEADD(minute, @duration, GETUTCDATE())
END

-- Declare table that will be used to find what are the valid
-- candidate snapshots that could be selected for purge
DECLARE @purge_candidates table
(
snapshot_id int NOT NULL,
snapshot_time datetime NOT NULL,
instance_name sysname NOT NULL,
collection_set_uid uniqueidentifier NOT NULL
)

-- Find candidates that match the retention_days criteria (if specified)
IF (@retention_days IS NULL)
BEGIN
-- User did not specified a value for @retention_days, therfore we
-- will use the default expiration day as marked in the source info
INSERT INTO @purge_candidates
SELECT s.snapshot_id, s.snapshot_time, s.instance_name, s.collection_set_uid
FROM core.snapshots s
WHERE (GETUTCDATE() >= s.valid_through)
END
ELSE
BEGIN
-- User specified a value for @retention_days, we will use this overriden value
-- when deciding what means old enough to qualify for purge this overrides
-- the days_until_expiration value specified in the source_info_internal table
INSERT INTO @purge_candidates
SELECT s.snapshot_id, s.snapshot_time, s.instance_name, s.collection_set_uid
FROM core.snapshots s
WHERE GETUTCDATE() >= DATEADD(DAY, @retention_days, s.snapshot_time)
END

-- Determine which is the oldest snapshot, from the list of candidates
DECLARE oldest_snapshot_cursor CURSOR FORWARD_ONLY READ_ONLY FOR
SELECT p.snapshot_id, p.instance_name, p.collection_set_uid
FROM @purge_candidates p
WHERE
((@instance_name IS NULL) or (p.instance_name = @instance_name)) AND
((@collection_set_uid IS NULL) or (p.collection_set_uid = @collection_set_uid))
ORDER BY p.snapshot_time ASC

OPEN oldest_snapshot_cursor

DECLARE @stop_purge int
DECLARE @oldest_snapshot_id int
DECLARE @oldest_instance_name sysname
DECLARE @oldest_collection_set_uid uniqueidentifier

FETCH NEXT FROM oldest_snapshot_cursor
INTO @oldest_snapshot_id, @oldest_instance_name, @oldest_collection_set_uid

-- As long as there are snapshots that matched the time criteria
WHILE @@FETCH_STATUS = 0
BEGIN

-- Filter out records that do not match the other filter crieria
IF ((@instance_name IS NULL) or (@oldest_instance_name = @instance_name))
BEGIN

-- There was no filter specified for instance_name or the instance matches the filter
IF ((@collection_set_uid IS NULL) or (@oldest_collection_set_uid = @collection_set_uid))
BEGIN

-- There was no filter specified for the collection_set_uid or the collection_set_uid matches the filter
BEGIN TRANSACTION tran_sp_purge_data

-- Purge data associated with this snapshot. Note: deleting this snapshot
-- triggers cascade delete in all warehouse tables based on the foreign key
-- relationship to snapshots table

-- Cascade cleanup of all data related referencing oldest snapshot
DELETE core.snapshots_internal
FROM core.snapshots_internal s
WHERE s.snapshot_id = @oldest_snapshot_id

COMMIT TRANSACTION tran_sp_purge_data

PRINT 'Snapshot #' + CONVERT(VARCHAR, @oldest_snapshot_id) + ' purged.';
END

END

-- Check if the execution of the stored proc exceeded the @duration specified
IF (@duration IS NOT NULL)
BEGIN
IF (GETUTCDATE()>=@end_time)
BEGIN
PRINT 'Stopping purge. More than ' + CONVERT(VARCHAR, @duration) + ' minutes passed since the start of operation.';
BREAK
END
END

-- Check if somebody wanted to stop the purge operation
SELECT @stop_purge = COUNT(stop_purge) FROM [core].[purge_info_internal]
IF (@stop_purge > 0)
BEGIN
PRINT 'Stopping purge. Detected a user request to stop purge.';
BREAK
END

-- Move to next oldest snapshot
FETCH NEXT FROM oldest_snapshot_cursor
INTO @oldest_snapshot_id, @oldest_instance_name, @oldest_collection_set_uid

END

CLOSE oldest_snapshot_cursor
DEALLOCATE oldest_snapshot_cursor

-- delete orphaned query plans
EXEC [core].[sp_purge_orphaned_notable_query_plan] @duration = @duration, @end_time = @end_time, @delete_batch_size = @delete_batch_size

-- delete orphaned query text
EXEC [core].[sp_purge_orphaned_notable_query_text] @duration = @duration, @end_time = @end_time, @delete_batch_size = @delete_batch_size

END
GO


This posting is provided "AS IS" with no warranties, and confers no rights. 
Use of included script samples are subject to the terms specified at
http://www.microsoft.com/info/cpyright.htm
Attachment: sp_purge_data_fix.txt
Leave a Comment
  • Please add 3 and 8 and type the answer here:
  • Post
  • Thanks for including the txt file, it appears that if you copy the text from the webpage, ther are no {CR}'s nor {LF}'s.  It copies as a single giant row.

  • What about the table snapshots.querystats. This is linked to or the removal of plans and text where these do not exist. But data never seems to be removed from this table.

    Regards,

    Tim Gordon-Jones

    timgj@pdbsolutions.co.uk

Page 1 of 1 (2 items)