Sunday, February 25, 2007

Stored Procedures - Removing Duplicates

The Following is a Simple Stored Procedure that flags a records as duplicate in a table where duplicate exists

a.Checks for the valid column and tablename.
b. Creates an Srn (identity column).
c. Creates a column dupe
d.Flags Dupes wherever duplicate records found

The Code for the Stored Procedure is as follows

GO
/****** Object: StoredProcedure [dbo].[remove_duplicates] ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER procedure [dbo].[remove_duplicates]
@colname varchar(200)
,@tablename varchar(100)

as

set nocount on

declare @sql varchar(1000)
declare @no_duplicates int

--Check for the Table and Column Name
set @sql=''
if exists
(
select * from syscolumns where name like +''+@colname+'' and id = object_id(+''+@tablename+'')
)
begin

--Generate an SRN Number
if exists(select * from syscolumns where id = object_id(+''+@tablename+'') and name like 'Vic_srn')
begin

set @sql=''
set @sql = @sql + 'alter table '+@tablename+ ' drop column Vic_srn'
exec(@sql)

end


set @sql=''
set @sql = @sql+'alter table '+@tablename+ ' add Vic_srn numeric(6) identity(1,1)'
exec(@sql)

--Add a column for Dupes

if exists(select * from syscolumns where id = object_id(+''+@tablename+'') and name like 'dupes')
begin

set @sql=''
set @sql = @sql + 'alter table '+@tablename+ ' drop column dupes'
exec(@sql)

end

set @sql=''
set @sql=@sql+' alter table '+@tablename+ ' add dupes varchar(10)'
exec(@sql)


--Update the Column Dupes

set @sql=''
set @sql=@sql+'update a '+char(13)
+'set a.dupes = ''yes'''+char(13)
+'from '+char(13)
+'vic_orderdetails a '+char(13)
+'join '+char(13)
+'vic_orderdetails b '+char(13)
+'on '+char(13)
+'a.orderid = b.orderid '+char(13)
+'where '+char(13)
+'a.vic_srn > b.vic_srn '+char(13)
exec(@sql)

if object_id(@tablename) is not null
begin
exec('select count(*) as ''Number of Duplicates'' from '+@tablename+' where dupes is not null')
end

Print 'The Number of Duplicates are '+convert(varchar(100),@no_duplicates)

end


else

Print 'Invalid Table and Column Name'