Apr
13
2010

using LINQ to detect and remove duplicate files

Here's a nifty way to find and delete duplicate files that resulted from extracting all the icon resources embedded in files on your harddrive :)

using System;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;

namespace DupeFinder
{
    internal class Program
    {
        private static void Main(string[] args)
        {
            Directory.GetFiles(@"d:\icons", "*.ico")
                .Select(
                    f => new
                             {
                                 FileName = f,
                                 FileHash = Encoding.UTF8.GetString( new SHA1Managed()
                                                                    .ComputeHash(new FileStream(f, 
                                                                                     FileMode.Open, 
                                                                                     FileAccess.Read)))
                             })
                .GroupBy(f => f.FileHash)
                .Select(g => new {FileHash = g.Key, Files = g.Select(z => z.FileName).ToList()})
                .SelectMany(f => f.Files.Skip(1))
                .ToList()
                .ForEach(File.Delete);

            Console.ReadKey();
        }
    }
}

Month List