DEV Community

Antidisestablishmentarianism
Antidisestablishmentarianism

Posted on • Updated on

 

Sorting very large files in C#.

There are many examples of sorting large files and here is yet another.

This program has many options including the ability to generate a sample file to sort. There are statistics provided as the file is being processed.

using Microsoft.VisualBasic.FileIO;
using Microsoft.Win32;
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Security.Cryptography;
using System.Security.Principal;

class FindDuplicateFiles
{
    private static List<FileInfo> files = new();
    private static List<IGrouping<string, KeyValuePair<FileInfo, string>>> duplicates = new();
    private static readonly ConcurrentDictionary<FileInfo, string> hashlist = new();
    private static readonly ConcurrentBag<string> zerolist = new(), errorlist = new();
    private static int count = 0;
    private static double size = 0;

    private static void GetDuplicates(DirectoryInfo[] directories)
    {
        foreach (DirectoryInfo directory in directories)
            if (directory.Exists)
                files = files.Concat(new DirectoryInfo(directory.FullName).GetFiles("*", new EnumerationOptions { RecurseSubdirectories = true })).ToList();

        //foreach (var x in files.Where(l => l.Length > 0).GroupBy(g => g.Length).Where(c => c.Count() > 1)) foreach (FileInfo y in x) Console.WriteLine($"{y.FullName}  {y.Length}");

        Parallel.ForEach(files.GroupBy(x => x.Length).Where(x => x.Count() > 1), item =>
        {
            Parallel.ForEach(item, file =>
            {
                if (file.Length > 0)
                    try
                    {
                        using FileStream fs = new(file.FullName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan);
                        using MD5 myhash = MD5.Create();
                        bool x = hashlist.TryAdd(file, Convert.ToHexString(myhash.ComputeHash(fs)));
                    }
                    catch (Exception ex)
                    {
                        errorlist.Add(ex.Message); //errorlist.Add(file.FullName);
                    }
                else
                    zerolist.Add(file.FullName);
            });
        });

        duplicates = hashlist.GroupBy(x => x.Value).Where(x => x.Count() > 1).ToList();

        duplicates.ForEach(d => {
            count += d.Count();
            var x = d.ToArray();
            for (int i = 1; i < x.Length; i++)
                size += x[i].Key.Length;
        });
    }

    private static void CreateContextMenuEntry(string objecttype, string contextmenuentry, string location, string? processname, string keyname, string? parameters = "\" \"%V\"")
    {
        if (Registry.ClassesRoot.OpenSubKey(objecttype + "\\shell")?.GetSubKeyNames()?.Contains(keyname) == true)
            Registry.ClassesRoot.DeleteSubKeyTree(objecttype + "\\shell\\" + keyname);

        Registry.ClassesRoot.CreateSubKey(objecttype + "\\shell\\" + keyname).SetValue("", contextmenuentry);
        Registry.ClassesRoot.CreateSubKey(objecttype + "\\shell\\" + keyname + "\\command").SetValue("", "\"" + location + "\\" + processname + parameters);

        //background of directory folder
        if (objecttype == "Directory")
        {
            if (Registry.ClassesRoot.OpenSubKey(objecttype + "\\Background\\shell")?.GetSubKeyNames()?.Contains(keyname) == true)
                Registry.ClassesRoot.DeleteSubKeyTree(objecttype + "\\Background\\shell\\" + keyname);

            Registry.ClassesRoot.CreateSubKey(objecttype + "\\Background\\shell\\" + keyname).SetValue("", contextmenuentry);
            Registry.ClassesRoot.CreateSubKey(objecttype + "\\Background\\shell\\" + keyname + "\\command").SetValue("", "\"" + location + "\\" + processname + parameters);
        }
    }

    private static void Install()
    {
        ConsoleKeyInfo key;

        try
        {
            //check if we are currently running as administrator
            if (!new WindowsPrincipal(WindowsIdentity.GetCurrent()).IsInRole(WindowsBuiltInRole.Administrator))
            {
                Console.WriteLine("This program requires administrative privledges to install.\nPress ENTER to start with elevated privileges or ESC to exit.");

                do
                {
                    key = Console.ReadKey(true);

                    if (key.Key == ConsoleKey.Enter)
                    {
                        //start new process as administrator. Environment.ProcessPath is the path of what we are currently running.
                        Process.Start(new ProcessStartInfo { FileName = Environment.ProcessPath, UseShellExecute = true, Verb = "runas" });
                        Environment.Exit(0);
                    }

                } while (key.Key != ConsoleKey.Escape);

                Environment.Exit(0);
            }
        }
        //if user selects "no" from adminstrator request.
        catch
        {
            Console.WriteLine("\nAdministrative rights are required for installing this application.\nPress any key to exit.");
            Console.ReadKey(true);
            Environment.Exit(0);
        }

        string? destdir = null;

        while (destdir == null)
        {
            Console.WriteLine("Enter directory to Install to, e.g. c:\\dupe.\n");
            destdir = Console.ReadLine()?.Trim('\\');
        }

        Directory.CreateDirectory(destdir);

        foreach (FileInfo? item in new DirectoryInfo(Directory.GetCurrentDirectory()).GetFiles())
            File.Copy(item.FullName, destdir + "\\" + item.Name, true);

        Console.WriteLine("\nFiles copied to destination directory.");

        CreateContextMenuEntry("Directory", "Find duplicate files", destdir, Path.GetFileName(Environment.ProcessPath), "Dupe");

        Console.WriteLine("Context menu entry added, install complete.\nPress any key to exit.");
        Console.ReadKey(true);
        Environment.Exit(0);
    }

    private static void ParseAnswer(char ans)
    {
        if (ans == 'c' || ans == 'C')
            Console.Clear();

        if (ans == 'e' || ans == 'E')
            if (errorlist.IsEmpty)
                Console.WriteLine("There were no errors detected.\n");
            else
            {
                Console.WriteLine("Errors...");

                foreach (string item in errorlist)
                    Console.WriteLine($"{item}\n");
            }

        if (ans == 'z' || ans == 'Z')
            if (zerolist.IsEmpty)
                Console.WriteLine("There are no zero byte files.\n");
            else
            {
                Console.WriteLine("Zero byte files:\n");

                foreach (string item in zerolist)
                    Console.WriteLine($"{item}");
            }

        if (ans == 'l' || ans == 'L')
            foreach (var item in duplicates)
            {
                Console.WriteLine($"\nThe following {item.Count()} files are identical:");

                foreach (var file in item)
                    Console.WriteLine(file.Key.FullName);
            }

        if (ans == 'd' || ans == 'D')
        {
            Console.WriteLine("Deleting files...\n");

            foreach (var item in duplicates)
            {
                var x = item.ToArray();

                for (int i = 1; i < x.Length; i++)
                    FileSystem.DeleteFile(x[i].Key.FullName, UIOption.OnlyErrorDialogs, RecycleOption.SendToRecycleBin); //File.Delete(x[i].Key.FullName);
            }

            Console.WriteLine("File deletion completed.\n");
        }
    }


    public static void Main(string[] args)
    {
        Console.Clear(); //Console.SetWindowSize(200,200);

        if (args.Length == 0)
            Install();

        Stopwatch watch = Stopwatch.StartNew();
        DirectoryInfo[] dirs = { new DirectoryInfo(args[0]) };
        char ans = ' ';
        char[] answer = { 'l', 'L', 'd', 'D', 'e', 'E', 'c', 'C', 'x', 'X', 'z', 'Z' };

        Console.WriteLine("Processing files...");

        GetDuplicates(dirs);

        Console.WriteLine($"\n{count} total files in duplicates list, {duplicates.Count} files have duplicates.\n{count - duplicates.Count} files can be deleted saving {Math.Round(size / 1048576, 2):n0} MB of space.");
        Console.WriteLine($"{hashlist.Count} files hashed in {watch.ElapsedMilliseconds / 1000} seconds.");
        Console.WriteLine($"{zerolist.Count} zero byte files, {errorlist.Count} errors");

        if (duplicates.Count > 0)
            while (ans != 'x' & ans != 'X')
            {
                Console.WriteLine("\n{L}ist duplicate files   {D}elete all duplicates   {C}lear console   {E}rrors   [Z]ero byte files   E[X]it program\n");

                do ans = Console.ReadKey(true).KeyChar;
                while (!answer.Contains(ans));

                ParseAnswer(ans);
            }
        else
        {
            Console.WriteLine("No duplicates found.\nPress and key to continue.");
            Console.ReadKey(true);
        }
    }
}
Enter fullscreen mode Exit fullscreen mode

Top comments (0)

Timeless DEV post...

Git Concepts I Wish I Knew Years Ago

The most used technology by developers is not Javascript.

It's not Python or HTML.

It hardly even gets mentioned in interviews or listed as a pre-requisite for jobs.

I'm talking about Git and version control of course.

One does not simply learn git