There are many examples of sorting large files and here is yet another.
This program has many options including the ability to generate a sample file to sort. There are statistics provided as the file is being processed.
using Microsoft.VisualBasic.FileIO;
using Microsoft.Win32;
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Security.Cryptography;
using System.Security.Principal;
class FindDuplicateFiles
{
private static List<FileInfo> files = new();
private static List<IGrouping<string, KeyValuePair<FileInfo, string>>> duplicates = new();
private static readonly ConcurrentDictionary<FileInfo, string> hashlist = new();
private static readonly ConcurrentBag<string> zerolist = new(), errorlist = new();
private static int count = 0;
private static double size = 0;
private static void GetDuplicates(DirectoryInfo[] directories)
{
foreach (DirectoryInfo directory in directories)
if (directory.Exists)
files = files.Concat(new DirectoryInfo(directory.FullName).GetFiles("*", new EnumerationOptions { RecurseSubdirectories = true })).ToList();
//foreach (var x in files.Where(l => l.Length > 0).GroupBy(g => g.Length).Where(c => c.Count() > 1)) foreach (FileInfo y in x) Console.WriteLine($"{y.FullName} {y.Length}");
Parallel.ForEach(files.GroupBy(x => x.Length).Where(x => x.Count() > 1), item =>
{
Parallel.ForEach(item, file =>
{
if (file.Length > 0)
try
{
using FileStream fs = new(file.FullName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan);
using MD5 myhash = MD5.Create();
bool x = hashlist.TryAdd(file, Convert.ToHexString(myhash.ComputeHash(fs)));
}
catch (Exception ex)
{
errorlist.Add(ex.Message); //errorlist.Add(file.FullName);
}
else
zerolist.Add(file.FullName);
});
});
duplicates = hashlist.GroupBy(x => x.Value).Where(x => x.Count() > 1).ToList();
duplicates.ForEach(d => {
count += d.Count();
var x = d.ToArray();
for (int i = 1; i < x.Length; i++)
size += x[i].Key.Length;
});
}
private static void CreateContextMenuEntry(string objecttype, string contextmenuentry, string location, string? processname, string keyname, string? parameters = "\" \"%V\"")
{
if (Registry.ClassesRoot.OpenSubKey(objecttype + "\\shell")?.GetSubKeyNames()?.Contains(keyname) == true)
Registry.ClassesRoot.DeleteSubKeyTree(objecttype + "\\shell\\" + keyname);
Registry.ClassesRoot.CreateSubKey(objecttype + "\\shell\\" + keyname).SetValue("", contextmenuentry);
Registry.ClassesRoot.CreateSubKey(objecttype + "\\shell\\" + keyname + "\\command").SetValue("", "\"" + location + "\\" + processname + parameters);
//background of directory folder
if (objecttype == "Directory")
{
if (Registry.ClassesRoot.OpenSubKey(objecttype + "\\Background\\shell")?.GetSubKeyNames()?.Contains(keyname) == true)
Registry.ClassesRoot.DeleteSubKeyTree(objecttype + "\\Background\\shell\\" + keyname);
Registry.ClassesRoot.CreateSubKey(objecttype + "\\Background\\shell\\" + keyname).SetValue("", contextmenuentry);
Registry.ClassesRoot.CreateSubKey(objecttype + "\\Background\\shell\\" + keyname + "\\command").SetValue("", "\"" + location + "\\" + processname + parameters);
}
}
private static void Install()
{
ConsoleKeyInfo key;
try
{
//check if we are currently running as administrator
if (!new WindowsPrincipal(WindowsIdentity.GetCurrent()).IsInRole(WindowsBuiltInRole.Administrator))
{
Console.WriteLine("This program requires administrative privledges to install.\nPress ENTER to start with elevated privileges or ESC to exit.");
do
{
key = Console.ReadKey(true);
if (key.Key == ConsoleKey.Enter)
{
//start new process as administrator. Environment.ProcessPath is the path of what we are currently running.
Process.Start(new ProcessStartInfo { FileName = Environment.ProcessPath, UseShellExecute = true, Verb = "runas" });
Environment.Exit(0);
}
} while (key.Key != ConsoleKey.Escape);
Environment.Exit(0);
}
}
//if user selects "no" from adminstrator request.
catch
{
Console.WriteLine("\nAdministrative rights are required for installing this application.\nPress any key to exit.");
Console.ReadKey(true);
Environment.Exit(0);
}
string? destdir = null;
while (destdir == null)
{
Console.WriteLine("Enter directory to Install to, e.g. c:\\dupe.\n");
destdir = Console.ReadLine()?.Trim('\\');
}
Directory.CreateDirectory(destdir);
foreach (FileInfo? item in new DirectoryInfo(Directory.GetCurrentDirectory()).GetFiles())
File.Copy(item.FullName, destdir + "\\" + item.Name, true);
Console.WriteLine("\nFiles copied to destination directory.");
CreateContextMenuEntry("Directory", "Find duplicate files", destdir, Path.GetFileName(Environment.ProcessPath), "Dupe");
Console.WriteLine("Context menu entry added, install complete.\nPress any key to exit.");
Console.ReadKey(true);
Environment.Exit(0);
}
private static void ParseAnswer(char ans)
{
if (ans == 'c' || ans == 'C')
Console.Clear();
if (ans == 'e' || ans == 'E')
if (errorlist.IsEmpty)
Console.WriteLine("There were no errors detected.\n");
else
{
Console.WriteLine("Errors...");
foreach (string item in errorlist)
Console.WriteLine($"{item}\n");
}
if (ans == 'z' || ans == 'Z')
if (zerolist.IsEmpty)
Console.WriteLine("There are no zero byte files.\n");
else
{
Console.WriteLine("Zero byte files:\n");
foreach (string item in zerolist)
Console.WriteLine($"{item}");
}
if (ans == 'l' || ans == 'L')
foreach (var item in duplicates)
{
Console.WriteLine($"\nThe following {item.Count()} files are identical:");
foreach (var file in item)
Console.WriteLine(file.Key.FullName);
}
if (ans == 'd' || ans == 'D')
{
Console.WriteLine("Deleting files...\n");
foreach (var item in duplicates)
{
var x = item.ToArray();
for (int i = 1; i < x.Length; i++)
FileSystem.DeleteFile(x[i].Key.FullName, UIOption.OnlyErrorDialogs, RecycleOption.SendToRecycleBin); //File.Delete(x[i].Key.FullName);
}
Console.WriteLine("File deletion completed.\n");
}
}
public static void Main(string[] args)
{
Console.Clear(); //Console.SetWindowSize(200,200);
if (args.Length == 0)
Install();
Stopwatch watch = Stopwatch.StartNew();
DirectoryInfo[] dirs = { new DirectoryInfo(args[0]) };
char ans = ' ';
char[] answer = { 'l', 'L', 'd', 'D', 'e', 'E', 'c', 'C', 'x', 'X', 'z', 'Z' };
Console.WriteLine("Processing files...");
GetDuplicates(dirs);
Console.WriteLine($"\n{count} total files in duplicates list, {duplicates.Count} files have duplicates.\n{count - duplicates.Count} files can be deleted saving {Math.Round(size / 1048576, 2):n0} MB of space.");
Console.WriteLine($"{hashlist.Count} files hashed in {watch.ElapsedMilliseconds / 1000} seconds.");
Console.WriteLine($"{zerolist.Count} zero byte files, {errorlist.Count} errors");
if (duplicates.Count > 0)
while (ans != 'x' & ans != 'X')
{
Console.WriteLine("\n{L}ist duplicate files {D}elete all duplicates {C}lear console {E}rrors [Z]ero byte files E[X]it program\n");
do ans = Console.ReadKey(true).KeyChar;
while (!answer.Contains(ans));
ParseAnswer(ans);
}
else
{
Console.WriteLine("No duplicates found.\nPress and key to continue.");
Console.ReadKey(true);
}
}
}
Top comments (0)