post the below code into a python ide or visual studio code, save with .py extension and run. (edit os.path.expanduser to the directories you want to search)
Python:
import os
import hashlib
import tkinter as tk
from tkinter import messagebox
def get_file_hash(file_path):
"""Get the SHA-256 hash of a file."""
hash_sha256 = hashlib.sha256()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_sha256.update(chunk)
return hash_sha256.hexdigest()
def find_duplicate_files(directories, popup_text_var):
"""Find duplicate files in a list of directories."""
duplicates = {}
total_scanned_files = 0
for directory in directories:
for root, dirs, files in os.walk(directory, topdown=True):
for filename in files:
total_scanned_files += 1
file_path = os.path.join(root, filename)
try:
# Get file size
file_size = os.path.getsize(file_path)
except OSError as e:
print(f"Error accessing file '{file_path}': {e}")
continue
# Check if a file with the same name and size is already in the dictionary
if (filename, file_size) in duplicates:
duplicates[(filename, file_size)].append(file_path)
else:
duplicates[(filename, file_size)] = [file_path]
# Update progress and display in popup window
popup_text_var.set(f"Scanned files: {total_scanned_files}\nDuplicates found: {len(duplicates)}")
# Filter out unique files
duplicates = {key: value for key, value in duplicates.items() if len(value) > 1}
return total_scanned_files, duplicates
def delete_duplicate_files(duplicate_files):
"""Delete duplicate files and return the total size of deleted files."""
total_deleted_files = 0
total_deleted_size = 0
for key, value in duplicate_files.items():
# Delete all duplicate files except the first one
for file_path in value[1:]:
try:
file_size = os.path.getsize(file_path)
os.remove(file_path)
total_deleted_files += 1
total_deleted_size += file_size
print(f"Deleted '{file_path}'")
except OSError as e:
print(f"Error deleting file '{file_path}': {e}")
return total_deleted_files, total_deleted_size
def show_popup():
"""Show a popup window with scan results."""
root = tk.Tk()
root.title("Scanning Progress")
global popup_text
popup_text = tk.StringVar()
label = tk.Label(root, textvariable=popup_text, padx=75, pady=20)
label.pack()
# Start the scan
directories = [
os.path.expanduser('~/Desktop'),
os.path.expanduser('~/Downloads'),
os.path.expanduser('~/Documents'),
os.path.expanduser('~/Pictures'),
os.path.expanduser('~/Videos'),
os.path.expanduser('~/Music'),
]
total_scanned_files, duplicate_files = find_duplicate_files(directories, popup_text)
total_duplicates, total_deleted_size = delete_duplicate_files(duplicate_files)
# Display final results
size_units = ['bytes', 'KB', 'MB', 'GB', 'TB']
unit_index = 0
size = total_deleted_size
while size >= 1024 and unit_index < len(size_units) - 1:
size /= 1024
unit_index += 1
size_str = f"{size:.2f} {size_units[unit_index]}"
popup_text.set(f"Scanned files: {total_scanned_files}\nDuplicates found: {total_duplicates}\nTotal space freed: {size_str}")
root.mainloop()
if __name__ == "__main__":
show_popup()