Duplicate Finder Script: Difference between revisions

From Federal Burro of Information
Jump to navigationJump to search
(Created page with " finder.py <pre> #!/usr/bin/env python3 import os import hashlib def get_file_checksum(filename): """ Returns the SHA-256 hash of the contents of a file """ sha256 = hashlib.sha256() with open(filename, 'rb') as f: while True: data = f.read(1024) if not data: print("x", end = '') break sha256.update(data) print("!", end = '') return sha256.hexdigest() def fi...")
 
No edit summary
 
Line 1: Line 1:


finder.py
finder.py
courtesy chatgpt


<pre>
<pre>

Latest revision as of 23:58, 9 June 2023

finder.py

courtesy chatgpt

#!/usr/bin/env python3
import os
import hashlib

def get_file_checksum(filename):
    """
    Returns the SHA-256 hash of the contents of a file
    """
    sha256 = hashlib.sha256()
    with open(filename, 'rb') as f:
        while True:
            data = f.read(1024)
            if not data:
                print("x", end = '')
                break
            sha256.update(data)
            print("!", end = '')
    return sha256.hexdigest()

def find_duplicate_files(rootdir):
    """
    Finds duplicate files in a directory tree rooted at rootdir
    """
    checksums = {}
    duplicates = []
    for dirpath, dirnames, filenames in os.walk(rootdir):
        for filename in filenames:
            filepath = os.path.join(dirpath, filename)
            checksum = get_file_checksum(filepath)
            if checksum in checksums:
                duplicates.append((filepath, checksums[checksum]))
            else:
                checksums[checksum] = filepath
    return duplicates

if __name__ == '__main__':
    duplicates = find_duplicate_files('/home/david/david/')
    for dup in duplicates:
        print(f'{dup[0]} is a duplicate of {dup[1]}')