Duplicate Finder Script: Difference between revisions
From Federal Burro of Information
Jump to navigationJump to search
(Created page with " finder.py <pre> #!/usr/bin/env python3 import os import hashlib def get_file_checksum(filename): """ Returns the SHA-256 hash of the contents of a file """ sha256 = hashlib.sha256() with open(filename, 'rb') as f: while True: data = f.read(1024) if not data: print("x", end = '') break sha256.update(data) print("!", end = '') return sha256.hexdigest() def fi...") |
No edit summary |
||
Line 1: | Line 1: | ||
finder.py | finder.py | ||
courtesy chatgpt | |||
<pre> | <pre> |
Latest revision as of 23:58, 9 June 2023
finder.py
courtesy chatgpt
#!/usr/bin/env python3 import os import hashlib def get_file_checksum(filename): """ Returns the SHA-256 hash of the contents of a file """ sha256 = hashlib.sha256() with open(filename, 'rb') as f: while True: data = f.read(1024) if not data: print("x", end = '') break sha256.update(data) print("!", end = '') return sha256.hexdigest() def find_duplicate_files(rootdir): """ Finds duplicate files in a directory tree rooted at rootdir """ checksums = {} duplicates = [] for dirpath, dirnames, filenames in os.walk(rootdir): for filename in filenames: filepath = os.path.join(dirpath, filename) checksum = get_file_checksum(filepath) if checksum in checksums: duplicates.append((filepath, checksums[checksum])) else: checksums[checksum] = filepath return duplicates if __name__ == '__main__': duplicates = find_duplicate_files('/home/david/david/') for dup in duplicates: print(f'{dup[0]} is a duplicate of {dup[1]}')