#!/usr/bin/python
import os
import sys
def getUniqueKey(fp):
"""
Calculate unique as integer value of last date-time changed of file and his size
:param fp:
:return:
"""
dt = os.path.getmtime(fp)
sz = os.path.getsize(fp)
return sz + int(dt)
def findDupcateFiles(folder_name):
"""
return Dictionary of duplicated files in dir and sub dir
:param folder_name:
:return:
"""
list_of_all_files = {}
for dir, subDirs, files in os.walk(folder_name):
for f in files:
f_path = os.path.join(dir, f)
k = getUniqueKey(f_path)
if k in list_of_all_files:
list_of_all_files[k].append(f_path)
else:
list_of_all_files[k] = [f_path]
duplicates = list(filter(lambda x: len(x) > 1, list_of_all_files.values()))
return duplicates
def printDupcates(duplicates):
"""
Pretty print of result - finding of duplicated files
:param duplicates:
:return:
"""
if len(duplicates) > 0:
for dup in duplicates:
print('Duplicates files: (by filse size and last date-time changes):', end="\n")
print('-----------')
for d in dup:
print(d, end="\n")
print('-----------')
else:
print("Duplicated files: not found.", end="\n")
if __name__ == '__main__':
if len(sys.argv) > 1:
dups = findDupcateFiles(sys.argv[1])
printDupcates(dups)
else:
print("Usage: python " + __file__ + " folder_name", end="\n")