-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
175 lines (146 loc) · 6.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
------------
File Scanner
------------
by bathtaters
"""
usage = """
File Scanner (v2.1.4) by bathtaters
Find and manage duplicate/similar files.
Usage: python $MAIN [-m:mode] [-option:value] [csv path] paths...
Modes:
-m:scan Scan each path for files, save results to CSV.
-m:keep Update CSV, auto-flagging which files to keep.
-m:reset Clear all 'keep' flags in CSV.
-m:move Move all unflagged files in CSV to first non-CSV path.
-m:recover Return all unflagged files in CSV from first non-CSV path to original location.
-m:clean Cleans up CSV: remove paths that don't exist and groups that are all flagged as 'keep.'
-m:view Open files in CSV with default app, one group at a time (Requires command line interaction).
-m:delete Delete all unflagged files permanently and update CSV.
-m:rmstr Get delete command as string (Prints to stdout).
Options for scan & keep modes:
-x:$ File extensions to exclusively scan, $ is an unspaced, comma-seperated list (ex. jpg,jpeg,mp4).
-i:$ Filenames to ignore/skip, $ is an unspaced, comma-seperated list (ex. .DS_Store,Thumbs.db).
-g:$ Set the file details to group by, $ is an unspaced, comma-seperated list of values:
• name Match on same start of filename
• size Match on file size (within variance range)
• img_hash Match on perceptual hash of still (i.e. visual similarity)
• av_hash Match on perceptual hash of video or audio (i.e. visual/audible similarity)
• av_streams Match by stream layouts (i.e. video/audio size/duration/codec)
• av_dur Match on video/audio duration
-- (Default groups above this line) --
• all Matches every file, placing them all under the same group
• ctime Match on file creation time
• mtime Match on file modified time
• img_type Match on image encoding (ex. JPEG,MPO,HEIF,etc)
• img_size Match on image dimensions (WxH)
• img_frames Match on framecount (For animated images, otherwise 1)
• av_media Match on media type (video or audio)
• av_container Match on audio/video container name
• av_bitrate Match on total bitrate of media (bits/second)
-t:# Match threshold for perceptual hash comparison (As a percentage, with 100 being an exact match).
-p:# Size (precision) of perceptual hashes (As a power of 2, higher values take longer but are more precise).
-vs:# Variance range (# is +/- value in bytes) of file size within group.
-vt:# Variance range (# is +/- value in ms) of created/modified times within group.
-vb:# Variance range (# is +/- value in bytes) of video bitrate within group.
-vd:# Variance range (# is +/- value in seconds) of video duration within group.
-vs:# Variance range (# is +/- value in pixels) of image size (w x h) within group.
-r:$ Path (from paths) to exclusively use when removing files (All files not under this path will be marked to keep).
Other options:
-fc:# For m:clean, remove files with less than this many matching stats.
-fv:# For m:view, filter out groups with less than this many matches.
-v If present, prints every match found (NOTE: All feedback is printed to stderr).
-h Shows you this help text.
If no CSV provided, uses '$CSV_PATH'
"""
### --- BASE OPTIONS --- ###
# Import Plugins
from file_compare.plugins.image import ImagePlugin
from file_compare.plugins.av import AVPlugin
# Default CSV file
DEFAULT_CSV = "./results.csv"
# Default options
options = {
# Default file extensions (In order of preference), None will search all
"exts": None,
# Default fields to create groups with (None will use defaults from plugins)
"group_by": None, # ('name','size','img_hash','vid_hash','vid_streams','vid_dur'),
# Default variance (+/- bytes) for size stat within groups
"size_var": 0,
# Default variance (+/- ms) for create/modify times stat within groups
"time_var": 0,
# Default shortest filename length that will use the alternative matcher.
"min_name": 3,
# For m:clean, remove files with less than this many matching stats.
"clean_filter": 0,
# For m:view, filter out groups with less than this many matches.
"view_filter": 0,
# Ignore these files
"ignore": ('.DS_Store','Thumbs.db'),
# Path to log file used to backup/recover interrupted scans
"logpath": "~/.filescan.log",
# If True, prints each duplicate found to stderr
"verbose": False,
# Set to a list of paths, will force AutoKeeper to only remove files under these paths
"rm_paths": None,
### PLUGINS ###
# List ComparisonPlugins to use here (In order of least specific to most)
"plugins": [ImagePlugin, AVPlugin],
# Video containers in order of preference, None will ignore
"vid_containers": None,
# Image codecs in order of preference, None will ignore
"img_codecs": None,
# Threshold for perceptual hash comparisons
"threshold": 95,
# Size for perceptual hash comparisons
"precision": 16,
# Default video bitrate variance (+/- bytes) for bitrate stat within groups
"bitrate_var": 100,
# Default video length variance (+/- seconds) for duration stat within groups
"duration_var": 1,
# Default picture dimension variance (+/- pixels) for w*h stat within groups
"dimension_var": 100,
}
### --- CLI RUNNER --- ###
import sys
from file_compare.base.compUI import get_ui
from file_compare.module import ComparisonController
def main():
mode, opts, csv = get_ui(sys.argv, usage, DEFAULT_CSV)
options.update(opts)
scanner = ComparisonController(**options)
if mode == "scan":
scanner.scan()
scanner.save_csv(csv)
elif mode == "keep":
scanner.load_csv(csv)
scanner.auto_keep()
scanner.save_csv(csv)
elif mode == "reset":
scanner.load_csv(csv)
scanner.reset_keep(False)
scanner.save_csv(csv)
elif mode == "move":
scanner.load_csv(csv)
scanner.move()
elif mode == "recover":
scanner.load_csv(csv)
scanner.recover()
elif mode == "clean":
scanner.load_csv(csv)
scanner.clean(clean_solo=True, clean_kept=True, use_filter=True)
scanner.save_csv(csv)
elif mode == "view":
scanner.load_csv(csv)
scanner.view_all()
elif mode == "delete":
scanner.load_csv(csv)
scanner.delete()
scanner.save_csv(csv)
elif mode == "rmstr":
scanner.load_csv(csv)
print(scanner.delete_str())
else:
sys.exit(1)
if __name__ == "__main__":
main()