-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharchiveInfo.py
executable file
·53 lines (43 loc) · 1.55 KB
/
archiveInfo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python
"""
archiveInfo.py: recrusivly extract archive and list found [timestamp](\.[\d])+\.dat(\.[gz|tar\.gz|tar.bz2|tgz|tbz])+
"""
import sys, os, io, getopt
import re
import tarfile
dataFile = re.compile('^\d+.*\.dat(\.)?.*$')
archiveFile = re.compile('^.*\.(tar\.gz|tgz|tar\.bz2|tbz)$')
def walkFs(path):
for root, dirs, files in os.walk(path):
count = 0
for name in files:
n = os.path.join(root,name)
if os.path.exists(n) and os.path.isfile(n):
# print "checking: %s" % n
if dataFile.match(name):
try:
tarball = open(n, 'rb')
walkArchive(root, name, io.BytesIO(tarball.read()))
except Exception, e:
sys.stderr.write("1:Error opening %s\n" % n)
# if count > 0:
# print "%s: %d data files" % (root, count)
def walkArchive(root, fn, fh):
t = tarfile.open(fileobj=fh)
root = os.path.join(root, fn)
count = 0
for f in t.getmembers():
if dataFile.match(f.name):
# count += 1
print root + " " + f.name
elif archiveFile.match(f.name):
walkArchive(root, f.name, io.BytesIO(t.extractfile(f).read()))
# if count > 0:
# print "File: %s (%d dat files)"% (root, count)
def main():
options, args = getopt.getopt(sys.argv[1:],"")
if len(args) != 1:
raise getopt.GetoptError('Need exactly one directory path', None)
walkFs(args[0])
if __name__ == '__main__':
main()