Skip to content

Commit ae44b60

Browse files
committed
initial working version of ptree, walk_nodes is a bit slow
1 parent bcea5b4 commit ae44b60

File tree

3 files changed

+329
-1
lines changed

3 files changed

+329
-1
lines changed

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ def get_cython_extfiles(extnames):
670670
'ptdump = tables.scripts.ptdump:main',
671671
'ptrepack = tables.scripts.ptrepack:main',
672672
'pt2to3 = tables.scripts.pt2to3:main',
673+
'pttree = tables.scripts.pttree:main',
673674
],
674675
}
675676

@@ -688,7 +689,7 @@ def get_cython_extfiles(extnames):
688689
'tables.tests', 'tables.nodes.tests',
689690
]
690691
setuptools_kwargs['scripts'] = [
691-
'utils/ptdump', 'utils/ptrepack', 'utils/pt2to3']
692+
'utils/ptdump', 'utils/ptrepack', 'utils/pt2to3', 'utils/pttree']
692693
# Copy additional data for packages that need it.
693694
setuptools_kwargs['package_data'] = {
694695
'tables.tests': ['*.h5', '*.mat'],

tables/scripts/pttree.py

+324
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
# -*- coding: utf-8 -*-
2+
3+
########################################################################
4+
#
5+
# License: BSD
6+
# Created: November 8, 2014
7+
# Author: Alistair Muldal - [email protected]
8+
#
9+
# $Id$
10+
#
11+
########################################################################
12+
13+
"""This utility prints the contents of an HDF5 file as a tree.
14+
15+
Pass the flag -h to this for help on usage.
16+
17+
"""
18+
19+
import tables
20+
import numpy as np
21+
import os
22+
import argparse
23+
24+
def _get_parser():
25+
parser = argparse.ArgumentParser(
26+
description='''
27+
`pttree` is designed to give a quick overview of the contents of a
28+
PyTables HDF5 file by printing a depth-indented list of nodes, similar
29+
to the output of the Unix `tree` utility for viewing directory
30+
structures. It can also display the size, shape and compression states
31+
of individual nodes, as well as summary information for the whole file.
32+
For a more verbose output including metadata, see `ptdump`.
33+
''')
34+
35+
36+
parser.add_argument(
37+
'-L', '--max-level', type=int, dest='max_depth',
38+
help='maximum display depth of tree (-1 = no limit)',
39+
)
40+
parser.add_argument(
41+
'--print-size', action='store_true', dest='print_size',
42+
help='print size of each node',
43+
)
44+
parser.add_argument(
45+
'--no-print-size', action='store_false', dest='print_size',
46+
)
47+
parser.add_argument(
48+
'--print-shape', action='store_true', dest='print_shape',
49+
help='print shape of each node',
50+
)
51+
parser.add_argument(
52+
'--no-print-shape', action='store_false', dest='print_shape',
53+
)
54+
parser.add_argument(
55+
'--print-compression', action='store_true', dest='print_compression',
56+
help='print compression library(level) for each compressed node',
57+
)
58+
parser.add_argument(
59+
'--no-print-compression', action='store_false',
60+
dest='print_compression',
61+
)
62+
63+
parser.add_argument('src', metavar='filename[:nodepath]',
64+
help='path to the root of the tree structure')
65+
66+
parser.set_defaults(max_depth=-1, print_size=True, print_shape=False,
67+
print_compression=False)
68+
69+
return parser
70+
71+
72+
def main():
73+
74+
parser = _get_parser()
75+
args = parser.parse_args()
76+
77+
# Catch the files passed as the last arguments
78+
src = args.__dict__.pop('src').split(':')
79+
if len(src) == 1:
80+
filename, nodename = src[0], "/"
81+
else:
82+
filename, nodename = src
83+
if nodename == "":
84+
# case where filename == "filename:" instead of "filename:/"
85+
nodename = "/"
86+
87+
with tables.open_file(filename, 'r') as f:
88+
tree_str = get_tree_str(f, nodename, **args.__dict__)
89+
print tree_str
90+
91+
pass
92+
93+
def get_tree_str(f, where='/', max_depth=-1, print_class=True,
94+
print_size=True, print_shape=False, print_compression=False,
95+
print_total=True):
96+
97+
root_node = f.get_node(where)
98+
root_node._g_check_open()
99+
100+
start_depth = root_node._v_depth
101+
102+
tree_nodes = {}
103+
104+
total_in_mem = 0
105+
total_on_disk = 0
106+
total_items = 0
107+
108+
if max_depth < 0:
109+
max_depth = os.sys.maxint
110+
111+
for node in f.walk_nodes(root_node):
112+
113+
pathname = node._v_pathname
114+
parent_pathname = node._v_parent._v_pathname
115+
name = node._v_name
116+
if print_class:
117+
name += " (%s)" % node.__class__.__name__
118+
labels = []
119+
120+
depth = node._v_depth - start_depth
121+
122+
if depth > max_depth:
123+
# this is pretty dumb, but I don't really know of a way to stop
124+
# walk_nodes at a particular depth
125+
continue
126+
127+
elif depth == max_depth and isinstance(node, tables.group.Group):
128+
129+
# we measure the size of all of the children of this branch
130+
n_items, in_mem, on_disk = get_branch_size(f, node)
131+
ratio = float(on_disk) / in_mem
132+
if print_size:
133+
sizestr = ', total size=(%s/%s/%.2f)' % (
134+
b2h(in_mem), b2h(on_disk), ratio)
135+
else:
136+
sizestr = ''
137+
extra_itemstr = '... %i items%s' % (n_items, sizestr)
138+
labels.append(extra_itemstr)
139+
140+
total_items += n_items
141+
total_on_disk += on_disk
142+
total_in_mem += in_mem
143+
144+
pass
145+
146+
else:
147+
148+
# node labels
149+
if isinstance(node, tables.link.Link):
150+
labels.append('target=%s' % node.target)
151+
152+
elif isinstance(node, (tables.array.Array, tables.table.Table)):
153+
154+
on_disk = node.size_on_disk
155+
in_mem = node.size_in_memory
156+
ratio = float(on_disk) / in_mem
157+
if print_size:
158+
labels.append('size=(%s/%s/%.2f)' % (
159+
b2h(in_mem), b2h(on_disk), ratio))
160+
if print_shape:
161+
labels.append('shape=%s' % node.shape)
162+
if print_compression:
163+
lib = node.filters.complib
164+
level = node.filters.complevel
165+
if level:
166+
compstr = '%s(%i)' % (lib, level)
167+
else:
168+
compstr = 'None'
169+
labels.append('compression=%s' % compstr)
170+
171+
total_items += 1
172+
total_on_disk += on_disk
173+
total_in_mem += in_mem
174+
175+
new_tree_node = PrettyTree(name, labels=labels)
176+
tree_nodes.update({pathname:new_tree_node})
177+
178+
# exclude root node (otherwise we get infinite recursions)
179+
if pathname != '/' and parent_pathname in tree_nodes:
180+
tree_nodes[parent_pathname].add_child(new_tree_node)
181+
182+
out_str = '\n' + '-' * 60 + '\n' * 2
183+
out_str += str(tree_nodes[root_node._v_pathname]) + '\n' * 2
184+
185+
if print_total:
186+
avg_ratio = float(total_on_disk) / total_in_mem
187+
fsize = os.stat(f.filename).st_size
188+
189+
out_str += '-' * 60 + '\n'
190+
out_str += 'Total stored items: %i\n' % total_items
191+
out_str += 'Total data size: %s in memory, %s on disk\n' % (
192+
b2h(total_in_mem), b2h(total_on_disk))
193+
out_str += 'Mean compression ratio: %.2f\n' % avg_ratio
194+
out_str += 'HDF5 file size: %s\n' % b2h(fsize)
195+
out_str += '-' * 60 + '\n'
196+
197+
return out_str
198+
199+
200+
class PrettyTree(object):
201+
"""
202+
203+
A pretty ASCII representation of a recursive tree structure. Each node can
204+
have multiple labels, given as a list of strings.
205+
206+
Example:
207+
--------
208+
209+
A = PrettyTree('A', labels=['wow'])
210+
B = PrettyTree('B', labels=['such tree'])
211+
C = PrettyTree('C', children=[A, B])
212+
D = PrettyTree('D', labels=['so recursive'])
213+
root = PrettyTree('root', labels=['many nodes'], children=[C, D])
214+
print root
215+
216+
Credit to Andrew Cooke's blog:
217+
<http://www.acooke.org/cute/ASCIIDispl0.html>
218+
219+
"""
220+
221+
def __init__(self, name, children=None, labels=None):
222+
223+
# NB: do NOT assign default list/dict arguments in the function
224+
# declaration itself - these objects are shared between ALL instances
225+
# of PrettyTree, and by assigning to them it's easy to get into
226+
# infinite recursions, e.g. when 'self in self.children == True'
227+
if children is None:
228+
children = []
229+
if labels is None:
230+
labels = []
231+
232+
self.name = name
233+
self.children = children
234+
self.labels = labels
235+
236+
def add_child(self, child):
237+
# some basic checks to help to avoid infinite recursion
238+
assert child is not self
239+
assert child not in self.children
240+
assert self not in child.children
241+
self.children.append(child)
242+
243+
def tree_lines(self):
244+
yield self.name
245+
for label in self.labels:
246+
yield ' ' + label
247+
last = self.children[-1] if self.children else None
248+
for child in self.children:
249+
prefix = '`--' if child is last else '+--'
250+
for line in child.tree_lines():
251+
yield prefix + line
252+
prefix = ' ' if child is last else '| '
253+
254+
def __str__(self):
255+
return "\n".join(self.tree_lines())
256+
257+
def __repr__(self):
258+
return '<%s at %s>' % (self.__class__.__name__, hex(id(self)))
259+
260+
261+
def b2h(nbytes, use_si_units=False):
262+
263+
if use_si_units:
264+
prefixes = 'TB', 'GB', 'MB', 'kB', 'B'
265+
values = 1E12, 1E9, 1E6, 1E3, 1
266+
else:
267+
prefixes = 'TiB', 'GiB', 'MiB', 'KiB', 'B'
268+
values = 2 ** 40, 2 ** 30, 2 ** 20, 2 ** 10, 1
269+
270+
for (prefix, value) in zip(prefixes, values):
271+
scaled = float(nbytes) / value
272+
if scaled >= 1:
273+
break
274+
275+
return "%.1f%s" % (scaled, prefix)
276+
277+
278+
def get_branch_size(f, where):
279+
280+
total_mem = 0.
281+
total_disk = 0.
282+
total_items = 0
283+
284+
for node in f.walk_nodes(where):
285+
286+
# don't dereference links, or we'll count the same arrays multiple
287+
# times
288+
if not isinstance(node, tables.link.Link):
289+
try:
290+
in_mem = node.size_in_memory
291+
on_disk = node.size_on_disk
292+
except AttributeError:
293+
continue
294+
295+
total_mem += in_mem
296+
total_disk += on_disk
297+
total_items += 1
298+
299+
return total_items, total_mem, total_disk
300+
301+
302+
def make_test_file(prefix='/tmp'):
303+
f = tables.open_file(os.path.join(prefix, 'test_pttree.hdf5'), 'w')
304+
305+
g1 = f.create_group('/', 'group1')
306+
g1a = f.create_group(g1, 'group1a')
307+
g1b = f.create_group(g1, 'group1b')
308+
309+
filters = tables.Filters(complevel=5, complib='bzip2')
310+
311+
for gg in g1a, g1b:
312+
f.create_carray(gg, 'zeros128b', obj=np.zeros(32, dtype=np.float64),
313+
filters=filters)
314+
f.create_carray(gg, 'random128b', obj=np.random.rand(32),
315+
filters=filters)
316+
317+
g2 = f.create_group('/', 'group2')
318+
319+
softlink = f.create_soft_link(g2, 'softlink_g1_z128',
320+
'/group1/group1a/zeros128b')
321+
hardlink = f.create_hard_link(g2, 'hardlink_g1a_z128',
322+
'/group1/group1a/zeros128b')
323+
324+
return f

utils/pttree

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/usr/bin/env python
2+
from tables.scripts.pttree import main
3+
main()

0 commit comments

Comments
 (0)