1
+ # -*- coding: utf-8 -*-
2
+
3
+ ########################################################################
4
+ #
5
+ # License: BSD
6
+ # Created: November 8, 2014
7
+ # Author: Alistair Muldal - [email protected]
8
+ #
9
+ # $Id$
10
+ #
11
+ ########################################################################
12
+
13
+ """This utility prints the contents of an HDF5 file as a tree.
14
+
15
+ Pass the flag -h to this for help on usage.
16
+
17
+ """
18
+
19
+ import tables
20
+ import numpy as np
21
+ import os
22
+ import argparse
23
+
24
+ def _get_parser ():
25
+ parser = argparse .ArgumentParser (
26
+ description = '''
27
+ `pttree` is designed to give a quick overview of the contents of a
28
+ PyTables HDF5 file by printing a depth-indented list of nodes, similar
29
+ to the output of the Unix `tree` utility for viewing directory
30
+ structures. It can also display the size, shape and compression states
31
+ of individual nodes, as well as summary information for the whole file.
32
+ For a more verbose output including metadata, see `ptdump`.
33
+ ''' )
34
+
35
+
36
+ parser .add_argument (
37
+ '-L' , '--max-level' , type = int , dest = 'max_depth' ,
38
+ help = 'maximum display depth of tree (-1 = no limit)' ,
39
+ )
40
+ parser .add_argument (
41
+ '--print-size' , action = 'store_true' , dest = 'print_size' ,
42
+ help = 'print size of each node' ,
43
+ )
44
+ parser .add_argument (
45
+ '--no-print-size' , action = 'store_false' , dest = 'print_size' ,
46
+ )
47
+ parser .add_argument (
48
+ '--print-shape' , action = 'store_true' , dest = 'print_shape' ,
49
+ help = 'print shape of each node' ,
50
+ )
51
+ parser .add_argument (
52
+ '--no-print-shape' , action = 'store_false' , dest = 'print_shape' ,
53
+ )
54
+ parser .add_argument (
55
+ '--print-compression' , action = 'store_true' , dest = 'print_compression' ,
56
+ help = 'print compression library(level) for each compressed node' ,
57
+ )
58
+ parser .add_argument (
59
+ '--no-print-compression' , action = 'store_false' ,
60
+ dest = 'print_compression' ,
61
+ )
62
+
63
+ parser .add_argument ('src' , metavar = 'filename[:nodepath]' ,
64
+ help = 'path to the root of the tree structure' )
65
+
66
+ parser .set_defaults (max_depth = - 1 , print_size = True , print_shape = False ,
67
+ print_compression = False )
68
+
69
+ return parser
70
+
71
+
72
+ def main ():
73
+
74
+ parser = _get_parser ()
75
+ args = parser .parse_args ()
76
+
77
+ # Catch the files passed as the last arguments
78
+ src = args .__dict__ .pop ('src' ).split (':' )
79
+ if len (src ) == 1 :
80
+ filename , nodename = src [0 ], "/"
81
+ else :
82
+ filename , nodename = src
83
+ if nodename == "" :
84
+ # case where filename == "filename:" instead of "filename:/"
85
+ nodename = "/"
86
+
87
+ with tables .open_file (filename , 'r' ) as f :
88
+ tree_str = get_tree_str (f , nodename , ** args .__dict__ )
89
+ print tree_str
90
+
91
+ pass
92
+
93
+ def get_tree_str (f , where = '/' , max_depth = - 1 , print_class = True ,
94
+ print_size = True , print_shape = False , print_compression = False ,
95
+ print_total = True ):
96
+
97
+ root_node = f .get_node (where )
98
+ root_node ._g_check_open ()
99
+
100
+ start_depth = root_node ._v_depth
101
+
102
+ tree_nodes = {}
103
+
104
+ total_in_mem = 0
105
+ total_on_disk = 0
106
+ total_items = 0
107
+
108
+ if max_depth < 0 :
109
+ max_depth = os .sys .maxint
110
+
111
+ for node in f .walk_nodes (root_node ):
112
+
113
+ pathname = node ._v_pathname
114
+ parent_pathname = node ._v_parent ._v_pathname
115
+ name = node ._v_name
116
+ if print_class :
117
+ name += " (%s)" % node .__class__ .__name__
118
+ labels = []
119
+
120
+ depth = node ._v_depth - start_depth
121
+
122
+ if depth > max_depth :
123
+ # this is pretty dumb, but I don't really know of a way to stop
124
+ # walk_nodes at a particular depth
125
+ continue
126
+
127
+ elif depth == max_depth and isinstance (node , tables .group .Group ):
128
+
129
+ # we measure the size of all of the children of this branch
130
+ n_items , in_mem , on_disk = get_branch_size (f , node )
131
+ ratio = float (on_disk ) / in_mem
132
+ if print_size :
133
+ sizestr = ', total size=(%s/%s/%.2f)' % (
134
+ b2h (in_mem ), b2h (on_disk ), ratio )
135
+ else :
136
+ sizestr = ''
137
+ extra_itemstr = '... %i items%s' % (n_items , sizestr )
138
+ labels .append (extra_itemstr )
139
+
140
+ total_items += n_items
141
+ total_on_disk += on_disk
142
+ total_in_mem += in_mem
143
+
144
+ pass
145
+
146
+ else :
147
+
148
+ # node labels
149
+ if isinstance (node , tables .link .Link ):
150
+ labels .append ('target=%s' % node .target )
151
+
152
+ elif isinstance (node , (tables .array .Array , tables .table .Table )):
153
+
154
+ on_disk = node .size_on_disk
155
+ in_mem = node .size_in_memory
156
+ ratio = float (on_disk ) / in_mem
157
+ if print_size :
158
+ labels .append ('size=(%s/%s/%.2f)' % (
159
+ b2h (in_mem ), b2h (on_disk ), ratio ))
160
+ if print_shape :
161
+ labels .append ('shape=%s' % node .shape )
162
+ if print_compression :
163
+ lib = node .filters .complib
164
+ level = node .filters .complevel
165
+ if level :
166
+ compstr = '%s(%i)' % (lib , level )
167
+ else :
168
+ compstr = 'None'
169
+ labels .append ('compression=%s' % compstr )
170
+
171
+ total_items += 1
172
+ total_on_disk += on_disk
173
+ total_in_mem += in_mem
174
+
175
+ new_tree_node = PrettyTree (name , labels = labels )
176
+ tree_nodes .update ({pathname :new_tree_node })
177
+
178
+ # exclude root node (otherwise we get infinite recursions)
179
+ if pathname != '/' and parent_pathname in tree_nodes :
180
+ tree_nodes [parent_pathname ].add_child (new_tree_node )
181
+
182
+ out_str = '\n ' + '-' * 60 + '\n ' * 2
183
+ out_str += str (tree_nodes [root_node ._v_pathname ]) + '\n ' * 2
184
+
185
+ if print_total :
186
+ avg_ratio = float (total_on_disk ) / total_in_mem
187
+ fsize = os .stat (f .filename ).st_size
188
+
189
+ out_str += '-' * 60 + '\n '
190
+ out_str += 'Total stored items: %i\n ' % total_items
191
+ out_str += 'Total data size: %s in memory, %s on disk\n ' % (
192
+ b2h (total_in_mem ), b2h (total_on_disk ))
193
+ out_str += 'Mean compression ratio: %.2f\n ' % avg_ratio
194
+ out_str += 'HDF5 file size: %s\n ' % b2h (fsize )
195
+ out_str += '-' * 60 + '\n '
196
+
197
+ return out_str
198
+
199
+
200
+ class PrettyTree (object ):
201
+ """
202
+
203
+ A pretty ASCII representation of a recursive tree structure. Each node can
204
+ have multiple labels, given as a list of strings.
205
+
206
+ Example:
207
+ --------
208
+
209
+ A = PrettyTree('A', labels=['wow'])
210
+ B = PrettyTree('B', labels=['such tree'])
211
+ C = PrettyTree('C', children=[A, B])
212
+ D = PrettyTree('D', labels=['so recursive'])
213
+ root = PrettyTree('root', labels=['many nodes'], children=[C, D])
214
+ print root
215
+
216
+ Credit to Andrew Cooke's blog:
217
+ <http://www.acooke.org/cute/ASCIIDispl0.html>
218
+
219
+ """
220
+
221
+ def __init__ (self , name , children = None , labels = None ):
222
+
223
+ # NB: do NOT assign default list/dict arguments in the function
224
+ # declaration itself - these objects are shared between ALL instances
225
+ # of PrettyTree, and by assigning to them it's easy to get into
226
+ # infinite recursions, e.g. when 'self in self.children == True'
227
+ if children is None :
228
+ children = []
229
+ if labels is None :
230
+ labels = []
231
+
232
+ self .name = name
233
+ self .children = children
234
+ self .labels = labels
235
+
236
+ def add_child (self , child ):
237
+ # some basic checks to help to avoid infinite recursion
238
+ assert child is not self
239
+ assert child not in self .children
240
+ assert self not in child .children
241
+ self .children .append (child )
242
+
243
+ def tree_lines (self ):
244
+ yield self .name
245
+ for label in self .labels :
246
+ yield ' ' + label
247
+ last = self .children [- 1 ] if self .children else None
248
+ for child in self .children :
249
+ prefix = '`--' if child is last else '+--'
250
+ for line in child .tree_lines ():
251
+ yield prefix + line
252
+ prefix = ' ' if child is last else '| '
253
+
254
+ def __str__ (self ):
255
+ return "\n " .join (self .tree_lines ())
256
+
257
+ def __repr__ (self ):
258
+ return '<%s at %s>' % (self .__class__ .__name__ , hex (id (self )))
259
+
260
+
261
+ def b2h (nbytes , use_si_units = False ):
262
+
263
+ if use_si_units :
264
+ prefixes = 'TB' , 'GB' , 'MB' , 'kB' , 'B'
265
+ values = 1E12 , 1E9 , 1E6 , 1E3 , 1
266
+ else :
267
+ prefixes = 'TiB' , 'GiB' , 'MiB' , 'KiB' , 'B'
268
+ values = 2 ** 40 , 2 ** 30 , 2 ** 20 , 2 ** 10 , 1
269
+
270
+ for (prefix , value ) in zip (prefixes , values ):
271
+ scaled = float (nbytes ) / value
272
+ if scaled >= 1 :
273
+ break
274
+
275
+ return "%.1f%s" % (scaled , prefix )
276
+
277
+
278
+ def get_branch_size (f , where ):
279
+
280
+ total_mem = 0.
281
+ total_disk = 0.
282
+ total_items = 0
283
+
284
+ for node in f .walk_nodes (where ):
285
+
286
+ # don't dereference links, or we'll count the same arrays multiple
287
+ # times
288
+ if not isinstance (node , tables .link .Link ):
289
+ try :
290
+ in_mem = node .size_in_memory
291
+ on_disk = node .size_on_disk
292
+ except AttributeError :
293
+ continue
294
+
295
+ total_mem += in_mem
296
+ total_disk += on_disk
297
+ total_items += 1
298
+
299
+ return total_items , total_mem , total_disk
300
+
301
+
302
+ def make_test_file (prefix = '/tmp' ):
303
+ f = tables .open_file (os .path .join (prefix , 'test_pttree.hdf5' ), 'w' )
304
+
305
+ g1 = f .create_group ('/' , 'group1' )
306
+ g1a = f .create_group (g1 , 'group1a' )
307
+ g1b = f .create_group (g1 , 'group1b' )
308
+
309
+ filters = tables .Filters (complevel = 5 , complib = 'bzip2' )
310
+
311
+ for gg in g1a , g1b :
312
+ f .create_carray (gg , 'zeros128b' , obj = np .zeros (32 , dtype = np .float64 ),
313
+ filters = filters )
314
+ f .create_carray (gg , 'random128b' , obj = np .random .rand (32 ),
315
+ filters = filters )
316
+
317
+ g2 = f .create_group ('/' , 'group2' )
318
+
319
+ softlink = f .create_soft_link (g2 , 'softlink_g1_z128' ,
320
+ '/group1/group1a/zeros128b' )
321
+ hardlink = f .create_hard_link (g2 , 'hardlink_g1a_z128' ,
322
+ '/group1/group1a/zeros128b' )
323
+
324
+ return f
0 commit comments