3
3
import os
4
4
import sys
5
5
import requests
6
- from configparser import ConfigParser
7
- # from distutils.sysconfig import get_python_lib
8
-
9
- def checkrecordfiles (recordname , filedirectory ):
10
- """Check a local directory along with the database cache directory specified in
11
- 'config.ini' for all necessary files required to read a WFDB record.
12
- Calls pbdownload.dlrecordfiles to download any missing files into the database
13
- cache directory. Returns the base record name if all files were present, or a
14
- full path record name specifying where the downloaded files are to be read,
15
- and a list of files downloaded.
16
-
17
- *If you wish to directly download files for a record, it highly recommended to call
18
- 'dlrecordfiles' directly. This is a helper function for rdsamp which
19
- tries to parse the 'recordname' input to deduce whether it contains a local directory,
20
- physiobank database, or both. Its usage format is different and more complex than
21
- that of 'dlrecordfiles'.
22
-
23
- Usage: readrecordname, downloadedfiles = checkrecordfiles(recordname, filedirectory)
24
-
25
- Input arguments:
26
- - recordname (required): The name of the WFDB record to be read
27
- (without any file extensions). Can be prepended with a local directory, or a
28
- physiobank subdirectory (or both if the relative local directory exists and
29
- takes the same name as the physiobank subdirectory). eg: recordname=mitdb/100
30
- - filedirectory (required): The local directory to check for the files required to
31
- read the record before checking the database cache directory. If the 'recordname'
32
- argument is prepended with a directory, this function will assume that it is a
33
- local directory and prepend that to this 'filedirectory' argument and check the
34
- resulting directory instead.
35
-
36
- Output arguments:
37
- - readrecordname: The record name prepended with the path the files are to be read from.
38
- - downloadedfiles: The list of files downloaded from PhysioBank.
39
- """
40
-
41
- # Base directory to store downloaded physiobank files
42
- config = loadconfig ('wfdb.config' )
43
- dbcachedir = config .get ('pbdownload' ,'dbcachedir' )
44
- basedir , baserecname = os .path .split (recordname )
45
-
46
- # At this point we do not know whether basedir is a local directory, a
47
- # physiobank directory, or both.
48
-
49
- if not basedir : # if basedir is not defined, then there is no physiobank
50
- # database specified. If files are missing we cannot download them anyway.
51
- return recordname , []
52
-
53
- # If this is reached, basedir is defined. Check if there is a directory
54
- # called 'basedir'. If it exists, check it for files.
55
- if os .path .isdir (basedir ):
56
- # It is possible that basedir is also a physiobank database. Therefore
57
- # if any files are missing, ,try to download files assuming basedir is
58
- # the physiobank database directory. If it turns out that basedir is
59
- # not a pb database, an error will be triggered. The record would not
60
- # be readable without the missing file(s) anyway.
61
-
62
- downloaddir = os .path .join (dbcachedir , basedir )
63
-
64
- # The basedir directory is missing the header file.
65
- if not os .path .isfile (os .path .join (basedir , baserecname + ".hea" )):
66
- # If invalid pb database, function would exit.
67
- dledfiles = dlrecordfiles (recordname , downloaddir )
68
- # Files downloaded, confirmed valid pb database.
69
- return os .path .join (downloaddir , baserecname ), dledfiles
70
-
71
- # Header is present in basedir
72
- fields = readheader (recordname )
73
-
74
- if fields [
75
- "nseg" ] == 1 : # Single segment. Check for all the required dat files
76
- for f in fields ["filename" ]:
77
- # Missing a dat file. Download in db cache dir.
78
- if not os .path .isfile (os .path .join (basedir , f )):
79
- dledfiles = dlrecordfiles (recordname , downloaddir )
80
- return os .path .join (downloaddir , baserecname ), dledfiles
81
- else : # Multi segment. Check for all segment headers and their dat files
82
- for segment in fields ["filename" ]:
83
- if segment != '~' :
84
- if not os .path .isfile (
85
- os .path .join (
86
- basedir ,
87
- segment +
88
- ".hea" )): # Missing a segment header
89
- dledfiles = dlrecordfiles (recordname , downloaddir )
90
- return os .path .join (
91
- downloaddir , baserecname ), dledfiles
92
- segfields = readheader (os .path .join (basedir , segment ))
93
- for f in segfields ["filename" ]:
94
- if f != '~' :
95
- if not os .path .isfile (
96
- os .path .join (
97
- basedir ,
98
- f )): # Missing a segment's dat file
99
- dledfiles = dlrecordfiles (
100
- recordname , downloaddir )
101
- return os .path .join (
102
- downloaddir , baserecname ), dledfiles
103
-
104
- # All files were already present in the 'basedir' directory.
105
- return recordname , []
106
-
107
- else : # there is no 'basedir' directory in your relative path. Therefore basedir must be a
108
- # physiobank database directory. check the current working directory for files.
109
- # If any are missing, check the cache directory for files and download missing
110
- # files from physiobank.
111
-
112
- pbdir = basedir # physiobank directory
113
- downloaddir = os .path .join (dbcachedir , pbdir )
114
-
115
- if not os .path .isfile (baserecname + ".hea" ):
116
- dledfiles = dlrecordfiles (recordname , downloaddir )
117
- return os .path .join (downloaddir , baserecname ), dledfiles
118
-
119
- # Header is present in current working dir.
120
- fields = readheader (baserecname )
121
-
122
- if fields [
123
- "nseg" ] == 1 : # Single segment. Check for all the required dat files
124
- for f in fields ["filename" ]:
125
- # Missing a dat file. Download in db cache dir.
126
- if not os .path .isfile (f ):
127
- dledfiles = dlrecordfiles (recordname , downloaddir )
128
- return os .path .join (downloaddir , baserecname ), dledfiles
129
- else : # Multi segment. Check for all segment headers and their dat files
130
- for segment in fields ["filename" ]:
131
- if segment != '~' :
132
- if not os .path .isfile (
133
- os .path .join (
134
- targetdir ,
135
- segment +
136
- ".hea" )): # Missing a segment header
137
- dledfiles = dlrecordfiles (recordname , downloaddir )
138
- return os .path .join (
139
- downloaddir , baserecname ), dledfiles
140
- segfields = readheader (os .path .join (targetdir , segment ))
141
- for f in segfields ["filename" ]:
142
- if f != '~' :
143
- if not os .path .isfile (
144
- os .path .join (
145
- targetdir ,
146
- f )): # Missing a segment's dat file
147
- dledfiles = dlrecordfiles (
148
- recordname , downloaddir )
149
- return os .path .join (
150
- downloaddir , baserecname ), dledfiles
151
-
152
- # All files are present in current directory. Return base record name
153
- # and no dled files.
154
- return baserecname , []
155
-
156
6
157
7
def dlrecordfiles (pbrecname , targetdir ):
158
8
"""Check a specified local directory for all necessary files required to read a Physiobank
@@ -163,7 +13,7 @@ def dlrecordfiles(pbrecname, targetdir):
163
13
164
14
Input arguments:
165
15
- pbrecname (required): The name of the MIT format Physiobank record to be read, prepended
166
- with the Physiobank subdirectory the file is contain in (without any file extensions).
16
+ with the Physiobank subdirectory the file is contained in (without any file extensions).
167
17
eg. pbrecname=prcp/12726 to download files http://physionet.org/physiobank/database/prcp/12726.hea
168
18
and 12727.dat
169
19
- targetdir (required): The local directory to check for files required to read the record,
@@ -849,19 +699,6 @@ def processwfdbbytes(fp, fmt, siglen, nsig, sampsperframe, floorsamp=0):
849
699
"initvalue" ,
850
700
"signame" ]
851
701
852
- def loadconfig (fn ):
853
- """
854
- Search for a configuration file. Load the first version found.
855
- """
856
- config = ConfigParser ()
857
- for loc in [os .curdir ,os .path .expanduser ("~" ),os .path .dirname (__file__ )]:
858
- configfn = os .path .join (loc ,fn )
859
- if os .path .isfile (configfn ):
860
- with open (configfn ) as source :
861
- config .readfp (source )
862
- break
863
- return config
864
-
865
702
866
703
def processsegment (fields , dirname , baserecordname , sampfrom , sampto , channels , physical ):
867
704
if (len (set (fields ["filename" ])) ==
@@ -1137,27 +974,66 @@ def expandfields(segmentfields, segnum, startseg, readsegs, channels, returninds
1137
974
# Keep fields['nsig'] as the number of returned channels from the segments.
1138
975
return segmentfields
1139
976
977
+
978
+ def checkrecordfiles (recordname , pbdl , dldir , keepfiles ):
979
+ """Figure out the directory in which to process record files and download missing
980
+ files if specified. *If you wish to directly download files for a record, call
981
+ 'dlrecordfiles'. This is a helper function for rdsamp.
982
+
983
+ Input arguments:
984
+ - recordname: name of the record
985
+ - pbdl: flag specifying whether a physiobank record should be downloaded
986
+ - dldir: directory in which to download physiobank files
987
+ - keepfiles: flag specifying whether to keep downloaded files
1140
988
989
+ Output arguments:
990
+ - dirname: the directory name from where the data files will be read
991
+ - baserecordname: the base name of the WFDB record without any file paths
992
+ - filestoremove: a list of downloaded files that are to be removed
993
+ """
994
+
995
+ filestoremove = []
996
+
997
+ # Download physiobank files if specified
998
+ if pbdl == 1 :
999
+ dledfiles = dlrecordfiles (recordname , dldir )
1000
+ if keepfiles == 0 :
1001
+ filestoremove = dledfiles
1002
+ # The directory to read the files from is the downloaded directory
1003
+ dirname = dldir
1004
+ (_ , baserecordname )= os .path .split (recordname )
1005
+ else :
1006
+ dirname , baserecordname = os .path .split (recordname )
1007
+
1008
+ return dirname , baserecordname , filestoremove
1009
+
1010
+
1011
+
1141
1012
def rdsamp (
1142
1013
recordname ,
1143
1014
sampfrom = 0 ,
1144
1015
sampto = [],
1145
1016
channels = [],
1146
1017
physical = 1 ,
1147
- stacksegments = 1 ):
1018
+ stacksegments = 1 ,
1019
+ pbdl = 0 ,
1020
+ dldir = os .getcwd (),
1021
+ keepfiles = 0 ):
1148
1022
"""Read a WFDB record and return the signal as a numpy array and the metadata as a dictionary.
1149
1023
1150
1024
Usage:
1151
1025
sig, fields = rdsamp(recordname, sampfrom, sampto, channels, physical, stacksegments)
1152
1026
1153
1027
Input arguments:
1154
- - recordname (required): The name of the WFDB record to be read (without any file extensions).
1028
+ - recordname (required): The name of the WFDB record to be read (without any file extensions). If the argument contains any path delimiter characters, the argument will be interpreted as PATH/baserecord and the data files will be searched for in the local path. If the pbdownload flag is set to 1, recordname will be interpreted as a physiobank record name including the database subdirectory.
1155
1029
- sampfrom (default=0): The starting sample number to read for each channel.
1156
1030
- sampto (default=length of entire signal): The final sample number to read for each channel.
1157
1031
- channels (default=all channels): Indices specifying the channel to be returned.
1158
1032
- physical (default=1): Flag that specifies whether to return signals in physical (1) or digital (0) units.
1159
1033
- stacksegments (default=1): Flag used only for multi-segment files. Specifies whether to return the signal as a single stacked/concatenated numpy array (1) or as a list of one numpy array for each segment (0).
1160
-
1034
+ - pbdl (default=0): If this argument is set, the function will assume that the user is trying to download a physiobank file. Therefore the 'recordname' argument will be interpreted as a physiobank record name including the database subdirectory, rather than a local directory.
1035
+ - dldir (default=os.getcwd()): The directory to download physiobank files to.
1036
+ - keepfiles (default=0): Flag specifying whether to keep physiobank files newly downloaded through the function call.
1161
1037
1162
1038
Output variables:
1163
1039
- sig: An nxm numpy array where n is the signal length and m is the number of channels.
@@ -1173,31 +1049,27 @@ def rdsamp(
1173
1049
: The last list element will be a list of dictionaries of metadata for each segment.
1174
1050
For empty segments, the dictionary will be replaced by a single string: 'Empty Segment'
1175
1051
"""
1176
-
1177
- filestoremove = []
1178
- config = loadconfig ('wfdb.config' )
1179
-
1180
- if int (config .get ('pbdownload' ,'getpbfiles' )) == 1 : # Flag specifying whether to allow downloading from physiobank
1181
- recordname , dledfiles = checkrecordfiles (recordname , os .getcwd ())
1182
- if int (config .get ('pbdownload' ,'keepdledfiles' )) == 0 : # Flag specifying whether to keep downloaded physiobank files
1183
- filestoremove = dledfiles
1184
-
1185
- fields = readheader (recordname ) # Get the info from the header file
1186
-
1187
- if fields ["nsig" ] == 0 :
1188
- sys .exit ("This record has no signals. Use rdann to read annotations" )
1052
+
1189
1053
if sampfrom < 0 :
1190
1054
sys .exit ("sampfrom must be non-negative" )
1191
- dirname , baserecordname = os .path .split (recordname )
1055
+ if channels and min (channels ) < 0 :
1056
+ sys .exit ("input channels must be non-negative" )
1057
+
1058
+ dirname , baserecordname , filestoremove = checkrecordfiles (recordname , pbdl , dldir , keepfiles )
1059
+
1060
+ fields = readheader (os .path .join (dirname , baserecordname ))
1192
1061
1062
+ if fields ["nsig" ] == 0 :
1063
+ sys .exit ("This record has no signals. Use rdann to read annotations" )
1193
1064
1194
- if fields ["nseg" ] == 1 : # single segment file
1065
+ # Begin processing the data files.
1066
+
1067
+ # Single segment file
1068
+ if fields ["nseg" ] == 1 :
1195
1069
sig , fields = processsegment (fields , dirname , baserecordname , sampfrom , sampto , channels , physical )
1196
1070
1197
- # Multi-segment file. Preprocess and recursively call rdsamp on single
1198
- # segments.
1071
+ # Multi-segment file. Preprocess and recursively call rdsamp on segments
1199
1072
else :
1200
-
1201
1073
# Determine if the record is fixed or variable layout.
1202
1074
# startseg is the first signal segment, 1 or 0.
1203
1075
startseg , layoutfields = fixedorvariable (fields , dirname )
@@ -1278,8 +1150,7 @@ def rdsamp(
1278
1150
else : # Fixed layout format.
1279
1151
fields = [fields , segmentfields ]
1280
1152
1281
- if filestoremove :
1282
- for fr in filestoremove :
1153
+ for fr in filestoremove :
1283
1154
os .remove (fr )
1284
1155
1285
1156
return (sig , fields )
0 commit comments