Skip to content

Commit d835e95

Browse files
author
Chen Xie
committed
removed config file. rdsamp downloads physiobank records when flags are set
1 parent 84c5c10 commit d835e95

File tree

5 files changed

+63
-245
lines changed

5 files changed

+63
-245
lines changed

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Include the license file
22
include LICENSE
3-
include wfdb.config
43

54
# Include the data files
65
# recursive-include data *

devtests.ipynb

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -809,46 +809,10 @@
809809
},
810810
"outputs": [],
811811
"source": [
812-
"sig, fields=wfdb.rdsamp('mitdb/100')"
813-
]
814-
},
815-
{
816-
"cell_type": "code",
817-
"execution_count": null,
818-
"metadata": {
819-
"collapsed": false
820-
},
821-
"outputs": [],
822-
"source": [
823-
"from configparser import ConfigParser\n",
824-
"config = loadconfig('/home/cx1111/PhysionetProjects/wfdb-python/wfdb/wfdb.config')\n",
825-
"\n"
826-
]
827-
},
828-
{
829-
"cell_type": "code",
830-
"execution_count": null,
831-
"metadata": {
832-
"collapsed": false
833-
},
834-
"outputs": [],
835-
"source": [
836-
"import os\n",
837-
"a=os.path.join('/usr/local/database/', 'mitdb')\n",
838-
"print(a)"
839-
]
840-
},
841-
{
842-
"cell_type": "code",
843-
"execution_count": null,
844-
"metadata": {
845-
"collapsed": false
846-
},
847-
"outputs": [],
848-
"source": [
849-
"dbcachedir='/usr/local/database'\n",
812+
"import wfdb\n",
813+
"sig, fields=wfdb.rdsamp('macecgdb/test01_00s', pbdl=1, dldir='/home/cx1111/Downloads/wfdbrecords/macecgdb', keepfiles=1)\n",
850814
"\n",
851-
"print(dbcachedir)"
815+
"print(sig)"
852816
]
853817
},
854818
{

setup.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
from codecs import open
99
from os import path
1010

11-
xx=find_packages(exclude=['contrib', 'docs', 'tests'])
12-
print('look at this!:', xx)
13-
1411
here = path.abspath(path.dirname(__file__))
1512

1613
# Get the long description from the README file
@@ -58,7 +55,6 @@
5855
'nose>=1.3.7',
5956
'numpy>=1.11.0',
6057
'matplotlib>=1.5.1',
61-
'configparser>=3.5.0',
6258
'requests>=2.10.0'
6359
],
6460

@@ -74,9 +70,8 @@
7470
# If there are data files included in your packages that need to be
7571
# installed, specify them here. If using Python 2.6 or less, then these
7672
# have to be included in MANIFEST.in as well.
77-
#package_dir={'wfdb': ''},
78-
package_data={'wfdb': ['wfdb.config'],
79-
},
73+
# package_data={'wfdb': ['wfdb.config'],
74+
# },
8075

8176
# Although 'package_data' is the preferred approach, in some case you may
8277
# need to place data files outside of your packages. See:

wfdb/_rdsamp.py

Lines changed: 58 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -3,156 +3,6 @@
33
import os
44
import sys
55
import requests
6-
from configparser import ConfigParser
7-
# from distutils.sysconfig import get_python_lib
8-
9-
def checkrecordfiles(recordname, filedirectory):
10-
"""Check a local directory along with the database cache directory specified in
11-
'config.ini' for all necessary files required to read a WFDB record.
12-
Calls pbdownload.dlrecordfiles to download any missing files into the database
13-
cache directory. Returns the base record name if all files were present, or a
14-
full path record name specifying where the downloaded files are to be read,
15-
and a list of files downloaded.
16-
17-
*If you wish to directly download files for a record, it highly recommended to call
18-
'dlrecordfiles' directly. This is a helper function for rdsamp which
19-
tries to parse the 'recordname' input to deduce whether it contains a local directory,
20-
physiobank database, or both. Its usage format is different and more complex than
21-
that of 'dlrecordfiles'.
22-
23-
Usage: readrecordname, downloadedfiles = checkrecordfiles(recordname, filedirectory)
24-
25-
Input arguments:
26-
- recordname (required): The name of the WFDB record to be read
27-
(without any file extensions). Can be prepended with a local directory, or a
28-
physiobank subdirectory (or both if the relative local directory exists and
29-
takes the same name as the physiobank subdirectory). eg: recordname=mitdb/100
30-
- filedirectory (required): The local directory to check for the files required to
31-
read the record before checking the database cache directory. If the 'recordname'
32-
argument is prepended with a directory, this function will assume that it is a
33-
local directory and prepend that to this 'filedirectory' argument and check the
34-
resulting directory instead.
35-
36-
Output arguments:
37-
- readrecordname: The record name prepended with the path the files are to be read from.
38-
- downloadedfiles: The list of files downloaded from PhysioBank.
39-
"""
40-
41-
# Base directory to store downloaded physiobank files
42-
config = loadconfig('wfdb.config')
43-
dbcachedir = config.get('pbdownload','dbcachedir')
44-
basedir, baserecname = os.path.split(recordname)
45-
46-
# At this point we do not know whether basedir is a local directory, a
47-
# physiobank directory, or both.
48-
49-
if not basedir: # if basedir is not defined, then there is no physiobank
50-
# database specified. If files are missing we cannot download them anyway.
51-
return recordname, []
52-
53-
# If this is reached, basedir is defined. Check if there is a directory
54-
# called 'basedir'. If it exists, check it for files.
55-
if os.path.isdir(basedir):
56-
# It is possible that basedir is also a physiobank database. Therefore
57-
# if any files are missing, ,try to download files assuming basedir is
58-
# the physiobank database directory. If it turns out that basedir is
59-
# not a pb database, an error will be triggered. The record would not
60-
# be readable without the missing file(s) anyway.
61-
62-
downloaddir = os.path.join(dbcachedir, basedir)
63-
64-
# The basedir directory is missing the header file.
65-
if not os.path.isfile(os.path.join(basedir, baserecname + ".hea")):
66-
# If invalid pb database, function would exit.
67-
dledfiles = dlrecordfiles(recordname, downloaddir)
68-
# Files downloaded, confirmed valid pb database.
69-
return os.path.join(downloaddir, baserecname), dledfiles
70-
71-
# Header is present in basedir
72-
fields = readheader(recordname)
73-
74-
if fields[
75-
"nseg"] == 1: # Single segment. Check for all the required dat files
76-
for f in fields["filename"]:
77-
# Missing a dat file. Download in db cache dir.
78-
if not os.path.isfile(os.path.join(basedir, f)):
79-
dledfiles = dlrecordfiles(recordname, downloaddir)
80-
return os.path.join(downloaddir, baserecname), dledfiles
81-
else: # Multi segment. Check for all segment headers and their dat files
82-
for segment in fields["filename"]:
83-
if segment != '~':
84-
if not os.path.isfile(
85-
os.path.join(
86-
basedir,
87-
segment +
88-
".hea")): # Missing a segment header
89-
dledfiles = dlrecordfiles(recordname, downloaddir)
90-
return os.path.join(
91-
downloaddir, baserecname), dledfiles
92-
segfields = readheader(os.path.join(basedir, segment))
93-
for f in segfields["filename"]:
94-
if f != '~':
95-
if not os.path.isfile(
96-
os.path.join(
97-
basedir,
98-
f)): # Missing a segment's dat file
99-
dledfiles = dlrecordfiles(
100-
recordname, downloaddir)
101-
return os.path.join(
102-
downloaddir, baserecname), dledfiles
103-
104-
# All files were already present in the 'basedir' directory.
105-
return recordname, []
106-
107-
else: # there is no 'basedir' directory in your relative path. Therefore basedir must be a
108-
# physiobank database directory. check the current working directory for files.
109-
# If any are missing, check the cache directory for files and download missing
110-
# files from physiobank.
111-
112-
pbdir = basedir # physiobank directory
113-
downloaddir = os.path.join(dbcachedir, pbdir)
114-
115-
if not os.path.isfile(baserecname + ".hea"):
116-
dledfiles = dlrecordfiles(recordname, downloaddir)
117-
return os.path.join(downloaddir, baserecname), dledfiles
118-
119-
# Header is present in current working dir.
120-
fields = readheader(baserecname)
121-
122-
if fields[
123-
"nseg"] == 1: # Single segment. Check for all the required dat files
124-
for f in fields["filename"]:
125-
# Missing a dat file. Download in db cache dir.
126-
if not os.path.isfile(f):
127-
dledfiles = dlrecordfiles(recordname, downloaddir)
128-
return os.path.join(downloaddir, baserecname), dledfiles
129-
else: # Multi segment. Check for all segment headers and their dat files
130-
for segment in fields["filename"]:
131-
if segment != '~':
132-
if not os.path.isfile(
133-
os.path.join(
134-
targetdir,
135-
segment +
136-
".hea")): # Missing a segment header
137-
dledfiles = dlrecordfiles(recordname, downloaddir)
138-
return os.path.join(
139-
downloaddir, baserecname), dledfiles
140-
segfields = readheader(os.path.join(targetdir, segment))
141-
for f in segfields["filename"]:
142-
if f != '~':
143-
if not os.path.isfile(
144-
os.path.join(
145-
targetdir,
146-
f)): # Missing a segment's dat file
147-
dledfiles = dlrecordfiles(
148-
recordname, downloaddir)
149-
return os.path.join(
150-
downloaddir, baserecname), dledfiles
151-
152-
# All files are present in current directory. Return base record name
153-
# and no dled files.
154-
return baserecname, []
155-
1566

1577
def dlrecordfiles(pbrecname, targetdir):
1588
"""Check a specified local directory for all necessary files required to read a Physiobank
@@ -163,7 +13,7 @@ def dlrecordfiles(pbrecname, targetdir):
16313
16414
Input arguments:
16515
- pbrecname (required): The name of the MIT format Physiobank record to be read, prepended
166-
with the Physiobank subdirectory the file is contain in (without any file extensions).
16+
with the Physiobank subdirectory the file is contained in (without any file extensions).
16717
eg. pbrecname=prcp/12726 to download files http://physionet.org/physiobank/database/prcp/12726.hea
16818
and 12727.dat
16919
- targetdir (required): The local directory to check for files required to read the record,
@@ -849,19 +699,6 @@ def processwfdbbytes(fp, fmt, siglen, nsig, sampsperframe, floorsamp=0):
849699
"initvalue",
850700
"signame"]
851701

852-
def loadconfig(fn):
853-
"""
854-
Search for a configuration file. Load the first version found.
855-
"""
856-
config = ConfigParser()
857-
for loc in [os.curdir,os.path.expanduser("~"),os.path.dirname(__file__)]:
858-
configfn = os.path.join(loc,fn)
859-
if os.path.isfile(configfn):
860-
with open(configfn) as source:
861-
config.readfp(source)
862-
break
863-
return config
864-
865702

866703
def processsegment(fields, dirname, baserecordname, sampfrom, sampto, channels, physical):
867704
if (len(set(fields["filename"])) ==
@@ -1137,27 +974,66 @@ def expandfields(segmentfields, segnum, startseg, readsegs, channels, returninds
1137974
# Keep fields['nsig'] as the number of returned channels from the segments.
1138975
return segmentfields
1139976

977+
978+
def checkrecordfiles(recordname, pbdl, dldir, keepfiles):
979+
"""Figure out the directory in which to process record files and download missing
980+
files if specified. *If you wish to directly download files for a record, call
981+
'dlrecordfiles'. This is a helper function for rdsamp.
982+
983+
Input arguments:
984+
- recordname: name of the record
985+
- pbdl: flag specifying whether a physiobank record should be downloaded
986+
- dldir: directory in which to download physiobank files
987+
- keepfiles: flag specifying whether to keep downloaded files
1140988
989+
Output arguments:
990+
- dirname: the directory name from where the data files will be read
991+
- baserecordname: the base name of the WFDB record without any file paths
992+
- filestoremove: a list of downloaded files that are to be removed
993+
"""
994+
995+
filestoremove=[]
996+
997+
# Download physiobank files if specified
998+
if pbdl == 1:
999+
dledfiles = dlrecordfiles(recordname, dldir)
1000+
if keepfiles==0:
1001+
filestoremove = dledfiles
1002+
# The directory to read the files from is the downloaded directory
1003+
dirname = dldir
1004+
(_, baserecordname)= os.path.split(recordname)
1005+
else:
1006+
dirname, baserecordname = os.path.split(recordname)
1007+
1008+
return dirname, baserecordname, filestoremove
1009+
1010+
1011+
11411012
def rdsamp(
11421013
recordname,
11431014
sampfrom=0,
11441015
sampto=[],
11451016
channels=[],
11461017
physical=1,
1147-
stacksegments=1):
1018+
stacksegments=1,
1019+
pbdl=0,
1020+
dldir=os.getcwd(),
1021+
keepfiles=0):
11481022
"""Read a WFDB record and return the signal as a numpy array and the metadata as a dictionary.
11491023
11501024
Usage:
11511025
sig, fields = rdsamp(recordname, sampfrom, sampto, channels, physical, stacksegments)
11521026
11531027
Input arguments:
1154-
- recordname (required): The name of the WFDB record to be read (without any file extensions).
1028+
- recordname (required): The name of the WFDB record to be read (without any file extensions). If the argument contains any path delimiter characters, the argument will be interpreted as PATH/baserecord and the data files will be searched for in the local path. If the pbdownload flag is set to 1, recordname will be interpreted as a physiobank record name including the database subdirectory.
11551029
- sampfrom (default=0): The starting sample number to read for each channel.
11561030
- sampto (default=length of entire signal): The final sample number to read for each channel.
11571031
- channels (default=all channels): Indices specifying the channel to be returned.
11581032
- physical (default=1): Flag that specifies whether to return signals in physical (1) or digital (0) units.
11591033
- stacksegments (default=1): Flag used only for multi-segment files. Specifies whether to return the signal as a single stacked/concatenated numpy array (1) or as a list of one numpy array for each segment (0).
1160-
1034+
- pbdl (default=0): If this argument is set, the function will assume that the user is trying to download a physiobank file. Therefore the 'recordname' argument will be interpreted as a physiobank record name including the database subdirectory, rather than a local directory.
1035+
- dldir (default=os.getcwd()): The directory to download physiobank files to.
1036+
- keepfiles (default=0): Flag specifying whether to keep physiobank files newly downloaded through the function call.
11611037
11621038
Output variables:
11631039
- sig: An nxm numpy array where n is the signal length and m is the number of channels.
@@ -1173,31 +1049,27 @@ def rdsamp(
11731049
: The last list element will be a list of dictionaries of metadata for each segment.
11741050
For empty segments, the dictionary will be replaced by a single string: 'Empty Segment'
11751051
"""
1176-
1177-
filestoremove = []
1178-
config = loadconfig('wfdb.config')
1179-
1180-
if int(config.get('pbdownload','getpbfiles')) == 1: # Flag specifying whether to allow downloading from physiobank
1181-
recordname, dledfiles = checkrecordfiles(recordname, os.getcwd())
1182-
if int(config.get('pbdownload','keepdledfiles')) == 0: # Flag specifying whether to keep downloaded physiobank files
1183-
filestoremove = dledfiles
1184-
1185-
fields = readheader(recordname) # Get the info from the header file
1186-
1187-
if fields["nsig"] == 0:
1188-
sys.exit("This record has no signals. Use rdann to read annotations")
1052+
11891053
if sampfrom < 0:
11901054
sys.exit("sampfrom must be non-negative")
1191-
dirname, baserecordname = os.path.split(recordname)
1055+
if channels and min(channels) < 0:
1056+
sys.exit("input channels must be non-negative")
1057+
1058+
dirname, baserecordname, filestoremove = checkrecordfiles(recordname, pbdl, dldir, keepfiles)
1059+
1060+
fields = readheader(os.path.join(dirname, baserecordname))
11921061

1062+
if fields["nsig"] == 0:
1063+
sys.exit("This record has no signals. Use rdann to read annotations")
11931064

1194-
if fields["nseg"] == 1: # single segment file
1065+
# Begin processing the data files.
1066+
1067+
# Single segment file
1068+
if fields["nseg"] == 1:
11951069
sig, fields = processsegment(fields, dirname, baserecordname, sampfrom, sampto, channels, physical)
11961070

1197-
# Multi-segment file. Preprocess and recursively call rdsamp on single
1198-
# segments.
1071+
# Multi-segment file. Preprocess and recursively call rdsamp on segments
11991072
else:
1200-
12011073
# Determine if the record is fixed or variable layout.
12021074
# startseg is the first signal segment, 1 or 0.
12031075
startseg, layoutfields = fixedorvariable(fields, dirname)
@@ -1278,8 +1150,7 @@ def rdsamp(
12781150
else: # Fixed layout format.
12791151
fields = [fields, segmentfields]
12801152

1281-
if filestoremove:
1282-
for fr in filestoremove:
1153+
for fr in filestoremove:
12831154
os.remove(fr)
12841155

12851156
return (sig, fields)

0 commit comments

Comments
 (0)