Skip to content

Commit 2d197da

Browse files
committed
removed setup.py
2 parents 4944556 + 8052986 commit 2d197da

17 files changed

Lines changed: 1650 additions & 972 deletions

Pipfile

Lines changed: 0 additions & 36 deletions
This file was deleted.

Pipfile.lock

Lines changed: 0 additions & 540 deletions
This file was deleted.

README.md

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,18 @@
11
# dataverse-reports
22

3-
A python3-based tool to generate and email statistical reports from [Dataverse](https://dataverse.org/) using the native API and database queries.
4-
5-
As with [Miniverse](https://github.com/IQSS/miniverse), the reports require access to the Dataverse database.
3+
A Python tool to generate and email statistical reports from [Dataverse](https://dataverse.org/) using the native API and database queries.
64

75
## Requirements
86

9-
- Python 3.6+
7+
- Python 3.12+
108
- Dataverse 5.1+
119

12-
## Python 3 Virtual Environment Setup
10+
## Python Virtual Environment Setup
1311

1412
```bash
1513
python3 -m venv venv
1614
source venv/bin/activate
17-
pip install pipenv
18-
pipenv install
15+
pip install -r requirements.txt
1916
```
2017

2118
## Configuration

config/application.yml.sample

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
1-
dataverse_api_host: ''
2-
dataverse_api_key: ''
3-
dataverse_db_host: ''
4-
dataverse_db_name: ''
5-
dataverse_db_username: ''
6-
dataverse_db_password: ''
1+
dataverse_name: 'My data repository'
2+
api:
3+
host: 'http://localhost:8080'
4+
token: 'sample_key'
5+
timeout: 60
6+
database:
7+
host: 'localhost'
8+
port: 5432
9+
name: 'dataverse'
10+
username: 'db_user'
11+
password: 'db_password'
12+
smtp:
13+
host: 'localhost'
14+
auth: ''
15+
port: 25
16+
username: 'username'
17+
password: 'password'
718
include_dataset_metrics: false
819
work_dir: '/tmp'
920
log_path: 'logs'
1021
log_file: 'dataverse-reports.log'
1122
log_level: 'INFO'
12-
smtp_host: 'localhost'
13-
smtp_auth: ''
14-
smtp_port: 25
15-
smtp_username: 'username'
16-
smtp_password: 'password'
1723
from_email: ''
1824
admin_emails:
1925
- email1

lib/database.py

Lines changed: 0 additions & 37 deletions
This file was deleted.
Lines changed: 94 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
1-
import requests
1+
"""Class for communicating with the Dataverse APIs"""
2+
3+
from xml.etree import ElementTree
24
import logging
35

6+
import requests
47
from requests.auth import HTTPBasicAuth
5-
from xml.etree import ElementTree
68

7-
class DataverseApi(object):
8-
def __init__(self, host=None, token=None):
9-
if host[len(host)-1] != '/':
10-
self.host = host + '/'
9+
class DataverseApi:
10+
"""Class for communicating with the Dataverse APIs"""
11+
12+
def __init__(self, config=None):
13+
if config is None:
14+
return None
15+
16+
self.config = config
17+
18+
if config['host'][len(config['host'])-1] != '/':
19+
self.host = config['host'] + '/'
1120
else:
12-
self.host = host
21+
self.host = config['host']
1322

14-
self.token = token
23+
self.token = config['token']
24+
self.timeout = config['timeout']
1525
self.version = 'v1'
1626

1727
self.logger = logging.getLogger('dataverse-reports')
@@ -21,94 +31,112 @@ def __init__(self, host=None, token=None):
2131
self.headers = {'X-Dataverse-key': self.token}
2232

2333
def test_connection(self):
34+
"""Test connection to Dataverse API"""
35+
2436
url = self.host + 'api/info/version/'
2537
self.logger.debug("Testing API connection: %s.", url)
26-
response = requests.get(url)
38+
response = requests.get(url, timeout=self.timeout)
2739
if response.status_code == 200:
2840
return True
29-
else:
30-
return False
41+
42+
return False
3143

3244
def construct_url(self, command):
45+
"""Create URL"""
46+
3347
new_url = self.host + '-H "X-Dataverse-key: ' + self.token + '"' + command
3448
return new_url
3549

36-
def search(self, term='*', type='dataverse', options={}):
37-
if type is not None:
50+
def search(self, term='*', search_type='dataverse', options=None):
51+
"""Search Dataverse API"""
52+
53+
if search_type is not None:
3854
url = self.host + 'api/' + self.version + '/search?q=' + term + '&type=' + type
3955
else:
4056
url = self.host + 'api/' + self.version + '/search?q=' + term
4157

4258
self.logger.debug("Searching Dataverse: %s.", url)
43-
response = requests.get(url)
59+
response = requests.get(url, timeout=self.timeout)
4460
self.logger.debug("Return status: %s", str(response.status_code))
4561
return response
4662

4763
def get_dataverse(self, identifier=''):
64+
"""Retrieve dataverse from the API"""
65+
4866
if identifier is None:
4967
self.logger.error("Must specify identifer.")
50-
return
68+
return None
5169

5270
url = self.host + 'api/' + self.version + '/dataverses/' + str(identifier)
5371
self.logger.debug("Retrieving dataverse: %s.", url)
54-
response = requests.get(url, headers=self.headers)
72+
response = requests.get(url, timeout=self.timeout, headers=self.headers)
5573
self.logger.debug("Return status: %s.", str(response.status_code))
5674
return response
5775

5876
def get_dataverse_contents(self, identifier=''):
77+
"""Retrieve dataverse contents from API"""
78+
5979
if identifier is None:
6080
self.logger.error("Must specify identifer.")
61-
return
81+
return None
6282

6383
url = self.host + 'api/' + self.version + '/dataverses/' + str(identifier) + '/contents'
6484
self.logger.debug("Retrieving dataverse contents: %s", url)
65-
response = requests.get(url, headers=self.headers)
85+
response = requests.get(url, timeout=self.timeout, headers=self.headers)
6686
self.logger.debug("Return status: %s", str(response.status_code))
6787

6888
response_json = response.json()
6989
return response_json['data']
7090

71-
def get_dataverse_size(self, identifier='', includeCached=False):
91+
def get_dataverse_size(self, identifier='', include_cached=False):
92+
"""Get size of dataverse"""
93+
7294
if identifier is None:
7395
self.logger.error("Must specify identifer.")
74-
return
96+
return None
7597

7698
url = self.host + 'api/' + self.version + '/dataverses/' + str(identifier) + '/storagesize'
77-
if includeCached is True:
99+
if include_cached is True:
78100
url += '?includeCache=true'
79101
self.logger.debug("Retrieving dataverse storage size: %s", url)
80-
response = requests.get(url, headers=self.headers)
102+
response = requests.get(url, timeout=self.timeout, headers=self.headers)
81103
self.logger.debug("Return status: %s", str(response.status_code))
82104
return response
83105

84106
def sword_get_dataverse(self, alias=''):
107+
""""Retrieve SWORD dataverse"""
108+
85109
if alias is None:
86110
self.logger.error("Must specify an alias.")
87-
return
111+
return None
88112

89113
url = self.host + '/dvn/api/data-deposit/' + self.version + '/swordv2/collection/dataverse/' + alias
90114
self.logger.debug("Retrieving SWORD dataverse: %s", url)
91-
response = requests.get(url, auth=HTTPBasicAuth(self.token, ''))
115+
response = requests.get(url, timeout=self.timeout, auth=HTTPBasicAuth(self.token, ''))
92116
self.logger.debug("Return status: %s", str(response.status_code))
93117

94118
tree = ElementTree.fromstring(response.content)
95119
return tree
96120

97121
def get_dataset(self, identifier=''):
122+
"""Retrieve dataset from API"""
123+
98124
if identifier is None:
99125
self.logger.error("Must specify an identifer.")
100-
return
126+
return None
101127

102128
url = self.host + 'api/' + self.version + '/datasets/' + str(identifier)
103129
self.logger.debug("Retrieving dataset: %s", url)
104-
response = requests.get(url, headers=self.headers)
130+
response = requests.get(url, timeout=self.timeout, headers=self.headers)
105131
self.logger.debug("Return status: %s", str(response.status_code))
106132
return response
107133

108134
def get_dataset_metric(self, identifier='', option='', doi='', date=None):
135+
"""Retrieve metric of dataset"""
136+
109137
if identifier is None or option is None or doi is None:
110138
self.logger.error("Must specify an identifer, option and DOI.")
111-
return
139+
return None
112140

113141
# Include date parameter if specified
114142
if date is not None:
@@ -117,40 +145,62 @@ def get_dataset_metric(self, identifier='', option='', doi='', date=None):
117145
url = self.host + 'api/' + self.version + '/datasets/' + str(identifier) + '/makeDataCount/' + str(option) + '?persistentId=' + doi
118146

119147
self.logger.debug("Retrieving dataset_metric: %s", url)
120-
response = requests.get(url, headers=self.headers)
148+
response = requests.get(url, timeout=self.timeout, headers=self.headers)
121149
self.logger.debug("Return status: %s", str(response.status_code))
122150
return response
123151

152+
def get_dataset_download_count(self, identifier=''):
153+
"""Retrieve dataset download count from API"""
154+
155+
if identifier is None:
156+
self.logger.error("Must specify an identifer.")
157+
return None
158+
159+
url = self.host + 'api/' + self.version + '/datasets/' + str(identifier) + '/download/count?includeMDC=true'
160+
self.logger.debug("Retrieving dataset download count: %s", url)
161+
response = requests.get(url, timeout=self.timeout, headers=self.headers)
162+
self.logger.debug("Return status: %s", str(response.status_code))
163+
return response
164+
124165
def get_admin_list_users(self, page=1):
166+
""""Get list of admin users"""
167+
125168
url = self.host + 'api/' + self.version + '/admin/list-users/?selectedPage=' + str(page)
126169
self.logger.debug("Retrieving users list: %s", url)
127-
response = requests.get(url, headers=self.headers)
170+
response = requests.get(url, timeout=self.timeout, headers=self.headers)
128171
self.logger.debug("Return status: %s", str(response.status_code))
129172
return response.json()
130173

131-
def construct_parameters(self, params={}):
132-
parameters = ''
174+
def construct_parameters(self, params=None):
175+
"""Construct parameters for URL"""
176+
177+
parameters_string = ''
133178
first = True
134179

135-
for key, value in dict.items():
136-
if first:
137-
parameters += key + '=' + value
138-
first = False
139-
else:
140-
parameters += '&' + key + '=' + value
180+
if params is not None:
181+
for key, value in params.items():
182+
if first:
183+
parameters_string += key + '=' + value
184+
first = False
185+
else:
186+
parameters_string += '&' + key + '=' + value
187+
188+
return parameters_string
141189

142-
return parameters
190+
def make_call(self, http_type='GET', url=''):
191+
"""Make call to Dataverse API"""
143192

144-
def make_call(self, type='GET', url=''):
145-
if type == 'GET':
146-
r = requests.get(url, headers=self.headers)
147-
elif type == 'POST':
148-
r = requests.put(url, headers=self.headers)
193+
if http_type == 'GET':
194+
r = requests.get(url, timeout=self.timeout, headers=self.headers)
195+
elif http_type == 'POST':
196+
r = requests.put(url, timeout=self.timeout, headers=self.headers)
149197
else:
150-
r = requests.get(url, headers=self.headers)
198+
r = requests.get(url, timeout=self.timeout, headers=self.headers)
151199

152200
return r.json
153201

154202
def set_token(self, new_token=''):
203+
"""Set Dataverse API token"""
204+
155205
if new_token:
156206
self.token = new_token

0 commit comments

Comments
 (0)