Skip to content

Commit 8185341

Browse files
authored
Merge pull request #32 from scrapinghub/sc786
Pytest and vcrpy to improve sh.hubstorage tests
2 parents fe134f0 + 0d19b04 commit 8185341

15 files changed

+1733
-1515
lines changed

requirements-test.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
-r requirements-pypy.txt
22

33
mock
4+
vcrpy==1.10.3
45
pytest
56
pytest-cov
6-
responses==0.5.0
7+
responses==0.5.0

tests/conftest.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# -*- coding: utf-8 -*-
2+
3+
4+
def pytest_addoption(parser):
5+
parser.addoption(
6+
"--update-cassettes", action="store_true", default=False,
7+
help="test with real services rewriting existing vcr cassettes")
8+
parser.addoption(
9+
"--ignore-cassettes", action="store_true", default=False,
10+
help="test with real services skipping existing vcr cassettes")

tests/hubstorage/conftest.py

+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import os
2+
import zlib
3+
import base64
4+
import pickle
5+
6+
import vcr
7+
import pytest
8+
import shutil
9+
import requests
10+
from requests import HTTPError
11+
12+
from scrapinghub import HubstorageClient
13+
from scrapinghub.hubstorage.utils import urlpathjoin
14+
15+
16+
TEST_PROJECT_ID = "2222222"
17+
TEST_SPIDER_NAME = 'hs-test-spider'
18+
TEST_FRONTIER_NAME = 'test'
19+
TEST_FRONTIER_SLOT = 'site.com'
20+
TEST_BOTGROUP = 'python-hubstorage-test'
21+
TEST_COLLECTION_NAME = "test_collection_123"
22+
TEST_AUTH = os.getenv('HS_AUTH', 'f' * 32)
23+
TEST_ENDPOINT = os.getenv('HS_ENDPOINT', 'http://storage.vm.scrapinghub.com')
24+
25+
# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR
26+
VCR_CASSETES_DIR = 'tests/hubstorage/cassetes'
27+
28+
29+
class VCRGzipSerializer(object):
30+
"""Custom ZIP serializer for VCR.py."""
31+
32+
def serialize(self, cassette_dict):
33+
# receives a dict, must return a string
34+
# there can be binary data inside some of the requests,
35+
# so it's impossible to use json for serialization to string
36+
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
37+
return base64.b64encode(compressed).decode('utf8')
38+
39+
def deserialize(self, cassette_string):
40+
# receives a string, must return a dict
41+
decoded = base64.b64decode(cassette_string.encode('utf8'))
42+
return pickle.loads(zlib.decompress(decoded))
43+
44+
45+
my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once')
46+
my_vcr.register_serializer('gz', VCRGzipSerializer())
47+
my_vcr.serializer = 'gz'
48+
49+
50+
def pytest_configure(config):
51+
if config.option.update_cassettes:
52+
# there's vcr `all` mode to update cassettes but it doesn't delete
53+
# or clear existing records, so its size will always only grow
54+
if os.path.exists(VCR_CASSETES_DIR):
55+
shutil.rmtree(VCR_CASSETES_DIR)
56+
elif config.option.ignore_cassettes:
57+
# simple hack to just ignore vcr cassettes:
58+
# - all record_mode means recording new interactions + no replay
59+
# - before_record returning None means skipping all the requests
60+
global my_vcr
61+
my_vcr.record_mode = 'all'
62+
my_vcr.before_record_request = lambda request: None
63+
64+
65+
def is_using_real_services(request):
66+
return (request.config.option.update_cassettes or
67+
request.config.option.ignore_cassettes)
68+
69+
70+
@pytest.fixture(scope='session')
71+
def hsclient():
72+
return HubstorageClient(auth=TEST_AUTH, endpoint=TEST_ENDPOINT)
73+
74+
75+
@pytest.fixture(scope='session')
76+
def hsproject(hsclient):
77+
return hsclient.get_project(TEST_PROJECT_ID)
78+
79+
80+
@my_vcr.use_cassette()
81+
@pytest.fixture(scope='session')
82+
def hsspiderid(hsproject):
83+
return str(hsproject.ids.spider(TEST_SPIDER_NAME, create=1))
84+
85+
86+
@pytest.fixture(scope='session')
87+
def hscollection(hsproject, request):
88+
collection = get_test_collection(hsproject)
89+
if is_using_real_services(request):
90+
clean_collection(collection)
91+
yield collection
92+
93+
94+
@pytest.fixture(autouse=True, scope='session')
95+
def setup_session(hsclient, hsproject, hscollection, request):
96+
if is_using_real_services(request):
97+
set_testbotgroup(hsproject)
98+
remove_all_jobs(hsproject)
99+
yield
100+
hsclient.close()
101+
102+
103+
@pytest.fixture(autouse=True)
104+
def setup_vcrpy(request, hsproject):
105+
# generates names like "test_module/test_function.yaml"
106+
# otherwise it uses current function name (setup_vcrpy) for all tests
107+
# other option is to add vcr decorator to each test separately
108+
cassette_name = '{}/{}.gz'.format(
109+
request.function.__module__.split('.')[-1],
110+
request.function.__name__
111+
)
112+
if is_using_real_services(request):
113+
remove_all_jobs(hsproject)
114+
with my_vcr.use_cassette(cassette_name):
115+
yield
116+
117+
118+
# ----------------------------------------------------------------------------
119+
120+
121+
def start_job(hsproject, **startparams):
122+
jobdata = hsproject.jobq.start(**startparams)
123+
if jobdata:
124+
jobkey = jobdata.pop('key')
125+
jobauth = (jobkey, jobdata['auth'])
126+
return hsproject.get_job(jobkey, jobauth=jobauth, metadata=jobdata)
127+
128+
129+
# Clean environment section
130+
131+
132+
def remove_all_jobs(hsproject):
133+
for k in list(hsproject.settings.keys()):
134+
if k != 'botgroups':
135+
del hsproject.settings[k]
136+
hsproject.settings.save()
137+
138+
# Cleanup JobQ: run 2 times to ensure we covered all jobs
139+
for queuename in ('pending', 'running', 'finished')*2:
140+
info = hsproject.jobq.summary(queuename)
141+
for summary in info['summary']:
142+
_remove_job(hsproject, summary['key'])
143+
144+
145+
def _remove_job(hsproject, jobkey):
146+
hsproject.jobq.finish(jobkey)
147+
hsproject.jobq.delete(jobkey)
148+
# delete job
149+
assert jobkey.startswith(TEST_PROJECT_ID), jobkey
150+
hsproject.jobs.apidelete(jobkey.partition('/')[2])
151+
152+
# Collection helpers section
153+
154+
155+
def get_test_collection(project):
156+
return project.collections.new_store(TEST_COLLECTION_NAME)
157+
158+
159+
def clean_collection(collection):
160+
try:
161+
for item in collection.iter_values():
162+
collection.delete(item['_key'])
163+
except HTTPError as e:
164+
# if collection doesn't exist yet service responds 404
165+
if e.response.status_code != 404:
166+
raise
167+
168+
169+
# Botgroups helpers section
170+
171+
172+
def set_testbotgroup(hsproject):
173+
hsproject.settings.apipost(jl={'botgroups': [TEST_BOTGROUP]})
174+
# Additional step to populate JobQ's botgroups table
175+
url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP, 'max_running')
176+
requests.post(url, auth=hsproject.auth, data='null')
177+
hsproject.settings.expire()
178+
179+
180+
def unset_testbotgroup(hsproject):
181+
hsproject.settings.apidelete('botgroups')
182+
hsproject.settings.expire()
183+
# Additional step to delete botgroups in JobQ
184+
url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP)
185+
requests.delete(url, auth=hsproject.auth)

tests/hubstorage/hstestcase.py

-102
This file was deleted.

tests/hubstorage/test_activity.py

+22-23
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,33 @@
11
"""
22
Test Activty
33
"""
4-
from .hstestcase import HSTestCase
54
from six.moves import range
65

76

8-
class ActivityTest(HSTestCase):
7+
def test_post_and_reverse_get(hsproject):
8+
# make some sample data
9+
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
10+
data1 = orig_data[:10]
11+
data2 = orig_data[10:]
912

10-
def test_post_and_reverse_get(self):
11-
# make some sample data
12-
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
13-
data1 = orig_data[:10]
14-
data2 = orig_data[10:]
13+
# put ordered data in 2 separate posts
14+
hsproject.activity.post(data1)
15+
hsproject.activity.post(data2)
1516

16-
# put ordered data in 2 separate posts
17-
self.project.activity.post(data1)
18-
self.project.activity.post(data2)
17+
# read them back in reverse chronological order
18+
result = list(hsproject.activity.list(count=20))
19+
assert len(result) == 20
20+
assert orig_data[::-1] == result
1921

20-
# read them back in reverse chronological order
21-
result = list(self.project.activity.list(count=20))
22-
self.assertEqual(len(result), 20)
23-
self.assertEqual(orig_data[::-1], result)
2422

25-
def test_filters(self):
26-
self.project.activity.post({'c': i} for i in range(10))
27-
r = list(self.project.activity.list(filter='["c", ">", [5]]', count=2))
28-
self.assertEqual(r, [{'c': 9}, {'c': 8}])
23+
def test_filters(hsproject):
24+
hsproject.activity.post({'c': i} for i in range(10))
25+
r = list(hsproject.activity.list(filter='["c", ">", [5]]', count=2))
26+
assert r == [{'c': 9}, {'c': 8}]
2927

30-
def test_timestamp(self):
31-
self.project.activity.add({'foo': 'bar'}, baz='qux')
32-
entry = next(self.project.activity.list(count=1, meta='_ts'))
33-
self.assertTrue(entry.pop('_ts', None))
34-
self.assertEqual(entry, {'foo': 'bar', 'baz': 'qux'})
28+
29+
def test_timestamp(hsproject):
30+
hsproject.activity.add({'foo': 'bar'}, baz='qux')
31+
entry = next(hsproject.activity.list(count=1, meta='_ts'))
32+
assert entry.pop('_ts', None)
33+
assert entry == {'foo': 'bar', 'baz': 'qux'}

0 commit comments

Comments
 (0)