Skip to content

Commit 8319db9

Browse files
authoredJun 21, 2019
Merge pull request #112 from victor-torres/VCRGzipSerializer
Refactoring VcrGzipSerializer to normalize new cassettes
2 parents df6be83 + a057f1e commit 8319db9

21 files changed

+130
-85
lines changed
 

‎.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ dist
1111

1212
# documentation
1313
docs/_build
14+
15+
.DS_Store
16+
pytestdebug.log

‎tests/client/conftest.py

+11-33
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
import os
2-
import zlib
3-
import base64
4-
import pickle
52

63
import vcr
74
import pytest
@@ -12,42 +9,23 @@
129
from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE
1310

1411
from ..conftest import request_accept_header_matcher
15-
16-
17-
TEST_PROJECT_ID = "2222222"
18-
TEST_SPIDER_NAME = 'hs-test-spider'
19-
TEST_FRONTIER_SLOT = 'site.com'
20-
TEST_BOTGROUP = 'python-hubstorage-test'
21-
TEST_COLLECTION_NAME = "test_collection_123"
22-
TEST_ADMIN_AUTH = os.getenv('AUTH', 'f' * 32)
23-
TEST_USER_AUTH = os.getenv('USER_AUTH', 'e' * 32)
24-
TEST_DASH_ENDPOINT = os.getenv('DASH_ENDPOINT', 'http://33.33.33.51:8080/api/')
25-
TEST_HS_ENDPOINT = os.getenv('HS_ENDPOINT',
26-
'http://storage.vm.scrapinghub.com')
12+
from ..conftest import VCRGzipSerializer
13+
from ..conftest import (
14+
TEST_SPIDER_NAME,
15+
TEST_FRONTIER_SLOT,
16+
TEST_COLLECTION_NAME,
17+
TEST_ENDPOINT,
18+
TEST_PROJECT_ID,
19+
TEST_ADMIN_AUTH,
20+
TEST_DASH_ENDPOINT,
21+
)
2722

2823
# use some fixed timestamp to represent current time
2924
TEST_TS = 1476803148638
3025

3126
# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR
3227
VCR_CASSETES_DIR = 'tests/client/cassetes'
3328

34-
35-
class VCRGzipSerializer(object):
36-
"""Custom ZIP serializer for VCR.py."""
37-
38-
def serialize(self, cassette_dict):
39-
# receives a dict, must return a string
40-
# there can be binary data inside some of the requests,
41-
# so it's impossible to use json for serialization to string
42-
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
43-
return base64.b64encode(compressed).decode('utf8')
44-
45-
def deserialize(self, cassette_string):
46-
# receives a string, must return a dict
47-
decoded = base64.b64decode(cassette_string.encode('utf8'))
48-
return pickle.loads(zlib.decompress(decoded))
49-
50-
5129
my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once')
5230
my_vcr.register_serializer('gz', VCRGzipSerializer())
5331
my_vcr.register_matcher('accept_header', request_accept_header_matcher)
@@ -79,7 +57,7 @@ def is_using_real_services(request):
7957
@pytest.fixture(scope='session')
8058
def client():
8159
return ScrapinghubClient(auth=TEST_ADMIN_AUTH,
82-
endpoint=TEST_HS_ENDPOINT,
60+
endpoint=TEST_ENDPOINT,
8361
dash_endpoint=TEST_DASH_ENDPOINT)
8462

8563

‎tests/client/test_activity.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import pytest
44

5-
from .conftest import TEST_PROJECT_ID
5+
from ..conftest import TEST_PROJECT_ID
66

77

88
def _add_test_activity(project):

‎tests/client/test_client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from scrapinghub.client.jobs import Job
77
from scrapinghub.client.projects import Projects, Project
88

9-
from .conftest import TEST_PROJECT_ID
9+
from ..conftest import TEST_PROJECT_ID
1010

1111

1212
# ScrapinghubClient class tests

‎tests/client/test_collections.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from scrapinghub.client.exceptions import NotFound
88
from scrapinghub.client.exceptions import ValueTooLarge
99

10-
from .conftest import TEST_COLLECTION_NAME
10+
from ..conftest import TEST_COLLECTION_NAME
1111

1212

1313
def _mkitem():

‎tests/client/test_frontiers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from six import string_types
66

77
from scrapinghub.client.frontiers import Frontiers, Frontier, FrontierSlot
8-
from .conftest import TEST_FRONTIER_SLOT
8+
from ..conftest import TEST_FRONTIER_SLOT
99

1010

1111
def _add_test_requests_to_frontier(frontier):

‎tests/client/test_job.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from scrapinghub.client.requests import Requests
1010
from scrapinghub.client.samples import Samples
1111

12-
from .conftest import TEST_PROJECT_ID
13-
from .conftest import TEST_SPIDER_NAME
12+
from ..conftest import TEST_PROJECT_ID
13+
from ..conftest import TEST_SPIDER_NAME
1414

1515

1616
def test_job_base(client, spider):

‎tests/client/test_projects.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
from scrapinghub.hubstorage.utils import apipoll
1919

20-
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
21-
from .conftest import TEST_USER_AUTH, TEST_DASH_ENDPOINT
20+
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
21+
from ..conftest import TEST_USER_AUTH, TEST_DASH_ENDPOINT
2222
from .utils import validate_default_meta
2323

2424

‎tests/client/test_spiders.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from scrapinghub.client.spiders import Spider
1313
from scrapinghub.client.utils import JobKey
1414

15-
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
15+
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
1616
from .utils import validate_default_meta
1717

1818

‎tests/client/utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
2-
from .conftest import TEST_DASH_ENDPOINT
1+
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
2+
from ..conftest import TEST_DASH_ENDPOINT
33

44

55
def validate_default_meta(meta, state='pending', units=1,

‎tests/conftest.py

+84
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,92 @@
11
# -*- coding: utf-8 -*-
2+
import base64
3+
import os
4+
import pickle
25
import pytest
36
import re
7+
import zlib
48

59
from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE
10+
from scrapinghub import HubstorageClient
11+
from scrapinghub.legacy import Connection
12+
13+
14+
DEFAULT_PROJECT_ID = "2222222"
15+
DEFAULT_ENDPOINT = 'http://storage.vm.scrapinghub.com'
16+
DEFAULT_DASH_ENDPOINT = 'http://33.33.33.51:8080/api/'
17+
DEFAULT_ADMIN_AUTH = 'f' * 32
18+
DEFAULT_USER_AUTH = 'e' * 32
19+
20+
21+
TEST_PROJECT_ID = os.getenv('HS_PROJECT_ID', DEFAULT_PROJECT_ID)
22+
TEST_SPIDER_NAME = 'hs-test-spider'
23+
TEST_FRONTIER_SLOT = 'site.com'
24+
TEST_BOTGROUP = 'python-hubstorage-test'
25+
TEST_COLLECTION_NAME = "test_collection_123"
26+
TEST_AUTH = os.getenv('HS_AUTH', DEFAULT_ADMIN_AUTH)
27+
TEST_ENDPOINT = os.getenv('HS_ENDPOINT', DEFAULT_ENDPOINT)
28+
TEST_COLLECTION_NAME = "test_collection_123"
29+
TEST_ADMIN_AUTH = os.getenv('AUTH', DEFAULT_ADMIN_AUTH)
30+
TEST_USER_AUTH = os.getenv('USER_AUTH', DEFAULT_USER_AUTH)
31+
TEST_DASH_ENDPOINT = os.getenv('DASH_ENDPOINT', DEFAULT_DASH_ENDPOINT)
32+
33+
34+
class VCRGzipSerializer(object):
35+
"""Custom ZIP serializer for VCR.py."""
36+
37+
def serialize(self, cassette_dict):
38+
# receives a dict, must return a string
39+
# there can be binary data inside some of the requests,
40+
# so it's impossible to use json for serialization to string
41+
cassette_dict = normalize_cassette(cassette_dict)
42+
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
43+
return base64.b64encode(compressed).decode('utf8')
44+
45+
def deserialize(self, cassette_string):
46+
# receives a string, must return a dict
47+
decoded = base64.b64decode(cassette_string.encode('utf8'))
48+
return pickle.loads(zlib.decompress(decoded))
49+
50+
51+
def normalize_endpoint(uri, endpoint, default_endpoint):
52+
return uri.replace(endpoint.rstrip('/'), default_endpoint.rstrip('/'))
53+
54+
55+
def normalize_cassette(cassette_dict):
56+
"""
57+
This function normalizes the cassette dict trying to make sure
58+
we are always making API requests with the same variables:
59+
- project id
60+
- endpoint
61+
- authentication header
62+
"""
63+
interactions = []
64+
for interaction in cassette_dict['interactions']:
65+
uri = interaction['request']['uri']
66+
uri = uri.replace(TEST_PROJECT_ID, DEFAULT_PROJECT_ID)
67+
68+
hs_endpoint = TEST_ENDPOINT or HubstorageClient.DEFAULT_ENDPOINT
69+
uri = normalize_endpoint(uri, hs_endpoint, DEFAULT_ENDPOINT)
70+
71+
dash_endpoint = TEST_DASH_ENDPOINT or Connection.DEFAULT_ENDPOINT
72+
uri = normalize_endpoint(uri, dash_endpoint, DEFAULT_DASH_ENDPOINT)
73+
74+
interaction['request']['uri'] = uri
75+
76+
if 'Authorization' in interaction['request']['headers']:
77+
del interaction['request']['headers']['Authorization']
78+
interaction['request']['headers']['Authorization'] = (
79+
'Basic {}'.format(
80+
base64.b64encode(
81+
'{}:'.format(DEFAULT_ADMIN_AUTH).encode('utf-8')
82+
).decode('utf-8')
83+
)
84+
)
85+
86+
interactions.append(interaction)
87+
88+
cassette_dict['interactions'] = interactions
89+
return cassette_dict
690

791

892
def pytest_addoption(parser):

‎tests/hubstorage/conftest.py

+9-29
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
import os
2-
import zlib
3-
import base64
4-
import pickle
52

63
import vcr
74
import pytest
@@ -14,36 +11,19 @@
1411
from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE
1512

1613
from ..conftest import request_accept_header_matcher
17-
18-
19-
TEST_PROJECT_ID = "2222222"
20-
TEST_SPIDER_NAME = 'hs-test-spider'
21-
TEST_FRONTIER_SLOT = 'site.com'
22-
TEST_BOTGROUP = 'python-hubstorage-test'
23-
TEST_COLLECTION_NAME = "test_collection_123"
24-
TEST_AUTH = os.getenv('HS_AUTH', 'f' * 32)
25-
TEST_ENDPOINT = os.getenv('HS_ENDPOINT', 'http://storage.vm.scrapinghub.com')
14+
from ..conftest import VCRGzipSerializer
15+
from ..conftest import (
16+
TEST_PROJECT_ID,
17+
TEST_ENDPOINT,
18+
TEST_AUTH,
19+
TEST_BOTGROUP,
20+
TEST_COLLECTION_NAME,
21+
TEST_SPIDER_NAME,
22+
)
2623

2724
# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR
2825
VCR_CASSETES_DIR = 'tests/hubstorage/cassetes'
2926

30-
31-
class VCRGzipSerializer(object):
32-
"""Custom ZIP serializer for VCR.py."""
33-
34-
def serialize(self, cassette_dict):
35-
# receives a dict, must return a string
36-
# there can be binary data inside some of the requests,
37-
# so it's impossible to use json for serialization to string
38-
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
39-
return base64.b64encode(compressed).decode('utf8')
40-
41-
def deserialize(self, cassette_string):
42-
# receives a string, must return a dict
43-
decoded = base64.b64decode(cassette_string.encode('utf8'))
44-
return pickle.loads(zlib.decompress(decoded))
45-
46-
4727
my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once')
4828
my_vcr.register_serializer('gz', VCRGzipSerializer())
4929
my_vcr.register_matcher('accept_header', request_accept_header_matcher)

‎tests/hubstorage/test_batchuploader.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from collections import defaultdict
88

99
from scrapinghub.hubstorage import ValueTooLarge
10-
from .conftest import TEST_SPIDER_NAME, TEST_AUTH
10+
from ..conftest import TEST_SPIDER_NAME, TEST_AUTH
1111
from .conftest import start_job
1212

1313

‎tests/hubstorage/test_client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from scrapinghub import HubstorageClient
55
from scrapinghub.hubstorage.utils import apipoll
66

7-
from .conftest import TEST_AUTH, TEST_ENDPOINT
8-
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
7+
from ..conftest import TEST_AUTH, TEST_ENDPOINT
8+
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
99
from .conftest import start_job
1010

1111

‎tests/hubstorage/test_collections.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from scrapinghub import HubstorageClient
99
from six.moves import range
1010

11-
from .conftest import TEST_COLLECTION_NAME
11+
from ..conftest import TEST_COLLECTION_NAME
1212
from .testutil import failing_downloader
1313

1414

‎tests/hubstorage/test_frontier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
import pytest
55

6-
from .conftest import TEST_FRONTIER_SLOT
6+
from ..conftest import TEST_FRONTIER_SLOT
77

88

99
@pytest.fixture(autouse=True)

‎tests/hubstorage/test_jobq.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from scrapinghub.hubstorage.jobq import DuplicateJobError
1010
from scrapinghub.hubstorage.utils import apipoll
1111

12-
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
12+
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
1313
from .conftest import hsspiderid
1414

1515

‎tests/hubstorage/test_jobsmeta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
System tests for operations on stored job metadata
55
"""
6-
from .conftest import TEST_SPIDER_NAME
6+
from ..conftest import TEST_SPIDER_NAME
77
from .conftest import start_job
88

99

‎tests/hubstorage/test_project.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from scrapinghub import HubstorageClient
1111

12-
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
12+
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
1313
from .conftest import hsspiderid
1414
from .conftest import start_job
1515
from .conftest import set_testbotgroup, unset_testbotgroup

‎tests/hubstorage/test_retry.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from scrapinghub import HubstorageClient
1212
from six.moves.http_client import BadStatusLine
1313

14-
from .conftest import TEST_AUTH, TEST_ENDPOINT
15-
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
14+
from ..conftest import TEST_AUTH, TEST_ENDPOINT
15+
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
1616

1717

1818
GET = responses.GET

‎tests/hubstorage/test_system.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from scrapinghub import HubstorageClient
88
from scrapinghub.hubstorage.utils import millitime
99

10-
from .conftest import TEST_ENDPOINT, TEST_SPIDER_NAME
11-
from .conftest import TEST_PROJECT_ID, TEST_AUTH
10+
from ..conftest import TEST_ENDPOINT, TEST_SPIDER_NAME
11+
from ..conftest import TEST_PROJECT_ID, TEST_AUTH
1212
from .conftest import start_job
1313

1414

0 commit comments

Comments
 (0)
Please sign in to comment.