Skip to content

Commit 4d07423

Browse files
authored
Merge pull request #62 from homeylab/http_input_feature
Http input feature
2 parents 41ff93e + c6757bc commit 4d07423

File tree

11 files changed

+145
-97
lines changed

11 files changed

+145
-97
lines changed

bookstack_file_exporter/archiver/archiver.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
1010
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
1111
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
12+
from bookstack_file_exporter.common.util import HttpHelper
1213

1314
log = logging.getLogger(__name__)
1415

@@ -22,17 +23,18 @@ class Archiver:
2223
2324
Args:
2425
:config: <ConfigNode> = Configuration with user inputs and general options.
26+
:http_client: <HttpHelper> = http helper functions with config from user inputs
2527
2628
Returns:
2729
Archiver instance with attributes that are accessible
2830
for use for handling bookstack exports and remote uploads.
2931
"""
30-
def __init__(self, config: ConfigNode):
32+
def __init__(self, config: ConfigNode, http_client: HttpHelper):
3133
self.config = config
3234
# for convenience
3335
self.base_dir = config.base_dir_name
3436
self.archive_dir = self._generate_root_folder(self.base_dir)
35-
self._page_archiver = PageArchiver(self.archive_dir, self.config)
37+
self._page_archiver = PageArchiver(self.archive_dir, self.config, http_client)
3638
self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
3739

3840
def create_export_dir(self):

bookstack_file_exporter/archiver/asset_archiver.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# pylint: disable=import-error
66
from requests import Response
77

8-
from bookstack_file_exporter.common import util as common_util
8+
from bookstack_file_exporter.common.util import HttpHelper
99

1010
log = logging.getLogger(__name__)
1111

@@ -112,47 +112,38 @@ class AssetArchiver:
112112
113113
Args:
114114
:urls: <Dict[str, str]> = api urls for images and attachments
115-
:headers: <Dict[str, str]> = http headers for api requests
116-
:verify_ssl: <bool> = verify ssl for api requests
115+
:http_client: <HttpHelper> = http helper functions with config from user inputs
117116
118117
Returns:
119118
AssetArchiver instance for use in archiving images and attachments for a page
120119
"""
121-
def __init__(self, urls: Dict[str, str], headers: Dict[str, str],
122-
verify_ssl: bool):
120+
def __init__(self, urls: Dict[str, str], http_client: HttpHelper):
123121
self.api_urls = urls
124-
self.verify_ssl = verify_ssl
125-
self._headers = headers
126122
self._asset_map = {
127123
'images': self._create_image_map,
128124
'attachments': self._create_attachment_map
129125
}
126+
self.http_client = http_client
130127

131128
def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNode]:
132129
"""Get image or attachment helpers for a page"""
133-
asset_response: Response = common_util.http_get_request(
134-
self.api_urls[asset_type],
135-
self._headers,
136-
self.verify_ssl)
130+
asset_response: Response = self.http_client.http_get_request(
131+
self.api_urls[asset_type])
137132
asset_json = asset_response.json()['data']
138133
return self._asset_map[asset_type](asset_json)
139134

140135
def get_asset_data(self, asset_type: str,
141136
meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]:
142137
"""Get asset data based on type"""
143138
data_url = f"{self.api_urls[asset_type]}/{meta_data.id_}"
144-
asset_data_response: Response = common_util.http_get_request(
145-
data_url,
146-
self._headers,
147-
self.verify_ssl)
139+
asset_data_response: Response = self.http_client.http_get_request(
140+
data_url)
148141
return asset_data_response.json()
149142

150143
def get_asset_bytes(self, asset_type: str, url: str) -> bytes:
151144
"""Get raw asset data"""
152-
asset_response: Response = common_util.http_get_request(
153-
url,
154-
self._headers,
155-
self.verify_ssl)
145+
asset_response: Response = self.http_client.http_get_request(
146+
url)
156147
match asset_type:
157148
case "images":
158149
asset_data = asset_response.content

bookstack_file_exporter/archiver/page_archiver.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from bookstack_file_exporter.archiver import util as archiver_util
77
from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode
88
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
9+
from bookstack_file_exporter.common.util import HttpHelper
910

1011
log = logging.getLogger(__name__)
1112

@@ -33,26 +34,26 @@ class PageArchiver:
3334
3435
Args:
3536
:archive_dir: <str> = directory where data will be put into.
36-
3737
:config: <ConfigNode> = Configuration with user inputs and general options.
38+
:http_client: <HttpHelper> = http helper functions with config from user inputs
3839
3940
Returns:
4041
:PageArchiver: instance with methods to help collect page content from a Bookstack instance.
4142
"""
42-
def __init__(self, archive_dir: str, config: ConfigNode) -> None:
43+
def __init__(self, archive_dir: str, config: ConfigNode, http_client: HttpHelper) -> None:
4344
self.asset_config = config.user_inputs.assets
4445
self.export_formats = config.user_inputs.formats
4546
self.api_urls = config.urls
46-
self._headers = config.headers
4747
# full path, bookstack-<timestamp>, and .tgz extension
4848
self.archive_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tgz']}"
4949
# name of intermediate tar file before gzip
5050
self.tar_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tar']}"
5151
# name of the base folder to use within the tgz archive (internal tar layout)
5252
self.archive_base_path = archive_dir.split("/")[-1]
5353
self.modify_md: bool = self._check_md_modify()
54-
self.asset_archiver = AssetArchiver(self.api_urls, self._headers,
55-
self.verify_ssl)
54+
self.asset_archiver = AssetArchiver(self.api_urls,
55+
http_client)
56+
self.http_client = http_client
5657

5758
def _check_md_modify(self) -> bool:
5859
# check to ensure they have asset_config defined, could be None
@@ -107,8 +108,8 @@ def _archive_page(self, page: Node, export_format: str, data: bytes):
107108

108109
def _get_page_data(self, page_id: int, export_format: str) -> bytes:
109110
url = f"{self.api_urls['pages']}/{page_id}/{_EXPORT_API_PATH}/{export_format}"
110-
return archiver_util.get_byte_response(url=url, headers=self._headers,
111-
verify_ssl=self.verify_ssl)
111+
return archiver_util.get_byte_response(url=url,
112+
http_client=self.http_client)
112113

113114
def _archive_page_meta(self, page_path: str, meta_data: Dict[str, Union[str, int]]):
114115
meta_file_name = f"{self.archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}"

bookstack_file_exporter/archiver/util.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
import glob
1010
from pathlib import Path
1111

12-
from bookstack_file_exporter.common import util
12+
from bookstack_file_exporter.common.util import HttpHelper
1313

1414
log = logging.getLogger(__name__)
1515

16-
def get_byte_response(url: str, headers: Dict[str, str], verify_ssl: bool) -> bytes:
16+
def get_byte_response(url: str, http_client: HttpHelper) -> bytes:
1717
"""get byte response from http request"""
18-
response = util.http_get_request(url=url, headers=headers, verify_ssl=verify_ssl)
18+
response = http_client.http_get_request(url=url)
1919
return response.content
2020

2121
# append to a tar file instead of creating files locally and then tar'ing after
Lines changed: 63 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,72 @@
11
import logging
22
from typing import Dict
3+
import urllib3
34
# pylint: disable=import-error
45
import requests
56
# pylint: disable=import-error
67
from requests.adapters import HTTPAdapter, Retry
78

9+
from bookstack_file_exporter.config_helper.models import HttpConfig
10+
811
log = logging.getLogger(__name__)
912

10-
def http_get_request(url: str, headers: Dict[str, str],
11-
verify_ssl: bool, timeout: int = 30) -> requests.Response:
12-
"""make http requests and return response object"""
13-
url_prefix = should_verify(url)
14-
try:
15-
with requests.Session() as session:
16-
# {backoff factor} * (2 ** ({number of previous retries}))
17-
# {raise_on_status} if status falls in status_forcelist range
18-
# and retries have been exhausted.
19-
# {status_force_list} 413, 429, 503 defaults are overwritten with additional ones
20-
retries = Retry(total=5,
21-
backoff_factor=0.5,
22-
raise_on_status=True,
23-
status_forcelist=[413, 429, 500, 502, 503, 504])
24-
session.mount(url_prefix, HTTPAdapter(max_retries=retries))
25-
response = session.get(url, headers=headers, verify=verify_ssl, timeout=timeout)
26-
except Exception as req_err:
27-
log.error("Failed to make request for %s", url)
28-
raise req_err
29-
try:
30-
#raise_for_status() throws an exception on codes 400-599
31-
response.raise_for_status()
32-
except requests.exceptions.HTTPError as e:
33-
# this means it either exceeded 50X retries in `http_get_request` handler
34-
# or it returned a 40X which is not expected
35-
log.error("Bookstack request failed with status code: %d on url: %s",
36-
response.status_code, url)
37-
raise e
38-
return response
39-
40-
def should_verify(url: str) -> str:
41-
"""check if http or https"""
42-
if url.startswith("https"):
43-
return "https://"
44-
return "http://"
13+
# disable TLS warnings if using verify_ssl=false
14+
urllib3.disable_warnings()
15+
16+
class HttpHelper:
17+
"""
18+
HttpHelper provides an http request helper with config stored and retries built in
19+
20+
Args:
21+
:headers: <Dict[str, str]> = all headers to use for http requests
22+
:config: <HttpConfig> = Configuration with user inputs for http requests
23+
24+
Returns:
25+
:HttpHelper: instance with methods to help with http requests.
26+
"""
27+
def __init__(self, headers: Dict[str, str],
28+
config: HttpConfig):
29+
self.backoff_factor = config.backoff_factor
30+
self.retry_codes = config.retry_codes
31+
self.retry_count = config.retry_count
32+
self.http_timeout = config.timeout
33+
self.verify_ssl = config.verify_ssl
34+
self._headers = headers
35+
36+
# more details on options: https://urllib3.readthedocs.io/en/stable/reference/urllib3.util.html
37+
def http_get_request(self, url: str) -> requests.Response:
38+
"""make http requests and return response object"""
39+
url_prefix = self.should_verify(url)
40+
try:
41+
with requests.Session() as session:
42+
# {backoff factor} * (2 ** ({number of previous retries}))
43+
# {raise_on_status} if status falls in status_forcelist range
44+
# and retries have been exhausted.
45+
# {status_force_list} 413, 429, 503 defaults are overwritten with additional ones
46+
retries = Retry(total=self.retry_count,
47+
backoff_factor=self.backoff_factor,
48+
raise_on_status=True,
49+
status_forcelist=self.retry_codes)
50+
session.mount(url_prefix, HTTPAdapter(max_retries=retries))
51+
response = session.get(url, headers=self._headers, verify=self.verify_ssl,
52+
timeout=self.http_timeout)
53+
except Exception as req_err:
54+
log.error("Failed to make request for %s", url)
55+
raise req_err
56+
try:
57+
#raise_for_status() throws an exception on codes 400-599
58+
response.raise_for_status()
59+
except requests.exceptions.HTTPError as e:
60+
# this means it either exceeded 50X retries in `http_get_request` handler
61+
# or it returned a 40X which is not expected
62+
log.error("Bookstack request failed with status code: %d on url: %s",
63+
response.status_code, url)
64+
raise e
65+
return response
66+
67+
@staticmethod
68+
def should_verify(url: str) -> str:
69+
"""check if http or https"""
70+
if url.startswith("https"):
71+
return "https://"
72+
return "http://"

bookstack_file_exporter/config_helper/config_helper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ def _generate_remote_config(self) -> Dict[str, StorageProviderConfig]:
105105
def _generate_headers(self) -> Dict[str, str]:
106106
headers = {}
107107
# add additional_headers provided by user
108-
if self.user_inputs.additional_headers:
109-
for key, value in self.user_inputs.additional_headers.items():
108+
if self.user_inputs.http_config.additional_headers:
109+
for key, value in self.user_inputs.http_config.additional_headers.items():
110110
headers[key] = value
111111

112112
# add default headers

bookstack_file_exporter/config_helper/models.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,26 @@ class Assets(BaseModel):
2626
export_attachments: Optional[bool] = False
2727
modify_markdown: Optional[bool] = False
2828
export_meta: Optional[bool] = False
29-
verify_ssl: Optional[bool] = True
29+
# verify_ssl: Optional[bool] = True
30+
31+
class HttpConfig(BaseModel):
32+
"""YAML schema for user provided http settings"""
33+
verify_ssl: Optional[bool] = False
34+
timeout: Optional[int] = 30
35+
backoff_factor: Optional[float] = 2.5
36+
retry_codes: Optional[List[int]] = [413, 429, 500, 502, 503, 504]
37+
retry_count: Optional[int] = 5
38+
additional_headers: Optional[Dict[str, str]] = {}
3039

3140
# pylint: disable=too-few-public-methods
3241
class UserInput(BaseModel):
3342
"""YAML schema for user provided configuration file"""
3443
host: str
35-
additional_headers: Optional[Dict[str, str]] = None
3644
credentials: Optional[BookstackAccess] = None
3745
formats: List[Literal["markdown", "html", "pdf", "plaintext"]]
3846
output_path: Optional[str] = None
3947
assets: Optional[Assets] = Assets()
4048
minio: Optional[ObjectStorageConfig] = None
4149
keep_last: Optional[int] = None
42-
run_interval: Optional[int] = 0
50+
run_interval: Optional[int] = 0
51+
http_config: Optional[HttpConfig] = HttpConfig()

bookstack_file_exporter/exporter/exporter.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from requests import Response
66

77
from bookstack_file_exporter.exporter.node import Node
8-
from bookstack_file_exporter.common import util
8+
from bookstack_file_exporter.common.util import HttpHelper
99

1010
log = logging.getLogger(__name__)
1111

@@ -19,10 +19,9 @@ class NodeExporter():
1919
Returns:
2020
NodeExporter instance to handle building shelve/book/chapter/page relations.
2121
"""
22-
def __init__(self, api_urls: Dict[str, str], headers: Dict[str,str], verify_ssl: bool):
22+
def __init__(self, api_urls: Dict[str, str], http_client: HttpHelper):
2323
self.api_urls = api_urls
24-
self.headers = headers
25-
self.verify_ssl = verify_ssl
24+
self.http_client = http_client
2625

2726
def get_all_shelves(self) -> Dict[int, Node]:
2827
"""
@@ -38,8 +37,7 @@ def get_all_shelves(self) -> Dict[int, Node]:
3837

3938
def _get_json_response(self, url: str) -> List[Dict[str, Union[str,int]]]:
4039
"""get http response data in json format"""
41-
response: Response = util.http_get_request(url=url, headers=self.headers,
42-
verify_ssl=self.verify_ssl)
40+
response: Response = self.http_client.http_get_request(url=url)
4341
return response.json()
4442

4543
def _get_all_ids(self, url: str) -> List[int]:

0 commit comments

Comments
 (0)