diff --git a/scrapyrt/__main__.py b/scrapyrt/__main__.py new file mode 100644 index 0000000..07a1af6 --- /dev/null +++ b/scrapyrt/__main__.py @@ -0,0 +1,4 @@ +from scrapyrt.cmdline import execute + +if __name__ == '__main__': + execute() diff --git a/scrapyrt/cmdline.py b/scrapyrt/cmdline.py index 149e141..956407c 100644 --- a/scrapyrt/cmdline.py +++ b/scrapyrt/cmdline.py @@ -13,7 +13,7 @@ from twisted.web.server import Site from scrapyrt.conf.spider_settings import get_project_settings -from scrapyrt.utils import install_reactor +from scrapyrt.utils import install_reactor, find_module_filepath from .conf import app_settings from .log import setup_logging @@ -32,14 +32,17 @@ def valid_setting(string): description='HTTP API server for Scrapy project.') parser.add_argument('-p', '--port', dest='port', type=int, - default=9080, + default=os.environ.get("PORT", 9080), help='port number to listen on') parser.add_argument('-i', '--ip', dest='ip', - default='localhost', + default=os.environ.get("HOSTNAME", 'localhost'), help='IP address the server will listen on') parser.add_argument('--project', dest='project', - default='default', + default=os.environ.get("PROJECT_NAME", 'default'), help='project name from scrapy.cfg') + parser.add_argument('--package', + default=os.environ.get("PACKAGE_LOCATION"), + help='package location where scrapy.cfg is contained') parser.add_argument('-s', '--set', dest='set', type=valid_setting, action='append', @@ -61,8 +64,13 @@ def get_application(arguments): return application -def find_scrapy_project(project): - project_config_path = closest_scrapy_cfg() +def find_scrapy_project(project, package=None): + working_dir = "." + if package is not None: + mod_filepath = find_module_filepath(*package.rsplit(".", 1), none_when_no_spec=True) + if mod_filepath is not None: + working_dir = os.path.dirname(mod_filepath) + project_config_path = closest_scrapy_cfg(path=working_dir) if not project_config_path: raise RuntimeError('Cannot find scrapy.cfg file') project_config = ConfigParser() @@ -104,7 +112,7 @@ def execute(): app_settings.set(name.upper(), value) app_settings.set('PROJECT_SETTINGS', - find_scrapy_project(arguments.project)) + find_scrapy_project(arguments.project, package=arguments.package)) project_settings = get_project_settings() reactor_type = app_settings.TWISTED_REACTOR or project_settings.get( 'TWISTED_REACTOR') diff --git a/scrapyrt/utils.py b/scrapyrt/utils.py index 10477ee..6bb2ad7 100644 --- a/scrapyrt/utils.py +++ b/scrapyrt/utils.py @@ -1,6 +1,8 @@ import asyncio import inspect from contextlib import suppress +from importlib.util import find_spec +from os import path from scrapy import Request from scrapy.utils.misc import load_object @@ -66,3 +68,53 @@ def install_reactor(reactor_path, event_loop_path=None): with suppress(error.ReactorAlreadyInstalledError): installer() + +def find_module_filepath(module_name, submodule_name, none_when_no_spec=False): + """ + Find module's file location without first importing it + + :param module_name: Module name, e.g., "cdd.tests" + :type: ```str``` + + :param submodule_name: Submodule name, e.g., "test_pure_utils" + :type: ```str``` + + :param none_when_no_spec: When `find_spec` returns `None` return that. If `False` raises `AssertionError` then. + :type none_when_no_spec: ```bool``` + + :return: Module location + :rpath: ```str``` + """ + assert module_name is not None + assert submodule_name is not None + module_spec = find_spec(module_name) + if module_spec is None: + if none_when_no_spec: + return module_spec + raise AssertionError("spec not found for {}".format(module_name)) + module_origin = module_spec.origin + module_parent = path.dirname(module_origin) + + return next( + filter( + path.exists, + ( + path.join( + module_parent, submodule_name, "__init__{}py".format(path.extsep) + ), + path.join(module_parent, "{}{}py".format(submodule_name, path.extsep)), + path.join( + module_parent, submodule_name, "__init__{}py".format(path.extsep) + ), + ), + ), + module_origin, + ) + + +__all__ = [ + "extract_scrapy_request_args", + "find_module_filepath", + "install_reactor", + "to_bytes", +] diff --git a/setup.py b/setup.py index 838c001..c7421bb 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages from os.path import join, dirname -with open(join(dirname(__file__), 'scrapyrt/VERSION'), 'rb') as f: +with open(join(dirname(__file__), 'scrapyrt', 'VERSION'), 'rb') as f: version = f.read().decode('ascii').strip() setup( @@ -29,6 +29,7 @@ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Operating System :: OS Independent', 'Environment :: Console', 'Environment :: No Input/Output (Daemon)',