diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 46f4ce3a..e2a11446 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -24,7 +24,7 @@ from rich.rule import Rule import rocrate_validator.log as logging -from rocrate_validator import services +from rocrate_validator import constants, services from rocrate_validator.cli.commands.errors import handle_error from rocrate_validator.cli.main import cli from rocrate_validator.cli.ui.text.validate import ValidationCommandView @@ -203,6 +203,29 @@ def validate_uri(ctx, param, value): show_default=True, help="Width of the output line", ) +@click.option( + '--cache-max-age', + type=click.INT, + default=constants.DEFAULT_HTTP_CACHE_MAX_AGE, + show_default=True, + help="Maximum age of the HTTP cache in seconds", +) +@click.option( + '--cache-path', + type=click.Path(), + default=None, + show_default=True, + help="Path to the HTTP cache directory", +) +@click.option( + '-nc', + '--no-cache', + is_flag=True, + help="Disable the HTTP cache", + default=False, + show_default=True, + hidden=True +) @click.pass_context def validate(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, @@ -221,7 +244,10 @@ def validate(ctx, verbose: bool = False, output_format: str = "text", output_file: Optional[Path] = None, - output_line_width: Optional[int] = None): + output_line_width: Optional[int] = None, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: Optional[Path] = None, + no_cache: bool = False): """ [magenta]rocrate-validator:[/magenta] Validate a RO-Crate against a profile """ @@ -245,6 +271,11 @@ def validate(ctx, logger.debug("fail_fast: %s", fail_fast) logger.debug("no fail fast: %s", not fail_fast) + # Cache settings + logger.debug("cache_max_age: %s", cache_max_age) + logger.debug("cache_path: %s", os.path.abspath(cache_path) if cache_path else None) + logger.debug("no_cache: %s", no_cache) + if rocrate_uri: logger.debug("rocrate_path: %s", os.path.abspath(rocrate_uri)) @@ -280,7 +311,9 @@ def validate(ctx, "rocrate_relative_root_path": relative_root_path, "abort_on_first": fail_fast, "skip_checks": skip_checks_list, - "metadata_only": metadata_only + "metadata_only": metadata_only, + "cache_max_age": cache_max_age if not no_cache else -1, + "cache_path": cache_path } # Print the application header diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index a3f36972..fa09c338 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -87,5 +87,5 @@ JSON_OUTPUT_FORMAT_VERSION = "0.2" # Http Cache Settings -DEFAULT_HTTP_CACHE_TIMEOUT = 60 +DEFAULT_HTTP_CACHE_MAX_AGE = 300 # in seconds DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache' diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index f853a8f9..cf245a80 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -33,7 +33,8 @@ import rocrate_validator.log as logging from rocrate_validator import __version__ -from rocrate_validator.constants import (DEFAULT_ONTOLOGY_FILE, +from rocrate_validator.constants import (DEFAULT_HTTP_CACHE_MAX_AGE, + DEFAULT_ONTOLOGY_FILE, DEFAULT_PROFILE_IDENTIFIER, DEFAULT_PROFILE_README_FILE, IGNORED_PROFILE_DIRECTORIES, @@ -2349,11 +2350,21 @@ class ValidationSettings: metadata_dict: dict = None #: Verbose output verbose: bool = False + #: Cache max age in seconds + cache_max_age: Optional[int] = DEFAULT_HTTP_CACHE_MAX_AGE + #: Cache path + cache_path: Optional[Path] = None def __post_init__(self): # if requirement_severity is a str, convert to Severity if isinstance(self.requirement_severity, str): self.requirement_severity = Severity[self.requirement_severity] + # initialize the HTTP cache + from rocrate_validator.utils import HttpRequester + # HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age) + HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age) + logger.debug("HTTP cache initialized at %s with max age %s seconds", + self.cache_path, self.cache_max_age) def to_dict(self): """ diff --git a/rocrate_validator/utils.py b/rocrate_validator/utils.py index 099783fe..28c1cdf4 100644 --- a/rocrate_validator/utils.py +++ b/rocrate_validator/utils.py @@ -389,8 +389,9 @@ class HttpRequester: _instance = None _lock = threading.Lock() - def __new__(cls): + def __new__(cls, *args, **kwargs) -> HttpRequester: if cls._instance is None: + logger.debug(f"Creating instance of {cls.__name__} with args: {args}, kwargs: {kwargs}") with cls._lock: if cls._instance is None: logger.debug(f"Creating instance of {cls.__name__}") @@ -399,7 +400,10 @@ def __new__(cls): logger.debug(f"Instance created: {cls._instance.__class__.__name__}") return cls._instance - def __init__(self): + def __init__(self, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: Optional[str] = None): + logger.debug(f"Initializing instance of {self.__class__.__name__} {self}") # check if the instance is already initialized if not hasattr(self, "_initialized"): # check if the instance is already initialized @@ -407,14 +411,23 @@ def __init__(self): if not getattr(self, "_initialized", False): # set the initialized flag self._initialized = False + # store the parameters + try: + logger.debug(f"Setting cache_max_age to {cache_max_age}") + self.cache_max_age = int(cache_max_age) + except ValueError: + raise TypeError("cache_max_age must be an integer") + self.cache_path_prefix = cache_path + # flag to indicate if the cache is permanent or temporary + self.permanent_cache = cache_path is not None # initialize the session - self.__initialize_session__() + self.__initialize_session__(cache_max_age, cache_path) # set the initialized flag self._initialized = True else: logger.debug(f"Instance of {self} already initialized") - def __initialize_session__(self): + def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = None): # initialize the session self.session = None logger.debug(f"Initializing instance of {self.__class__.__name__}") @@ -422,17 +435,24 @@ def __initialize_session__(self): # check if requests_cache is installed # and set up the cached session try: - if constants.DEFAULT_HTTP_CACHE_TIMEOUT > 0: + if cache_max_age >= 0: from requests_cache import CachedSession - # Generate a random path for the cache - # to avoid conflicts with other instances - random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) + # If cache_path is not provided, use the default path prefix + if not cache_path: + # Generate a random path for the cache + # to avoid conflicts with other instances + random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) + cache_path = constants.DEFAULT_HTTP_CACHE_PATH_PREFIX + f"_{random_suffix}" + logger.debug(f"Using default cache path: {cache_path}") + else: + logger.debug(f"Using provided cache path: {cache_path}") + self.permanent_cache = True # Initialize the session with a cache self.session = CachedSession( # Cache name with random suffix - cache_name=f"{constants.DEFAULT_HTTP_CACHE_PATH_PREFIX}_{random_suffix}", - expire_after=constants.DEFAULT_HTTP_CACHE_TIMEOUT, # Cache expiration time in seconds + cache_name=cache_path, + expire_after=cache_max_age, # Cache expiration time in seconds backend='sqlite', # Use SQLite backend allowable_methods=('GET',), # Cache GET allowable_codes=(200, 302, 404) # Cache responses with these status codes @@ -441,15 +461,23 @@ def __initialize_session__(self): logger.warning("requests_cache is not installed. Using requests instead.") except Exception as e: logger.error("Error initializing requests_cache: %s", e) - logger.warning("Using requests instead of requests_cache") - # if requests_cache is not installed or an error occurred, use requests - # instead of requests_cache + + # if requests_cache is not installed or an error occurred, + # use requests instead of requests_cache # and create a new session if not self.session: - logger.debug("Using requests instead of requests_cache") + logger.debug("Cache disabled: using requests instead of requests_cache") self.session = requests.Session() def __del__(self): + """ + Destructor to clean up the cache file used by CachedSession. + """ + logger.debug(f"Deleting instance of {self.__class__.__name__}") + if hasattr(self, "permanent_cache") and not self.permanent_cache: + self.cleanup() + + def cleanup(self): """ Destructor to clean up the cache file used by CachedSession. """ @@ -475,6 +503,18 @@ def __getattr__(self, name): return getattr(self.session, name.lower()) raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + @classmethod + def initialize_cache(cls, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: Optional[str] = None) -> HttpRequester: + """ + Initialize the HttpRequester singleton with cache settings. + + :param max_age: The maximum age of the cache in seconds. + :param cache_path: The path to the cache directory. + """ + return cls(cache_max_age=cache_max_age, cache_path=cache_path) + class URI: