Skip to content
39 changes: 36 additions & 3 deletions rocrate_validator/cli/commands/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from rich.rule import Rule

import rocrate_validator.log as logging
from rocrate_validator import services
from rocrate_validator import constants, services
from rocrate_validator.cli.commands.errors import handle_error
from rocrate_validator.cli.main import cli
from rocrate_validator.cli.ui.text.validate import ValidationCommandView
Expand Down Expand Up @@ -203,6 +203,29 @@ def validate_uri(ctx, param, value):
show_default=True,
help="Width of the output line",
)
@click.option(
'--cache-max-age',
type=click.INT,
default=constants.DEFAULT_HTTP_CACHE_MAX_AGE,
show_default=True,
help="Maximum age of the HTTP cache in seconds",
)
Comment on lines +206 to +212
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to set this so that the cache never expires? Set to -1?
(I see that -1 in the code but it should be documented if it's allowed here)

@click.option(
'--cache-path',
type=click.Path(),
default=None,
show_default=True,
help="Path to the HTTP cache directory",
)
@click.option(
'-nc',
'--no-cache',
is_flag=True,
help="Disable the HTTP cache",
default=False,
show_default=True,
hidden=True
)
@click.pass_context
def validate(ctx,
profiles_path: Path = DEFAULT_PROFILES_PATH,
Expand All @@ -221,7 +244,10 @@ def validate(ctx,
verbose: bool = False,
output_format: str = "text",
output_file: Optional[Path] = None,
output_line_width: Optional[int] = None):
output_line_width: Optional[int] = None,
cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE,
cache_path: Optional[Path] = None,
no_cache: bool = False):
"""
[magenta]rocrate-validator:[/magenta] Validate a RO-Crate against a profile
"""
Expand All @@ -245,6 +271,11 @@ def validate(ctx,
logger.debug("fail_fast: %s", fail_fast)
logger.debug("no fail fast: %s", not fail_fast)

# Cache settings
logger.debug("cache_max_age: %s", cache_max_age)
logger.debug("cache_path: %s", os.path.abspath(cache_path) if cache_path else None)
logger.debug("no_cache: %s", no_cache)

if rocrate_uri:
logger.debug("rocrate_path: %s", os.path.abspath(rocrate_uri))

Expand Down Expand Up @@ -280,7 +311,9 @@ def validate(ctx,
"rocrate_relative_root_path": relative_root_path,
"abort_on_first": fail_fast,
"skip_checks": skip_checks_list,
"metadata_only": metadata_only
"metadata_only": metadata_only,
"cache_max_age": cache_max_age if not no_cache else -1,
"cache_path": cache_path
}

# Print the application header
Expand Down
2 changes: 1 addition & 1 deletion rocrate_validator/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@
JSON_OUTPUT_FORMAT_VERSION = "0.2"

# Http Cache Settings
DEFAULT_HTTP_CACHE_TIMEOUT = 60
DEFAULT_HTTP_CACHE_MAX_AGE = 300 # in seconds
DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache'
13 changes: 12 additions & 1 deletion rocrate_validator/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@

import rocrate_validator.log as logging
from rocrate_validator import __version__
from rocrate_validator.constants import (DEFAULT_ONTOLOGY_FILE,
from rocrate_validator.constants import (DEFAULT_HTTP_CACHE_MAX_AGE,
DEFAULT_ONTOLOGY_FILE,
DEFAULT_PROFILE_IDENTIFIER,
DEFAULT_PROFILE_README_FILE,
IGNORED_PROFILE_DIRECTORIES,
Expand Down Expand Up @@ -2349,11 +2350,21 @@ class ValidationSettings:
metadata_dict: dict = None
#: Verbose output
verbose: bool = False
#: Cache max age in seconds
cache_max_age: Optional[int] = DEFAULT_HTTP_CACHE_MAX_AGE
#: Cache path
cache_path: Optional[Path] = None

def __post_init__(self):
# if requirement_severity is a str, convert to Severity
if isinstance(self.requirement_severity, str):
self.requirement_severity = Severity[self.requirement_severity]
# initialize the HTTP cache
from rocrate_validator.utils import HttpRequester
# HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age)
HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age)
logger.debug("HTTP cache initialized at %s with max age %s seconds",
self.cache_path, self.cache_max_age)

def to_dict(self):
"""
Expand Down
68 changes: 54 additions & 14 deletions rocrate_validator/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,8 +389,9 @@ class HttpRequester:
_instance = None
_lock = threading.Lock()

def __new__(cls):
def __new__(cls, *args, **kwargs) -> HttpRequester:
if cls._instance is None:
logger.debug(f"Creating instance of {cls.__name__} with args: {args}, kwargs: {kwargs}")
with cls._lock:
if cls._instance is None:
logger.debug(f"Creating instance of {cls.__name__}")
Expand All @@ -399,40 +400,59 @@ def __new__(cls):
logger.debug(f"Instance created: {cls._instance.__class__.__name__}")
return cls._instance

def __init__(self):
def __init__(self,
cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE,
cache_path: Optional[str] = None):
logger.debug(f"Initializing instance of {self.__class__.__name__} {self}")
# check if the instance is already initialized
if not hasattr(self, "_initialized"):
# check if the instance is already initialized
with self._lock:
if not getattr(self, "_initialized", False):
# set the initialized flag
self._initialized = False
# store the parameters
try:
logger.debug(f"Setting cache_max_age to {cache_max_age}")
self.cache_max_age = int(cache_max_age)
except ValueError:
raise TypeError("cache_max_age must be an integer")
self.cache_path_prefix = cache_path
# flag to indicate if the cache is permanent or temporary
self.permanent_cache = cache_path is not None
# initialize the session
self.__initialize_session__()
self.__initialize_session__(cache_max_age, cache_path)
# set the initialized flag
self._initialized = True
else:
logger.debug(f"Instance of {self} already initialized")

def __initialize_session__(self):
def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = None):
# initialize the session
self.session = None
logger.debug(f"Initializing instance of {self.__class__.__name__}")
assert not self._initialized, "Session already initialized"
# check if requests_cache is installed
# and set up the cached session
try:
if constants.DEFAULT_HTTP_CACHE_TIMEOUT > 0:
if cache_max_age >= 0:
from requests_cache import CachedSession

# Generate a random path for the cache
# to avoid conflicts with other instances
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
# If cache_path is not provided, use the default path prefix
if not cache_path:
# Generate a random path for the cache
# to avoid conflicts with other instances
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
cache_path = constants.DEFAULT_HTTP_CACHE_PATH_PREFIX + f"_{random_suffix}"
logger.debug(f"Using default cache path: {cache_path}")
else:
logger.debug(f"Using provided cache path: {cache_path}")
self.permanent_cache = True
# Initialize the session with a cache
self.session = CachedSession(
# Cache name with random suffix
cache_name=f"{constants.DEFAULT_HTTP_CACHE_PATH_PREFIX}_{random_suffix}",
expire_after=constants.DEFAULT_HTTP_CACHE_TIMEOUT, # Cache expiration time in seconds
cache_name=cache_path,
expire_after=cache_max_age, # Cache expiration time in seconds
backend='sqlite', # Use SQLite backend
allowable_methods=('GET',), # Cache GET
allowable_codes=(200, 302, 404) # Cache responses with these status codes
Expand All @@ -441,15 +461,23 @@ def __initialize_session__(self):
logger.warning("requests_cache is not installed. Using requests instead.")
except Exception as e:
logger.error("Error initializing requests_cache: %s", e)
logger.warning("Using requests instead of requests_cache")
# if requests_cache is not installed or an error occurred, use requests
# instead of requests_cache

# if requests_cache is not installed or an error occurred,
# use requests instead of requests_cache
# and create a new session
if not self.session:
logger.debug("Using requests instead of requests_cache")
logger.debug("Cache disabled: using requests instead of requests_cache")
self.session = requests.Session()

def __del__(self):
"""
Destructor to clean up the cache file used by CachedSession.
"""
logger.debug(f"Deleting instance of {self.__class__.__name__}")
if hasattr(self, "permanent_cache") and not self.permanent_cache:
self.cleanup()

def cleanup(self):
"""
Destructor to clean up the cache file used by CachedSession.
"""
Expand All @@ -475,6 +503,18 @@ def __getattr__(self, name):
return getattr(self.session, name.lower())
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")

@classmethod
def initialize_cache(cls,
cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE,
cache_path: Optional[str] = None) -> HttpRequester:
"""
Initialize the HttpRequester singleton with cache settings.

:param max_age: The maximum age of the cache in seconds.
:param cache_path: The path to the cache directory.
"""
return cls(cache_max_age=cache_max_age, cache_path=cache_path)


class URI:

Expand Down