diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..532313e --- /dev/null +++ b/.env.example @@ -0,0 +1,13 @@ +# CoinGecko API Configuration + +# Base URL for CoinGecko API (optional, defaults to official endpoint) +COINGECKO_API_BASE_URL=https://api.coingecko.com/api/v3 + +# API Key for CoinGecko (optional for public endpoints) +COINGECKO_API_KEY=your_api_key_here + +# Request timeout in seconds (optional, defaults to 30) +COINGECKO_REQUEST_TIMEOUT=30 + +# Rate limit requests per minute (optional, defaults to 50) +COINGECKO_RATE_LIMIT=50 \ No newline at end of file diff --git a/.gitignore b/.gitignore index f61b850..4e4bb0e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,48 +1,13 @@ -.venv +__pycache__/ +*.py[cod] +*$py.class .env -__pycache__ -.pytest_cache -.pypirc -*.db -test -test_state.json -task_flow.egg-info -example_repo -signature.js -git-filter-repo -task/orca/ -**/dist/ -# yarn.lock -package-lock.json -node_modules -build -migrate.sh -*/dev.js -executables/* -namespace/* -config/* -.env.local -taskStateInfoKeypair.json -localKOIIDB.db -metadata.json -.npmrc -*.pem -.vscode -.cursor -data/chunks -data/process -test_state.csv -todos-example.csv - - -# Ignore auto-generated repository directories -repos/ - - -# Ignore Data -data/* - - -venv - -**/venv/ +.venv +venv/ +.pytest_cache/ +dist/ +build/ +*.egg-info/ +.coverage +htmlcov/ +.DS_Store \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..b34f7cb --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# Make src a Python package \ No newline at end of file diff --git a/src/config_validator.py b/src/config_validator.py new file mode 100644 index 0000000..b70506f --- /dev/null +++ b/src/config_validator.py @@ -0,0 +1,156 @@ +from typing import Dict, Any, Optional +import os +import re + +class ConfigValidationError(Exception): + """Custom exception for configuration validation errors.""" + pass + +class CoinGeckoConfigValidator: + """Validates CoinGecko API configuration settings.""" + + @staticmethod + def validate_config(config: Dict[str, Any]) -> Dict[str, Any]: + """ + Validate and sanitize the configuration dictionary. + + Args: + config (Dict[str, Any]): Configuration dictionary to validate + + Returns: + Dict[str, Any]: Validated and sanitized configuration + + Raises: + ConfigValidationError: If configuration is invalid + """ + # Validate or set default values + config = config.copy() # Prevent modifying original dict + + # Validate presence of required keys + required_keys = ['API_BASE_URL', 'API_KEY'] + for key in required_keys: + if key not in config or config[key] is None or str(config[key]).strip() == '': + raise ConfigValidationError(f"Missing or invalid required configuration key: {key}") + + # Validate API base URL format + CoinGeckoConfigValidator._validate_url(config['API_BASE_URL']) + + # Validate API key + CoinGeckoConfigValidator._validate_api_key(config['API_KEY']) + + # Validate optional timeout + if 'REQUEST_TIMEOUT' in config: + config['REQUEST_TIMEOUT'] = CoinGeckoConfigValidator._validate_timeout(config['REQUEST_TIMEOUT']) + else: + config['REQUEST_TIMEOUT'] = 30 # Default timeout + + # Validate rate limit settings + if 'RATE_LIMIT' in config: + config['RATE_LIMIT'] = CoinGeckoConfigValidator._validate_rate_limit(config['RATE_LIMIT']) + else: + config['RATE_LIMIT'] = 50 # Default rate limit + + return config + + @staticmethod + def _validate_url(url: str) -> None: + """ + Validate URL format. + + Args: + url (str): URL to validate + + Raises: + ConfigValidationError: If URL is invalid + """ + url_pattern = re.compile( + r'^https?://' # http:// or https:// + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain + r'localhost|' # localhost + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # IP + r'(?::\d+)?' # optional port + r'(?:/?|[/?]\S+)$', re.IGNORECASE) + + if not url_pattern.match(url): + raise ConfigValidationError(f"Invalid URL format: {url}") + + @staticmethod + def _validate_api_key(api_key: str) -> None: + """ + Validate API key format. + + Args: + api_key (str): API key to validate + + Raises: + ConfigValidationError: If API key is invalid + """ + # Remove whitespace and validate minimum length + sanitized_key = str(api_key).strip() + if len(sanitized_key) < 10: + raise ConfigValidationError("Invalid API key: Key is too short") + + @staticmethod + def _validate_timeout(timeout: Any) -> float: + """ + Validate request timeout. + + Args: + timeout (Any): Timeout value to validate + + Returns: + float: Validated timeout value + + Raises: + ConfigValidationError: If timeout is invalid + """ + try: + timeout_float = float(timeout) + if timeout_float <= 0 or timeout_float > 120: # Reasonable timeout range + raise ConfigValidationError(f"Invalid timeout value: {timeout}") + return timeout_float + except (TypeError, ValueError): + raise ConfigValidationError(f"Invalid timeout value: must be a numeric value, got {type(timeout)}") + + @staticmethod + def _validate_rate_limit(rate_limit: Any) -> float: + """ + Validate rate limit settings. + + Args: + rate_limit (Any): Rate limit configuration to validate + + Returns: + float: Validated rate limit value + + Raises: + ConfigValidationError: If rate limit is invalid + """ + try: + limit_float = float(rate_limit) + if limit_float <= 0 or limit_float > 100: # Reasonable rate limit range + raise ConfigValidationError(f"Invalid rate limit value: {rate_limit}") + return limit_float + except (TypeError, ValueError): + raise ConfigValidationError(f"Invalid rate limit value: must be a numeric value, got {type(rate_limit)}") + + @classmethod + def load_from_env(cls) -> Dict[str, Any]: + """ + Load configuration from environment variables. + + Returns: + Dict[str, Any]: Validated configuration from environment + """ + config = { + 'API_BASE_URL': os.getenv('COINGECKO_API_BASE_URL', 'https://api.coingecko.com/api/v3'), + 'API_KEY': os.getenv('COINGECKO_API_KEY', ''), + 'REQUEST_TIMEOUT': os.getenv('COINGECKO_REQUEST_TIMEOUT', 30), + 'RATE_LIMIT': os.getenv('COINGECKO_RATE_LIMIT', 50) + } + + # Convert numeric environment variables + config['REQUEST_TIMEOUT'] = float(config['REQUEST_TIMEOUT']) + config['RATE_LIMIT'] = float(config['RATE_LIMIT']) + + return cls.validate_config(config) \ No newline at end of file diff --git a/src/historical_price_transformer.py b/src/historical_price_transformer.py new file mode 100644 index 0000000..09a095e --- /dev/null +++ b/src/historical_price_transformer.py @@ -0,0 +1,119 @@ +from typing import Dict, List, Union, Optional +from datetime import datetime +import logging + +class HistoricalPriceTransformer: + """ + Transforms and validates historical price data from CoinGecko API. + + Handles data validation, cleaning, and transformation of historical + cryptocurrency price data. + """ + + @staticmethod + def validate_historical_data(data: List[List[Union[int, float]]]) -> bool: + """ + Validate the structure and content of historical price data. + + Args: + data (List[List[Union[int, float]]]): Raw historical price data + + Returns: + bool: True if data is valid, False otherwise + """ + if not isinstance(data, list): + logging.error("Historical data must be a list") + return False + + # Check each data point + for point in data: + if not isinstance(point, list) or len(point) != 2: + logging.error(f"Invalid data point format: {point}") + return False + + timestamp, price = point + + # Validate timestamp + try: + datetime.fromtimestamp(timestamp / 1000) # Convert milliseconds to seconds + except (TypeError, ValueError): + logging.error(f"Invalid timestamp: {timestamp}") + return False + + # Validate price + if not isinstance(price, (int, float)) or price < 0: + logging.error(f"Invalid price: {price}") + return False + + return True + + @staticmethod + def transform_historical_data(data: List[List[Union[int, float]]]) -> List[Dict[str, Union[int, float]]]: + """ + Transform historical price data into a more usable format. + + Args: + data (List[List[Union[int, float]]]): Raw historical price data + + Returns: + List[Dict[str, Union[int, float]]]: Transformed historical price data + + Raises: + ValueError: If input data is invalid + """ + if not HistoricalPriceTransformer.validate_historical_data(data): + raise ValueError("Invalid historical price data") + + transformed_data = [] + for point in data: + timestamp, price = point + transformed_point = { + 'timestamp': timestamp, + 'datetime': datetime.fromtimestamp(timestamp / 1000).isoformat(), + 'price': price + } + transformed_data.append(transformed_point) + + return transformed_data + + @staticmethod + def filter_historical_data( + data: List[Dict[str, Union[int, float]]], + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + min_price: Optional[float] = None, + max_price: Optional[float] = None + ) -> List[Dict[str, Union[int, float]]]: + """ + Filter historical price data based on optional criteria. + + Args: + data (List[Dict[str, Union[int, float]]]): Transformed historical price data + start_date (Optional[datetime]): Minimum date for filtering (exclusive) + end_date (Optional[datetime]): Maximum date for filtering (exclusive) + min_price (Optional[float]): Minimum price for filtering + max_price (Optional[float]): Maximum price for filtering + + Returns: + List[Dict[str, Union[int, float]]]: Filtered historical price data + """ + def meets_filter_criteria(point): + point_datetime = datetime.fromisoformat(point['datetime']) + + # Check date range (strict inequality) + if start_date and point_datetime <= start_date: + return False + + if end_date and point_datetime >= end_date: + return False + + # Check price range + if min_price is not None and point['price'] < min_price: + return False + + if max_price is not None and point['price'] > max_price: + return False + + return True + + return [point for point in data if meets_filter_criteria(point)] \ No newline at end of file diff --git a/tests/test_config_validator.py b/tests/test_config_validator.py new file mode 100644 index 0000000..4ca2a54 --- /dev/null +++ b/tests/test_config_validator.py @@ -0,0 +1,113 @@ +import os +import pytest +from src.config_validator import CoinGeckoConfigValidator, ConfigValidationError + +def test_valid_config_validation(): + """Test successful configuration validation.""" + valid_config = { + 'API_BASE_URL': 'https://api.coingecko.com/api/v3', + 'API_KEY': 'test_api_key_123456', + 'REQUEST_TIMEOUT': 30, + 'RATE_LIMIT': 50 + } + + validated_config = CoinGeckoConfigValidator.validate_config(valid_config) + assert validated_config == valid_config + +def test_missing_required_keys(): + """Test validation fails with missing required keys.""" + invalid_configs = [ + {}, + {'API_BASE_URL': 'https://api.example.com'}, + {'API_KEY': 'test_key'} + ] + + for config in invalid_configs: + with pytest.raises(ConfigValidationError, match="Missing or invalid required configuration key"): + CoinGeckoConfigValidator.validate_config(config) + +def test_invalid_url_format(): + """Test URL validation.""" + invalid_urls = [ + 'not_a_url', + 'http://', + 'https://invalid url.com', + 'ftp://example.com' + ] + + for url in invalid_urls: + with pytest.raises(ConfigValidationError, match="Invalid URL format"): + CoinGeckoConfigValidator.validate_config({ + 'API_BASE_URL': url, + 'API_KEY': 'test_key_valid_length' + }) + +def test_api_key_validation(): + """Test API key validation.""" + invalid_keys = [ + None, + '', + ' ', + 'short' + ] + + for key in invalid_keys: + with pytest.raises(ConfigValidationError) as exc_info: + CoinGeckoConfigValidator.validate_config({ + 'API_BASE_URL': 'https://api.coingecko.com/api/v3', + 'API_KEY': key + }) + + # Check that the error message matches the expected validation + error_msg = str(exc_info.value) + assert "Missing or invalid required configuration key" in error_msg or "Invalid API key: Key is too short" in error_msg + +def test_timeout_validation(): + """Test timeout value validation.""" + invalid_timeouts = [ + -1, + 0, + '0', + 130, + 'not_a_number' + ] + + for timeout in invalid_timeouts: + with pytest.raises(ConfigValidationError, match="Invalid timeout value"): + CoinGeckoConfigValidator.validate_config({ + 'API_BASE_URL': 'https://api.coingecko.com/api/v3', + 'API_KEY': 'test_key_valid_length', + 'REQUEST_TIMEOUT': timeout + }) + +def test_rate_limit_validation(): + """Test rate limit validation.""" + invalid_rate_limits = [ + -1, + 0, + '0', + 150, + 'not_a_number' + ] + + for rate_limit in invalid_rate_limits: + with pytest.raises(ConfigValidationError, match="Invalid rate limit value"): + CoinGeckoConfigValidator.validate_config({ + 'API_BASE_URL': 'https://api.coingecko.com/api/v3', + 'API_KEY': 'test_key_valid_length', + 'RATE_LIMIT': rate_limit + }) + +def test_env_config_loading(monkeypatch): + """Test loading configuration from environment variables.""" + monkeypatch.setenv('COINGECKO_API_BASE_URL', 'https://custom-api.coingecko.com') + monkeypatch.setenv('COINGECKO_API_KEY', 'test_env_key_12345') + monkeypatch.setenv('COINGECKO_REQUEST_TIMEOUT', '45') + monkeypatch.setenv('COINGECKO_RATE_LIMIT', '75') + + config = CoinGeckoConfigValidator.load_from_env() + + assert config['API_BASE_URL'] == 'https://custom-api.coingecko.com' + assert config['API_KEY'] == 'test_env_key_12345' + assert config['REQUEST_TIMEOUT'] == 45 + assert config['RATE_LIMIT'] == 75 \ No newline at end of file diff --git a/tests/test_historical_price_transformer.py b/tests/test_historical_price_transformer.py new file mode 100644 index 0000000..8f52bf1 --- /dev/null +++ b/tests/test_historical_price_transformer.py @@ -0,0 +1,68 @@ +import unittest +from datetime import datetime, timedelta +from src.historical_price_transformer import HistoricalPriceTransformer + +class TestHistoricalPriceTransformer(unittest.TestCase): + def setUp(self): + # Sample historical price data + self.sample_data = [ + [1609459200000, 29000.50], # Jan 1, 2021 + [1612137600000, 33000.75], # Feb 1, 2021 + [1614556800000, 48000.25], # Mar 1, 2021 + [1617235200000, 58000.00] # Apr 1, 2021 + ] + + def test_validate_historical_data_valid(self): + """Test validation of valid historical price data""" + self.assertTrue(HistoricalPriceTransformer.validate_historical_data(self.sample_data)) + + def test_validate_historical_data_invalid(self): + """Test validation of invalid historical price data""" + invalid_data = [ + [None, 29000.50], # Invalid timestamp + ['2021-01-01', 33000.75], # Wrong timestamp type + [1612137600000, -100] # Negative price + ] + self.assertFalse(HistoricalPriceTransformer.validate_historical_data(invalid_data)) + + def test_transform_historical_data(self): + """Test transformation of historical price data""" + transformed_data = HistoricalPriceTransformer.transform_historical_data(self.sample_data) + + self.assertEqual(len(transformed_data), len(self.sample_data)) + + for point, original_point in zip(transformed_data, self.sample_data): + self.assertIn('timestamp', point) + self.assertIn('datetime', point) + self.assertIn('price', point) + + self.assertEqual(point['timestamp'], original_point[0]) + self.assertEqual(point['price'], original_point[1]) + + # Check datetime is correctly formatted + self.assertIsNotNone(datetime.fromisoformat(point['datetime'])) + + def test_filter_historical_data(self): + """Test filtering of historical price data""" + transformed_data = HistoricalPriceTransformer.transform_historical_data(self.sample_data) + + # Filter by date range (strict filtering) + start_date = datetime(2021, 2, 1) + end_date = datetime(2021, 4, 1) + filtered_data = HistoricalPriceTransformer.filter_historical_data( + transformed_data, + start_date=start_date, + end_date=end_date + ) + self.assertEqual(len(filtered_data), 1) # Only Mar 1st point is strictly between Feb and Apr + + # Filter by price range + price_filtered_data = HistoricalPriceTransformer.filter_historical_data( + transformed_data, + min_price=40000, + max_price=60000 + ) + self.assertEqual(len(price_filtered_data), 2) # Two points in this price range + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests~HEAD b/tests~HEAD new file mode 100644 index 0000000..ac2f00a --- /dev/null +++ b/tests~HEAD @@ -0,0 +1 @@ +mkdir -p tests \ No newline at end of file