Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# CoinGecko API Configuration

# Base URL for CoinGecko API (optional, defaults to official endpoint)
COINGECKO_API_BASE_URL=https://api.coingecko.com/api/v3

# API Key for CoinGecko (optional for public endpoints)
COINGECKO_API_KEY=your_api_key_here

# Request timeout in seconds (optional, defaults to 30)
COINGECKO_REQUEST_TIMEOUT=30

# Rate limit requests per minute (optional, defaults to 50)
COINGECKO_RATE_LIMIT=50
59 changes: 12 additions & 47 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,48 +1,13 @@
.venv
__pycache__/
*.py[cod]
*$py.class
.env
__pycache__
.pytest_cache
.pypirc
*.db
test
test_state.json
task_flow.egg-info
example_repo
signature.js
git-filter-repo
task/orca/
**/dist/
# yarn.lock
package-lock.json
node_modules
build
migrate.sh
*/dev.js
executables/*
namespace/*
config/*
.env.local
taskStateInfoKeypair.json
localKOIIDB.db
metadata.json
.npmrc
*.pem
.vscode
.cursor
data/chunks
data/process
test_state.csv
todos-example.csv


# Ignore auto-generated repository directories
repos/


# Ignore Data
data/*


venv

**/venv/
.venv
venv/
.pytest_cache/
dist/
build/
*.egg-info/
.coverage
htmlcov/
.DS_Store
1 change: 1 addition & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Make src a Python package
156 changes: 156 additions & 0 deletions src/config_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
from typing import Dict, Any, Optional
import os
import re

class ConfigValidationError(Exception):
"""Custom exception for configuration validation errors."""
pass

class CoinGeckoConfigValidator:
"""Validates CoinGecko API configuration settings."""

@staticmethod
def validate_config(config: Dict[str, Any]) -> Dict[str, Any]:
"""
Validate and sanitize the configuration dictionary.

Args:
config (Dict[str, Any]): Configuration dictionary to validate

Returns:
Dict[str, Any]: Validated and sanitized configuration

Raises:
ConfigValidationError: If configuration is invalid
"""
# Validate or set default values
config = config.copy() # Prevent modifying original dict

# Validate presence of required keys
required_keys = ['API_BASE_URL', 'API_KEY']
for key in required_keys:
if key not in config or config[key] is None or str(config[key]).strip() == '':
raise ConfigValidationError(f"Missing or invalid required configuration key: {key}")

# Validate API base URL format
CoinGeckoConfigValidator._validate_url(config['API_BASE_URL'])

# Validate API key
CoinGeckoConfigValidator._validate_api_key(config['API_KEY'])

# Validate optional timeout
if 'REQUEST_TIMEOUT' in config:
config['REQUEST_TIMEOUT'] = CoinGeckoConfigValidator._validate_timeout(config['REQUEST_TIMEOUT'])
else:
config['REQUEST_TIMEOUT'] = 30 # Default timeout

# Validate rate limit settings
if 'RATE_LIMIT' in config:
config['RATE_LIMIT'] = CoinGeckoConfigValidator._validate_rate_limit(config['RATE_LIMIT'])
else:
config['RATE_LIMIT'] = 50 # Default rate limit

return config

@staticmethod
def _validate_url(url: str) -> None:
"""
Validate URL format.

Args:
url (str): URL to validate

Raises:
ConfigValidationError: If URL is invalid
"""
url_pattern = re.compile(
r'^https?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain
r'localhost|' # localhost
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # IP
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)

if not url_pattern.match(url):
raise ConfigValidationError(f"Invalid URL format: {url}")

@staticmethod
def _validate_api_key(api_key: str) -> None:
"""
Validate API key format.

Args:
api_key (str): API key to validate

Raises:
ConfigValidationError: If API key is invalid
"""
# Remove whitespace and validate minimum length
sanitized_key = str(api_key).strip()
if len(sanitized_key) < 10:
raise ConfigValidationError("Invalid API key: Key is too short")

@staticmethod
def _validate_timeout(timeout: Any) -> float:
"""
Validate request timeout.

Args:
timeout (Any): Timeout value to validate

Returns:
float: Validated timeout value

Raises:
ConfigValidationError: If timeout is invalid
"""
try:
timeout_float = float(timeout)
if timeout_float <= 0 or timeout_float > 120: # Reasonable timeout range
raise ConfigValidationError(f"Invalid timeout value: {timeout}")
return timeout_float
except (TypeError, ValueError):
raise ConfigValidationError(f"Invalid timeout value: must be a numeric value, got {type(timeout)}")
Comment on lines +107 to +113
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve exception chaining for better debugging.

The exception handling should preserve the original exception context for better debugging.

         try:
             timeout_float = float(timeout)
             if timeout_float <= 0 or timeout_float > 120:  # Reasonable timeout range
                 raise ConfigValidationError(f"Invalid timeout value: {timeout}")
             return timeout_float
-        except (TypeError, ValueError):
-            raise ConfigValidationError(f"Invalid timeout value: must be a numeric value, got {type(timeout)}")
+        except (TypeError, ValueError) as exc:
+            raise ConfigValidationError(f"Invalid timeout value: must be a numeric value, got {type(timeout)}") from exc
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
timeout_float = float(timeout)
if timeout_float <= 0 or timeout_float > 120: # Reasonable timeout range
raise ConfigValidationError(f"Invalid timeout value: {timeout}")
return timeout_float
except (TypeError, ValueError):
raise ConfigValidationError(f"Invalid timeout value: must be a numeric value, got {type(timeout)}")
try:
timeout_float = float(timeout)
if timeout_float <= 0 or timeout_float > 120: # Reasonable timeout range
raise ConfigValidationError(f"Invalid timeout value: {timeout}")
return timeout_float
except (TypeError, ValueError) as exc:
raise ConfigValidationError(f"Invalid timeout value: must be a numeric value, got {type(timeout)}") from exc
🧰 Tools
🪛 Ruff (0.11.9)

113-113: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)

🪛 Pylint (3.3.7)

[convention] 113-113: Line too long (111/100)

(C0301)


[warning] 113-113: Consider explicitly re-raising using 'except (TypeError, ValueError) as exc' and 'raise ConfigValidationError(f'Invalid timeout value: must be a numeric value, got {type(timeout)}') from exc'

(W0707)

🤖 Prompt for AI Agents
In src/config_validator.py around lines 107 to 113, the exception handling for
converting timeout to float currently raises a new ConfigValidationError without
preserving the original exception context. Modify the except block to use "raise
... from e" syntax to chain the original exception, where "e" is the caught
exception, so that the traceback includes the original error for better
debugging.


@staticmethod
def _validate_rate_limit(rate_limit: Any) -> float:
"""
Validate rate limit settings.

Args:
rate_limit (Any): Rate limit configuration to validate

Returns:
float: Validated rate limit value

Raises:
ConfigValidationError: If rate limit is invalid
"""
try:
limit_float = float(rate_limit)
if limit_float <= 0 or limit_float > 100: # Reasonable rate limit range
raise ConfigValidationError(f"Invalid rate limit value: {rate_limit}")
return limit_float
except (TypeError, ValueError):
raise ConfigValidationError(f"Invalid rate limit value: must be a numeric value, got {type(rate_limit)}")
Comment on lines +129 to +135
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve exception chaining for better debugging.

Similar to timeout validation, preserve the original exception context.

         try:
             limit_float = float(rate_limit)
             if limit_float <= 0 or limit_float > 100:  # Reasonable rate limit range
                 raise ConfigValidationError(f"Invalid rate limit value: {rate_limit}")
             return limit_float
-        except (TypeError, ValueError):
-            raise ConfigValidationError(f"Invalid rate limit value: must be a numeric value, got {type(rate_limit)}")
+        except (TypeError, ValueError) as exc:
+            raise ConfigValidationError(f"Invalid rate limit value: must be a numeric value, got {type(rate_limit)}") from exc
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
limit_float = float(rate_limit)
if limit_float <= 0 or limit_float > 100: # Reasonable rate limit range
raise ConfigValidationError(f"Invalid rate limit value: {rate_limit}")
return limit_float
except (TypeError, ValueError):
raise ConfigValidationError(f"Invalid rate limit value: must be a numeric value, got {type(rate_limit)}")
try:
limit_float = float(rate_limit)
if limit_float <= 0 or limit_float > 100: # Reasonable rate limit range
raise ConfigValidationError(f"Invalid rate limit value: {rate_limit}")
return limit_float
except (TypeError, ValueError) as exc:
raise ConfigValidationError(f"Invalid rate limit value: must be a numeric value, got {type(rate_limit)}") from exc
🧰 Tools
🪛 Ruff (0.11.9)

135-135: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)

🪛 Pylint (3.3.7)

[convention] 135-135: Line too long (117/100)

(C0301)


[warning] 135-135: Consider explicitly re-raising using 'except (TypeError, ValueError) as exc' and 'raise ConfigValidationError(f'Invalid rate limit value: must be a numeric value, got {type(rate_limit)}') from exc'

(W0707)

🤖 Prompt for AI Agents
In src/config_validator.py around lines 129 to 135, the exception handling for
invalid rate limit values currently raises a new ConfigValidationError without
preserving the original exception context. Modify the except block to use "raise
... from e" syntax to chain the original exception, capturing the original error
for better debugging.


@classmethod
def load_from_env(cls) -> Dict[str, Any]:
"""
Load configuration from environment variables.

Returns:
Dict[str, Any]: Validated configuration from environment
"""
config = {
'API_BASE_URL': os.getenv('COINGECKO_API_BASE_URL', 'https://api.coingecko.com/api/v3'),
'API_KEY': os.getenv('COINGECKO_API_KEY', ''),
'REQUEST_TIMEOUT': os.getenv('COINGECKO_REQUEST_TIMEOUT', 30),
'RATE_LIMIT': os.getenv('COINGECKO_RATE_LIMIT', 50)
Comment on lines +148 to +149
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix type inconsistency with os.getenv defaults.

The default values should be strings since os.getenv expects string defaults, not integers.

         config = {
             'API_BASE_URL': os.getenv('COINGECKO_API_BASE_URL', 'https://api.coingecko.com/api/v3'),
             'API_KEY': os.getenv('COINGECKO_API_KEY', ''),
-            'REQUEST_TIMEOUT': os.getenv('COINGECKO_REQUEST_TIMEOUT', 30),
-            'RATE_LIMIT': os.getenv('COINGECKO_RATE_LIMIT', 50)
+            'REQUEST_TIMEOUT': os.getenv('COINGECKO_REQUEST_TIMEOUT', '30'),
+            'RATE_LIMIT': os.getenv('COINGECKO_RATE_LIMIT', '50')
         }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
'REQUEST_TIMEOUT': os.getenv('COINGECKO_REQUEST_TIMEOUT', 30),
'RATE_LIMIT': os.getenv('COINGECKO_RATE_LIMIT', 50)
config = {
'API_BASE_URL': os.getenv('COINGECKO_API_BASE_URL', 'https://api.coingecko.com/api/v3'),
'API_KEY': os.getenv('COINGECKO_API_KEY', ''),
'REQUEST_TIMEOUT': os.getenv('COINGECKO_REQUEST_TIMEOUT', '30'),
'RATE_LIMIT': os.getenv('COINGECKO_RATE_LIMIT', '50')
}
🧰 Tools
🪛 Pylint (3.3.7)

[warning] 148-148: os.getenv default type is builtins.int. Expected str or None.

(W1508)


[warning] 149-149: os.getenv default type is builtins.int. Expected str or None.

(W1508)

🤖 Prompt for AI Agents
In src/config_validator.py around lines 148 to 149, the default values passed to
os.getenv are integers, but os.getenv expects string defaults. Change the
default values from integers (30 and 50) to their string equivalents ('30' and
'50') to maintain type consistency.

}

# Convert numeric environment variables
config['REQUEST_TIMEOUT'] = float(config['REQUEST_TIMEOUT'])
config['RATE_LIMIT'] = float(config['RATE_LIMIT'])

return cls.validate_config(config)
119 changes: 119 additions & 0 deletions src/historical_price_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from typing import Dict, List, Union, Optional
from datetime import datetime
import logging

class HistoricalPriceTransformer:
"""
Transforms and validates historical price data from CoinGecko API.

Handles data validation, cleaning, and transformation of historical
cryptocurrency price data.
"""

@staticmethod
def validate_historical_data(data: List[List[Union[int, float]]]) -> bool:
"""
Validate the structure and content of historical price data.

Args:
data (List[List[Union[int, float]]]): Raw historical price data

Returns:
bool: True if data is valid, False otherwise
"""
if not isinstance(data, list):
logging.error("Historical data must be a list")
return False

# Check each data point
for point in data:
if not isinstance(point, list) or len(point) != 2:
logging.error(f"Invalid data point format: {point}")
return False

timestamp, price = point

# Validate timestamp
try:
datetime.fromtimestamp(timestamp / 1000) # Convert milliseconds to seconds
except (TypeError, ValueError):
logging.error(f"Invalid timestamp: {timestamp}")
return False

# Validate price
if not isinstance(price, (int, float)) or price < 0:
logging.error(f"Invalid price: {price}")
return False

return True

@staticmethod
def transform_historical_data(data: List[List[Union[int, float]]]) -> List[Dict[str, Union[int, float]]]:
"""
Transform historical price data into a more usable format.

Args:
data (List[List[Union[int, float]]]): Raw historical price data

Returns:
List[Dict[str, Union[int, float]]]: Transformed historical price data

Raises:
ValueError: If input data is invalid
"""
if not HistoricalPriceTransformer.validate_historical_data(data):
raise ValueError("Invalid historical price data")

transformed_data = []
for point in data:
timestamp, price = point
transformed_point = {
'timestamp': timestamp,
'datetime': datetime.fromtimestamp(timestamp / 1000).isoformat(),
'price': price
}
transformed_data.append(transformed_point)

return transformed_data

@staticmethod
def filter_historical_data(
data: List[Dict[str, Union[int, float]]],
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None,
min_price: Optional[float] = None,
max_price: Optional[float] = None
) -> List[Dict[str, Union[int, float]]]:
"""
Filter historical price data based on optional criteria.

Args:
data (List[Dict[str, Union[int, float]]]): Transformed historical price data
start_date (Optional[datetime]): Minimum date for filtering (exclusive)
end_date (Optional[datetime]): Maximum date for filtering (exclusive)
min_price (Optional[float]): Minimum price for filtering
max_price (Optional[float]): Maximum price for filtering

Returns:
List[Dict[str, Union[int, float]]]: Filtered historical price data
"""
def meets_filter_criteria(point):
point_datetime = datetime.fromisoformat(point['datetime'])

# Check date range (strict inequality)
if start_date and point_datetime <= start_date:
return False

if end_date and point_datetime >= end_date:
return False

# Check price range
if min_price is not None and point['price'] < min_price:
return False

if max_price is not None and point['price'] > max_price:
return False

return True

return [point for point in data if meets_filter_criteria(point)]
Loading