Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions raganything/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,10 +872,16 @@ def _read_output_files(
absolute_img_path = (
images_base_dir / img_path
).resolve()

# Security check: ensure the image path is within the base directory
resolved_base = images_base_dir.resolve()
if not absolute_img_path.is_relative_to(resolved_base):
cls.logger.warning(
f"Potential path traversal detected in {field_name}: {img_path}. Skipping."
)
continue

item[field_name] = str(absolute_img_path)
cls.logger.debug(
f"Updated {field_name}: {img_path} -> {item[field_name]}"
)

except Exception as e:
cls.logger.warning(f"Could not read JSON file {json_file}: {e}")
Expand Down
28 changes: 25 additions & 3 deletions raganything/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,12 +568,34 @@ def replace_image_path(match):
return match.group(0) # Keep original

# Use utility function to validate image file
self.logger.debug(f"Calling validate_image_file for: {image_path}")
is_valid = validate_image_file(image_path)
self.logger.debug(f"Validation result for {image_path}: {is_valid}")

# Security check: only allow images from the workspace or output directories
# to prevent indirect prompt injection from reading arbitrary system files.
if is_valid:
abs_image_path = Path(image_path).resolve()
# Check if it's in the current working directory or subdirectories
try:
is_in_cwd = abs_image_path.is_relative_to(Path.cwd())
except ValueError:
is_in_cwd = False

# If a config is available, check against working_dir and parser_output_dir
is_in_safe_dir = is_in_cwd
if hasattr(self, "config") and self.config:
try:
is_in_working = abs_image_path.is_relative_to(Path(self.config.working_dir).resolve())
is_in_output = abs_image_path.is_relative_to(Path(self.config.parser_output_dir).resolve())
is_in_safe_dir = is_in_safe_dir or is_in_working or is_in_output
except Exception:
pass

if not is_in_safe_dir:
self.logger.warning(f"Blocking image path outside safe directories: {image_path}")
is_valid = False

if not is_valid:
self.logger.warning(f"Image validation failed for: {image_path}")
self.logger.warning(f"Image validation failed or path unsafe for: {image_path}")
return match.group(0) # Keep original if validation fails

try:
Expand Down
6 changes: 5 additions & 1 deletion raganything/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,14 @@ def validate_image_file(image_path: str, max_size_mb: int = 50) -> bool:
logger.debug(f"Resolved path object: {path}")
logger.debug(f"Path exists check: {path.exists()}")

# Check if file exists
# Check if file exists and is not a symlink (for security)
if not path.exists():
logger.warning(f"Image file not found: {image_path}")
return False

if path.is_symlink():
logger.warning(f"Blocking symlink for security: {image_path}")
return False

# Check file extension
image_extensions = [
Expand Down