Skip to content
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e75a4c2
background task & logging
aspiringLich Feb 28, 2026
74db110
Merge branch 'develop' into feature/39-box-integration
aspiringLich Feb 28, 2026
5305599
upload folders to box
aspiringLich Feb 28, 2026
06e29ee
cleanup & mark chunked uploads as unimplemented
aspiringLich Feb 28, 2026
591a7b3
file caching (not working)
aspiringLich Mar 2, 2026
457183b
tweaks
aspiringLich Mar 9, 2026
b7c573d
Merge branch 'main' into feature/39-box-integration
aspiringLich Mar 9, 2026
5ed8da5
review pass
aspiringLich Mar 9, 2026
c6ceb54
review pass 2
aspiringLich Mar 9, 2026
88edf16
fix error when deleting files
aspiringLich Mar 9, 2026
4b647db
update database
aspiringLich Mar 9, 2026
8a24124
update image download api
aspiringLich Mar 9, 2026
9837057
Merge branch 'develop' into feature/39-box-integration
aspiringLich Mar 9, 2026
fc90e63
update README & fix merge conflict
aspiringLich Mar 9, 2026
3721ad9
update boxsdk
aspiringLich Apr 16, 2026
543557f
Merge branch 'develop' into feature/39-box-integration
aspiringLich Apr 16, 2026
08cbd29
storage backend
aspiringLich Apr 16, 2026
0002f9b
nits
aspiringLich Apr 16, 2026
6b5d9f3
move box upload logic to storage.py
aspiringLich Apr 16, 2026
f7e266f
nit (BOX_DEVELOPER_TOKEN overrides BOX_JWT)
aspiringLich Apr 16, 2026
b7f104d
box storage tests
aspiringLich Apr 16, 2026
62c5d02
oauth and docker compose examples
aspiringLich Apr 16, 2026
0c8c54c
cleaner storage api
aspiringLich Apr 23, 2026
f4e67e9
oauth working mostly!
aspiringLich Apr 23, 2026
ea0c92b
update record upload / media upload with new logic
aspiringLich Apr 23, 2026
c835dbb
polish storage logic
aspiringLich Apr 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions bfd9000_web/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Django Settings
Comment thread
aspiringLich marked this conversation as resolved.
SECRET_KEY=your-secret-key-here
DEBUG=True
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1
Comment thread
aspiringLich marked this conversation as resolved.

# Box.com Integration
# Use exactly one authentication method; precedence: DEVELOPER_TOKEN > JWT > OAUTH.
#
# Option A — Developer token (local development only, expires after 1 hour):
BOX_DEVELOPER_TOKEN=your-box-developer-token-here
#
# Option B — JWT service account (CI / production, requires a Box app with JWT enabled):
BOX_JWT_CONFIG_FILE=/path/to/box-jwt-config.json
#
# Option C — OAuth 2.0 client credentials (production; generate in the Box Developer Console):
BOX_OAUTH_CLIENT_ID=your-box-oauth-client-id
BOX_OAUTH_CLIENT_SECRET=your-box-oauth-client-secret
#
BOX_FOLDER_ID=your-box-folder-id-here

# Scanner Configuration
SCANNER_API_BASE=http://localhost:5000
SCANNER_DEVICE_ID=scanner-001

# BFD9020 AI Service
BFD9020_BASE_URL=https://wingate.case.edu/bfd9020

# CORS Configuration
CORS_ALLOWED_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
93 changes: 90 additions & 3 deletions bfd9000_web/BFD9000/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,52 @@
https://docs.djangoproject.com/en/5.2/ref/settings/
"""

import logging
import os
from pathlib import Path
import sys

from dotenv import load_dotenv

# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent

# Load environment variables from .env file
load_dotenv()


def _read_secret(name: str, default: str | None = None) -> str | None:
"""Return the value of secret *name*, or *default* if not found.

Resolution order:
1. Environment variable ``name``
2. Docker Compose secret file ``/run/secrets/<name>``
3. *default*
"""
value = os.environ.get(name)
if value is not None:
return value
secret_path = Path(f"/run/secrets/{name}")
if secret_path.is_file():
return secret_path.read_text(encoding="utf-8").strip() or None
return default


# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/5.2/howto/deployment/checklist/


# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.environ.get(
SECRET_KEY = _read_secret(
'SECRET_KEY', 'django-insecure-+6m#s88j*)qb+a%2s%cw31e2k04um&*a-fk!jgcpl3849(w4sm')

# Box.com Configuration
BOX_DEVELOPER_TOKEN = _read_secret('BOX_DEVELOPER_TOKEN')
BOX_JWT_CONFIG_FILE = _read_secret('BOX_JWT_CONFIG_FILE')
BOX_FOLDER_ID = _read_secret('BOX_FOLDER_ID')
BOX_OAUTH_CLIENT_ID = _read_secret('BOX_OAUTH_CLIENT_ID')
BOX_OAUTH_CLIENT_SECRET = _read_secret('BOX_OAUTH_CLIENT_SECRET')

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = os.environ.get('DEBUG', 'True') == 'True'
APP_VERSION_FILE = BASE_DIR / 'VERSION'
Expand Down Expand Up @@ -242,8 +274,63 @@ def _prefix_url(path):

SCANNER_API_BASE = os.environ.get('SCANNER_API_BASE', 'http://localhost:5000')
SCANNER_DEVICE_ID = os.environ.get('SCANNER_DEVICE_ID', 'scanner-001')
BFD9020_BASE_URL = os.environ.get(
'BFD9020_BASE_URL', 'https://wingate.case.edu/bfd9020')
BFD9020_BASE_URL = os.environ.get('BFD9020_BASE_URL', 'https://wingate.case.edu/bfd9020')

# Logging Configuration
class PrettyFormatter(logging.Formatter):
Comment thread
aspiringLich marked this conversation as resolved.
"""A custom formatter to add color to stdout log records."""

GRAY = "\x1b[90m"
YELLOW = "\x1b[33;21m"
RED = "\x1b[31;21m"
BOLD_RED = "\x1b[31;1m"
RESET = "\x1b[0m"
Comment thread
aspiringLich marked this conversation as resolved.

# Define a different format for each level
FORMATS = {
logging.DEBUG: f"{GRAY}%(asctime)s {GRAY}DEBUG {GRAY}%(name)s: {RESET}%(message)s",
logging.INFO: f"{GRAY}%(asctime)s {RESET}INFO {GRAY}%(name)s: {RESET}%(message)s",
logging.WARNING: f"{GRAY}%(asctime)s {YELLOW}WARN {GRAY}%(name)s: {RESET}%(message)s",
logging.ERROR: f"{GRAY}%(asctime)s {RED}ERROR {GRAY}%(name)s: {RESET}%(message)s",
logging.CRITICAL: f"{GRAY}%(asctime)s {BOLD_RED}CRIT {GRAY}%(name)s: {RESET}%(message)s"
}

def format(self, record):
use_color = hasattr(sys.stderr, 'isatty') and sys.stderr.isatty()
log_fmt = self.FORMATS.get(record.levelno) if use_color else '%(asctime)s %(levelname)-5s %(name)s: %(message)s'
return logging.Formatter(log_fmt).format(record)

LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'pretty': {
'()': PrettyFormatter,
},
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'formatter': 'pretty',
},
},
'root': {
'handlers': ['console'],
'level': 'INFO',
},
'loggers': {
'django': {
'handlers': ['console'],
'level': 'INFO',
'propagate': False,
},
'archive': {
'handlers': ['console'],
'level': 'DEBUG',
'propagate': False,
},
},
}

# Thumbnail generation policy (staging and UI previews)
THUMBNAIL_MAX_WIDTH = int(os.environ.get('THUMBNAIL_MAX_WIDTH', '300'))
Expand Down
15 changes: 10 additions & 5 deletions bfd9000_web/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@ Alternatively just install django with python.

## Running the Django Application

1. Make sure to apply any database migrations:
1. Make sure to apply any database migrations and import data into the database:

```bash
cd bfd9000_web
python manage.py migrate

# import data into database
cd docs/collections_data
python ../../manage.py import_subjects bolton
python ../../manage.py import_valuesets
cd ../../
```

2. ONLY IF YOU ARE DEVELOPING THE FRONTEND, install DaisyUI and run tailwindcss in a seperate terminal window.
Expand All @@ -41,9 +47,8 @@ Alternatively just install django with python.

```bash
# add yourself as a user
python bfd9000_web/manage.py createsuperuser

python bfd9000_web/manage.py runserver
python manage.py createsuperuser
python manage.py runserver
```

4. Open your web browser and go to `http://127.0.0.1:9000` to view the application.
Expand Down Expand Up @@ -88,7 +93,7 @@ Or use the provided compose file:

```bash
# Copy the example env file and edit as needed
cp bfd9000_web/dot-env.example bfd9000_web/.env
cp bfd9000_web/.env.example bfd9000_web/.env

docker compose -f bfd9000_web/docker-compose.yml up
```
Expand Down
30 changes: 30 additions & 0 deletions bfd9000_web/archive/apps.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,39 @@
"""Django app configuration for the archive app."""

import logging
import os
import sys
import threading

from django.apps import AppConfig

logger = logging.getLogger(__name__)


class ArchiveConfig(AppConfig):
"""Configure default settings for the archive app."""
default_auto_field = "django.db.models.BigAutoField" # pyright: ignore[reportAssignmentType]
name = "archive"

def ready(self):
"""Initialize the archive app and start background tasks."""
# Guard against running twice in development (autoreloader issue)
if 'runserver' in sys.argv:
if self._is_main_process():
self._start_background_task()
# are we in production (gunicorn)?
elif os.path.basename(sys.argv[0]) == 'gunicorn':
self._start_background_task()
else:
logger.info("Background tasks not started: conditions have not been met (runserver, gunicorn)")

def _is_main_process(self):
"""Check if this is the main process (development only)."""
return os.environ.get('RUN_MAIN') == 'true'

def _start_background_task(self):
"""Start the background media upload thread."""
from archive.media_upload import media_upload_worker

thread = threading.Thread(target=media_upload_worker, daemon=True)
thread.start()
103 changes: 103 additions & 0 deletions bfd9000_web/archive/media_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""Background worker for uploading local media files to Box storage."""

import logging
import time
from pathlib import Path
from typing import List

from django.conf import settings

from archive.models import DigitalRecord
from .storage import BoxStorageBackend

logger = logging.getLogger(__name__)


def media_upload_worker():
from BFD9000.settings import BOX_DEVELOPER_TOKEN, BOX_FOLDER_ID, BOX_JWT_CONFIG_FILE

if not BOX_DEVELOPER_TOKEN and not BOX_JWT_CONFIG_FILE:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High: this startup guard is stricter than the backend auth logic below. archive.storage._get_box_client() supports OAuth with BOX_OAUTH_CLIENT_ID + BOX_OAUTH_CLIENT_SECRET, and the new compose/example docs advertise that as a supported configuration, but the worker exits here unless a developer token or JWT config is present. In an OAuth-only deployment, downloads can authenticate but the background uploader never starts, so files remain on local disk indefinitely.

logger.error(
"worker cannot start: neither BOX_DEVELOPER_TOKEN nor BOX_JWT_CONFIG_FILE is set"
)
return
if not BOX_FOLDER_ID:
logger.error("worker cannot start: BOX_FOLDER_ID is not set")
return

time.sleep(5)

while True:
try:
files_processed = process_media_files()
if files_processed > 0:
logger.info("Processed %d media file(s)", files_processed)
except Exception as exc:
logger.error("Error in media upload worker: %s", exc, exc_info=True)
time.sleep(60)


def process_media_files() -> int:
"""Upload all pending files from the local media/uploads directory to Box."""
media_root = Path(settings.MEDIA_ROOT).joinpath("uploads")

if not media_root.exists():
return 0

image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium: this worker only archives a small image-extension allowlist, but the rest of the app still accepts other source_file types. In particular, scan_tiff_preview() accepts both .tif and .tiff, and DigitalRecord.source_file is still documented as supporting PNG/TIFF/STL. Records uploaded as .tif or .stl will never be archived or cleaned up by this job.

files_processed: List[Path] = []

for file_path in media_root.rglob("*"):
if (
file_path.exists()
and file_path.is_file()
and file_path.suffix.lower() in image_extensions
):
if handle_media_file(file_path):
files_processed.append(file_path)

for path in files_processed:
path.unlink()
logger.debug("Deleted local file: %s", path)
prune_empty_directory(path.parent)

return len(files_processed)


def handle_media_file(file_path: Path) -> bool:
"""Upload *file_path* to Box and update the matching ``DigitalRecord`` link.

Returns ``True`` on success, ``False`` on any error (logged; worker continues).
"""
try:
logger.debug("Handling media file: %s", file_path)
relative_path = file_path.relative_to(settings.MEDIA_ROOT)

qs = DigitalRecord.objects.filter(source_file=str(relative_path))
count = qs.count()
if count != 1:
logger.error(
"Expected 1 record for %s, found %d; skipping DB update", relative_path, count
)
return False

with open(file_path, "rb") as f:
link = BoxStorageBackend().upload(f, str(relative_path))

qs.update(source_file=link)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High: this rewrites DigitalRecord.source_file from a local media path to a box://... link, but the rest of the API still serializes that field as if it were a local FileField. DigitalRecordSerializer.get_image_url() still returns obj.source_file.url and get_file_size() still reads obj.source_file.size, so after archival Django advertises /media/box%3A/123 as the image URL and size lookup falls back to None/FileNotFoundError. I verified that behavior locally with source_file.name = "box://123". The new image() view handles Box-backed files, but list/detail responses will still expose broken metadata unless the serializer is updated too.

return True
except Exception as exc:
logger.error("Error handling file %s: %s", file_path, exc, exc_info=True)
return False


def prune_empty_directory(directory: Path):
"""Remove *directory* if empty, then recurse into its parent."""
media_root = Path(settings.MEDIA_ROOT)

if directory == media_root or not directory.exists():
return

if not any(directory.iterdir()):
directory.rmdir()
prune_empty_directory(directory.parent)
Loading