diff --git a/README.md b/README.md index deba3fa..c100713 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,8 @@ The previous code snippet demonstrates generating a course from only a title, bu [`CourseSettings`][okcourse.CourseSettings] lets you configure the number of lectures, number of subtopics in each lecture, and which AI models to use for generating the course content (lecture text, cover image, and audio file). +If you run `okcourse` in an environment without write access to the filesystem, set `in_memory_output` on `CourseSettings` to `True`. When enabled, generated images and audio are stored in memory on the `Course` object rather than written to disk. + ## Run an example app To see the library in action, try generating a course by running an [example app](/okcourse/examples/). diff --git a/src/okcourse/__init__.py b/src/okcourse/__init__.py index 355a0de..77db207 100644 --- a/src/okcourse/__init__.py +++ b/src/okcourse/__init__.py @@ -13,6 +13,8 @@ import logging from .generators import CourseGenerator, OpenAIAsyncGenerator +from .memory_backends import FileSystemBackend, InMemoryBackend, StorageBackend +from .inmemory_zip import InMemoryCoursePack from .models import ( Course, CourseGenerationInfo, @@ -33,6 +35,10 @@ "CoursePromptSet", "CourseSettings", "OpenAIAsyncGenerator", + "StorageBackend", + "FileSystemBackend", + "InMemoryBackend", + "InMemoryCoursePack", ] # Avoid "No handler found" warnings diff --git a/src/okcourse/generators/openai/async_openai.py b/src/okcourse/generators/openai/async_openai.py index c2f2ccb..28c8313 100644 --- a/src/okcourse/generators/openai/async_openai.py +++ b/src/okcourse/generators/openai/async_openai.py @@ -233,15 +233,18 @@ async def generate_image(self, course: Course) -> Course: if image.revised_prompt: self.log.warning(f"Image prompt was revised by model - prompt used was: {image.revised_prompt}") - course.generation_info.image_file_path = course.settings.output_directory / Path( - sanitize_filename(course.title) - ).with_suffix(".png") - course.generation_info.image_file_path.parent.mkdir(parents=True, exist_ok=True) - self.log.info(f"Saving image to {course.generation_info.image_file_path}") - course.generation_info.image_file_path.write_bytes(image_bytes) + if course.settings.in_memory_output: + course.generation_info.image_bytes = image_bytes + else: + course.generation_info.image_file_path = course.settings.output_directory / Path( + sanitize_filename(course.title) + ).with_suffix(".png") + course.generation_info.image_file_path.parent.mkdir(parents=True, exist_ok=True) + self.log.info(f"Saving image to {course.generation_info.image_file_path}") + course.generation_info.image_file_path.write_bytes(image_bytes) - # Save the course JSON now that we have the image path - course.generation_info.image_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2)) + # Save the course JSON now that we have the image path + course.generation_info.image_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2)) return course @@ -330,21 +333,36 @@ async def generate_audio(self, course: Course) -> Course: audio_chunks = [task.result()[1] for task in sorted(speech_tasks, key=lambda t: t.result()[0])] # If the user generated an image for the course, embed it - if course.generation_info.image_file_path and course.generation_info.image_file_path.exists(): - composer_tag = ( - f"{course.settings.text_model_lecture} & " - f"{course.settings.tts_model} & " - f"{course.settings.image_model}" - ) - cover_tag = io.BytesIO(course.generation_info.image_file_path.read_bytes()) + if course.settings.in_memory_output: + if course.generation_info.image_bytes: + composer_tag = ( + f"{course.settings.text_model_lecture} & " + f"{course.settings.tts_model} & " + f"{course.settings.image_model}" + ) + cover_tag = io.BytesIO(course.generation_info.image_bytes) + else: + composer_tag = f"{course.settings.text_model_lecture} & {course.settings.tts_model}" + cover_tag = None else: - composer_tag = f"{course.settings.text_model_lecture} & {course.settings.tts_model}" - cover_tag = None + if course.generation_info.image_file_path and course.generation_info.image_file_path.exists(): + composer_tag = ( + f"{course.settings.text_model_lecture} & " + f"{course.settings.tts_model} & " + f"{course.settings.image_model}" + ) + cover_tag = io.BytesIO(course.generation_info.image_file_path.read_bytes()) + else: + composer_tag = f"{course.settings.text_model_lecture} & {course.settings.tts_model}" + cover_tag = None - course.generation_info.audio_file_path = course.settings.output_directory / Path( - sanitize_filename(course.title) - ).with_suffix(".mp3") - course.generation_info.audio_file_path.parent.mkdir(parents=True, exist_ok=True) + if course.settings.in_memory_output: + course.generation_info.audio_file_path = None + else: + course.generation_info.audio_file_path = course.settings.output_directory / Path( + sanitize_filename(course.title) + ).with_suffix(".mp3") + course.generation_info.audio_file_path.parent.mkdir(parents=True, exist_ok=True) version_string = get_top_level_version("okcourse") tags: dict[str, str] = { @@ -365,11 +383,15 @@ async def generate_audio(self, course: Course) -> Course: album_art_mime="image/png", ) - self.log.info(f"Saving audio to {course.generation_info.audio_file_path}") - course.generation_info.audio_file_path.write_bytes(combined_mp3.getvalue()) + if course.settings.in_memory_output: + course.generation_info.audio_bytes = combined_mp3.getvalue() + else: + self.log.info(f"Saving audio to {course.generation_info.audio_file_path}") + course.generation_info.audio_file_path.write_bytes(combined_mp3.getvalue()) # Save the course JSON now that we have the audio path - course.generation_info.audio_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2)) + if not course.settings.in_memory_output and course.generation_info.audio_file_path: + course.generation_info.audio_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2)) return course diff --git a/src/okcourse/inmemory_zip.py b/src/okcourse/inmemory_zip.py new file mode 100644 index 0000000..f9f00b0 --- /dev/null +++ b/src/okcourse/inmemory_zip.py @@ -0,0 +1,29 @@ +"""Utility for bundling course artifacts into an in-memory ZIP archive.""" + +from __future__ import annotations + +import io +from zipfile import ZipFile, ZIP_DEFLATED +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class InMemoryCoursePack: + """Container for all course artifacts stored as a ZIP file in memory.""" + + zip_bytes: bytes + + @classmethod + def from_course(cls, course: "Course") -> "InMemoryCoursePack": + """Create a zip of course JSON, image, and audio stored in memory.""" + buffer = io.BytesIO() + with ZipFile(buffer, "w", ZIP_DEFLATED) as zf: + zf.writestr("course.json", course.model_dump_json(indent=2)) + if course.generation_info.image_bytes: + zf.writestr("cover.png", course.generation_info.image_bytes) + if course.generation_info.audio_bytes: + zf.writestr("audio.mp3", course.generation_info.audio_bytes) + buffer.seek(0) + return cls(zip_bytes=buffer.getvalue()) + diff --git a/src/okcourse/memory_backends.py b/src/okcourse/memory_backends.py new file mode 100644 index 0000000..afc2138 --- /dev/null +++ b/src/okcourse/memory_backends.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +"""Prototype storage backends for holding generated course artifacts in memory.""" + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Dict + + +class StorageBackend(ABC): + """Abstract interface for storing generated artifacts.""" + + @abstractmethod + def save_bytes(self, path: Path, data: bytes) -> None: + pass + + @abstractmethod + def load_bytes(self, path: Path) -> bytes | None: + pass + + +class FileSystemBackend(StorageBackend): + """Store artifacts on disk using the normal filesystem.""" + + def save_bytes(self, path: Path, data: bytes) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(data) + + def load_bytes(self, path: Path) -> bytes | None: + if path.exists(): + return path.read_bytes() + return None + + +class InMemoryBackend(StorageBackend): + """Keep artifacts only in memory.""" + + def __init__(self) -> None: + self._storage: Dict[Path, bytes] = {} + + def save_bytes(self, path: Path, data: bytes) -> None: + self._storage[path] = data + + def load_bytes(self, path: Path) -> bytes | None: + return self._storage.get(path) + diff --git a/src/okcourse/models.py b/src/okcourse/models.py index 51527d8..1a9d2e1 100644 --- a/src/okcourse/models.py +++ b/src/okcourse/models.py @@ -159,6 +159,12 @@ class CourseSettings(BaseModel): "``output_directory``." ), ) + in_memory_output: bool = Field( + False, + description=( + "Store generated artifacts in memory instead of writing them to the filesystem." + ), + ) class CourseGenerationInfo(BaseModel): @@ -231,6 +237,14 @@ class CourseGenerationInfo(BaseModel): None, description="The path to the audio file generated from the course content." ) image_file_path: Path | None = Field(None, description="The path to the cover image generated for the course.") + image_bytes: bytes | None = Field( + None, + description="The generated cover image bytes when `in_memory_output` is used.", + ) + audio_bytes: bytes | None = Field( + None, + description="The generated MP3 bytes when `in_memory_output` is used.", + ) class Course(BaseModel): diff --git a/tests/test_in_memory_settings.py b/tests/test_in_memory_settings.py new file mode 100644 index 0000000..2b29423 --- /dev/null +++ b/tests/test_in_memory_settings.py @@ -0,0 +1,18 @@ +from okcourse.models import CourseSettings, CourseGenerationInfo + + +def test_in_memory_settings_default(): + settings = CourseSettings() + assert settings.in_memory_output is False + + +def test_in_memory_settings_true(): + settings = CourseSettings(in_memory_output=True) + assert settings.in_memory_output is True + + +def test_generation_info_memory_fields(): + info = CourseGenerationInfo() + assert info.image_bytes is None + assert info.audio_bytes is None +