Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ The previous code snippet demonstrates generating a course from only a title, bu

[`CourseSettings`][okcourse.CourseSettings] lets you configure the number of lectures, number of subtopics in each lecture, and which AI models to use for generating the course content (lecture text, cover image, and audio file).

If you want to run `okcourse` in an environment without write access to the filesystem, set `in_memory_output` on `CourseSettings` to `True`. When enabled, generated images and audio are stored in memory on the `Course` object rather than written to disk.

## Run an example app

To see the library in action, try generating a course by running an [example app](/okcourse/examples/).
Expand Down
6 changes: 6 additions & 0 deletions src/okcourse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import logging

from .generators import CourseGenerator, OpenAIAsyncGenerator
from .memory_backends import FileSystemBackend, InMemoryBackend, StorageBackend
from .inmemory_zip import InMemoryCoursePack
from .models import (
Course,
CourseGenerationInfo,
Expand All @@ -33,6 +35,10 @@
"CoursePromptSet",
"CourseSettings",
"OpenAIAsyncGenerator",
"StorageBackend",
"FileSystemBackend",
"InMemoryBackend",
"InMemoryCoursePack",
]

# Avoid "No handler found" warnings
Expand Down
70 changes: 46 additions & 24 deletions src/okcourse/generators/openai/async_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,15 +233,18 @@ async def generate_image(self, course: Course) -> Course:
if image.revised_prompt:
self.log.warning(f"Image prompt was revised by model - prompt used was: {image.revised_prompt}")

course.generation_info.image_file_path = course.settings.output_directory / Path(
sanitize_filename(course.title)
).with_suffix(".png")
course.generation_info.image_file_path.parent.mkdir(parents=True, exist_ok=True)
self.log.info(f"Saving image to {course.generation_info.image_file_path}")
course.generation_info.image_file_path.write_bytes(image_bytes)
if course.settings.in_memory_output:
course.generation_info.image_bytes = image_bytes
else:
course.generation_info.image_file_path = course.settings.output_directory / Path(
sanitize_filename(course.title)
).with_suffix(".png")
course.generation_info.image_file_path.parent.mkdir(parents=True, exist_ok=True)
self.log.info(f"Saving image to {course.generation_info.image_file_path}")
course.generation_info.image_file_path.write_bytes(image_bytes)

# Save the course JSON now that we have the image path
course.generation_info.image_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2))
# Save the course JSON now that we have the image path
course.generation_info.image_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2))

return course

Expand Down Expand Up @@ -330,21 +333,36 @@ async def generate_audio(self, course: Course) -> Course:
audio_chunks = [task.result()[1] for task in sorted(speech_tasks, key=lambda t: t.result()[0])]

# If the user generated an image for the course, embed it
if course.generation_info.image_file_path and course.generation_info.image_file_path.exists():
composer_tag = (
f"{course.settings.text_model_lecture} & "
f"{course.settings.tts_model} & "
f"{course.settings.image_model}"
)
cover_tag = io.BytesIO(course.generation_info.image_file_path.read_bytes())
if course.settings.in_memory_output:
if course.generation_info.image_bytes:
composer_tag = (
f"{course.settings.text_model_lecture} & "
f"{course.settings.tts_model} & "
f"{course.settings.image_model}"
)
cover_tag = io.BytesIO(course.generation_info.image_bytes)
else:
composer_tag = f"{course.settings.text_model_lecture} & {course.settings.tts_model}"
cover_tag = None
else:
composer_tag = f"{course.settings.text_model_lecture} & {course.settings.tts_model}"
cover_tag = None
if course.generation_info.image_file_path and course.generation_info.image_file_path.exists():
composer_tag = (
f"{course.settings.text_model_lecture} & "
f"{course.settings.tts_model} & "
f"{course.settings.image_model}"
)
cover_tag = io.BytesIO(course.generation_info.image_file_path.read_bytes())
else:
composer_tag = f"{course.settings.text_model_lecture} & {course.settings.tts_model}"
cover_tag = None

course.generation_info.audio_file_path = course.settings.output_directory / Path(
sanitize_filename(course.title)
).with_suffix(".mp3")
course.generation_info.audio_file_path.parent.mkdir(parents=True, exist_ok=True)
if course.settings.in_memory_output:
course.generation_info.audio_file_path = None
else:
course.generation_info.audio_file_path = course.settings.output_directory / Path(
sanitize_filename(course.title)
).with_suffix(".mp3")
course.generation_info.audio_file_path.parent.mkdir(parents=True, exist_ok=True)

version_string = get_top_level_version("okcourse")
tags: dict[str, str] = {
Expand All @@ -365,11 +383,15 @@ async def generate_audio(self, course: Course) -> Course:
album_art_mime="image/png",
)

self.log.info(f"Saving audio to {course.generation_info.audio_file_path}")
course.generation_info.audio_file_path.write_bytes(combined_mp3.getvalue())
if course.settings.in_memory_output:
course.generation_info.audio_bytes = combined_mp3.getvalue()
else:
self.log.info(f"Saving audio to {course.generation_info.audio_file_path}")
course.generation_info.audio_file_path.write_bytes(combined_mp3.getvalue())

# Save the course JSON now that we have the audio path
course.generation_info.audio_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2))
if not course.settings.in_memory_output and course.generation_info.audio_file_path:
course.generation_info.audio_file_path.with_suffix(".json").write_text(course.model_dump_json(indent=2))

return course

Expand Down
29 changes: 29 additions & 0 deletions src/okcourse/inmemory_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Utility for bundling course artifacts into an in-memory ZIP archive."""

from __future__ import annotations

import io
from zipfile import ZipFile, ZIP_DEFLATED
from dataclasses import dataclass
from typing import Optional


@dataclass
class InMemoryCoursePack:
"""Container for all course artifacts stored as a ZIP file in memory."""

zip_bytes: bytes

@classmethod
def from_course(cls, course: "Course") -> "InMemoryCoursePack":
"""Create a zip of course JSON, image, and audio stored in memory."""
buffer = io.BytesIO()
with ZipFile(buffer, "w", ZIP_DEFLATED) as zf:
zf.writestr("course.json", course.model_dump_json(indent=2))
if course.generation_info.image_bytes:
zf.writestr("cover.png", course.generation_info.image_bytes)
if course.generation_info.audio_bytes:
zf.writestr("audio.mp3", course.generation_info.audio_bytes)
buffer.seek(0)
return cls(zip_bytes=buffer.getvalue())

46 changes: 46 additions & 0 deletions src/okcourse/memory_backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

"""Prototype storage backends for holding generated course artifacts in memory."""

from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict


class StorageBackend(ABC):
"""Abstract interface for storing generated artifacts."""

@abstractmethod
def save_bytes(self, path: Path, data: bytes) -> None:
pass

@abstractmethod
def load_bytes(self, path: Path) -> bytes | None:
pass


class FileSystemBackend(StorageBackend):
"""Store artifacts on disk using the normal filesystem."""

def save_bytes(self, path: Path, data: bytes) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(data)

def load_bytes(self, path: Path) -> bytes | None:
if path.exists():
return path.read_bytes()
return None


class InMemoryBackend(StorageBackend):
"""Keep artifacts only in memory."""

def __init__(self) -> None:
self._storage: Dict[Path, bytes] = {}

def save_bytes(self, path: Path, data: bytes) -> None:
self._storage[path] = data

def load_bytes(self, path: Path) -> bytes | None:
return self._storage.get(path)

14 changes: 14 additions & 0 deletions src/okcourse/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ class CourseSettings(BaseModel):
"``output_directory``."
),
)
in_memory_output: bool = Field(
False,
description=(
"Store generated artifacts in memory instead of writing them to the filesystem."
),
)


class CourseGenerationInfo(BaseModel):
Expand Down Expand Up @@ -231,6 +237,14 @@ class CourseGenerationInfo(BaseModel):
None, description="The path to the audio file generated from the course content."
)
image_file_path: Path | None = Field(None, description="The path to the cover image generated for the course.")
image_bytes: bytes | None = Field(
None,
description="The generated cover image bytes when `in_memory_output` is used.",
)
audio_bytes: bytes | None = Field(
None,
description="The generated MP3 bytes when `in_memory_output` is used.",
)


class Course(BaseModel):
Expand Down
18 changes: 18 additions & 0 deletions tests/test_in_memory_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from okcourse.models import CourseSettings, CourseGenerationInfo


def test_in_memory_settings_default():
settings = CourseSettings()
assert settings.in_memory_output is False


def test_in_memory_settings_true():
settings = CourseSettings(in_memory_output=True)
assert settings.in_memory_output is True


def test_generation_info_memory_fields():
info = CourseGenerationInfo()
assert info.image_bytes is None
assert info.audio_bytes is None