Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b0910cd
refactor: Add verbose name and plural to all models
unna97 Aug 14, 2024
6c35ae7
refactor: Add file hash to AudioFile model
unna97 Aug 14, 2024
53b80d5
refactor: Add test files and settings for unit testing
unna97 Aug 14, 2024
86f0c32
refactor: Add file sha hash calculation to AudioFile model and utils.…
unna97 Aug 15, 2024
3eca111
Clean up the unnecesary modules
unna97 Aug 19, 2024
69b1a67
refactor: Use pytest module and setup fixtures for tests
unna97 Aug 20, 2024
477cabc
Add the subtitles test fixture
unna97 Aug 21, 2024
3ab82d4
refactor: Refactor timeboundlabel models and add tests for it
unna97 Aug 23, 2024
0bc0dc4
Add notes and remove unecessary imports in models & run black
unna97 Aug 24, 2024
d28b10e
refactor: Improve test for abstract timeboundlabel model & add exampl…
unna97 Aug 24, 2024
d5b3b1d
Fix db_entrypoint.sh path & remove unnecessary mounts
unna97 Aug 25, 2024
ddaa549
refactor: Update image source URL in index.html template
unna97 Aug 27, 2024
7ef132f
refactor: Add subtitle model and tests for parsing and creating subti…
unna97 Aug 27, 2024
4791986
Run black on test_models.py
unna97 Aug 28, 2024
15c15dd
- Refactor UploadAudio & Subtitle View
unna97 Aug 28, 2024
fcdfff4
refactor: Update audio file form validation and handling
unna97 Aug 30, 2024
4573f3d
Add tests for urls
unna97 Sep 7, 2024
f30a878
- Run black
unna97 Nov 21, 2024
fd04016
Add tests for some views
unna97 Nov 21, 2024
91ff641
Correct a reassign
unna97 Nov 21, 2024
31c2ca3
Remove a marked failing test
unna97 Nov 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Meta:
"audio_file",
"start_time",
"end_time",
"annotation",
"content",
"timestamp",
)

Expand All @@ -29,6 +29,6 @@ def create(self, validated_data):
def update(self, instance, validated_data):
instance.start_time = validated_data.get("start_time", instance.start_time)
instance.end_time = validated_data.get("end_time", instance.end_time)
instance.annotation = validated_data.get("annotation", instance.annotation)
instance.content = validated_data.get("content", instance.content)
instance.save()
return instance
3 changes: 1 addition & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ services:
interval: 10s
timeout: 5s
retries: 5
entrypoint: ["./db-entrypoint.sh"]
entrypoint: ["/db-entrypoint.sh"]

migrate:
build:
Expand All @@ -53,7 +53,6 @@ services:
command: conda run --no-capture-output -n comedy-project-docker python manage.py migrate
volumes:
- .:/audio-annonation
- D:/anaconda-envs:/envs
depends_on:
db:
condition: service_healthy
Expand Down
126 changes: 3 additions & 123 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,124 +1,4 @@
[tool.poetry]
name = "audio-annonation"
version = "0.1.0"
description = ""
authors = ["Unnati Patel", "Christopher Keim"]
readme = "README.md"
packages = [
{ include = "api" },
{ include = "waveform_audio" },
]

[tool.poetry.dependencies]
python = ">=3.11, <3.12"

# Web
requests = "^2.31.0"
urllib3 = "^2.0.2"
django = "^4.2.1"
djangorestframework = "^3.14.0"
crispy-tailwind = "^0.5.0"
whitenoise = "^6.6.0"
asgiref = "^3.6.0"
certifi = "^2023.5.7"
tzdata = "^2023.3"
python-dotenv = "^1.0.0"

# Data Science
pandas = "^1.5.0"
numpy = "^1.24.3"
scipy = "^1.10.1"
scikit-learn = "^1.2.2"
matplotlib = "^3.6.0"
sqlparse = "^0.4.4"
audioread = "^3.0.0"
soundfile = "^0.12.1"
pooch = "^1.6.0"
soxr = "^0.3.5"
librosa = "^0.10.0.post2"

# MLOps
threadpoolctl = "^3.1.0"
numba = "^0.57.0"
llvmlite = "^0.40.0"
cffi = "^1.15.1"



[tool.poetry.group.dev]
optional = true
[tool.poetry.group.dev.dependencies]

# DevOps
black = "^22.3.0"
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
ruff = "^0.0.285"

# PostgresSQL psycopg2 pre-compiled binary
psycopg2-binary = "^2.9.9"


[tool.poetry.group.prod]
optional = true
[tool.poetry.group.prod.dependencies]

# PostgresSQL psycopg2 source build
psycopg2 = "^2.9.9"


[tool.ruff]
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = ["E", "F"]
ignore = []

# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
unfixable = []

# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
"tests",
]

# Same as Black.
line-length = 88

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

# Assume Python 3.9
target-version = "py39"

[tool.ruff.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 10

[tool.pytest.ini_options]
# Configurations for pytest with coverage
addopts = "-vv --cov"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
DJANGO_SETTINGS_MODULE = "waveform_audio.settings"
# -- recommended but optional:
python_files = ["test_*.py", "*_test.py", "testing/python/*.py"]
50 changes: 44 additions & 6 deletions waveform_audio/forms.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# import crispy_forms.helper as crispy_helper
from django import forms
from .models import AudioFile
from django.utils.translation import gettext_lazy as _

from waveform_audio.models import AudioFile, Subtitle
from crispy_forms.helper import FormHelper
from crispy_forms.layout import Submit # Layout, Row, Column
from crispy_forms.layout import Submit
from waveform_audio import utils
import mimetypes


# url: https://stackoverflow.com/questions/24783275/django-form-with-choices-but-also-with-freetext-option?noredirect=1&lq=1
Expand Down Expand Up @@ -31,19 +35,30 @@ class Meta:
model = AudioFile
fields = ["file"]
labels = {
"audio_file": "Select a file",
"file": "Select a file",
}
help_texts = {
"audio_file": "insert an audio file",
"file": "insert an audio file",
}
widgets = {
"audio_file": forms.FileInput(attrs={"accept": "audio/*"}),
"file": forms.FileInput(attrs={"accept": "audio/wav,audio/mp3,audio/mp4"}),
}

def clean_file(self):
audio_file = self.cleaned_data.get("file")
if audio_file:
valid_mime_types = ["audio/mpeg", "audio/wav", "audio/mp3"]
mime_type, random_str = mimetypes.guess_type(audio_file.name)
if mime_type is None or mime_type not in valid_mime_types:
raise forms.ValidationError(
_("Invalid audio file type"), code="invalid_file_type"
)
return audio_file


class SubtitleFileForm(forms.Form):
subtitle_file = forms.FileField(
label="Select a file",
label="Select a subtitle file",
help_text="insert a subtitle file",
widget=forms.FileInput(attrs={"accept": ".srt"}),
)
Expand All @@ -54,3 +69,26 @@ def helper(self):
helper.form_method = "POST"
helper.inputs.append(Submit("submit", "Submit"))
return helper

def clean_subtitle_file(self):
subtitle_file = self.cleaned_data.get("subtitle_file")
if subtitle_file:
try:
self.subtitle_texts = utils.process_subtitle_file(subtitle_file)
except Exception as e:
raise forms.ValidationError(f"Error processing subtitle file: {str(e)}")
return subtitle_file

def save(self, audio_file_instance):
if hasattr(self, "subtitle_texts"):
subtitles = [
Subtitle.objects.create(
audio_file=audio_file_instance,
start_time=subtitle["start_time"],
end_time=subtitle["end_time"],
content=subtitle["content"],
)
for subtitle in self.subtitle_texts
]
return subtitles
return []
57 changes: 41 additions & 16 deletions waveform_audio/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by Django 4.2.1 on 2023-06-30 06:06
# Generated by Django 5.1 on 2024-08-24 16:53

from django.db import migrations, models
import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
Expand All @@ -15,10 +15,13 @@ class Migration(migrations.Migration):
name="AudioFile",
fields=[
("id", models.AutoField(primary_key=True, serialize=False)),
("file", models.FileField(upload_to="audio/")),
("file", models.FileField(unique=True, upload_to="audio/")),
("file_hash", models.CharField(unique=True)),
("timestamp", models.DateTimeField(auto_now_add=True)),
],
options={
"verbose_name": "Audio File",
"verbose_name_plural": "Audio Files",
"db_table": "audio_file",
"ordering": ["timestamp"],
},
Expand All @@ -29,34 +32,56 @@ class Migration(migrations.Migration):
("id", models.AutoField(primary_key=True, serialize=False)),
("start_time", models.TimeField()),
("end_time", models.TimeField()),
("timestamp", models.DateTimeField(auto_now_add=True)),
(
"annotation",
"content",
models.CharField(
choices=[
("speech", "Speech"),
("music", "Music"),
("noise", "Noise"),
("laughter", "Laughter"),
("other", "Other"),
("unknown", "Unknown"),
],
default="unknown",
max_length=10,
help_text="Annotation label of the audio segment",
max_length=255,
),
),
("timestamp", models.DateTimeField(auto_now_add=True)),
(
"audio_file",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="annotations",
to="waveform_audio.audiofile",
),
),
],
options={
"verbose_name": "Audio Annotation",
"verbose_name_plural": "Audio Annotations",
"db_table": "audio_annotation",
"ordering": ["start_time", "end_time"],
"abstract": False,
"unique_together": {
("audio_file", "start_time", "end_time", "content")
},
},
),
migrations.CreateModel(
name="Subtitle",
fields=[
("id", models.AutoField(primary_key=True, serialize=False)),
("start_time", models.TimeField()),
("end_time", models.TimeField()),
("timestamp", models.DateTimeField(auto_now_add=True)),
("content", models.TextField(help_text="Subtitle content")),
(
"audio_file",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="waveform_audio.audiofile",
),
),
],
options={
"verbose_name": "Subtitle",
"verbose_name_plural": "Subtitles",
"db_table": "subtitle",
"ordering": ["start_time", "end_time"],
"abstract": False,
"unique_together": {("audio_file", "start_time", "end_time")},
},
),
]
36 changes: 0 additions & 36 deletions waveform_audio/migrations/0002_subtitle.py

This file was deleted.

This file was deleted.

Loading