Skip to content

Commit 5d99144

Browse files
committed
Enhance audio extraction function, supporting audio extraction from video files and adding relevant parameters
1 parent 6324979 commit 5d99144

File tree

2 files changed

+185
-15
lines changed

2 files changed

+185
-15
lines changed

README.md

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ Build with the `linux/arm64`, `linux/amd64`, `linux/arm/v7` architectures.
1212

1313
## Features
1414

15-
- **Video Processing**: Extract metadata, capture screenshots, convert formats, and adjust resolution
15+
- **Video Processing**: Extract metadata, capture screenshots, convert formats, adjust resolution, and extract audio
1616
- **Audio Processing**: Extract metadata and convert between audio formats
17+
- **Audio Extraction**: Extract audio tracks from video files with format and quality control (video files only)
1718
- **Format Conversion**: Convert videos between popular formats (MP4, AVI, MOV, MKV, WebM) and audio between formats (MP3, WAV, FLAC, AAC, OGG, M4A, Opus)
1819
- **Resolution Control**: Convert videos to specific resolutions (720p, 1080p, 4K, custom dimensions)
1920
- **Batch Processing**: Combine multiple operations in a single API call
@@ -293,7 +294,10 @@ X-API-Key: your_secret_key_here
293294
"screenshot_count": 5,
294295
"convert_format": "mp4",
295296
"convert_quality": "medium",
296-
"convert_resolution": "720p"
297+
"convert_resolution": "720p",
298+
"extract_audio": true,
299+
"audio_format": "mp3",
300+
"audio_quality": "high"
297301
}
298302
```
299303

@@ -349,6 +353,12 @@ curl -X POST http://localhost:8080/process \
349353
"url": "/download/screenshot_abc123_24.jpg"
350354
}
351355
],
356+
"extracted_audio": {
357+
"filename": "extracted_audio_ghi789.mp3",
358+
"file_size": 3145728,
359+
"format": "mp3",
360+
"url": "/download/extracted_audio_ghi789.mp3"
361+
},
352362
"conversion": {
353363
"filename": "converted_def456.mp4",
354364
"file_size": 12582912,
@@ -548,12 +558,15 @@ Downloads the processed file (screenshot or converted video).
548558
|-----------|------|-------------|---------|
549559
| `media_url` | string | URL to media file (video or audio) | - |
550560
| `extract_info` | boolean | Extract media metadata | true |
551-
| `take_screenshots` | boolean | Capture screenshots | false |
561+
| `take_screenshots` | boolean | Capture screenshots (video only) | false |
552562
| `screenshot_timestamps` | array | Specific timestamps for screenshots (seconds) | - |
553563
| `screenshot_count` | integer | Number of evenly spaced screenshots | - |
554564
| `convert_format` | string | Target format (video: mp4, avi, mov, mkv, webm; audio: mp3, wav, flac, aac, ogg, m4a, opus) | - |
555565
| `convert_quality` | string | Conversion quality (low, medium, high) | medium |
556566
| `convert_resolution` | string | Target resolution (720p, 1080p, 1920x1080, etc.) | original |
567+
| `extract_audio` | boolean | Extract audio from video (video only) | false |
568+
| `audio_format` | string | Audio format for extraction (mp3, wav, flac, aac, ogg, m4a, opus) | - |
569+
| `audio_quality` | string | Audio quality (low, medium, high) | medium |
557570

558571
### File Upload
559572

@@ -811,6 +824,19 @@ curl -X POST http://localhost:8080/process \
811824
}'
812825
```
813826

827+
### Extract Audio from Video
828+
```bash
829+
curl -X POST http://localhost:8080/process \
830+
-H "Content-Type: application/json" \
831+
-H "X-API-Key: your_secret_key_here" \
832+
-d '{
833+
"media_url": "https://sample-videos.com/zip/10/mp4/SampleVideo_1280x720_1mb.mp4",
834+
"extract_audio": true,
835+
"audio_format": "mp3",
836+
"audio_quality": "high"
837+
}'
838+
```
839+
814840
### Combined Processing
815841
```bash
816842
curl -X POST http://localhost:8080/process \
@@ -821,6 +847,9 @@ curl -X POST http://localhost:8080/process \
821847
"extract_info": true,
822848
"take_screenshots": true,
823849
"screenshot_timestamps": [5, 15, 25],
850+
"extract_audio": true,
851+
"audio_format": "mp3",
852+
"audio_quality": "high",
824853
"convert_format": "webm",
825854
"convert_quality": "medium",
826855
"convert_resolution": "720p"
@@ -880,6 +909,12 @@ curl -X POST http://localhost:8080/process \
880909
- Check resolution limits (max: 7680x4320)
881910
- For aspect ratio preservation, use single dimension: `720`
882911

912+
9. **Audio Extraction Issues**
913+
- Audio extraction only works with video files
914+
- Ensure the video file contains an audio track
915+
- Some audio formats may require specific FFmpeg codecs (libmp3lame for MP3, libopus for Opus, etc.)
916+
- Check FFmpeg installation includes required audio codecs
917+
883918
### Logs
884919

885920
View container logs:

app.py

Lines changed: 147 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,12 @@
1111

1212
import os
1313
import json
14-
import re
1514
import uuid
1615
import subprocess
1716
import time
1817
import threading
1918
import logging
2019
import logging.handlers
21-
from datetime import datetime
2220
from urllib.parse import urlparse
2321
import requests
2422
from flask import Flask, request, jsonify, send_file
@@ -72,7 +70,8 @@ def setup_logging():
7270
root_logger.addHandler(file_handler)
7371

7472
# Suppress Flask and Werkzeug logs in production
75-
if os.getenv("FLASK_DEBUG", "false").lower() not in ("true", "1", "yes", "on"):
73+
debug_mode = os.getenv("FLASK_DEBUG", "false").lower()
74+
if debug_mode not in ("true", "1", "yes", "on"):
7675
logging.getLogger("werkzeug").setLevel(logging.WARNING)
7776
logging.getLogger("gunicorn").setLevel(logging.WARNING)
7877

@@ -146,21 +145,27 @@ def log_startup_info():
146145
logger.info(f"Gunicorn worker class: {os.getenv('GUNICORN_WORKER_CLASS', 'sync')}")
147146
logger.info(f"Gunicorn timeout: {os.getenv('GUNICORN_TIMEOUT', '120')}s")
148147
logger.info(f"Gunicorn max requests: {os.getenv('GUNICORN_MAX_REQUESTS', '1000')}")
149-
logger.info(f"Gunicorn max requests jitter: {os.getenv('GUNICORN_MAX_REQUESTS_JITTER', '100')}")
150-
logger.info(f"Gunicorn bind: {os.getenv('GUNICORN_BIND', '0.0.0.0:8080')}")
151-
logger.info(f"Gunicorn workers: {os.getenv('GUNICORN_WORKERS', '4')}")
148+
max_requests_jitter = os.getenv('GUNICORN_MAX_REQUESTS_JITTER', '100')
149+
logger.info(f"Gunicorn max requests jitter: {max_requests_jitter}")
150+
gunicorn_bind = os.getenv('GUNICORN_BIND', '0.0.0.0:8080')
151+
logger.info(f"Gunicorn bind: {gunicorn_bind}")
152+
gunicorn_workers = os.getenv('GUNICORN_WORKERS', '4')
153+
logger.info(f"Gunicorn workers: {gunicorn_workers}")
152154
# Log configuration
153155
logger.info("Configuration loaded:")
154156
logger.info(f" TEMP_DIR: {TEMP_DIR}")
155-
logger.info(f" MAX_FILE_SIZE: {MAX_FILE_SIZE} bytes ({MAX_FILE_SIZE/1024/1024:.1f} MB)")
157+
max_file_size_mb = MAX_FILE_SIZE / 1024 / 1024
158+
logger.info(f" MAX_FILE_SIZE: {MAX_FILE_SIZE} bytes ({max_file_size_mb:.1f} MB)")
156159
logger.info(f" FILE_RETENTION_HOURS: {FILE_RETENTION_HOURS}")
157160
logger.info(f" CLEANUP_INTERVAL_MINUTES: {CLEANUP_INTERVAL_MINUTES}")
158161
logger.info(f" ALLOWED_VIDEO_EXTENSIONS: {ALLOWED_VIDEO_EXTENSIONS}")
159162
logger.info(f" ALLOWED_AUDIO_EXTENSIONS: {ALLOWED_AUDIO_EXTENSIONS}")
160163
logger.info(f" SUPPORTED_VIDEO_OUTPUT_FORMATS: {SUPPORTED_VIDEO_OUTPUT_FORMATS}")
161164
logger.info(f" SUPPORTED_AUDIO_OUTPUT_FORMATS: {SUPPORTED_AUDIO_OUTPUT_FORMATS}")
162-
logger.info(f" API_KEYS configured: {len(API_KEYS) > 0}")
163-
logger.info(f" BASE_URL: {BASE_URL or 'Not set'}")
165+
api_keys_configured = len(API_KEYS) > 0
166+
logger.info(f" API_KEYS configured: {api_keys_configured}")
167+
base_url_status = BASE_URL or 'Not set'
168+
logger.info(f" BASE_URL: {base_url_status}")
164169

165170
log_startup_info()
166171

@@ -620,6 +625,104 @@ def convert_format(self, output_format, quality="medium", resolution=None):
620625
logger.error(f"Video format conversion failed for {self.video_path}: {str(e)}")
621626
raise Exception(f"Format conversion failed: {str(e)}")
622627

628+
def extract_audio(self, output_format, quality="medium"):
629+
"""Extract audio from video file"""
630+
try:
631+
logger.info(f"Extracting audio from video to {output_format} format with {quality} quality")
632+
633+
if output_format not in SUPPORTED_AUDIO_OUTPUT_FORMATS:
634+
logger.error(f"Unsupported audio output format: {output_format}")
635+
raise ValueError(
636+
f"Unsupported audio output format. Supported: "
637+
f"{SUPPORTED_AUDIO_OUTPUT_FORMATS}"
638+
)
639+
640+
output_filename = f"extracted_audio_{uuid.uuid4().hex}.{output_format}"
641+
output_path = os.path.join(TEMP_DIR, output_filename)
642+
643+
# Quality settings for different audio formats
644+
quality_settings = {
645+
"mp3": {
646+
"low": ["-b:a", "128k"],
647+
"medium": ["-b:a", "192k"],
648+
"high": ["-b:a", "320k"],
649+
},
650+
"aac": {
651+
"low": ["-b:a", "128k"],
652+
"medium": ["-b:a", "192k"],
653+
"high": ["-b:a", "256k"],
654+
},
655+
"ogg": {
656+
"low": ["-q:a", "3"],
657+
"medium": ["-q:a", "6"],
658+
"high": ["-q:a", "9"],
659+
},
660+
"opus": {
661+
"low": ["-b:a", "96k"],
662+
"medium": ["-b:a", "128k"],
663+
"high": ["-b:a", "192k"],
664+
},
665+
}
666+
667+
# Base command for audio extraction
668+
cmd = [
669+
"ffmpeg",
670+
"-i", self.video_path,
671+
"-vn", # No video
672+
]
673+
674+
# Set appropriate codec for each format
675+
if output_format == "mp3":
676+
cmd.extend(["-acodec", "libmp3lame"])
677+
elif output_format == "aac":
678+
cmd.extend(["-acodec", "aac"])
679+
elif output_format == "ogg":
680+
cmd.extend(["-acodec", "libvorbis"])
681+
elif output_format == "opus":
682+
cmd.extend(["-acodec", "libopus"])
683+
elif output_format == "flac":
684+
cmd.extend(["-acodec", "flac"])
685+
elif output_format == "wav":
686+
cmd.extend(["-acodec", "pcm_s16le"])
687+
elif output_format == "m4a":
688+
cmd.extend(["-acodec", "aac"])
689+
else:
690+
# Default to MP3
691+
cmd.extend(["-acodec", "libmp3lame"])
692+
693+
# Add quality settings if available for the format
694+
if output_format in quality_settings:
695+
settings = quality_settings[output_format]
696+
cmd.extend(settings.get(quality, settings["medium"]))
697+
else:
698+
# Default settings for other formats
699+
cmd.extend(["-b:a", "192k"])
700+
701+
# Add output path
702+
cmd.extend(["-y", output_path])
703+
704+
logger.debug(f"Running ffmpeg audio extraction command: {' '.join(cmd)}")
705+
result = subprocess.run(cmd, capture_output=True, text=True)
706+
if result.returncode != 0:
707+
logger.error(f"Audio extraction failed: {result.stderr}")
708+
raise Exception(f"Audio extraction failed: {result.stderr}")
709+
710+
# Get output file info
711+
file_size = os.path.getsize(output_path)
712+
logger.info(f"Audio extraction completed: {output_filename} ({file_size} bytes)")
713+
714+
return {
715+
"filename": output_filename,
716+
"file_path": output_path,
717+
"file_size": file_size,
718+
"format": output_format,
719+
"url": create_download_url(output_filename),
720+
}
721+
722+
except Exception as e:
723+
logger.error(f"Audio extraction failed for {self.video_path}: {str(e)}")
724+
raise Exception(f"Audio extraction failed: {str(e)}")
725+
623726
def _parse_resolution(self, resolution):
624727
"""Parse and validate resolution parameter"""
625728
if not resolution:
@@ -1269,10 +1372,15 @@ def process_media():
12691372
convert_format = data.get("convert_format")
12701373
convert_quality = data.get("convert_quality", "medium")
12711374
convert_resolution = data.get("convert_resolution")
1375+
extract_audio = _parse_bool(data.get("extract_audio", False))
1376+
audio_format = data.get("audio_format")
1377+
audio_quality = data.get("audio_quality", "medium")
12721378

12731379
logger.info(f"Request {request_id}: Processing options - "
12741380
f"extract_info={extract_info}, take_screenshots={take_screenshots}, "
1275-
f"convert_format={convert_format}, quality={convert_quality}")
1381+
f"convert_format={convert_format}, quality={convert_quality}, "
1382+
f"extract_audio={extract_audio}, audio_format={audio_format}, "
1383+
f"audio_quality={audio_quality}")
12761384

12771385
# Get media file
12781386
media_path = None
@@ -1319,6 +1427,31 @@ def process_media():
13191427
logger.warning(f"Request {request_id}: Screenshots requested for audio file")
13201428
result["warning"] = "Screenshots not supported for audio files"
13211429

1430+
# Extract audio (only for video)
1431+
if extract_audio and media_type == "video":
1432+
if not audio_format:
1433+
logger.warning(f"Request {request_id}: Audio extraction requested "
1434+
f"but no audio_format specified")
1435+
result["warning"] = ("Audio extraction requested but no "
1436+
"audio_format specified")
1437+
else:
1438+
logger.info(f"Request {request_id}: Extracting audio to "
1439+
f"{audio_format} format")
1440+
try:
1441+
audio_result = processor.extract_audio(audio_format, audio_quality)
1442+
result["extracted_audio"] = audio_result
1443+
# Keep extracted audio file for download
1444+
output_files.append(audio_result["file_path"])
1445+
except Exception as e:
1446+
logger.error(f"Request {request_id}: Audio extraction failed - "
1447+
f"{str(e)}")
1448+
result["audio_extraction_error"] = str(e)
1449+
elif extract_audio and media_type == "audio":
1450+
logger.warning(f"Request {request_id}: Audio extraction requested "
1451+
f"for audio file")
1452+
result["warning"] = ("Audio extraction not supported for audio files "
1453+
"(already audio)")
1454+
13221455
# Convert format
13231456
if convert_format:
13241457
logger.info(f"Request {request_id}: Converting format to {convert_format}")
@@ -1338,7 +1471,8 @@ def process_media():
13381471
SUPPORTED_VIDEO_OUTPUT_FORMATS if media_type == "video"
13391472
else SUPPORTED_AUDIO_OUTPUT_FORMATS
13401473
)
1341-
logger.error(f"Request {request_id}: Unsupported format '{convert_format}' for {media_type}")
1474+
logger.error(f"Request {request_id}: Unsupported format "
1475+
f"'{convert_format}' for {media_type}")
13421476
raise ValueError(
13431477
f"Unsupported format '{convert_format}' for {media_type}. "
13441478
f"Supported formats: {supported_formats}"
@@ -1352,7 +1486,8 @@ def process_media():
13521486
cleanup_temp_files(*input_files)
13531487

13541488
response_time = (time.time() - start_time) * 1000
1355-
logger.info(f"Request {request_id}: Processing completed successfully in {response_time:.1f}ms")
1489+
logger.info(f"Request {request_id}: Processing completed successfully "
1490+
f"in {response_time:.1f}ms")
13561491
log_response_info(request_id, 200, response_time, result)
13571492

13581493
return create_response(

0 commit comments

Comments
 (0)