forked from Mazenmarwan023/Shazam-app
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_spectrogram.py
99 lines (82 loc) · 4.2 KB
/
generate_spectrogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import json
import hashlib
class SpectrogramGenerator:
def __init__(self, data_path):
self.data_path = data_path
self.output_path = os.path.join('/Users/macbook/Documents/DSP_tasks/Task_5', "fingerprints")
self.spectrogram_path = os.path.join('/Users/macbook/Documents/DSP_tasks/Task_5', "spectrograms")
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
if not os.path.exists(self.spectrogram_path):
os.makedirs(self.spectrogram_path)
def generate_spectrogram(self, audio_path):
y, sr= librosa.load(audio_path, sr=None)
S = librosa.feature.melspectrogram(y=y, sr=sr)
S_DB = librosa.power_to_db(S, ref=np.max)
return S_DB
def extract_features(self, spectrogram):
"""
Extract summarized audio features from a spectrogram.
:param spectrogram: Spectrogram (assumed in dB scale).
:return: A dictionary containing summarized features.
"""
features = {}
try:
# Convert dB spectrogram to amplitude spectrogram
amplitude_spectrogram = librosa.db_to_amplitude(spectrogram)
# Spectral features
features['spectral_centroid_mean'] = float(np.mean(librosa.feature.spectral_centroid(S=amplitude_spectrogram)))
features['spectral_bandwidth_mean'] = float(np.mean(librosa.feature.spectral_bandwidth(S=amplitude_spectrogram)))
features['spectral_contrast_mean'] = float(np.mean(librosa.feature.spectral_contrast(S=amplitude_spectrogram)))
features['spectral_rolloff_mean'] = float(np.mean(librosa.feature.spectral_rolloff(S=amplitude_spectrogram)))
# Tonal features
chroma = librosa.feature.chroma_stft(S=amplitude_spectrogram)
tonnetz = librosa.feature.tonnetz(chroma=chroma)
features['tonnetz_mean'] = float(np.mean(tonnetz))
# Temporal features
zero_crossings = librosa.feature.zero_crossing_rate(amplitude_spectrogram)
features['zero_crossing_rate_mean'] = float(np.mean(zero_crossings))
# MFCCs
mfcc = librosa.feature.mfcc(S=spectrogram, n_mfcc=13)
for i in range(mfcc.shape[0]):
features[f'mfcc_{i}_mean'] = float(np.mean(mfcc[i, :]))
except Exception as e:
print(f"Error extracting features: {e}")
return features
def perceptual_hash(self, features):
features_str = json.dumps(features, sort_keys=True)
hash_object = hashlib.sha256(features_str.encode('utf-8'))
return {
"features": features,
"phash": hash_object.hexdigest()
}
def save_fingerprint(self, fingerprint, filename):
with open(filename, 'w') as file:
json.dump(fingerprint, file, indent=4)
def process_files(self):
for team in range(1, 21):
if team == 2 or team == 4 or team == 12:
continue
team_folder = os.path.join(self.data_path, f'Team_{team}')
for file in os.listdir(team_folder):
if file.endswith('.wav') or file.endswith('.mp3'):
file_path = os.path.join(team_folder, file)
S_DB = self.generate_spectrogram(file_path)
# Save spectrogram image in "spectrograms" folder
plt.figure(figsize=(10, 4))
librosa.display.specshow(S_DB, sr=22050, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title(file)
plt.tight_layout()
spectrogram_filename = os.path.join(self.spectrogram_path, f"{file}.png")
plt.savefig(spectrogram_filename)
plt.close()
features = self.extract_features(S_DB)
fingerprint = self.perceptual_hash(features)
fingerprint_filename = os.path.join(self.output_path, f"{file}.json")
self.save_fingerprint(fingerprint, fingerprint_filename)
print(f"Processed {file}")