-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspotlight.py
134 lines (103 loc) · 4.77 KB
/
spotlight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import numpy as np
import os
import pandas as pd
d_train_path= r"C:\Users\wout.decrop\environments\Imagine\imagine_environment\CLAP_audio\data/train.txt"
d_valid_path= r"C:\Users\wout.decrop\environments\Imagine\imagine_environment\CLAP_audio\data/val.txt"
d_test_path= r"C:\Users\wout.decrop\environments\Imagine\imagine_environment\CLAP_audio\data/test.txt"
# print("path ", d_train_path)
wavs_folder=r'\\fs\SHARED\onderzoek\6. Marine Observation Center\Projects\IMAGINE\UC6\plots\plots_per_station_4_updated_metadata_extra_filter-window-4'
split = np.genfromtxt(d_train_path, dtype='str', delimiter=' ')
d_train_loc = np.array([os.path.join(wavs_folder, i) for i in split[:, 0]])
split = np.genfromtxt(d_valid_path, dtype='str', delimiter=' ')
d_valid_loc = np.array([os.path.join(wavs_folder, i) for i in split[:, 0]])
split = np.genfromtxt(d_test_path, dtype='str', delimiter=' ')
d_test_loc = np.array([os.path.join(wavs_folder, i) for i in split[:, 0]])
def categorize_speed(speed):
if speed >= 0 and speed < 8:
return '0-8'
elif speed >= 8 and speed < 14:
return '5-14'
else:
return '14+'
def categorize_distance(distance):
if distance >= 0 and distance < 1000:
return '0-1 km'
elif distance >= 1000 and distance < 2000:
return '1-2 km'
elif distance >= 2000 and distance < 3000:
return '2-3 km'
elif distance >= 3000 and distance < 4000:
return '3-4 km'
elif distance >= 4000 and distance < 5000:
return '4-5 km'
elif distance >= 5000 and distance < 6000:
return '5-6 km'
elif distance >= 6000 and distance < 7000:
return '6-7 km'
elif distance >= 7000 and distance < 8000:
return '7-8 km'
elif distance >= 8000 and distance < 9000:
return '8-9 km'
elif distance >= 9000 and distance <= 10000:
return '9-10 km'
else:
return '10+ km'
def process_filenames(d_train):
# Create DataFrame with filenames
df = pd.DataFrame({'filename': d_train})
# Extract distance from the filename
df['distance'] = df['filename'].apply(lambda x: float(x.split('_')[-1].split('.wav')[0]))
# Extract speed from the filename
df['speed'] = df['filename'].apply(lambda x: float(x.split('_')[-3].replace('-', '.')))
df = df[df['speed'] <= 30]
# Extract activity from the filename
df['activity'] = df['filename'].apply(lambda x: x.split('_')[-4])
# Extract ship type from the filename
df['ship_type'] = df['filename'].apply(lambda x: x.split('_')[-5])
# Apply the function to create a new column 'distance_category'
df['distance_category'] = df['distance'].apply(categorize_distance)
# Apply the function to create a new column 'speed_category'
df['speed_category'] = df['speed'].apply(categorize_speed)
# Create a combined_info column
# df['label'] = df['ship_type'] + ' at distance ' + df['distance_category'] + ' with speed ' + df['speed_category'] + ' is ' + df['activity']
df['label'] = ["ship"] * len(df['ship_type'])
# df['activity'] = ["activity"] * len(df['activity'])
# df['speed_category'] = ["speed_category"] * len(df['speed_category'])
df['activity'] = ["activity"] * len(df['activity'])
df['label'] = df['label'] + ' at distance ' + df['distance_category'] + ' with speed ' + df['speed_category'] + ' is ' + df['activity']
# df['label']= df['distance'].apply(categorize_distance)
return df
d_train=process_filenames(d_train_loc)
d_valid=process_filenames(d_valid_loc)
d_test=process_filenames(d_test_loc)
from transformers import ClapFeatureExtractor
import torchaudio
import torch
import numpy as np
import torch.nn.functional as F_general
def extract_clap_embeddings(wav_path):
# Initialize the feature extractor
feature_extractor = ClapFeatureExtractor()
desired_fs = 48000
max_duration = 10
channel = 0
waveform_info = torchaudio.info(wav_path)
# Load audio waveform
waveform, fs = torchaudio.load(wav_path)
# Resample if necessary
if waveform_info.sample_rate != desired_fs:
transform = torchaudio.transforms.Resample(fs, desired_fs)
waveform = transform(waveform)
max_samples = max_duration * desired_fs
waveform = waveform[channel, :max_samples]
# Pad waveform if necessary
if waveform.shape[0] < max_samples:
waveform = F_general.pad(waveform, (0, max_samples - waveform.shape[0]))
sr = desired_fs
# Extract the CLAP embeddings
mel_spectrogram = feature_extractor(waveform, return_tensors="pt", sampling_rate=sr, padding=True)
return mel_spectrogram
# Example usage:
wav_path = '/srv/CLAP/examples/Grafton_15810_2022-03-15_09-02-23_420-983156_Cargo_underway-using-engine_15-9_2022-03-15-09-09-23_2175.wav'
clap_embeddings = extract_clap_embeddings(wav_path)
print(clap_embeddings)