-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Wout Decrop
committed
Apr 11, 2024
1 parent
f84f0a5
commit 74c8d7f
Showing
77 changed files
with
178,168 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
roi/BioLingual/model/* | ||
roi/BioLingual/output/* | ||
roi/BioLingual/processor/* | ||
roi/BioLingual/test_model/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"cells": [], | ||
"metadata": {}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "51ae2204-dbcd-4377-b612-f156ee6ca3bb", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import random\n", | ||
"from tqdm import tqdm\n", | ||
"# from pydub import AudioSegment\n", | ||
"\n", | ||
"\n", | ||
"absence_boats_folder = '/storage/Imagine_UC6_new/DATA/data_per_station_10_updated_metadata_extra_filter'\n", | ||
"# data_set_folder = '/storage/Imagine_UC6/data_new_ais/dataset_files'\n", | ||
"data_set_folder = '/srv/CLAP/data'\n", | ||
"train_txt_file=os.path.join(data_set_folder,'train.txt')\n", | ||
"test_txt_file =os.path.join(data_set_folder,'test.txt')\n", | ||
"val_txt_file=os.path.join(data_set_folder,'val.txt')\n", | ||
"class_txt_file=os.path.join(data_set_folder,'classes.txt')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "63a2e796-4ea2-48f7-beef-e3a50969ae64", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def calculator(filename):\n", | ||
" parts = filename.split('_')\n", | ||
" last_part = parts[-1].split('.')[0] # Remove the \".wav\" extension\n", | ||
" number = int(last_part)\n", | ||
" \n", | ||
" if number > 10000:\n", | ||
" number = 10000\n", | ||
" \n", | ||
" result = (10000 - number) / 10000\n", | ||
" return round(result,3)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "0dd122b6-8f79-4834-982b-520517d13753", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"Processing files: 0it [00:00, ?it/s]\u001b[A\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 732/732 [00:00<00:00, 42832.46it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 5049/5049 [00:00<00:00, 52564.40it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 726/726 [00:00<00:00, 44000.65it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 288/288 [00:00<00:00, 34850.68it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 293/293 [00:00<00:00, 32945.45it/s]\n", | ||
"\n", | ||
"Processing files: 0%| | 0/5646 [00:00<?, ?it/s]\u001b[A\n", | ||
"Processing files: 100%|██████████| 5646/5646 [00:00<00:00, 47574.64it/s]\u001b[A\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 725/725 [00:00<00:00, 41833.98it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 288/288 [00:00<00:00, 34493.42it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 1613/1613 [00:00<00:00, 40990.82it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 144/144 [00:00<00:00, 47681.36it/s]\n", | ||
"\n", | ||
"Processing files: 100%|██████████| 531/531 [00:00<00:00, 37788.45it/s]\n", | ||
"Writing training file: 100%|██████████| 13155/13155 [00:00<00:00, 232695.54it/s]\n", | ||
"Writing testing file: 100%|██████████| 1457/1457 [00:00<00:00, 208662.58it/s]\n", | ||
"Writing validation file: 100%|██████████| 1451/1451 [00:00<00:00, 206639.11it/s]\n", | ||
"Writing classes file: 100%|██████████| 11/11 [00:00<00:00, 121734.42it/s]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"file_paths = []\n", | ||
"for root, dirs, files in os.walk(absence_boats_folder):\n", | ||
" for file in tqdm(files, desc=\"Processing files\",position=1, leave=True):\n", | ||
" file_path = os.path.join(root, file)\n", | ||
" relative_path = os.path.relpath(file_path,absence_boats_folder)\n", | ||
" relative_path=relative_path.replace(\" \", \"_\")\n", | ||
" try:\n", | ||
" # AudioSegment.from_file(file_path) \n", | ||
" torchaudio.load(file_path)\n", | ||
" file_paths.append(relative_path)\n", | ||
" except:\n", | ||
" print(\"skipping\")\n", | ||
" pass\n", | ||
"# Get a list of folder names within the \"absence_boats\" directory\n", | ||
"\n", | ||
"# Get a list of folder names within the \"absence_boats\" directory\n", | ||
"folder_names = next(os.walk(absence_boats_folder))[1]\n", | ||
"\n", | ||
"# Assign numbers based on the location of each folder in the list\n", | ||
"folder_numbers = {folder_names[i]: i for i in range(len(folder_names)) if folder_names[i] != \".ipynb_checkpoints\"}\n", | ||
"\n", | ||
"folder_numbers = {}\n", | ||
"index_counter = 0\n", | ||
"\n", | ||
"for i in range(len(folder_names)):\n", | ||
" if folder_names[i] != \".ipynb_checkpoints\":\n", | ||
" folder_numbers[folder_names[i]] = index_counter\n", | ||
" index_counter += 1\n", | ||
"\n", | ||
"# Split the boat files into training, testing, and validation sets\n", | ||
"random.shuffle(file_paths)\n", | ||
"# num_samples = len(file_paths)\n", | ||
"# train_cutoff = int(num_samples * train_ratio)\n", | ||
"# test_cutoff = train_cutoff + int(num_samples * test_ratio)\n", | ||
"\n", | ||
"train_files = [file for file in file_paths if any(subfolder in file for subfolder in ['train'])]\n", | ||
"test_files = [file for file in file_paths if any(subfolder in file for subfolder in ['test'])]\n", | ||
"val_files = [file for file in file_paths if any(subfolder in file for subfolder in ['val'])]\n", | ||
"\n", | ||
"# Create the training text file\n", | ||
"with open(train_txt_file, 'w') as f_train:\n", | ||
" for file in tqdm(train_files, desc=\"Writing training file\"):\n", | ||
" file = file.replace('\\\\', '/')\n", | ||
" f_train.write(file + ' ' + str(calculator(file)) + '\\n')\n", | ||
"\n", | ||
"# Create the testing text file\n", | ||
"with open(test_txt_file, 'w') as f_test:\n", | ||
" for file in tqdm(test_files, desc=\"Writing testing file\"):\n", | ||
" file = file.replace('\\\\', '/')\n", | ||
" f_test.write(file + ' ' + str(calculator(file)) + '\\n')\n", | ||
"\n", | ||
"# Create the validation text file\n", | ||
"with open(val_txt_file, 'w') as f_val:\n", | ||
" for file in tqdm(val_files, desc=\"Writing validation file\"):\n", | ||
" file = file.replace('\\\\', '/')\n", | ||
" f_val.write(file + ' ' + str(calculator(file)) + '\\n')\n", | ||
"\n", | ||
"# Create the classes text file\n", | ||
"with open(class_txt_file, 'w') as f_class:\n", | ||
" for label in tqdm(folder_numbers, desc=\"Writing classes file\"):\n", | ||
" f_class.write(str(label) + '\\n')" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"cells": [], | ||
"metadata": {}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.