Skip to content

Commit

Permalink
CLAP backup
Browse files Browse the repository at this point in the history
  • Loading branch information
Wout Decrop committed Apr 11, 2024
1 parent f84f0a5 commit 74c8d7f
Show file tree
Hide file tree
Showing 77 changed files with 178,168 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
roi/BioLingual/model/*
roi/BioLingual/output/*
roi/BioLingual/processor/*
roi/BioLingual/test_model/*
6 changes: 6 additions & 0 deletions .ipynb_checkpoints/GPU-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
172 changes: 172 additions & 0 deletions .ipynb_checkpoints/Untitled-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "51ae2204-dbcd-4377-b612-f156ee6ca3bb",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import random\n",
"from tqdm import tqdm\n",
"# from pydub import AudioSegment\n",
"\n",
"\n",
"absence_boats_folder = '/storage/Imagine_UC6_new/DATA/data_per_station_10_updated_metadata_extra_filter'\n",
"# data_set_folder = '/storage/Imagine_UC6/data_new_ais/dataset_files'\n",
"data_set_folder = '/srv/CLAP/data'\n",
"train_txt_file=os.path.join(data_set_folder,'train.txt')\n",
"test_txt_file =os.path.join(data_set_folder,'test.txt')\n",
"val_txt_file=os.path.join(data_set_folder,'val.txt')\n",
"class_txt_file=os.path.join(data_set_folder,'classes.txt')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "63a2e796-4ea2-48f7-beef-e3a50969ae64",
"metadata": {},
"outputs": [],
"source": [
"def calculator(filename):\n",
" parts = filename.split('_')\n",
" last_part = parts[-1].split('.')[0] # Remove the \".wav\" extension\n",
" number = int(last_part)\n",
" \n",
" if number > 10000:\n",
" number = 10000\n",
" \n",
" result = (10000 - number) / 10000\n",
" return round(result,3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0dd122b6-8f79-4834-982b-520517d13753",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Processing files: 0it [00:00, ?it/s]\u001b[A\n",
"\n",
"Processing files: 100%|██████████| 732/732 [00:00<00:00, 42832.46it/s]\n",
"\n",
"Processing files: 100%|██████████| 5049/5049 [00:00<00:00, 52564.40it/s]\n",
"\n",
"Processing files: 100%|██████████| 726/726 [00:00<00:00, 44000.65it/s]\n",
"\n",
"Processing files: 100%|██████████| 288/288 [00:00<00:00, 34850.68it/s]\n",
"\n",
"Processing files: 100%|██████████| 293/293 [00:00<00:00, 32945.45it/s]\n",
"\n",
"Processing files: 0%| | 0/5646 [00:00<?, ?it/s]\u001b[A\n",
"Processing files: 100%|██████████| 5646/5646 [00:00<00:00, 47574.64it/s]\u001b[A\n",
"\n",
"Processing files: 100%|██████████| 725/725 [00:00<00:00, 41833.98it/s]\n",
"\n",
"Processing files: 100%|██████████| 288/288 [00:00<00:00, 34493.42it/s]\n",
"\n",
"Processing files: 100%|██████████| 1613/1613 [00:00<00:00, 40990.82it/s]\n",
"\n",
"Processing files: 100%|██████████| 144/144 [00:00<00:00, 47681.36it/s]\n",
"\n",
"Processing files: 100%|██████████| 531/531 [00:00<00:00, 37788.45it/s]\n",
"Writing training file: 100%|██████████| 13155/13155 [00:00<00:00, 232695.54it/s]\n",
"Writing testing file: 100%|██████████| 1457/1457 [00:00<00:00, 208662.58it/s]\n",
"Writing validation file: 100%|██████████| 1451/1451 [00:00<00:00, 206639.11it/s]\n",
"Writing classes file: 100%|██████████| 11/11 [00:00<00:00, 121734.42it/s]\n"
]
}
],
"source": [
"file_paths = []\n",
"for root, dirs, files in os.walk(absence_boats_folder):\n",
" for file in tqdm(files, desc=\"Processing files\",position=1, leave=True):\n",
" file_path = os.path.join(root, file)\n",
" relative_path = os.path.relpath(file_path,absence_boats_folder)\n",
" relative_path=relative_path.replace(\" \", \"_\")\n",
" try:\n",
" # AudioSegment.from_file(file_path) \n",
" torchaudio.load(file_path)\n",
" file_paths.append(relative_path)\n",
" except:\n",
" print(\"skipping\")\n",
" pass\n",
"# Get a list of folder names within the \"absence_boats\" directory\n",
"\n",
"# Get a list of folder names within the \"absence_boats\" directory\n",
"folder_names = next(os.walk(absence_boats_folder))[1]\n",
"\n",
"# Assign numbers based on the location of each folder in the list\n",
"folder_numbers = {folder_names[i]: i for i in range(len(folder_names)) if folder_names[i] != \".ipynb_checkpoints\"}\n",
"\n",
"folder_numbers = {}\n",
"index_counter = 0\n",
"\n",
"for i in range(len(folder_names)):\n",
" if folder_names[i] != \".ipynb_checkpoints\":\n",
" folder_numbers[folder_names[i]] = index_counter\n",
" index_counter += 1\n",
"\n",
"# Split the boat files into training, testing, and validation sets\n",
"random.shuffle(file_paths)\n",
"# num_samples = len(file_paths)\n",
"# train_cutoff = int(num_samples * train_ratio)\n",
"# test_cutoff = train_cutoff + int(num_samples * test_ratio)\n",
"\n",
"train_files = [file for file in file_paths if any(subfolder in file for subfolder in ['train'])]\n",
"test_files = [file for file in file_paths if any(subfolder in file for subfolder in ['test'])]\n",
"val_files = [file for file in file_paths if any(subfolder in file for subfolder in ['val'])]\n",
"\n",
"# Create the training text file\n",
"with open(train_txt_file, 'w') as f_train:\n",
" for file in tqdm(train_files, desc=\"Writing training file\"):\n",
" file = file.replace('\\\\', '/')\n",
" f_train.write(file + ' ' + str(calculator(file)) + '\\n')\n",
"\n",
"# Create the testing text file\n",
"with open(test_txt_file, 'w') as f_test:\n",
" for file in tqdm(test_files, desc=\"Writing testing file\"):\n",
" file = file.replace('\\\\', '/')\n",
" f_test.write(file + ' ' + str(calculator(file)) + '\\n')\n",
"\n",
"# Create the validation text file\n",
"with open(val_txt_file, 'w') as f_val:\n",
" for file in tqdm(val_files, desc=\"Writing validation file\"):\n",
" file = file.replace('\\\\', '/')\n",
" f_val.write(file + ' ' + str(calculator(file)) + '\\n')\n",
"\n",
"# Create the classes text file\n",
"with open(class_txt_file, 'w') as f_class:\n",
" for label in tqdm(folder_numbers, desc=\"Writing classes file\"):\n",
" f_class.write(str(label) + '\\n')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
6 changes: 6 additions & 0 deletions .ipynb_checkpoints/captioning-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 74c8d7f

Please sign in to comment.