CLAP backup

lifewatch · Apr 11, 2024 · 74c8d7f · 74c8d7f
1 parent f84f0a5
commit 74c8d7f
Show file tree

Hide file tree

Showing 77 changed files with 178,168 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+roi/BioLingual/model/*
+roi/BioLingual/output/*
+roi/BioLingual/processor/*
+roi/BioLingual/test_model/*
diff --git a/.ipynb_checkpoints/GPU-checkpoint.ipynb b/.ipynb_checkpoints/GPU-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,172 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "51ae2204-dbcd-4377-b612-f156ee6ca3bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import random\n",
+    "from tqdm import tqdm\n",
+    "# from pydub import AudioSegment\n",
+    "\n",
+    "\n",
+    "absence_boats_folder = '/storage/Imagine_UC6_new/DATA/data_per_station_10_updated_metadata_extra_filter'\n",
+    "# data_set_folder = '/storage/Imagine_UC6/data_new_ais/dataset_files'\n",
+    "data_set_folder = '/srv/CLAP/data'\n",
+    "train_txt_file=os.path.join(data_set_folder,'train.txt')\n",
+    "test_txt_file =os.path.join(data_set_folder,'test.txt')\n",
+    "val_txt_file=os.path.join(data_set_folder,'val.txt')\n",
+    "class_txt_file=os.path.join(data_set_folder,'classes.txt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "63a2e796-4ea2-48f7-beef-e3a50969ae64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def calculator(filename):\n",
+    "    parts = filename.split('_')\n",
+    "    last_part = parts[-1].split('.')[0]  # Remove the \".wav\" extension\n",
+    "    number = int(last_part)\n",
+    "    \n",
+    "    if number > 10000:\n",
+    "        number = 10000\n",
+    "    \n",
+    "    result = (10000 - number) / 10000\n",
+    "    return round(result,3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0dd122b6-8f79-4834-982b-520517d13753",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Processing files: 0it [00:00, ?it/s]\u001b[A\n",
+      "\n",
+      "Processing files: 100%|██████████| 732/732 [00:00<00:00, 42832.46it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 5049/5049 [00:00<00:00, 52564.40it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 726/726 [00:00<00:00, 44000.65it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 288/288 [00:00<00:00, 34850.68it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 293/293 [00:00<00:00, 32945.45it/s]\n",
+      "\n",
+      "Processing files:   0%|          | 0/5646 [00:00<?, ?it/s]\u001b[A\n",
+      "Processing files: 100%|██████████| 5646/5646 [00:00<00:00, 47574.64it/s]\u001b[A\n",
+      "\n",
+      "Processing files: 100%|██████████| 725/725 [00:00<00:00, 41833.98it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 288/288 [00:00<00:00, 34493.42it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 1613/1613 [00:00<00:00, 40990.82it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 144/144 [00:00<00:00, 47681.36it/s]\n",
+      "\n",
+      "Processing files: 100%|██████████| 531/531 [00:00<00:00, 37788.45it/s]\n",
+      "Writing training file: 100%|██████████| 13155/13155 [00:00<00:00, 232695.54it/s]\n",
+      "Writing testing file: 100%|██████████| 1457/1457 [00:00<00:00, 208662.58it/s]\n",
+      "Writing validation file: 100%|██████████| 1451/1451 [00:00<00:00, 206639.11it/s]\n",
+      "Writing classes file: 100%|██████████| 11/11 [00:00<00:00, 121734.42it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "file_paths = []\n",
+    "for root, dirs, files in os.walk(absence_boats_folder):\n",
+    "    for file in tqdm(files, desc=\"Processing files\",position=1, leave=True):\n",
+    "        file_path = os.path.join(root, file)\n",
+    "        relative_path = os.path.relpath(file_path,absence_boats_folder)\n",
+    "        relative_path=relative_path.replace(\" \", \"_\")\n",
+    "        try:\n",
+    "            # AudioSegment.from_file(file_path) \n",
+    "            torchaudio.load(file_path)\n",
+    "            file_paths.append(relative_path)\n",
+    "        except:\n",
+    "            print(\"skipping\")\n",
+    "            pass\n",
+    "# Get a list of folder names within the \"absence_boats\" directory\n",
+    "\n",
+    "# Get a list of folder names within the \"absence_boats\" directory\n",
+    "folder_names = next(os.walk(absence_boats_folder))[1]\n",
+    "\n",
+    "# Assign numbers based on the location of each folder in the list\n",
+    "folder_numbers = {folder_names[i]: i for i in range(len(folder_names)) if folder_names[i] != \".ipynb_checkpoints\"}\n",
+    "\n",
+    "folder_numbers = {}\n",
+    "index_counter = 0\n",
+    "\n",
+    "for i in range(len(folder_names)):\n",
+    "    if folder_names[i] != \".ipynb_checkpoints\":\n",
+    "        folder_numbers[folder_names[i]] = index_counter\n",
+    "        index_counter += 1\n",
+    "\n",
+    "# Split the boat files into training, testing, and validation sets\n",
+    "random.shuffle(file_paths)\n",
+    "# num_samples = len(file_paths)\n",
+    "# train_cutoff = int(num_samples * train_ratio)\n",
+    "# test_cutoff = train_cutoff + int(num_samples * test_ratio)\n",
+    "\n",
+    "train_files = [file for file in file_paths if any(subfolder in file for subfolder in ['train'])]\n",
+    "test_files = [file for file in file_paths if any(subfolder in file for subfolder in ['test'])]\n",
+    "val_files = [file for file in file_paths if any(subfolder in file for subfolder in ['val'])]\n",
+    "\n",
+    "# Create the training text file\n",
+    "with open(train_txt_file, 'w') as f_train:\n",
+    "    for file in tqdm(train_files, desc=\"Writing training file\"):\n",
+    "        file = file.replace('\\\\', '/')\n",
+    "        f_train.write(file + ' ' + str(calculator(file)) + '\\n')\n",
+    "\n",
+    "# Create the testing text file\n",
+    "with open(test_txt_file, 'w') as f_test:\n",
+    "    for file in tqdm(test_files, desc=\"Writing testing file\"):\n",
+    "        file = file.replace('\\\\', '/')\n",
+    "        f_test.write(file + ' ' + str(calculator(file)) + '\\n')\n",
+    "\n",
+    "# Create the validation text file\n",
+    "with open(val_txt_file, 'w') as f_val:\n",
+    "    for file in tqdm(val_files, desc=\"Writing validation file\"):\n",
+    "        file = file.replace('\\\\', '/')\n",
+    "        f_val.write(file + ' ' + str(calculator(file)) + '\\n')\n",
+    "\n",
+    "# Create the classes text file\n",
+    "with open(class_txt_file, 'w') as f_class:\n",
+    "    for label in tqdm(folder_numbers, desc=\"Writing classes file\"):\n",
+    "        f_class.write(str(label) + '\\n')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/captioning-checkpoint.ipynb b/.ipynb_checkpoints/captioning-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}