humanai-foundation · agentksimha · Jan 27, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Dataset_Generation.ipynb b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Dataset_Generation.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -177,6 +177,18 @@
     "from tensorflow.keras.utils import plot_model"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Helper function to create a directory if it does not already exist\n",
+    "def ensure_dir(path):\n",
+    "    if not os.path.exists(path):\n",
+    "        os.makedirs(path)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -284,15 +296,13 @@
     "\n",
     "pdf_path1 = \"./Padilla - Nobleza virtuosa_testExtract.pdf\"  # Path to the PDF file\n",
     "unproc_images_folder_1 = \"./preprocessing/imgsUnProcessed1\"  # Output folder to save the images\n",
-    "if not os.path.exists(unproc_images_folder_1):\n",
-    "    os.makedirs(unproc_images_folder_1)\n",
+    "ensure_dir(unproc_images_folder_1)\n",
     "pdf_to_images(pdf_path1, unproc_images_folder_1)\n",
     "\n",
     "\n",
     "pdf_path2 = \"./Padilla - 2 Noble perfecto_Extract.pdf\"  # Path to the PDF file\n",
     "unproc_images_folder_2 = \"./preprocessing/imgsUnProcessed2\"  # Output folder to save the images\n",
-    "if not os.path.exists(unproc_images_folder_2):\n",
-    "    os.makedirs(unproc_images_folder_2)\n",
+    "ensure_dir(unproc_images_folder_2)\n",
     "pdf_to_images(pdf_path2, unproc_images_folder_2)"
    ]
   },
@@ -339,15 +349,13 @@
     "\n",
     "unproc_images_folder_1 = \"./preprocessing/imgsUnProcessed1\"\n",
     "proc_images_folder_1 = \"./preprocessing/imgsForAllPages1\"\n",
-    "if not os.path.exists(proc_images_folder_1):\n",
-    "    os.makedirs(proc_images_folder_1)\n",
+    "ensure_dir(proc_images_folder_1)\n",
     "process_images(unproc_images_folder_1, proc_images_folder_1)\n",
     "\n",
     "\n",
     "unproc_images_folder_2 = \"./preprocessing/imgsUnProcessed2\"\n",
     "proc_images_folder_2 = \"./preprocessing/imgsForAllPages2\"\n",
-    "if not os.path.exists(proc_images_folder_2):\n",
-    "    os.makedirs(proc_images_folder_2)\n",
+    "ensure_dir(proc_images_folder_2)\n",
     "process_images(unproc_images_folder_2, proc_images_folder_2)\n",
     "\n",
     "print(\"Image processing complete!\")"
@@ -459,14 +467,12 @@
     "\n",
     "bound_box_applied1 = './preprocessing/BoundBoxApplied1/'\n",
     "bound_box_sorted1 = \"./preprocessing/BoundBoxSorted1\"\n",
-    "if not os.path.exists(bound_box_sorted1):\n",
-    "    os.makedirs(bound_box_sorted1)\n",
+    "ensure_dir(bound_box_sorted1)\n",
     "sort_bounding_boxes(bound_box_applied1, bound_box_sorted1)\n",
     "\n",
     "bound_box_applied2 = './preprocessing/BoundBoxApplied2/'\n",
     "bound_box_sorted2 = \"./preprocessing/BoundBoxSorted2\"\n",
-    "if not os.path.exists(bound_box_sorted2):\n",
-    "    os.makedirs(bound_box_sorted2)\n",
+    "ensure_dir(bound_box_sorted2)\n",
     "sort_bounding_boxes(bound_box_applied2, bound_box_sorted2)"
    ]
   },
@@ -546,17 +552,15 @@
     "bound_box_sorted1 = './preprocessing/BoundBoxSorted1'\n",
     "proc_grnd_truth1 = \"./preprocessing/textSplitted1\"\n",
     "TEST_SIZE=6\n",
-    "if not os.path.exists(proc_grnd_truth1):\n",
-    "    os.makedirs(proc_grnd_truth1)\n",
+    "ensure_dir(proc_grnd_truth1)\n",
     "process_textfiles(grnd_truth1, bound_box_sorted1, proc_grnd_truth1, TEST_SIZE)\n",
     "\n",
     "\n",
     "grnd_truth2 = \"./preprocessing/all_text2.txt\" \n",
     "bound_box_sorted2 = './preprocessing/BoundBoxSorted2'\n",
     "proc_grnd_truth2 = \"./preprocessing/textSplitted2\"\n",
     "TEST_SIZE=0\n",
-    "if not os.path.exists(proc_grnd_truth2):\n",
-    "    os.makedirs(proc_grnd_truth2)\n",
+    "ensure_dir(proc_grnd_truth2)\n",
     "process_textfiles(grnd_truth2, bound_box_sorted2, proc_grnd_truth2, TEST_SIZE)\n",
     "print(\"Text splitting complete!\")"
    ]
@@ -598,24 +602,22 @@
     "proc_images_folder_1 = './preprocessing/imgsForAllPages1'\n",
     "bound_box_sorted1 = './preprocessing/BoundBoxSorted1'\n",
     "proc_grnd_truth1 = './preprocessing/textSplitted1'\n",
-    "training_data1 = './traning_data1'\n",
+    "training_data1 = './training_data1'\n",
     "test_size=6\n",
     "train_size = count_files_in_folder(proc_images_folder_1, ['.png', '.jpeg', '.jpg'])- test_size\n",
     "print(\"Training pages \" +  str(train_size))\n",
-    "if not os.path.exists(training_data1):\n",
-    "    os.makedirs(training_data1)\n",
+    "ensure_dir(training_data1)\n",
     "apply_extraction_to_folder_for_train(proc_images_folder_1, bound_box_sorted1, proc_grnd_truth1, training_data1, train_size)\n",
     "\n",
     "\n",
     "proc_images_folder_2 = './preprocessing/imgsForAllPages2'\n",
     "bound_box_sorted2 = './preprocessing/BoundBoxSorted2'\n",
     "proc_grnd_truth2 = './preprocessing/textSplitted2'\n",
-    "training_data2 = './traning_data2'\n",
+    "training_data2 = './training_data2'\n",
     "test_size = 0\n",
-    "train_size = count_files_in_folder(proc_images_folder_1, ['.png', '.jpeg', '.jpg'])- test_size\n",
+    "train_size = count_files_in_folder(proc_images_folder_2, ['.png', '.jpeg', '.jpg'])- test_size\n",
     "print(\"Training pages \" +  str(train_size))\n",
-    "if not os.path.exists(training_data2):\n",
-    "    os.makedirs(training_data2)\n",
+    "ensure_dir(training_data2)\n",
     "apply_extraction_to_folder_for_train(proc_images_folder_2, bound_box_sorted2, proc_grnd_truth2, training_data2, train_size) # better to send no. of pages given in transcription"
    ]
   }

diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Readme.md b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Readme.md
@@ -1,6 +1,6 @@
 # Historical Text Recognition using CRNN Model
 
-This project aims to address the challenge of text recognition from `historical Spanish printed sources` dating back to the `seventeenth century`, a domain where existing Optical Character Recognition (OCR) tools often fail due to the complexity and variability of the texts. Leveraging hybrid end-to-end models based on a combination of CNN and RNN architectures, namely `CNN-RNN`, our research seeks to develop advanced machine learning techniques capable of accurately transcribing non-standard printed text. This project is a part of the `RenAIssance project`, a large project under the HumanAI organization. I am `Shashank Shekhar Singh`, a third year student from `IIT BHU, India` and have been developing this project as a part of the `Google Summer of Code program' 2024`.
+This project aims to address the challenge of text recognition from `historical Spanish printed sources` dating back to the `seventeenth century`, a domain where existing Optical Character Recognition (OCR) tools often fail due to the complexity and variability of the texts. Leveraging hybrid end-to-end models based on a combination of CNN and RNN architectures, namely `CNN-RNN`, our research seeks to develop advanced machine learning techniques capable of accurately transcribing non-standard printed text. This project is a part of the `RenAIssance project`, a large project under the HumanAI organization. I am `Shashank Shekhar Singh`, a third year student from `IIT BHU, India` and have been developing this project as a part of the `Google Summer of Code program 2024`.
 
 <p align="center">
   <img src="images/humanai_logo.jpg" alt="HumanAI" style="height: 100px; margin-right: 20px;"/>
@@ -108,4 +108,4 @@ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file
 - [Google Summer of Code 2024 Project](https://summerofcode.withgoogle.com/programs/2024/projects/lg7vQeMM)
 - [HumanAI Foundation](https://humanai.foundation/)
 
-Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
+Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/environment.yaml b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/environment.yaml
@@ -0,0 +1,18 @@
+name: renaissance-crnn-ocr
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python>=3.9
+  - numpy>=1.23.0
+  - pandas>=2.0.0
+  - matplotlib>=3.7.0
+  - requests>=2.28.0
+  - tqdm>=4.65.0
+  - pip
+  - pip:
+      - tensorflow>=2.12.0
+      - Pillow>=9.0.0
+      - opencv-python>=4.7.0
+      - PyMuPDF>=1.22.0       # provides the `fitz` module
+      - python-docx>=0.8.11   # provides the `docx` module
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/requirements.txt b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/requirements.txt
@@ -0,0 +1,22 @@
+# RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh - Python Dependencies
+# Python >= 3.9 recommended
+
+# ── Deep Learning ─────────────────────────────────────────────────────────────
+tensorflow>=2.12.0
+
+# ── Image Processing ──────────────────────────────────────────────────────────
+Pillow>=9.0.0
+opencv-python>=4.7.0
+PyMuPDF>=1.22.0        # provides the `fitz` module
+
+# ── Data Handling ─────────────────────────────────────────────────────────────
+numpy>=1.23.0
+pandas>=2.0.0
+python-docx>=0.8.11    # provides the `docx` module
+
+# ── Visualisation ─────────────────────────────────────────────────────────────
+matplotlib>=3.7.0
+
+# ── Utilities ─────────────────────────────────────────────────────────────────
+requests>=2.28.0
+tqdm>=4.65.0
diff --git a/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/custom_dataset.py b/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/custom_dataset.py
@@ -60,7 +60,8 @@ class ContrastiveLearningDataset(Dataset):
     def __init__(self, img_dir, crop_height_ratio=0.2, img_size=(64, 384)):
         super().__init__()
         self.img_size = img_size
-        assert os.path.isdir(img_dir)
+        if not os.path.isdir(img_dir):
+            raise FileNotFoundError(f"Image directory not found at '{img_dir}'")
         self.filepaths = [
             os.path.join(img_dir, filename) for filename in os.listdir(img_dir)
             if os.path.isfile(os.path.join(img_dir, filename))
@@ -88,10 +89,13 @@ def __len__(self):
         return len(self.filepaths)
 
     def __getitem__(self, idx):
+        img_path = self.filepaths[idx]
+        if not os.path.exists(img_path):
+            raise FileNotFoundError(f"Image file not found at '{img_path}'")
         try:
-            img = Image.open(self.filepaths[idx])
+            img = Image.open(img_path)
         except IOError:
-            return "cannot identify image file '%s'", self.filepaths[idx]
+            raise IOError(f"Cannot identify or open image file '{img_path}'")
         original = self.original_transform(img)
         augmented = self.augmented_transform(img)
         return {"original": original, "augmented": augmented}
@@ -100,7 +104,16 @@ def __getitem__(self, idx):
 class DecoderDataset(Dataset):
     def __init__(self, csv_file, img_dir, token_dict, img_size=(64, 384), max_length=20, transform=None):
         self.img_dir = img_dir
-        self.annotations = pd.read_csv(csv_file)
+        if not os.path.exists(csv_file):
+            raise FileNotFoundError(f"CSV file not found at '{csv_file}'")
+        try:
+            self.annotations = pd.read_csv(csv_file)
+        except Exception as e:
+            raise ValueError(f"Failed to read CSV file '{csv_file}': {e}")
+        if self.annotations.empty:
+            raise ValueError(f"CSV file '{csv_file}' is empty")
+        if not os.path.isdir(img_dir):
+            raise FileNotFoundError(f"Image directory not found at '{img_dir}'")
         self.token_dict = token_dict
         self.max_length = max_length
         self.transform = transforms.Compose([
@@ -120,7 +133,13 @@ def __len__(self):
 
     def __getitem__(self, index):
         img_name = self.annotations.iloc[index, 1]
-        image = Image.open(os.path.join(self.img_dir, img_name))  # Use PIL to read the image
+        img_path = os.path.join(self.img_dir, img_name)
+        if not os.path.exists(img_path):
+            raise FileNotFoundError(f"Image file not found at '{img_path}'")
+        try:
+            image = Image.open(img_path)
+        except IOError:
+            raise IOError(f"Cannot identify or open image file '{img_path}'")
         image = self.transform(image)  # Image is in CHW format now
 
         label = self.annotations.iloc[index, 0]

diff --git a/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/custom_loss.py b/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/custom_loss.py
@@ -4,6 +4,14 @@
 
 
 def cosine_similarity(x, y):
+    if x.shape[0] != y.shape[0]:
+        raise ValueError(
+            f"Batch size mismatch: x has {x.shape[0]}, y has {y.shape[0]}"
+        )
+    if x.shape[2] != y.shape[2]:
+        raise ValueError(
+            f"Feature dimension mismatch: x has {x.shape[2]}, y has {y.shape[2]}"
+        )
     x = normalize(x, dim=2)
     y = normalize(y, dim=2)
     return torch.bmm(x, y.transpose(1, 2))