humanai-foundation · agentksimha · Jan 27, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Dataset_Generation.ipynb b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Dataset_Generation.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -177,6 +177,18 @@
     "from tensorflow.keras.utils import plot_model"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Helper function to create a directory if it does not already exist\n",
+    "def ensure_dir(path):\n",
+    "    if not os.path.exists(path):\n",
+    "        os.makedirs(path)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -284,15 +296,13 @@
     "\n",
     "pdf_path1 = \"./Padilla - Nobleza virtuosa_testExtract.pdf\"  # Path to the PDF file\n",
     "unproc_images_folder_1 = \"./preprocessing/imgsUnProcessed1\"  # Output folder to save the images\n",
-    "if not os.path.exists(unproc_images_folder_1):\n",
-    "    os.makedirs(unproc_images_folder_1)\n",
+    "ensure_dir(unproc_images_folder_1)\n",
     "pdf_to_images(pdf_path1, unproc_images_folder_1)\n",
     "\n",
     "\n",
     "pdf_path2 = \"./Padilla - 2 Noble perfecto_Extract.pdf\"  # Path to the PDF file\n",
     "unproc_images_folder_2 = \"./preprocessing/imgsUnProcessed2\"  # Output folder to save the images\n",
-    "if not os.path.exists(unproc_images_folder_2):\n",
-    "    os.makedirs(unproc_images_folder_2)\n",
+    "ensure_dir(unproc_images_folder_2)\n",
     "pdf_to_images(pdf_path2, unproc_images_folder_2)"
    ]
   },
@@ -339,15 +349,13 @@
     "\n",
     "unproc_images_folder_1 = \"./preprocessing/imgsUnProcessed1\"\n",
     "proc_images_folder_1 = \"./preprocessing/imgsForAllPages1\"\n",
-    "if not os.path.exists(proc_images_folder_1):\n",
-    "    os.makedirs(proc_images_folder_1)\n",
+    "ensure_dir(proc_images_folder_1)\n",
     "process_images(unproc_images_folder_1, proc_images_folder_1)\n",
     "\n",
     "\n",
     "unproc_images_folder_2 = \"./preprocessing/imgsUnProcessed2\"\n",
     "proc_images_folder_2 = \"./preprocessing/imgsForAllPages2\"\n",
-    "if not os.path.exists(proc_images_folder_2):\n",
-    "    os.makedirs(proc_images_folder_2)\n",
+    "ensure_dir(proc_images_folder_2)\n",
     "process_images(unproc_images_folder_2, proc_images_folder_2)\n",
     "\n",
     "print(\"Image processing complete!\")"
@@ -459,14 +467,12 @@
     "\n",
     "bound_box_applied1 = './preprocessing/BoundBoxApplied1/'\n",
     "bound_box_sorted1 = \"./preprocessing/BoundBoxSorted1\"\n",
-    "if not os.path.exists(bound_box_sorted1):\n",
-    "    os.makedirs(bound_box_sorted1)\n",
+    "ensure_dir(bound_box_sorted1)\n",
     "sort_bounding_boxes(bound_box_applied1, bound_box_sorted1)\n",
     "\n",
     "bound_box_applied2 = './preprocessing/BoundBoxApplied2/'\n",
     "bound_box_sorted2 = \"./preprocessing/BoundBoxSorted2\"\n",
-    "if not os.path.exists(bound_box_sorted2):\n",
-    "    os.makedirs(bound_box_sorted2)\n",
+    "ensure_dir(bound_box_sorted2)\n",
     "sort_bounding_boxes(bound_box_applied2, bound_box_sorted2)"
    ]
   },
@@ -546,17 +552,15 @@
     "bound_box_sorted1 = './preprocessing/BoundBoxSorted1'\n",
     "proc_grnd_truth1 = \"./preprocessing/textSplitted1\"\n",
     "TEST_SIZE=6\n",
-    "if not os.path.exists(proc_grnd_truth1):\n",
-    "    os.makedirs(proc_grnd_truth1)\n",
+    "ensure_dir(proc_grnd_truth1)\n",
     "process_textfiles(grnd_truth1, bound_box_sorted1, proc_grnd_truth1, TEST_SIZE)\n",
     "\n",
     "\n",
     "grnd_truth2 = \"./preprocessing/all_text2.txt\" \n",
     "bound_box_sorted2 = './preprocessing/BoundBoxSorted2'\n",
     "proc_grnd_truth2 = \"./preprocessing/textSplitted2\"\n",
     "TEST_SIZE=0\n",
-    "if not os.path.exists(proc_grnd_truth2):\n",
-    "    os.makedirs(proc_grnd_truth2)\n",
+    "ensure_dir(proc_grnd_truth2)\n",
     "process_textfiles(grnd_truth2, bound_box_sorted2, proc_grnd_truth2, TEST_SIZE)\n",
     "print(\"Text splitting complete!\")"
    ]
@@ -598,24 +602,22 @@
     "proc_images_folder_1 = './preprocessing/imgsForAllPages1'\n",
     "bound_box_sorted1 = './preprocessing/BoundBoxSorted1'\n",
     "proc_grnd_truth1 = './preprocessing/textSplitted1'\n",
-    "training_data1 = './traning_data1'\n",
+    "training_data1 = './training_data1'\n",
     "test_size=6\n",
     "train_size = count_files_in_folder(proc_images_folder_1, ['.png', '.jpeg', '.jpg'])- test_size\n",
     "print(\"Training pages \" +  str(train_size))\n",
-    "if not os.path.exists(training_data1):\n",
-    "    os.makedirs(training_data1)\n",
+    "ensure_dir(training_data1)\n",
     "apply_extraction_to_folder_for_train(proc_images_folder_1, bound_box_sorted1, proc_grnd_truth1, training_data1, train_size)\n",
     "\n",
     "\n",
     "proc_images_folder_2 = './preprocessing/imgsForAllPages2'\n",
     "bound_box_sorted2 = './preprocessing/BoundBoxSorted2'\n",
     "proc_grnd_truth2 = './preprocessing/textSplitted2'\n",
-    "training_data2 = './traning_data2'\n",
+    "training_data2 = './training_data2'\n",
     "test_size = 0\n",
-    "train_size = count_files_in_folder(proc_images_folder_1, ['.png', '.jpeg', '.jpg'])- test_size\n",
+    "train_size = count_files_in_folder(proc_images_folder_2, ['.png', '.jpeg', '.jpg'])- test_size\n",
     "print(\"Training pages \" +  str(train_size))\n",
-    "if not os.path.exists(training_data2):\n",
-    "    os.makedirs(training_data2)\n",
+    "ensure_dir(training_data2)\n",
     "apply_extraction_to_folder_for_train(proc_images_folder_2, bound_box_sorted2, proc_grnd_truth2, training_data2, train_size) # better to send no. of pages given in transcription"
    ]
   }

diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Readme.md b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Readme.md
@@ -1,6 +1,6 @@
 # Historical Text Recognition using CRNN Model
 
-This project aims to address the challenge of text recognition from `historical Spanish printed sources` dating back to the `seventeenth century`, a domain where existing Optical Character Recognition (OCR) tools often fail due to the complexity and variability of the texts. Leveraging hybrid end-to-end models based on a combination of CNN and RNN architectures, namely `CNN-RNN`, our research seeks to develop advanced machine learning techniques capable of accurately transcribing non-standard printed text. This project is a part of the `RenAIssance project`, a large project under the HumanAI organization. I am `Shashank Shekhar Singh`, a third year student from `IIT BHU, India` and have been developing this project as a part of the `Google Summer of Code program' 2024`.
+This project aims to address the challenge of text recognition from `historical Spanish printed sources` dating back to the `seventeenth century`, a domain where existing Optical Character Recognition (OCR) tools often fail due to the complexity and variability of the texts. Leveraging hybrid end-to-end models based on a combination of CNN and RNN architectures, namely `CNN-RNN`, our research seeks to develop advanced machine learning techniques capable of accurately transcribing non-standard printed text. This project is a part of the `RenAIssance project`, a large project under the HumanAI organization. I am `Shashank Shekhar Singh`, a third year student from `IIT BHU, India` and have been developing this project as a part of the `Google Summer of Code program 2024`.
 
 <p align="center">
   <img src="images/humanai_logo.jpg" alt="HumanAI" style="height: 100px; margin-right: 20px;"/>
@@ -108,4 +108,4 @@ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file
 - [Google Summer of Code 2024 Project](https://summerofcode.withgoogle.com/programs/2024/projects/lg7vQeMM)
 - [HumanAI Foundation](https://humanai.foundation/)
 
-Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
+Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/environment.yaml b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/environment.yaml
@@ -0,0 +1,18 @@
+name: renaissance-crnn-ocr
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python>=3.9
+  - numpy>=1.23.0
+  - pandas>=2.0.0
+  - matplotlib>=3.7.0
+  - requests>=2.28.0
+  - tqdm>=4.65.0
+  - pip
+  - pip:
+      - tensorflow>=2.12.0
+      - Pillow>=9.0.0
+      - opencv-python>=4.7.0
+      - PyMuPDF>=1.22.0       # provides the `fitz` module
+      - python-docx>=0.8.11   # provides the `docx` module
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/requirements.txt b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/requirements.txt
@@ -0,0 +1,22 @@
+# RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh - Python Dependencies
+# Python >= 3.9 recommended
+
+# ── Deep Learning ─────────────────────────────────────────────────────────────
+tensorflow>=2.12.0
+
+# ── Image Processing ──────────────────────────────────────────────────────────
+Pillow>=9.0.0
+opencv-python>=4.7.0
+PyMuPDF>=1.22.0        # provides the `fitz` module
+
+# ── Data Handling ─────────────────────────────────────────────────────────────
+numpy>=1.23.0
+pandas>=2.0.0
+python-docx>=0.8.11    # provides the `docx` module
+
+# ── Visualisation ─────────────────────────────────────────────────────────────
+matplotlib>=3.7.0
+
+# ── Utilities ─────────────────────────────────────────────────────────────────
+requests>=2.28.0
+tqdm>=4.65.0
diff --git a/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/Decoder.py b/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/Decoder.py
@@ -27,18 +27,20 @@ def __init__(self, hidden_size, output_size, dropout=0.1):
         super(LSTMAttnDecoder, self).__init__()
         self.hidden_size = hidden_size
         self.output_size = output_size
-        self.dropout = dropout
+        self.dropout_rate = dropout
 
         self.embedding = nn.Embedding(output_size, hidden_size)
         # embedding: (output_size, hidden_size)
-        self.dropout = nn.Dropout(self.dropout)
+        self.dropout = nn.Dropout(self.dropout_rate)
         self.attention = Attention(hidden_size)
         self.lstm = nn.LSTM(hidden_size * 2, hidden_size, num_layers=2, bidirectional=True, batch_first=True)
         self.out = nn.Linear(hidden_size * 2, output_size)
         # out: (4*hidden_size, output_size)
 
     def forward(self, input_step, last_hidden, encoder_outputs):
         # input_step: (batch_size, 1)
+        if (input_step < 0).any() or (input_step >= self.output_size).any():
+            raise ValueError(f"input_step contains indices out of range [0, {self.output_size})")
         embedded = self.embedding(input_step)
         # embedded: (batch_size, 1, hidden_size)
         embedded = self.dropout(embedded)
@@ -57,4 +59,4 @@ def forward(self, input_step, last_hidden, encoder_outputs):
         # output: (batch_size, hidden_size) if seq_length=1
         output = self.out(torch.cat((output, context), 1))
         # output: (batch_size, output_size)
-        return output, hidden
+        return output, hidden
diff --git a/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/ResNet.py b/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/ResNet.py
@@ -1,5 +1,6 @@
 from torch import nn
 
+
 class BasicBlock(nn.Module):
     expansion = 1
 
@@ -36,7 +37,7 @@ def forward(self, x):
 
 
 class ResNet18(nn.Module):
-    def __init__(self, num_classes=1000):
+    def __init__(self):
         super(ResNet18, self).__init__()
         self.in_channels = 64
 
@@ -53,34 +54,29 @@ def __init__(self, num_classes=1000):
 
     def _make_layer(self, block, out_channels, num_blocks, stride):
         strides = [stride] + [1] * (num_blocks - 1)
-        layers = nn.ModuleList()
+        layers = []
 
         for stride in strides:
             layers.append(block(self.in_channels, out_channels, stride))
             self.in_channels = out_channels * block.expansion
 
-        return layers
+        return nn.Sequential(*layers)
 
     def forward(self, x):
         x = self.conv1(x)
         x = self.bn1(x)
         x = self.relu(x)
         x = self.maxpool(x)
-        for layer in self.layer1:
-            x = layer(x)
-        for layer in self.layer2:
-            x = layer(x)
-        for layer in self.layer3:
-            x = layer(x)
-        for layer in self.layer4:
-            x = layer(x)
-
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
         x = self.avgpool(x)
         return x
 
 
 class ResNet34(nn.Module):
-    def __init__(self, num_classes=1000):
+    def __init__(self):
         super(ResNet34, self).__init__()
         self.in_channels = 64
 
@@ -97,34 +93,30 @@ def __init__(self, num_classes=1000):
 
     def _make_layer(self, block, out_channels, num_blocks, stride):
         strides = [stride] + [1] * (num_blocks - 1)
-        layers = nn.ModuleList()
+        layers = []
 
         for stride in strides:
             layers.append(block(self.in_channels, out_channels, stride))
             self.in_channels = out_channels * block.expansion
 
-        return layers
+        return nn.Sequential(*layers)
 
     def forward(self, x):
         x = self.conv1(x)
         x = self.bn1(x)
         x = self.relu(x)
         x = self.maxpool(x)
-        for layer in self.layer1:
-            x = layer(x)
-        for layer in self.layer2:
-            x = layer(x)
-        for layer in self.layer3:
-            x = layer(x)
-        for layer in self.layer4:
-            x = layer(x)
-
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
         x = self.avgpool(x)
         return x
 
 
 class Bottleneck(nn.Module):
     expansion = 4
+
     def __init__(self, in_channels, out_channels, stride=(1, 1)):
         super(Bottleneck, self).__init__()
         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
@@ -137,6 +129,7 @@ def __init__(self, in_channels, out_channels, stride=(1, 1)):
         self.stride = stride
         self.shortcut_conv = nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False)
         self.shortcut_bn = nn.BatchNorm2d(out_channels * self.expansion)
+
     def forward(self, x):
         identity = x
         out = self.conv1(x)
@@ -154,6 +147,7 @@ def forward(self, x):
         out = self.relu(out)
         return out
 
+
 class ResNet50(nn.Module):
     def __init__(self):
         super(ResNet50, self).__init__()
@@ -168,31 +162,27 @@ def __init__(self):
         self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=(2, 1))
         self.last_conv = nn.Conv2d(2048, 512, kernel_size=1, stride=1, bias=False)
         self.avgpool = nn.AvgPool2d(kernel_size=(2, 1), stride=(2, 1))
-    
+
     def _make_layer(self, block, out_channels, num_blocks, stride):
         strides = [stride] + [1] * (num_blocks - 1)
-        layers = nn.ModuleList()
+        layers = []
         for stride in strides:
             layers.append(block(self.in_channels, out_channels, stride))
             self.in_channels = out_channels * block.expansion
-        return layers
-    
+        return nn.Sequential(*layers)
+
     def forward(self, x):
         x = self.conv1(x)
         x = self.bn1(x)
         x = self.relu(x)
         x = self.maxpool(x)
-        for layer in self.layer1:
-            x = layer(x)
+        x = self.layer1(x)
         # print("layer 1", x.shape)
-        for layer in self.layer2:
-            x = layer(x)
+        x = self.layer2(x)
         # print("layer 2", x.shape)
-        for layer in self.layer3:
-            x = layer(x)
+        x = self.layer3(x)
         # print("layer 3", x.shape)
-        for layer in self.layer4:
-            x = layer(x)
+        x = self.layer4(x)
         # print("layer 4", x.shape)
         x = self.last_conv(x)
         x = self.avgpool(x)