humanai-foundation · agentksimha · Jan 27, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Dataset_Generation.ipynb b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Dataset_Generation.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -177,6 +177,18 @@
     "from tensorflow.keras.utils import plot_model"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Helper function to create a directory if it does not already exist\n",
+    "def ensure_dir(path):\n",
+    "    if not os.path.exists(path):\n",
+    "        os.makedirs(path)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -284,15 +296,13 @@
     "\n",
     "pdf_path1 = \"./Padilla - Nobleza virtuosa_testExtract.pdf\"  # Path to the PDF file\n",
     "unproc_images_folder_1 = \"./preprocessing/imgsUnProcessed1\"  # Output folder to save the images\n",
-    "if not os.path.exists(unproc_images_folder_1):\n",
-    "    os.makedirs(unproc_images_folder_1)\n",
+    "ensure_dir(unproc_images_folder_1)\n",
     "pdf_to_images(pdf_path1, unproc_images_folder_1)\n",
     "\n",
     "\n",
     "pdf_path2 = \"./Padilla - 2 Noble perfecto_Extract.pdf\"  # Path to the PDF file\n",
     "unproc_images_folder_2 = \"./preprocessing/imgsUnProcessed2\"  # Output folder to save the images\n",
-    "if not os.path.exists(unproc_images_folder_2):\n",
-    "    os.makedirs(unproc_images_folder_2)\n",
+    "ensure_dir(unproc_images_folder_2)\n",
     "pdf_to_images(pdf_path2, unproc_images_folder_2)"
    ]
   },
@@ -339,15 +349,13 @@
     "\n",
     "unproc_images_folder_1 = \"./preprocessing/imgsUnProcessed1\"\n",
     "proc_images_folder_1 = \"./preprocessing/imgsForAllPages1\"\n",
-    "if not os.path.exists(proc_images_folder_1):\n",
-    "    os.makedirs(proc_images_folder_1)\n",
+    "ensure_dir(proc_images_folder_1)\n",
     "process_images(unproc_images_folder_1, proc_images_folder_1)\n",
     "\n",
     "\n",
     "unproc_images_folder_2 = \"./preprocessing/imgsUnProcessed2\"\n",
     "proc_images_folder_2 = \"./preprocessing/imgsForAllPages2\"\n",
-    "if not os.path.exists(proc_images_folder_2):\n",
-    "    os.makedirs(proc_images_folder_2)\n",
+    "ensure_dir(proc_images_folder_2)\n",
     "process_images(unproc_images_folder_2, proc_images_folder_2)\n",
     "\n",
     "print(\"Image processing complete!\")"
@@ -459,14 +467,12 @@
     "\n",
     "bound_box_applied1 = './preprocessing/BoundBoxApplied1/'\n",
     "bound_box_sorted1 = \"./preprocessing/BoundBoxSorted1\"\n",
-    "if not os.path.exists(bound_box_sorted1):\n",
-    "    os.makedirs(bound_box_sorted1)\n",
+    "ensure_dir(bound_box_sorted1)\n",
     "sort_bounding_boxes(bound_box_applied1, bound_box_sorted1)\n",
     "\n",
     "bound_box_applied2 = './preprocessing/BoundBoxApplied2/'\n",
     "bound_box_sorted2 = \"./preprocessing/BoundBoxSorted2\"\n",
-    "if not os.path.exists(bound_box_sorted2):\n",
-    "    os.makedirs(bound_box_sorted2)\n",
+    "ensure_dir(bound_box_sorted2)\n",
     "sort_bounding_boxes(bound_box_applied2, bound_box_sorted2)"
    ]
   },
@@ -546,17 +552,15 @@
     "bound_box_sorted1 = './preprocessing/BoundBoxSorted1'\n",
     "proc_grnd_truth1 = \"./preprocessing/textSplitted1\"\n",
     "TEST_SIZE=6\n",
-    "if not os.path.exists(proc_grnd_truth1):\n",
-    "    os.makedirs(proc_grnd_truth1)\n",
+    "ensure_dir(proc_grnd_truth1)\n",
     "process_textfiles(grnd_truth1, bound_box_sorted1, proc_grnd_truth1, TEST_SIZE)\n",
     "\n",
     "\n",
     "grnd_truth2 = \"./preprocessing/all_text2.txt\" \n",
     "bound_box_sorted2 = './preprocessing/BoundBoxSorted2'\n",
     "proc_grnd_truth2 = \"./preprocessing/textSplitted2\"\n",
     "TEST_SIZE=0\n",
-    "if not os.path.exists(proc_grnd_truth2):\n",
-    "    os.makedirs(proc_grnd_truth2)\n",
+    "ensure_dir(proc_grnd_truth2)\n",
     "process_textfiles(grnd_truth2, bound_box_sorted2, proc_grnd_truth2, TEST_SIZE)\n",
     "print(\"Text splitting complete!\")"
    ]
@@ -598,24 +602,22 @@
     "proc_images_folder_1 = './preprocessing/imgsForAllPages1'\n",
     "bound_box_sorted1 = './preprocessing/BoundBoxSorted1'\n",
     "proc_grnd_truth1 = './preprocessing/textSplitted1'\n",
-    "training_data1 = './traning_data1'\n",
+    "training_data1 = './training_data1'\n",
     "test_size=6\n",
     "train_size = count_files_in_folder(proc_images_folder_1, ['.png', '.jpeg', '.jpg'])- test_size\n",
     "print(\"Training pages \" +  str(train_size))\n",
-    "if not os.path.exists(training_data1):\n",
-    "    os.makedirs(training_data1)\n",
+    "ensure_dir(training_data1)\n",
     "apply_extraction_to_folder_for_train(proc_images_folder_1, bound_box_sorted1, proc_grnd_truth1, training_data1, train_size)\n",
     "\n",
     "\n",
     "proc_images_folder_2 = './preprocessing/imgsForAllPages2'\n",
     "bound_box_sorted2 = './preprocessing/BoundBoxSorted2'\n",
     "proc_grnd_truth2 = './preprocessing/textSplitted2'\n",
-    "training_data2 = './traning_data2'\n",
+    "training_data2 = './training_data2'\n",
     "test_size = 0\n",
-    "train_size = count_files_in_folder(proc_images_folder_1, ['.png', '.jpeg', '.jpg'])- test_size\n",
+    "train_size = count_files_in_folder(proc_images_folder_2, ['.png', '.jpeg', '.jpg'])- test_size\n",
     "print(\"Training pages \" +  str(train_size))\n",
-    "if not os.path.exists(training_data2):\n",
-    "    os.makedirs(training_data2)\n",
+    "ensure_dir(training_data2)\n",
     "apply_extraction_to_folder_for_train(proc_images_folder_2, bound_box_sorted2, proc_grnd_truth2, training_data2, train_size) # better to send no. of pages given in transcription"
    ]
   }

diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Readme.md b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Readme.md
@@ -1,6 +1,6 @@
 # Historical Text Recognition using CRNN Model
 
-This project aims to address the challenge of text recognition from `historical Spanish printed sources` dating back to the `seventeenth century`, a domain where existing Optical Character Recognition (OCR) tools often fail due to the complexity and variability of the texts. Leveraging hybrid end-to-end models based on a combination of CNN and RNN architectures, namely `CNN-RNN`, our research seeks to develop advanced machine learning techniques capable of accurately transcribing non-standard printed text. This project is a part of the `RenAIssance project`, a large project under the HumanAI organization. I am `Shashank Shekhar Singh`, a third year student from `IIT BHU, India` and have been developing this project as a part of the `Google Summer of Code program' 2024`.
+This project aims to address the challenge of text recognition from `historical Spanish printed sources` dating back to the `seventeenth century`, a domain where existing Optical Character Recognition (OCR) tools often fail due to the complexity and variability of the texts. Leveraging hybrid end-to-end models based on a combination of CNN and RNN architectures, namely `CNN-RNN`, our research seeks to develop advanced machine learning techniques capable of accurately transcribing non-standard printed text. This project is a part of the `RenAIssance project`, a large project under the HumanAI organization. I am `Shashank Shekhar Singh`, a third year student from `IIT BHU, India` and have been developing this project as a part of the `Google Summer of Code program 2024`.
 
 <p align="center">
   <img src="images/humanai_logo.jpg" alt="HumanAI" style="height: 100px; margin-right: 20px;"/>
@@ -108,4 +108,4 @@ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file
 - [Google Summer of Code 2024 Project](https://summerofcode.withgoogle.com/programs/2024/projects/lg7vQeMM)
 - [HumanAI Foundation](https://humanai.foundation/)
 
-Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
+Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/environment.yaml b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/environment.yaml
@@ -0,0 +1,18 @@
+name: renaissance-crnn-ocr
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python>=3.9
+  - numpy>=1.23.0
+  - pandas>=2.0.0
+  - matplotlib>=3.7.0
+  - requests>=2.28.0
+  - tqdm>=4.65.0
+  - pip
+  - pip:
+      - tensorflow>=2.12.0
+      - Pillow>=9.0.0
+      - opencv-python>=4.7.0
+      - PyMuPDF>=1.22.0       # provides the `fitz` module
+      - python-docx>=0.8.11   # provides the `docx` module
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/requirements.txt b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/requirements.txt
@@ -0,0 +1,22 @@
+# RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh - Python Dependencies
+# Python >= 3.9 recommended
+
+# ── Deep Learning ─────────────────────────────────────────────────────────────
+tensorflow>=2.12.0
+
+# ── Image Processing ──────────────────────────────────────────────────────────
+Pillow>=9.0.0
+opencv-python>=4.7.0
+PyMuPDF>=1.22.0        # provides the `fitz` module
+
+# ── Data Handling ─────────────────────────────────────────────────────────────
+numpy>=1.23.0
+pandas>=2.0.0
+python-docx>=0.8.11    # provides the `docx` module
+
+# ── Visualisation ─────────────────────────────────────────────────────────────
+matplotlib>=3.7.0
+
+# ── Utilities ─────────────────────────────────────────────────────────────────
+requests>=2.28.0
+tqdm>=4.65.0
diff --git a/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/utility/utils.py b/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/utility/utils.py
@@ -20,24 +20,46 @@ def count_files_in_folder(folder_path, extensions_list):
 
     return file_count
 
-def pdf_to_images(pdf_path, output_folder):
-    # Open the PDF
-    pdf_document = fitz.open(pdf_path)
 
-    # Iterate over each page in the PDF
+
+def pdf_to_images(pdf_path, output_folder):
+    # Check if PDF file exists
+    if not os.path.isfile(pdf_path):
+        print(f"Error: PDF file '{pdf_path}' does not exist.")
+        return
+
+    # Check if output folder exists, if not create it
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+        print(f"Created output folder: {output_folder}")
+
+    try:
+        # Open the PDF
+        pdf_document = fitz.open(pdf_path)
+    except Exception as e:
+        print(f"Error opening PDF: {e}")
+        return
+
+    # Check if PDF has pages
+    if len(pdf_document) == 0:
+        print("Error: PDF has no pages.")
+        pdf_document.close()
+        return
+
+    # Iterate over each page
     for page_number in range(len(pdf_document)):
-        # Get the page
-        page = pdf_document.load_page(page_number)
+        try:
+            page = pdf_document.load_page(page_number)
+            pixmap = page.get_pixmap()
 
-        # Render the page as a Pixmap
-        pixmap = page.get_pixmap()
+            image_path = os.path.join(output_folder, f'page_{page_number + 1}.png')
+            pixmap.save(image_path)
 
-        # Save the Pixmap as a PNG image
-        image_path = os.path.join(output_folder, f'page_{page_number + 1}.png')
-        pixmap.save(image_path)
+        except Exception as e:
+            print(f"Error processing page {page_number + 1}: {e}")
 
-    # Close the PDF
     pdf_document.close()
+    print("Conversion completed successfully.")
 
 def split_and_save_image(image_path, output_folder, last_image_number):
     # Read the image
@@ -170,9 +192,24 @@ def read_nth_line(file_path, n):
     return None
 
 def count_lines_in_file(file_path):
-    with open(file_path, 'r') as file:
-        lines = file.readlines()
-        return len(lines)
+    # Check if file exists
+    if not os.path.isfile(file_path):
+        print(f"Error: File '{file_path}' does not exist.")
+        return None
+
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            line_count = sum(1 for _ in file)  # memory-efficient
+        return line_count
+
+    except PermissionError:
+        print(f"Error: Permission denied for file '{file_path}'.")
+    except UnicodeDecodeError:
+        print(f"Error: Encoding issue while reading '{file_path}'. Try a different encoding.")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+
+    return None
 
 def process_textfiles(textfile, sorted_BoundBox_folder, output_folder, TEST_SIZE):
     # Initialize textfile counter, starting from 7 because of some garbage information in the beginning
@@ -375,17 +412,34 @@ def rotation_aug(training_data):
                     # Save the rotated image to the output folder
                     rotated_img.save(os.path.join(training_data, new_filename))
 
-# Function to add Gaussian noise to an image
-def add_gaussian_noise(image, mean=0, std=2):
-    gauss = np.random.normal(mean, std, image.shape).astype('uint8')
-    noisy_image = cv2.add(image, gauss)
-    return noisy_image
+def add_gaussian_noise(image, mean=0, std=25, mode="normal"):
+    """
+    mode:
+        - "normal" → standard Gaussian noise (positive + negative)
+        - "black"  → only negative noise (darkening)
+        - "white"  → only positive noise (brightening)
+    """
+
+    # Use higher precision to avoid overflow
+    image_int = image.astype(np.int16)
+
+    # Generate noise
+    noise = np.random.normal(mean, std, image.shape)
+
+    # Apply mode constraints
+    if mode == "black":
+        noise = np.clip(noise, -255, 0)
+    elif mode == "white":
+        noise = np.clip(noise, 0, 255)
+    elif mode != "normal":
+        raise ValueError("mode must be 'normal', 'black', or 'white'")
+
+    # Add noise
+    noisy_image = image_int + noise
+
+    # Clip back to valid pixel range
+    noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
 
-def add_black_gaussian_noise(image, mean=0, std=25):
-    gauss = np.random.normal(mean, std, image.shape).astype('int16')
-    gauss = np.clip(gauss, -255, 0)  # Ensure noise is negative or zero
-    noisy_image = image.astype('int16') + gauss
-    noisy_image = np.clip(noisy_image, 0, 255).astype('uint8')
     return noisy_image
 
 def gaussian_noise_aug(training_data):
@@ -394,7 +448,7 @@ def gaussian_noise_aug(training_data):
         if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
             img_path = os.path.join(training_data, filename)
             img = cv2.imread(img_path)
-            noisy_img = add_black_gaussian_noise(img)
+            noisy_img = add_gaussian_noise(img,mode='black')
             new_filename = f"{os.path.splitext(filename)[0]}_gauss{os.path.splitext(filename)[1]}"
             output_path = os.path.join(training_data, new_filename)
             cv2.imwrite(output_path, noisy_img)
diff --git a/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/Decoder.py b/RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/Decoder.py
@@ -27,18 +27,20 @@ def __init__(self, hidden_size, output_size, dropout=0.1):
         super(LSTMAttnDecoder, self).__init__()
         self.hidden_size = hidden_size
         self.output_size = output_size
-        self.dropout = dropout
+        self.dropout_rate = dropout
 
         self.embedding = nn.Embedding(output_size, hidden_size)
         # embedding: (output_size, hidden_size)
-        self.dropout = nn.Dropout(self.dropout)
+        self.dropout = nn.Dropout(self.dropout_rate)
         self.attention = Attention(hidden_size)
         self.lstm = nn.LSTM(hidden_size * 2, hidden_size, num_layers=2, bidirectional=True, batch_first=True)
         self.out = nn.Linear(hidden_size * 2, output_size)
         # out: (4*hidden_size, output_size)
 
     def forward(self, input_step, last_hidden, encoder_outputs):
         # input_step: (batch_size, 1)
+        if (input_step < 0).any() or (input_step >= self.output_size).any():
+            raise ValueError(f"input_step contains indices out of range [0, {self.output_size})")
         embedded = self.embedding(input_step)
         # embedded: (batch_size, 1, hidden_size)
         embedded = self.dropout(embedded)
@@ -57,4 +59,4 @@ def forward(self, input_step, last_hidden, encoder_outputs):
         # output: (batch_size, hidden_size) if seq_length=1
         output = self.out(torch.cat((output, context), 1))
         # output: (batch_size, output_size)
-        return output, hidden
+        return output, hidden