Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .github/workflows/test_crnn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: CRNN OCR-1 Unit Tests

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10"]

steps:
- uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest torch --index-url https://download.pytorch.org/whl/cpu
pip install google-generativeai

- name: Run CRNN OCR-1 tests
run: |
pytest tests/test_crnn_ocr1.py -v
```

---

**Step 5 — Commit message:**
```
ci: Add GitHub Actions workflow for CRNN tests (refs #57)
53 changes: 51 additions & 2 deletions RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,56 @@ the RNN (Recurrent neural networks).
For a detailed walkthrough of the project's development, challenges, and solutions, read the complete blog post [here](https://medium.com/@shashankshekharsingh1205/my-journey-with-humanai-in-the-google-summer-of-code24-program-part-2-bb42abce3495).

## Datasets and Models
- The `Padilla - Nobleza virtuosa_testExtract.pdf` can be downloaded from [here](https://github.com/Shashankss1205/RenAIssance/blob/main/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/data/Padilla_Nobleza_virtuosa_testExtract.pdf)
- The `Padilla - Nobleza virtuosa_testExtract.pdf` can be downloaded from [here](https://github.com/Shashankss1205/RenAIssance/blob/main/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/data/Padilla_Nobleza_virtuosa_testExtract.pdf)
- ## Setup

Install all dependencies before running the notebooks:
```bash
pip install -r requirements.txt
```

## Requirements
- Python 3.10+
- PyTorch 2.0+
- CUDA GPU recommended (Google Colab or Kaggle)

## How to Run
1. Clone this repository
2. Install dependencies: `pip install -r requirements.txt`
3. Open `Model.ipynb` in Jupyter or Google Colab
4. Run all cells in order
```

---

## Step 5 — Commit
```
Commit message : Add setup and run instructions to README
● Commit directly to main branch
```
Click **"Commit changes"** ✅

---

## Step 6 — Open PR
Click **"Contribute"** → **"Open pull request"**

**Title:**
```
Add setup and run instructions to README
```

**Description:**
```
## What This PR Does
Adds Setup and How to Run sections to README.md
with clear instructions for new contributors.

## Why
README was missing environment setup instructions.
New contributors can now get started immediately.

Related to my GSoC 2026 application for RenAIssance.
- The `Padilla - 1 Nobleza virtuosa_testTranscription.docx` can be downloaded from [here](https://github.com/Shashankss1205/RenAIssance/blob/main/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/data/Padilla_Nobleza_virtuosa_testTranscription.docx)
- The ocr model used can be directly generated by running the python notebook or can be downloaded from [here](https://github.com/Shashankss1205/RenAIssance/blob/main/RenAIssance_CRNN_OCR_Shashank_Shekhar_Singh/Model/ocr_model.h5)

Expand All @@ -108,4 +157,4 @@ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file
- [Google Summer of Code 2024 Project](https://summerofcode.withgoogle.com/programs/2024/projects/lg7vQeMM)
- [HumanAI Foundation](https://humanai.foundation/)

Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
Feel free to fork the repository and submit pull requests. For major changes, please open an issue to discuss your ideas first. Contributions are always welcomed!
23 changes: 14 additions & 9 deletions RenAIssance_SelfSupervisedLearning_OCR_YukinoriYamamoto/ResNet.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from torch import nn


class BasicBlock(nn.Module):
expansion = 1

Expand Down Expand Up @@ -36,7 +37,7 @@ def forward(self, x):


class ResNet18(nn.Module):
def __init__(self, num_classes=1000):
def __init__(self, num_classes=3): # ✅ FIX 1: changed 1000 → 3
super(ResNet18, self).__init__()
self.in_channels = 64

Expand All @@ -50,6 +51,7 @@ def __init__(self, num_classes=1000):
self.layer4 = self._make_layer(BasicBlock, 512, 2, stride=1)

self.avgpool = nn.AdaptiveAvgPool2d((1, 32))
self.fc = nn.Linear(512 * 32, num_classes) # ✅ FIX 2: added fc layer

def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
Expand All @@ -76,11 +78,13 @@ def forward(self, x):
x = layer(x)

x = self.avgpool(x)
x = x.view(x.size(0), -1) # ✅ FIX 3: flatten before fc
x = self.fc(x) # ✅ FIX 4: apply classification head
return x


class ResNet34(nn.Module):
def __init__(self, num_classes=1000):
def __init__(self, num_classes=3): # ✅ FIX 5: changed 1000 → 3
super(ResNet34, self).__init__()
self.in_channels = 64

Expand All @@ -94,6 +98,7 @@ def __init__(self, num_classes=1000):
self.layer4 = self._make_layer(BasicBlock, 512, 3, stride=1)

self.avgpool = nn.AdaptiveAvgPool2d((1, 44))
self.fc = nn.Linear(512 * 44, num_classes) # ✅ FIX 6: added fc layer

def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
Expand All @@ -120,9 +125,12 @@ def forward(self, x):
x = layer(x)

x = self.avgpool(x)
x = x.view(x.size(0), -1) # ✅ FIX 7: flatten before fc
x = self.fc(x) # ✅ FIX 8: apply classification head
return x


# ResNet50, Bottleneck — unchanged below this line
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_channels, out_channels, stride=(1, 1)):
Expand All @@ -137,6 +145,7 @@ def __init__(self, in_channels, out_channels, stride=(1, 1)):
self.stride = stride
self.shortcut_conv = nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False)
self.shortcut_bn = nn.BatchNorm2d(out_channels * self.expansion)

def forward(self, x):
identity = x
out = self.conv1(x)
Expand All @@ -154,6 +163,7 @@ def forward(self, x):
out = self.relu(out)
return out


class ResNet50(nn.Module):
def __init__(self):
super(ResNet50, self).__init__()
Expand All @@ -168,33 +178,28 @@ def __init__(self):
self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=(2, 1))
self.last_conv = nn.Conv2d(2048, 512, kernel_size=1, stride=1, bias=False)
self.avgpool = nn.AvgPool2d(kernel_size=(2, 1), stride=(2, 1))

def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = nn.ModuleList()
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return layers

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
for layer in self.layer1:
x = layer(x)
# print("layer 1", x.shape)
for layer in self.layer2:
x = layer(x)
# print("layer 2", x.shape)
for layer in self.layer3:
x = layer(x)
# print("layer 3", x.shape)
for layer in self.layer4:
x = layer(x)
# print("layer 4", x.shape)
x = self.last_conv(x)
x = self.avgpool(x)
# print("avgpool", x.shape)
return x
Loading