The first revision

habedi · Mar 1, 2025 · 0004fdb · 0004fdb
1 parent 58fe05d
commit 0004fdb
Show file tree

Hide file tree

Showing 11 changed files with 121 additions and 140 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -6,7 +6,7 @@ root = true
 # Python specific settings, complying with PEP 8 style guide
 [*.py]
 indent_size = 4
-max_line_length = 80
+max_line_length = 100
 
 # Markdown files
 [*.md]
@@ -21,7 +21,7 @@ indent_size = 2
 indent_size = 2
 
 # YAML files
-[*.yml]
+[*.{yaml,yml}]
 indent_size = 2
 
 # JSON files

diff --git a/.gitattributes b/.gitattributes
@@ -6,3 +6,6 @@
 *.gif filter=lfs diff=lfs merge=lfs -text
 *.csv filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
+
+# Exclude files from language stats (GitHub Linguist)
+*.ipynb linguist-vendored
diff --git a/.github/workflows/publish_to_pypi.yml → .github/workflows/publish.yml b/.github/workflows/publish_to_pypi.yml → .github/workflows/publish.yml
@@ -2,10 +2,12 @@ name: Publish to PyPI
 
 on:
   workflow_dispatch: # Enable manual runs
+  tag:
+    - 'v*' # Run on version tags
 
 jobs:
 
-  # Run tests before publishing
+  # Run the tests before publishing to PyPI
   call_tests:
     uses: ./.github/workflows/tests.yml
 
@@ -30,10 +32,6 @@ jobs:
         run: |
           poetry install
 
-      #      - name: Update Version
-      #        run: |
-      #          poetry version patch # Use 'minor' or 'major' for minor or major version bumps
-
       - name: Build and Publish Package
         run: |
           poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,9 +1,6 @@
 name: Tests
 
 on:
-  #  push:
-  #    branches:
-  #      - main
   workflow_dispatch: # Enable manual runs
   workflow_call: # Make this workflow available to be called by other workflows
 
@@ -13,7 +10,7 @@ jobs:
 
     strategy:
       matrix:
-        python-version: [ "3.10", "3.11", "3.12" ]
+        python-version: [ "3.10", "3.11", "3.12", "3.13" ]
 
     steps:
       - name: Checkout Repository
@@ -37,11 +34,11 @@ jobs:
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         run: |
-          poetry run pytest tests/ --cov --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml
+          poetry run pytest tests/ --cov --doctest-modules --cov-report=xml
         continue-on-error: false
 
-      - name: Upload Test Results
-        uses: actions/upload-artifact@v4
+      - name: Upload Coverage Reports to Codecov
+        uses: codecov/codecov-action@v5
         with:
-          name: pytest-results-${{ matrix.python-version }}
-          path: junit/test-results-${{ matrix.python-version }}.xml
+          token: ${{ secrets.CODECOV_TOKEN }}
+
diff --git a/README.md b/README.md
@@ -1,41 +1,42 @@
-# Easy Letters
+## Easy Letters
 
-[![PyPI version](https://badge.fury.io/py/easy-letters.svg)](https://badge.fury.io/py/easy-letters)
-[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
-[![python version](https://img.shields.io/badge/Python-%3E=3.10-blue)](https://github.com/habedi/easy-letters)
-[![pip downloads](https://img.shields.io/pypi/dm/easy-letters.svg)](https://pypi.org/project/easy-letters/)
 [![Tests](https://github.com/habedi/easy-letters/actions/workflows/tests.yml/badge.svg)](https://github.com/habedi/easy-letters/actions/workflows/tests.yml)
 [![CodeFactor](https://www.codefactor.io/repository/github/habedi/easy-letters/badge)](https://www.codefactor.io/repository/github/habedi/easy-letters)
+[![python version](https://img.shields.io/badge/Python-%3E=3.10-blue)](https://github.com/habedi/easy-letters)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+[![PyPI version](https://badge.fury.io/py/easy-letters.svg)](https://badge.fury.io/py/easy-letters)
+[![pip downloads](https://img.shields.io/pypi/dm/easy-letters.svg)](https://pypi.org/project/easy-letters/)
 
-Easy Letters is a Python package that helps job seekers write application letters. A simple retrieval
-augmented generation (RAG) pipeline is used to generate the letters. The user can then edit the draft letter to suit
-their needs.
+Easy Letters is a Python library that can help job seekers write application letters.
+Currently, it proves the basic blocks for creating a simple retrieval augmented generation (RAG) pipeline
+to generate application letter drafts.
+The user can then edit the draft letter to suit their needs.
 
-See the `notebooks/README.md` file for how easy letters works.
+See the [notebooks/README.md](notebooks/README.md) file for how it works.
 
-## 🔧 Installation
+### 🔧 Installation
 
 You can install Easy Letters using pip:
 
 ```bash
 pip install easy-letters
 ```
 
-## 🚀 Getting Started
+### 🚀 Getting Started
 
-### API Key Setup
+#### API Key Setup
 
-At the moment, Easy Letters gets the API key for supported services from the environment variables.
+Easy Letters gets the API key for supported services (like OpenAI) from the environment variables.
 So you need to set the following environment variables to be able to use Easy Letters:
 
 - `OPENAI_API_KEY`: The OpenAI API key (required)
 
-### Sample Notebooks
+#### Sample Notebooks
 
-You can find Jupyter notebooks with example code in the `notebooks` directory.
+You can find Jupyter notebooks with example code in the [notebooks](notebooks/) directory.
 The notebooks demonstrate how to use Easy Letters to generate application letter drafts.
 
-### Supported Models
+#### Supported Models
 
 Easy Letters currently supports the following models:
 
@@ -48,12 +49,12 @@ Easy Letters currently supports the following models:
 | Text Embedding 3 (Small Variant) | Text Embedding  |
 | Text Embedding 3 (Large Variant) | Text Embedding  |
 
-### Installing from Source
+#### Installing from Source
 
 You can also install Easy Letters from the source code in this repository. The main benefit of this approach is that
 you might find it easier to run the sample notebooks and modify the code as you wish this way.
 
-After cloning this repository, you can navigate to the `easy-letters` directory and install the
+After cloning this repository, you can navigate to the directory where you cloned the repository and install the
 dependencies using [Poetry](https://python-poetry.org/):
 
 ```bash
@@ -63,15 +64,15 @@ git clone https://github.com/habedi/easy-letters.git && cd easy-letters
 poetry install --with dev
 ```
 
-### Running the Unit Tests with Coverage
+#### Running Tests with Coverage
 
 You can run the unit tests with coverage using the following command:
 
 ```bash
 poetry run pytest tests/ --cov=easy_letters
 ```
 
-## 📝 TODO
+### 📝 TODO
 
 - [ ] Add support for Anthropic models and API
 - [ ] Add support for locally served models via Ollama
diff --git a/easy_letters/similarity_search.py b/easy_letters/similarity_search.py
@@ -67,6 +67,6 @@ def find_similar(self, embedding, collection_name="letters", top_k=5,
         Returns:
             list: A list of search results with similar documents.
         """
-        return self.client.search(collection_name=collection_name,
-                                  query_vector=embedding,
-                                  limit=top_k, score_threshold=min_similarity)
+        return self.client.query_points(collection_name=collection_name,
+                                        query=embedding,
+                                        limit=top_k, score_threshold=min_similarity)
diff --git a/notebooks/demo_openai.ipynb b/notebooks/demo_openai.ipynb
@@ -33,15 +33,14 @@
    "source": [
     "import io\n",
     "import os\n",
-    "import tiktoken\n",
+    "from pathlib import Path\n",
     "\n",
     "import pandas as pd\n",
+    "import tiktoken\n",
+    "from IPython.display import display, Markdown\n",
     "\n",
-    "from easy_letters import OpenAIConnector, Ranker\n",
     "from easy_letters import LanguageModels, EmbeddingModels\n",
-    "\n",
-    "from IPython.display import display, Markdown\n",
-    "from pathlib import Path"
+    "from easy_letters import OpenAIConnector, Ranker"
    ],
    "outputs": [],
    "execution_count": 1
@@ -69,7 +68,7 @@
    "cell_type": "code",
    "source": [
     "DATA_DIR = Path(\"../tests/test_data\")\n",
-    "LETTERS_DIR = DATA_DIR/ \"sample_letters\"\n",
+    "LETTERS_DIR = DATA_DIR / \"sample_letters\"\n",
     "SAMPLE_JOB_AD = DATA_DIR / \"sample_ads/description_6.text\"\n",
     "\n",
     "OUTPUT_DIR = Path(\"./output\")\n",
@@ -105,7 +104,7 @@
    "source": [
     "class DocumentLoader:\n",
     "    \"\"\"A class to load documents from files.\"\"\"\n",
-    "    \n",
+    "\n",
     "    @staticmethod\n",
     "    def _read_txt(path: Path) -> str:\n",
     "        with io.open(path, 'r', encoding='utf-8') as f:\n",
@@ -115,7 +114,7 @@
     "        \"\"\"Load all the documents in a directory with a specific extension into a DataFrame.\"\"\"\n",
     "        documents = []\n",
     "        documents_ids = []\n",
-    "        \n",
+    "\n",
     "        ext = '.' + ext.lower().lstrip('.')\n",
     "        for file in path.glob(f'*{ext}'):\n",
     "            if ext in ('.txt', '.text'):\n",
@@ -333,7 +332,7 @@
    "cell_type": "code",
    "source": [
     "letters_with_embeddings_df = application_letters_df.copy()\n",
-    "letters_with_embeddings_df['embedding'] = connector.embed(documents=application_letters_df['text'], \n",
+    "letters_with_embeddings_df['embedding'] = connector.embed(documents=application_letters_df['text'],\n",
     "                                                          model=EmbeddingModels.OPENAPI_EMS)"
    ],
    "metadata": {
@@ -519,7 +518,7 @@
    },
    "cell_type": "code",
    "source": [
-    "sample_job_ad_embedded = connector.embed(documents=[sample_job_ad], \n",
+    "sample_job_ad_embedded = connector.embed(documents=[sample_job_ad],\n",
     "                                         model=EmbeddingModels.OPENAPI_EMS)"
    ],
    "id": "2d61db3285bf245c",
@@ -560,7 +559,7 @@
    },
    "cell_type": "code",
    "source": [
-    "most_relevant_letters =  ranker.find_similar(sample_job_ad_embedded[0], top_k=3, min_similarity=0.1)\n",
+    "most_relevant_letters = ranker.find_similar(sample_job_ad_embedded[0], top_k=3, min_similarity=0.1)\n",
     "\n",
     "most_relevant_letters"
    ],
@@ -591,7 +590,7 @@
    "cell_type": "code",
    "source": [
     "for letter in most_relevant_letters:\n",
-    "    print(\"=\"*80)\n",
+    "    print(\"=\" * 80)\n",
     "    print(f\"Letter ID: {letter.id}\")\n",
     "    print(f\"Similarity: {letter.score:.2f}\")\n",
     "    print(f\"Letter Text:\\n{letter.payload['text']}\")\n",
@@ -657,9 +656,9 @@
     "prompt_part_1 = \"I'm applying for a job with this description:\\n\\n\"\n",
     "prompt_part_2 = \"#START OF JOB AD\\n\\n\" + sample_job_ad + \"\\n\\n#END OF JOB AD\"\n",
     "prompt_part_3 = \"\\n\\nI need to submit a application letter with my CV. Here is a few examples of my previous application letters:\\n\\n\"\n",
-    "prompt_part_4 = \"\\n\\n\".join([('#START OF EXAMPLE APPLICATION LETTER\\n\\n'+\n",
+    "prompt_part_4 = \"\\n\\n\".join([('#START OF EXAMPLE APPLICATION LETTER\\n\\n' +\n",
     "                              t.payload['text'] + '\\n\\n#END OF EXAMPLE APPLICATION LETTER')\n",
-    "                              for t in most_relevant_letters])\n",
+    "                             for t in most_relevant_letters])\n",
     "# Extra information for the prompt\n",
     "prompt_part_5 = (\"\\n\\nWrite a new application letter that is tailored to the job description above. \"\n",
     "                 \"Be concise and to the point. The letter should be no longer than 500 words. \"\n",
@@ -716,10 +715,10 @@
    },
    "cell_type": "code",
    "source": [
-    "draft_letter = connector.chat(prompt=prompt, \n",
-    "                          model=LLM,\n",
-    "                          temperature=0.1,\n",
-    "                          max_tokens=512)"
+    "draft_letter = connector.chat(prompt=prompt,\n",
+    "                              model=LLM,\n",
+    "                              temperature=0.1,\n",
+    "                              max_tokens=512)"
    ],
    "id": "fdcaa76ede6692a7",
    "outputs": [],

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,34 +1,44 @@
 [tool.poetry]
 name = "easy-letters"
 version = "0.1.8"
-description = "A Python package for generating draft application letters using generative AI"
+description = "A Python library for generating draft application letters using generative AI"
 authors = ["Hassan Abedi <[email protected]>"]
+maintainers = ["Hassan Abedi <[email protected]>"]
 readme = "README.md"
 packages = [{ include = "easy_letters", from = "." }]
-include = ["README.md", "LICENSE"]
+include = ["README.md"]
 license = "MIT"
 repository = "https://github.com/habedi/easy-letters"
 
 [tool.poetry.dependencies]
 python = "^3.10"
 openai = "^1.16.1"
 qdrant-client = "^1.8.2"
+numpy = "^2.2.3"
 
 [tool.poetry.group.dev.dependencies]
-jupyter = "^1.0.0"
+poetry-dynamic-versioning = "^1.4.0"
 pytest = "^8.2.2"
-black = ">=24.4.2,<26.0.0"
+pytest-mock = "^3.14.0"
 pytest-cov = ">=5,<7"
-poetry-dynamic-versioning = "^1.4.0"
-tiktoken = ">=0.7,<0.10"
 pandas = "^2.2.2"
-pytest-mock = "^3.14.0"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
+jupyter = "^1.0.0"
+tiktoken = ">=0.7,<0.10"
+ruff = "^0.9.9"
 
 [tool.poetry-dynamic-versioning]
 enable = true
 vcs = "git"
 versioning = "semver"  # Semantic Versioning
+
+#[build-system]
+#requires = ["poetry-core"]
+#build-backend = "poetry.core.masonry.api"
+
+#[build-system]
+#requires = ["pdm-backend"]
+#build-backend = "pdm.backend"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/tests/shared.py b/tests/shared.py
@@ -0,0 +1,39 @@
+import numpy as np
+
+from easy_letters import Ranker
+
+# Sample documents and their embeddings for testing
+documents_with_embeddings = {
+    'text': ["Document 1", "Document 2"],
+    'embedding': [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])]
+}
+
+# Sample embedding to search for similar documents
+embedding_to_search = np.array([0.1, 0.2, 0.3])
+
+# The expected response (score is Cosine similarity)
+search_response = [
+    {"id": 0, "score": 1.0, "payload": {"text": "Document 1"}},
+    {"id": 1, "score": 0.9746, "payload": {"text": "Document 2"}}
+]
+
+
+def test_make_collection():
+    """
+    Test the make_collection method of the Ranker class.
+    This test checks if the collection is created successfully with the correct
+     parameters.
+    """
+    # Arrange
+    ranker = Ranker()
+    collection_name = "test_collection"
+
+    # Act
+    ranker.make_collection(documents_with_embeddings, collection_name)
+
+    # Assert
+    coll = ranker.client.get_collection(collection_name)
+    assert coll is not None
+    assert coll.points_count == 2
+    assert coll.config.params.vectors.size == 3
+    assert coll.config.params.vectors.distance == "Cosine"