diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index ee1b6ca..2eea9ea 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -21,18 +21,28 @@ jobs: with: python-version: "3.12" - - name: Install dependencies + - name: Install uv run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Cache uv environment + uses: actions/cache@v3 + with: + path: .venv + key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-uv- + + - name: Sync dependencies + run: uv sync - name: Create .env file for testing run: | echo "API_KEY=${{ secrets.SYNTHEX_API_KEY }}" > .env - name: Run tests - run: pytest + run: uv run pytest publish: name: Build and Publish to PyPI diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d2703b..4574901 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## Release v0.4.1 - September 4, 2025 + +### Added + +- Added a `job_id` field to the response of the `generate_data` method. + +### Modified + +- Updated the `status` method in `JobsAPI` to accept a `job_id` parameter, allowing users to check the status of any job by its ID. +- Changed the `generate_data` method to initiate data fetching in a separate thread, enabling asynchronous job processing. + ## Release v0.4.0 - September 3, 2025 -Firt release. \ No newline at end of file +First release. \ No newline at end of file diff --git a/README.md b/README.md index 7928618..e38468e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
diff --git a/requirements.txt b/requirements.txt index 408ceba..95e69b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,47 +1,21 @@ -aiohappyeyeballs==2.6.1 -aiohttp==3.12.4 -aiosignal==1.3.2 annotated-types==0.7.0 -attrs==25.3.0 -build==1.2.2.post1 -certifi==2025.1.31 -charset-normalizer==3.4.1 -dill==0.3.8 -dnspython==2.7.0 -email_validator==2.2.0 -filelock==3.18.0 -frozenlist==1.6.0 -fsspec==2025.3.0 -hf-xet==1.1.2 -huggingface-hub==0.32.2 +anyio==4.10.0 +certifi==2025.8.3 +charset-normalizer==3.4.3 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 idna==3.10 -iniconfig==2.1.0 -multidict==6.4.4 -multiprocess==0.70.16 -numpy==2.2.6 -packaging==24.2 -pandas==2.2.3 platformdirs==4.3.8 -pluggy==1.5.0 -propcache==0.3.1 -pyarrow==20.0.0 -pydantic==2.11.2 -pydantic-settings==2.9.1 -pydantic_core==2.33.1 -pyproject_hooks==1.2.0 -pytest==8.3.5 -pytest-mock==3.14.1 -python-dateutil==2.9.0.post0 -python-dotenv==1.1.0 -pytz==2025.2 -PyYAML==6.0.2 -requests==2.32.3 -responses==0.25.7 -six==1.17.0 -tqdm==4.67.1 -typing-inspection==0.4.0 -typing_extensions==4.13.1 -tzdata==2025.2 -urllib3==2.3.0 -xxhash==3.5.0 -yarl==1.20.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +pydantic-settings==2.10.1 +python-dotenv==1.1.1 +pyyaml==6.0.2 +requests==2.32.4 +responses==0.25.8 +sniffio==1.3.1 +-e file:///home/riccardo/Desktop/tanaos/synthex +typing-extensions==4.14.1 +typing-inspection==0.4.1 +urllib3==2.5.0 diff --git a/synthex/jobs_api.py b/synthex/jobs_api.py index b67becc..d2521e4 100644 --- a/synthex/jobs_api.py +++ b/synthex/jobs_api.py @@ -1,11 +1,12 @@ from .api_client import APIClient -from typing import Any, List, Optional +from typing import Any, List import csv import os import time +import threading import logging -from .models import ListJobsResponseModel, JobOutputType, JobOutputSchemaDefinition, ActionResult, \ +from .models import ListJobsResponseModel, JobOutputType, JobOutputSchemaDefinition, GenerateDataResponse, \ JobStatusResponseModel, SuccessResponse from .endpoints import LIST_JOBS_ENDPOINT, CREATE_JOB_WITH_SAMPLES_ENDPOINT from .decorators import auto_validate_methods @@ -16,7 +17,6 @@ logger = logging.getLogger(__name__) - @auto_validate_methods class JobsAPI: """ @@ -25,8 +25,6 @@ class JobsAPI: def __init__(self, client: APIClient): self._client: APIClient = client - # TODO: add support for multiple concurrent jobs; this should require launching each job in a separate thread. - self._current_job_id: Optional[str] = None def list(self, limit: int = 10, offset: int = 0) -> ListJobsResponseModel: """ @@ -107,9 +105,6 @@ def _create_job( response = self._client.post(f"{CREATE_JOB_WITH_SAMPLES_ENDPOINT}", data=data) - # Store id of the current job. - self._current_job_id = response.data - if response.data is None: raise ValidationError("Response data is None, expected a valid job ID.") return response.data @@ -144,7 +139,6 @@ def _get_data_and_write_to_file(self, job_id: str, output_path: str, output_type # Keep fetching data as long as the status code is 206, meaning that there is more data; # When the status code changes to 200, that means there is no more data to fetch. keep_polling = True - # TODO: a timeout is needed here: if a job has not received any data within x seconds, error out while keep_polling: # Poll every JOB_DATA_POLLING_INTERVAL seconds time.sleep(config.JOB_DATA_POLLING_INTERVAL) @@ -167,7 +161,7 @@ def _get_data_and_write_to_file(self, job_id: str, output_path: str, output_type writer.writeheader() # Append each dict as a row. writer.writerows(data) - + logger.info(f"{len(data)} datapoints written for job {job_id}") logger.info(f"All datapoints for job {job_id} have been written") @@ -182,7 +176,7 @@ def generate_data( output_path: str, number_of_samples: int, output_type: JobOutputType = "csv", - ) -> ActionResult: + ) -> GenerateDataResponse: """ Generates data based on the provided schema definition, examples, and requirements. Args: @@ -197,7 +191,7 @@ def generate_data( - "csv": Saves the data to a CSV file. output_path (str): The file path where the generated data should be saved. Returns: - ActionResult: An object indicating that the job was started successfully. + GenerateDataResponse: An object indicating that the job was started successfully. """ # Sanitize the output path @@ -214,25 +208,25 @@ def generate_data( # Create the output directory if it doesn't exist os.makedirs(os.path.dirname(output_path), exist_ok=True) - # Fetch data and write to file - self._get_data_and_write_to_file(job_id, output_path, output_type) + # Start fetching data in a separate thread + th = threading.Thread(target=self._get_data_and_write_to_file, args=(job_id, output_path, output_type)) + th.start() - return ActionResult( + return GenerateDataResponse( success=True, - message=f"Job started successfully. Output will be saved to '{output_path}' upon completion.", + message=f"Job was started successfully. Output will be saved to '{output_path}'.", + job_id=job_id ) - def status(self) -> JobStatusResponseModel: + def status(self, job_id: str) -> JobStatusResponseModel: """ - Check the status of the job that is currently running. + Check the status of a job. + Args: + job_id (str): The ID of the job to check the status of. Returns: JobStatusResponseModel: A model containing the status of the job. """ - - # No job has been started yet. - if not self._current_job_id: - raise ValidationError("No job is currently running.") - - response = self._client.get(GET_JOB_STATUS_ENDPOINT(self._current_job_id)) - + + response = self._client.get(GET_JOB_STATUS_ENDPOINT(job_id)) + return JobStatusResponseModel.model_validate(response.data) diff --git a/synthex/models/responses.py b/synthex/models/responses.py index ccbbfe7..541716d 100644 --- a/synthex/models/responses.py +++ b/synthex/models/responses.py @@ -13,6 +13,7 @@ class SuccessResponse(BaseModel, Generic[T]): data: Optional[T] = None -class ActionResult(BaseModel): +class GenerateDataResponse(BaseModel): success: bool - message: Optional[str] = None \ No newline at end of file + message: str + job_id: str \ No newline at end of file diff --git a/tests/unit/jobs/test_create_job.py b/tests/unit/jobs/test_create_job.py index a2bd534..25c2779 100644 --- a/tests/unit/jobs/test_create_job.py +++ b/tests/unit/jobs/test_create_job.py @@ -46,9 +46,7 @@ def test_create_job_success(synthex: Synthex, generate_data_params: dict[Any, An number_of_samples=generate_data_params["number_of_samples"], ) - assert job_id == "abc123" - assert synthex.jobs._current_job_id == "abc123" # type: ignore - + assert job_id == "abc123" @pytest.mark.unit @responses.activate diff --git a/tests/unit/jobs/test_status.py b/tests/unit/jobs/test_status.py index 1d8d76a..6a170ff 100644 --- a/tests/unit/jobs/test_status.py +++ b/tests/unit/jobs/test_status.py @@ -39,10 +39,7 @@ def test_status_success( status_code=200 ) - # Artificially set the current job ID to "job_id" for testing purposes. - short_lived_synthex.jobs._current_job_id = job_id # type: ignore - - status = short_lived_synthex.jobs.status() + status = short_lived_synthex.jobs.status(job_id) assert isinstance(status, JobStatusResponseModel), "Status should be of type JobStatusResponseModel" assert status.status == test_status, f"Status should be {test_status}" assert status.progress == test_progress, f"Progress should be {test_progress}" @@ -50,16 +47,16 @@ def test_status_success( @responses.activate @pytest.mark.unit -def test_status_no_running_job_failure( +def test_status_no_job_id_failure( short_lived_synthex: Synthex, ): """ This test verifies that a `ValidationError` is raised when attempting to - check the status of jobs through `JobsAPI.status` while no job is currently running. If the exception + check the status of jobs through `JobsAPI.status` without passing a job ID. If the exception is not raised, the test will fail with an appropriate error message. Args: synthex (Synthex): An instance of the `Synthex` class to be tested. """ with pytest.raises(ValidationError): - short_lived_synthex.jobs.status() \ No newline at end of file + short_lived_synthex.jobs.status() # type: ignore \ No newline at end of file