rug-oop-2024 · github-classroom · Oct 16, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
@@ -0,0 +1,43 @@
+name: Style check
+
+on:
+  push:
+    branches:
+      - main
+      - master
+
+  pull_request:
+    branches:
+      - main
+      - master
+
+jobs:
+  flake8_py3:
+    permissions: write-all
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install flake8 and plugins
+        run: |
+          pip install flake8 flake8-docstrings flake8-annotations
+
+      - name: Configure Flake8
+        run: |
+          echo "[flake8]" > .flake8
+          echo "extend-ignore = E402" >> .flake8
+          echo "exclude = .github,autoop/tests" >> .flake8
+          # exclude A101, A102, D100 and everything that starts with D2 and D4
+          echo "ignore = ANN101,ANN102,D100,D2,D4,ANN002,ANN003" >> .flake8
+
+      - name: Run flake8
+        uses: suo/flake8-github-action@releases/v1
+        with:
+          checkName: "flake8_py3"
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,3 @@
 *vscode
 *__pycache__
 *__MACOSX
-*assets*
diff --git a/.gitignore:Zone.Identifier b/.gitignore:Zone.Identifier
diff --git a/INSTRUCTIONS.md:Zone.Identifier b/INSTRUCTIONS.md:Zone.Identifier
diff --git a/README.md b/README.md
@@ -153,30 +153,30 @@ If you did not implement the feature
 
 | Requirement                           | Type (FN/NF) | Implemented by       | Implementation Completed (add X if done) | Comment |
 |-------------------------------------- |--------------|----------------------|--------------------------|---------|         
-| Up-to-date requirements.txt           | NF           |                      |         | |
-| `ML/detect-features`                  | FN           |                      |         | |
-| `ML/artifact`                         | NF           |                      |         | |
-| `ML/feature`                          | NF           |                      |         | |
-| `ML/metric`                           | NF           |                      |         | |
-| `ML/metric/extensions`                | FN           |                      |         | |
-| `ML/model`                            | NF           |                      |         | |
-| `ML/model/extensions`                 | FN           |                      |         | |
-| `ML/pipeline/evaluation`              | FN           |                      |         | | 
-| `ST/page/datasets`                    | NF           |                      |         | |
-| `ST/datasets/management/create`       | FN           |                      |         | |
-| `ST/datasets/management/save`         | FN           |                      |         | |
-| `ST/page/modelling`                   | NF           |                      |         | |
-| `ST/modelling/datasets/list`          | FN           |                      |         | |
-| `ST/modelling/datasets/features`      | FN           |                      |         | |
-| `ST/modelling/models`                 | FN           |                      |         | |
-| `ST/modelling/pipeline/split`         | FN           |                      |         | |
-| `ST/modelling/pipeline/metrics`       | FN           |                      |         | |
-| `ST/modelling/pipeline/summary`       | FN           |                      |         | |
-| `ST/modelling/pipeline/train`         | FN           |                      |         | |
-| `ST/modelling/pipeline/save`          | FN           |                      |         | |
-| `ST/page/deployment`                  | FN           |                      |         | |
-| `ST/deployment/load`                  | FN           |                      |         | |
-| `ST/deployment/predict`               | FN           |                      |         | |
+| Up-to-date requirements.txt           | NF           |    Ana               |    X    | |
+| `ML/detect-features`                  | FN           |    Ana               |    X    | |
+| `ML/artifact`                         | NF           |    Catarina          |    X    | |
+| `ML/feature`                          | NF           |    Catarina          |    X    | |
+| `ML/metric`                           | NF           |    Catarina          |    X    | |
+| `ML/metric/extensions`                | FN           |     both             |    X    | |
+| `ML/model`                            | NF           |    Catarina          |    X    | |
+| `ML/model/extensions`                 | FN           |     both             |    X    | |
+| `ML/pipeline/evaluation`              | FN           |    Catarina          |    X    | | 
+| `ST/page/datasets`                    | NF           |     both             |    X    | |
+| `ST/datasets/management/create`       | FN           |    Catarina          |    X    | |
+| `ST/datasets/management/save`         | FN           |    Catarina          |    X    | |
+| `ST/page/modelling`                   | NF           |    Ana               |    X    | |
+| `ST/modelling/datasets/list`          | FN           |    Ana               |    X    | |
+| `ST/modelling/datasets/features`      | FN           |    Catarina          |    X    | |
+| `ST/modelling/models`                 | FN           |    Catarina          |    X    | |
+| `ST/modelling/pipeline/split`         | FN           |    Catarina          |    X    | |
+| `ST/modelling/pipeline/metrics`       | FN           |    both              |    X    | |
+| `ST/modelling/pipeline/summary`       | FN           |    Ana               |    X    | |
+| `ST/modelling/pipeline/train`         | FN           |    Catarina          |    X    | |
+| `ST/modelling/pipeline/save`          | FN           |    Catarina          |    X    | |
+| `ST/page/deployment`                  | FN           |    Ana               |    X    | |
+| `ST/deployment/load`                  | FN           |    Catarina          |    X    | |
+| `ST/deployment/predict`               | FN           |    Catarina          |    X    | |
 
 If you add extra features, please indicate them below:
 | Requirement                           | Type (FN/NF) | Implemented by       | Implementation Completed (add X if done) | Comment |

diff --git a/README.md:Zone.Identifier b/README.md:Zone.Identifier
diff --git a/app/Welcome.py b/app/Welcome.py
@@ -1,9 +1,8 @@
-from autoop.core.ml.artifact import Artifact
 import streamlit as st
 
 st.set_page_config(
     page_title="Hello",
     page_icon="👋",
 )
 st.sidebar.success("Select a page above.")
-st.markdown(open("README.md").read())
+st.markdown(open("README.md").read())
diff --git a/app/Welcome.py:Zone.Identifier b/app/Welcome.py:Zone.Identifier
diff --git a/app/core/system.py b/app/core/system.py
@@ -1,22 +1,46 @@
 from autoop.core.storage import LocalStorage
 from autoop.core.database import Database
-from autoop.core.ml.dataset import Dataset
 from autoop.core.ml.artifact import Artifact
 from autoop.core.storage import Storage
-from typing import List
+from typing import List, Optional
 
 
-class ArtifactRegistry():
-    def __init__(self, 
-                 database: Database,
-                 storage: Storage):
+class ArtifactRegistry:
+    """
+    Manages the registration, retrieval, and deletion of artifacts in the
+    AutoML system.
+
+    Methods:
+        register: Registers a new artifact in the system.
+        list: Lists all artifacts, optionally filtered by type.
+        get: Retrieves a specific artifact by its ID.
+        delete: Deletes an artifact from the system.
+    """
+
+    def __init__(
+        self, database: Database, storage: Storage
+    ) -> None:
+        """
+        Initializes the ArtifactRegistry.
+
+        Args:
+            database (Database): The database instance for metadata storage.
+            storage (Storage): The storage instance for artifact data storage.
+        """
         self._database = database
         self._storage = storage
 
-    def register(self, artifact: Artifact):
-        # save the artifact in the storage
+    def register(self, artifact: Artifact) -> None:
+        """
+        Registers a new artifact by saving its data and metadata.
+
+        Args:
+            artifact (Artifact): The artifact to register.
+
+        Returns:
+            None
+        """
         self._storage.save(artifact.data, artifact.asset_path)
-        # save the metadata in the database
         entry = {
             "name": artifact.name,
             "version": artifact.version,
@@ -25,9 +49,18 @@ def register(self, artifact: Artifact):
             "metadata": artifact.metadata,
             "type": artifact.type,
         }
-        self._database.set(f"artifacts", artifact.id, entry)
-
-    def list(self, type: str=None) -> List[Artifact]:
+        self._database.set("artifacts", artifact.id, entry)
+
+    def list(self, type: Optional[str] = None) -> List[Artifact]:
+        """
+        Lists all artifacts, optionally filtered by type.
+
+        Args:
+            type (Optional[str]): The type of artifact to filter by.
+
+        Returns:
+            List[Artifact]: A list of matching artifacts.
+        """
         entries = self._database.list("artifacts")
         artifacts = []
         for id, data in entries:
@@ -44,8 +77,17 @@ def list(self, type: str=None) -> List[Artifact]:
             )
             artifacts.append(artifact)
         return artifacts
-    
+
     def get(self, artifact_id: str) -> Artifact:
+        """
+        Retrieves an artifact by its ID.
+
+        Args:
+            artifact_id (str): The unique ID of the artifact.
+
+        Returns:
+            Artifact: The retrieved artifact.
+        """
         data = self._database.get("artifacts", artifact_id)
         return Artifact(
             name=data["name"],
@@ -56,33 +98,73 @@ def get(self, artifact_id: str) -> Artifact:
             data=self._storage.load(data["asset_path"]),
             type=data["type"],
         )
-
-    def delete(self, artifact_id: str):
+
+    def delete(self, artifact_id: str) -> None:
+        """
+        Deletes an artifact by its ID.
+
+        Args:
+            artifact_id (str): The unique ID of the artifact to delete.
+
+        Returns:
+            None
+        """
         data = self._database.get("artifacts", artifact_id)
         self._storage.delete(data["asset_path"])
         self._database.delete("artifacts", artifact_id)
-    
+
 
 class AutoMLSystem:
-    _instance = None
+    """
+    Singleton class representing the AutoML system.
+
+    Manages the artifact registry, storage, and database for machine learning
+    operations.
+
+    Attributes:
+        _storage (LocalStorage): The local storage instance.
+        _database (Database): The database instance.
+        _registry (ArtifactRegistry): The artifact registry instance.
+    """
 
-    def __init__(self, storage: LocalStorage, database: Database):
+    _instance: Optional["AutoMLSystem"] = None
+
+    def __init__(
+        self, storage: LocalStorage, database: Database
+    ) -> None:
+        """
+        Initializes the AutoMLSystem.
+
+        Args:
+            storage (LocalStorage): The local storage instance.
+            database (Database): The database instance.
+        """
         self._storage = storage
         self._database = database
         self._registry = ArtifactRegistry(database, storage)
 
     @staticmethod
-    def get_instance():
+    def get_instance() -> "AutoMLSystem":
+        """
+        Retrieves the singleton instance of the AutoMLSystem.
+
+        Returns:
+            AutoMLSystem: The singleton instance.
+        """
         if AutoMLSystem._instance is None:
             AutoMLSystem._instance = AutoMLSystem(
-                LocalStorage("./assets/objects"), 
-                Database(
-                    LocalStorage("./assets/dbo")
-                )
+                LocalStorage("./assets/objects"),
+                Database(LocalStorage("./assets/dbo"))
             )
         AutoMLSystem._instance._database.refresh()
         return AutoMLSystem._instance
-    
+
     @property
-    def registry(self):
-        return self._registry
+    def registry(self) -> ArtifactRegistry:
+        """
+        Accesses the artifact registry.
+
+        Returns:
+            ArtifactRegistry: The artifact registry instance.
+        """
+        return self._registry
diff --git a/app/core/system.py:Zone.Identifier b/app/core/system.py:Zone.Identifier
diff --git a/app/pages/0_✅_Instructions.py b/app/pages/0_✅_Instructions.py
diff --git a/app/pages/0_✅_Instructions.py:Zone.Identifier b/app/pages/0_✅_Instructions.py:Zone.Identifier
diff --git a/app/pages/1_📊_Datasets.py b/app/pages/1_📊_Datasets.py
@@ -1,11 +1,72 @@
+import os
 import streamlit as st
 import pandas as pd
-
 from app.core.system import AutoMLSystem
 from autoop.core.ml.dataset import Dataset
 
 automl = AutoMLSystem.get_instance()
 
-datasets = automl.registry.list(type="dataset")
 
-# your code here
+def handle_file_upload() -> None:
+    """
+    Handles the file upload process for CSV files. Users can upload a CSV file,
+    view its contents, and save it as a dataset with a specified name and
+    version.
+    """
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        try:
+            data = pd.read_csv(uploaded_file, on_bad_lines="skip")
+            st.write(data)
+
+            if data.isnull().any().any():
+                st.error("The dataset contains columns with null values "
+                         "and is not appropriate for reading.")
+                return
+
+            dataset_name = st.text_input(
+                "Enter dataset name:", value="MyDataset"
+            )
+            version = st.text_input("Enter version:", value="1.0.0")
+
+            asset_base_dir = "datasets"
+            os.makedirs(asset_base_dir, exist_ok=True)
+            asset_path = f"{asset_base_dir}/{dataset_name}_v{version}.csv"
+
+            if st.button("Save Dataset"):
+                if dataset_name and asset_path and version:
+                    dataset = Dataset.from_dataframe(
+                        data=data,
+                        name=dataset_name,
+                        asset_path=asset_path,
+                        version=version
+                    )
+                    automl.registry.register(dataset)
+                    st.success(
+                        f"Dataset '{dataset_name}' saved successfully!"
+                    )
+                else:
+                    st.error(
+                        "Please enter all required fields: dataset name, "
+                        "asset path, and version."
+                    )
+        except pd.errors.ParserError as e:
+            st.error(f"Error parsing CSV file: {e}")
+        except Exception as e:
+            st.error(f"An unexpected error occurred: {e}")
+
+
+def display_existing_datasets() -> None:
+    """
+    Displays a list of datasets currently registered in the AutoML system,
+    including their names and versions.
+    """
+    st.subheader("Existing Datasets")
+    datasets = automl.registry.list(type="dataset")
+    for ds in datasets:
+        st.write(f"Dataset Name: {ds._name}, Version: {ds._version}")
+
+
+st.title("Dataset Management")
+handle_file_upload()
+display_existing_datasets()
diff --git a/app/pages/1_📊_Datasets.py:Zone.Identifier b/app/pages/1_📊_Datasets.py:Zone.Identifier