diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml new file mode 100644 index 000000000..38c505899 --- /dev/null +++ b/.github/workflows/style.yml @@ -0,0 +1,43 @@ +name: Style check + +on: + push: + branches: + - main + - master + + pull_request: + branches: + - main + - master + +jobs: + flake8_py3: + permissions: write-all + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install flake8 and plugins + run: | + pip install flake8 flake8-docstrings flake8-annotations + + - name: Configure Flake8 + run: | + echo "[flake8]" > .flake8 + echo "extend-ignore = E402" >> .flake8 + echo "exclude = .github,autoop/tests" >> .flake8 + # exclude A101, A102, D100 and everything that starts with D2 and D4 + echo "ignore = ANN101,ANN102,D100,D2,D4,ANN002,ANN003" >> .flake8 + + - name: Run flake8 + uses: suo/flake8-github-action@releases/v1 + with: + checkName: "flake8_py3" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1832124c8..02561b9cc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ *vscode *__pycache__ *__MACOSX -*assets* \ No newline at end of file diff --git a/.gitignore:Zone.Identifier b/.gitignore:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/INSTRUCTIONS.md:Zone.Identifier b/INSTRUCTIONS.md:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/README.md b/README.md index 5c3444ba5..6c501221b 100644 --- a/README.md +++ b/README.md @@ -153,30 +153,30 @@ If you did not implement the feature | Requirement | Type (FN/NF) | Implemented by | Implementation Completed (add X if done) | Comment | |-------------------------------------- |--------------|----------------------|--------------------------|---------| -| Up-to-date requirements.txt | NF | | | | -| `ML/detect-features` | FN | | | | -| `ML/artifact` | NF | | | | -| `ML/feature` | NF | | | | -| `ML/metric` | NF | | | | -| `ML/metric/extensions` | FN | | | | -| `ML/model` | NF | | | | -| `ML/model/extensions` | FN | | | | -| `ML/pipeline/evaluation` | FN | | | | -| `ST/page/datasets` | NF | | | | -| `ST/datasets/management/create` | FN | | | | -| `ST/datasets/management/save` | FN | | | | -| `ST/page/modelling` | NF | | | | -| `ST/modelling/datasets/list` | FN | | | | -| `ST/modelling/datasets/features` | FN | | | | -| `ST/modelling/models` | FN | | | | -| `ST/modelling/pipeline/split` | FN | | | | -| `ST/modelling/pipeline/metrics` | FN | | | | -| `ST/modelling/pipeline/summary` | FN | | | | -| `ST/modelling/pipeline/train` | FN | | | | -| `ST/modelling/pipeline/save` | FN | | | | -| `ST/page/deployment` | FN | | | | -| `ST/deployment/load` | FN | | | | -| `ST/deployment/predict` | FN | | | | +| Up-to-date requirements.txt | NF | Ana | X | | +| `ML/detect-features` | FN | Ana | X | | +| `ML/artifact` | NF | Catarina | X | | +| `ML/feature` | NF | Catarina | X | | +| `ML/metric` | NF | Catarina | X | | +| `ML/metric/extensions` | FN | both | X | | +| `ML/model` | NF | Catarina | X | | +| `ML/model/extensions` | FN | both | X | | +| `ML/pipeline/evaluation` | FN | Catarina | X | | +| `ST/page/datasets` | NF | both | X | | +| `ST/datasets/management/create` | FN | Catarina | X | | +| `ST/datasets/management/save` | FN | Catarina | X | | +| `ST/page/modelling` | NF | Ana | X | | +| `ST/modelling/datasets/list` | FN | Ana | X | | +| `ST/modelling/datasets/features` | FN | Catarina | X | | +| `ST/modelling/models` | FN | Catarina | X | | +| `ST/modelling/pipeline/split` | FN | Catarina | X | | +| `ST/modelling/pipeline/metrics` | FN | both | X | | +| `ST/modelling/pipeline/summary` | FN | Ana | X | | +| `ST/modelling/pipeline/train` | FN | Catarina | X | | +| `ST/modelling/pipeline/save` | FN | Catarina | X | | +| `ST/page/deployment` | FN | Ana | X | | +| `ST/deployment/load` | FN | Catarina | X | | +| `ST/deployment/predict` | FN | Catarina | X | | If you add extra features, please indicate them below: | Requirement | Type (FN/NF) | Implemented by | Implementation Completed (add X if done) | Comment | diff --git a/README.md:Zone.Identifier b/README.md:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/app/Welcome.py b/app/Welcome.py index 697979f53..5c08b210f 100644 --- a/app/Welcome.py +++ b/app/Welcome.py @@ -1,4 +1,3 @@ -from autoop.core.ml.artifact import Artifact import streamlit as st st.set_page_config( @@ -6,4 +5,4 @@ page_icon="👋", ) st.sidebar.success("Select a page above.") -st.markdown(open("README.md").read()) \ No newline at end of file +st.markdown(open("README.md").read()) diff --git a/app/Welcome.py:Zone.Identifier b/app/Welcome.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/app/core/system.py b/app/core/system.py index 3a00cf610..73b9ae94b 100644 --- a/app/core/system.py +++ b/app/core/system.py @@ -1,22 +1,46 @@ from autoop.core.storage import LocalStorage from autoop.core.database import Database -from autoop.core.ml.dataset import Dataset from autoop.core.ml.artifact import Artifact from autoop.core.storage import Storage -from typing import List +from typing import List, Optional -class ArtifactRegistry(): - def __init__(self, - database: Database, - storage: Storage): +class ArtifactRegistry: + """ + Manages the registration, retrieval, and deletion of artifacts in the + AutoML system. + + Methods: + register: Registers a new artifact in the system. + list: Lists all artifacts, optionally filtered by type. + get: Retrieves a specific artifact by its ID. + delete: Deletes an artifact from the system. + """ + + def __init__( + self, database: Database, storage: Storage + ) -> None: + """ + Initializes the ArtifactRegistry. + + Args: + database (Database): The database instance for metadata storage. + storage (Storage): The storage instance for artifact data storage. + """ self._database = database self._storage = storage - def register(self, artifact: Artifact): - # save the artifact in the storage + def register(self, artifact: Artifact) -> None: + """ + Registers a new artifact by saving its data and metadata. + + Args: + artifact (Artifact): The artifact to register. + + Returns: + None + """ self._storage.save(artifact.data, artifact.asset_path) - # save the metadata in the database entry = { "name": artifact.name, "version": artifact.version, @@ -25,9 +49,18 @@ def register(self, artifact: Artifact): "metadata": artifact.metadata, "type": artifact.type, } - self._database.set(f"artifacts", artifact.id, entry) - - def list(self, type: str=None) -> List[Artifact]: + self._database.set("artifacts", artifact.id, entry) + + def list(self, type: Optional[str] = None) -> List[Artifact]: + """ + Lists all artifacts, optionally filtered by type. + + Args: + type (Optional[str]): The type of artifact to filter by. + + Returns: + List[Artifact]: A list of matching artifacts. + """ entries = self._database.list("artifacts") artifacts = [] for id, data in entries: @@ -44,8 +77,17 @@ def list(self, type: str=None) -> List[Artifact]: ) artifacts.append(artifact) return artifacts - + def get(self, artifact_id: str) -> Artifact: + """ + Retrieves an artifact by its ID. + + Args: + artifact_id (str): The unique ID of the artifact. + + Returns: + Artifact: The retrieved artifact. + """ data = self._database.get("artifacts", artifact_id) return Artifact( name=data["name"], @@ -56,33 +98,73 @@ def get(self, artifact_id: str) -> Artifact: data=self._storage.load(data["asset_path"]), type=data["type"], ) - - def delete(self, artifact_id: str): + + def delete(self, artifact_id: str) -> None: + """ + Deletes an artifact by its ID. + + Args: + artifact_id (str): The unique ID of the artifact to delete. + + Returns: + None + """ data = self._database.get("artifacts", artifact_id) self._storage.delete(data["asset_path"]) self._database.delete("artifacts", artifact_id) - + class AutoMLSystem: - _instance = None + """ + Singleton class representing the AutoML system. + + Manages the artifact registry, storage, and database for machine learning + operations. + + Attributes: + _storage (LocalStorage): The local storage instance. + _database (Database): The database instance. + _registry (ArtifactRegistry): The artifact registry instance. + """ - def __init__(self, storage: LocalStorage, database: Database): + _instance: Optional["AutoMLSystem"] = None + + def __init__( + self, storage: LocalStorage, database: Database + ) -> None: + """ + Initializes the AutoMLSystem. + + Args: + storage (LocalStorage): The local storage instance. + database (Database): The database instance. + """ self._storage = storage self._database = database self._registry = ArtifactRegistry(database, storage) @staticmethod - def get_instance(): + def get_instance() -> "AutoMLSystem": + """ + Retrieves the singleton instance of the AutoMLSystem. + + Returns: + AutoMLSystem: The singleton instance. + """ if AutoMLSystem._instance is None: AutoMLSystem._instance = AutoMLSystem( - LocalStorage("./assets/objects"), - Database( - LocalStorage("./assets/dbo") - ) + LocalStorage("./assets/objects"), + Database(LocalStorage("./assets/dbo")) ) AutoMLSystem._instance._database.refresh() return AutoMLSystem._instance - + @property - def registry(self): - return self._registry \ No newline at end of file + def registry(self) -> ArtifactRegistry: + """ + Accesses the artifact registry. + + Returns: + ArtifactRegistry: The artifact registry instance. + """ + return self._registry diff --git a/app/core/system.py:Zone.Identifier b/app/core/system.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git "a/app/pages/0_\342\234\205_Instructions.py" "b/app/pages/0_\342\234\205_Instructions.py" deleted file mode 100644 index b8a2f79ef..000000000 --- "a/app/pages/0_\342\234\205_Instructions.py" +++ /dev/null @@ -1,9 +0,0 @@ -from autoop.core.ml.artifact import Artifact -import streamlit as st - -st.set_page_config( - page_title="Instructions", - page_icon="👋", -) - -st.markdown(open("INSTRUCTIONS.md").read()) \ No newline at end of file diff --git "a/app/pages/0_\342\234\205_Instructions.py:Zone.Identifier" "b/app/pages/0_\342\234\205_Instructions.py:Zone.Identifier" deleted file mode 100644 index e69de29bb..000000000 diff --git "a/app/pages/1_\360\237\223\212_Datasets.py" "b/app/pages/1_\360\237\223\212_Datasets.py" index 8c8ef481f..914cc5917 100644 --- "a/app/pages/1_\360\237\223\212_Datasets.py" +++ "b/app/pages/1_\360\237\223\212_Datasets.py" @@ -1,11 +1,72 @@ +import os import streamlit as st import pandas as pd - from app.core.system import AutoMLSystem from autoop.core.ml.dataset import Dataset automl = AutoMLSystem.get_instance() -datasets = automl.registry.list(type="dataset") -# your code here +def handle_file_upload() -> None: + """ + Handles the file upload process for CSV files. Users can upload a CSV file, + view its contents, and save it as a dataset with a specified name and + version. + """ + uploaded_file = st.file_uploader("Choose a CSV file", type="csv") + if uploaded_file is not None: + try: + data = pd.read_csv(uploaded_file, on_bad_lines="skip") + st.write(data) + + if data.isnull().any().any(): + st.error("The dataset contains columns with null values " + "and is not appropriate for reading.") + return + + dataset_name = st.text_input( + "Enter dataset name:", value="MyDataset" + ) + version = st.text_input("Enter version:", value="1.0.0") + + asset_base_dir = "datasets" + os.makedirs(asset_base_dir, exist_ok=True) + asset_path = f"{asset_base_dir}/{dataset_name}_v{version}.csv" + + if st.button("Save Dataset"): + if dataset_name and asset_path and version: + dataset = Dataset.from_dataframe( + data=data, + name=dataset_name, + asset_path=asset_path, + version=version + ) + automl.registry.register(dataset) + st.success( + f"Dataset '{dataset_name}' saved successfully!" + ) + else: + st.error( + "Please enter all required fields: dataset name, " + "asset path, and version." + ) + except pd.errors.ParserError as e: + st.error(f"Error parsing CSV file: {e}") + except Exception as e: + st.error(f"An unexpected error occurred: {e}") + + +def display_existing_datasets() -> None: + """ + Displays a list of datasets currently registered in the AutoML system, + including their names and versions. + """ + st.subheader("Existing Datasets") + datasets = automl.registry.list(type="dataset") + for ds in datasets: + st.write(f"Dataset Name: {ds._name}, Version: {ds._version}") + + +st.title("Dataset Management") +handle_file_upload() +display_existing_datasets() diff --git "a/app/pages/1_\360\237\223\212_Datasets.py:Zone.Identifier" "b/app/pages/1_\360\237\223\212_Datasets.py:Zone.Identifier" deleted file mode 100644 index e69de29bb..000000000 diff --git "a/app/pages/2_\342\232\231_Modelling.py" "b/app/pages/2_\342\232\231_Modelling.py" index d3f61c596..c6fd0800d 100644 --- "a/app/pages/2_\342\232\231_Modelling.py" +++ "b/app/pages/2_\342\232\231_Modelling.py" @@ -1,21 +1,372 @@ import streamlit as st import pandas as pd +import io +import pickle from app.core.system import AutoMLSystem +from autoop.core.ml.feature import Feature +from autoop.core.ml.pipeline import Pipeline +from autoop.core.ml.artifact import Artifact from autoop.core.ml.dataset import Dataset - +from autoop.functional.feature import detect_feature_types +from autoop.core.ml.model.classification.classification_models import ( + get_classification_models, +) +from autoop.core.ml.model.regression.regression_models import ( + get_regression_models, +) +from autoop.core.ml.metric import ( + get_metric, + get_classification_metrics, + get_regression_metrics, +) st.set_page_config(page_title="Modelling", page_icon="📈") -def write_helper_text(text: str): - st.write(f"

{text}

", unsafe_allow_html=True) -st.write("# âš™ Modelling") -write_helper_text("In this section, you can design a machine learning pipeline to train a model on a dataset.") +def write_helper_text(text: str) -> None: + """ + Writes helper text in a styled markdown format. + + Args: + text (str): The helper text to display. + """ + st.markdown(f'

{text}

', unsafe_allow_html=True) + + +def initialize_automl() -> tuple[AutoMLSystem, list[Dataset]]: + """ + Initializes the AutoML system and retrieves the list of datasets. + + Returns: + tuple[AutoMLSystem, list[Dataset]]: The AutoML system instance and + the list of datasets. + """ + automl = AutoMLSystem.get_instance() + datasets = automl.registry.list(type="dataset") + return automl, datasets + + +def select_dataset(datasets: list[Dataset]) -> Dataset: + """ + Allows the user to select a dataset from the list of datasets. + + Args: + datasets (list[Dataset]): The list of available datasets. + + Returns: + Dataset: The selected dataset. + """ + dataset_names = [dataset._name for dataset in datasets] + selected_dataset_name = st.selectbox("Select a dataset", dataset_names) + if selected_dataset_name: + selected_dataset = next( + dataset for dataset in datasets + if dataset._name == selected_dataset_name + ) + st.write(f"Selected dataset: {selected_dataset_name}") + return selected_dataset + return None + + +def select_features(features: list[Feature]) -> tuple[list[Feature], Feature]: + """ + Allows the user to select input features and a target feature from the + list of features. + + Args: + features (list[Feature]): The list of available features. + + Returns: + tuple[list[Feature], Feature]: The selected input features and the + target feature. + """ + feature_names = [feature.name for feature in features] + + input_features_names = st.multiselect( + "Select input features", feature_names + ) + + input_features = [ + feature for feature in features if feature.name in + input_features_names + ] + + available_target_features = [ + feature for feature in features if feature.name not in + input_features_names + ] + + target_feature_name = st.selectbox( + "Select target feature", + [feature.name for feature in available_target_features], + key="target_feature", + ) + target_feature = next( + feature for feature in available_target_features + if feature.name == target_feature_name + ) + + return input_features, target_feature + + +def determine_task_type( + features: list[Feature], target_feature: Feature +) -> str: + """ + Determines the task type (classification or regression) based on the + target feature type. + + Args: + features (list[Feature]): The list of features. + target_feature (Feature): The target feature. + + Returns: + str: The task type ("classification" or "regression"). + """ + if target_feature.type == "categorical": + return "classification" + elif target_feature.type == "numerical": + return "regression" + else: + raise ValueError( + f"Unsupported target feature type: {target_feature.type}" + ) + + +def select_model(task_type: str) -> type: + """ + Allows the user to select a model based on the task type. + + Args: + task_type (str): The task type ("classification" or "regression"). + + Returns: + type: The selected model class. + """ + if task_type == "classification": + model_options = get_classification_models() + else: + model_options = get_regression_models() + selected_model_name = st.selectbox( + "Select a model", list(model_options.keys()) + ) + selected_model_class = model_options[selected_model_name] + st.write(f"Selected model: {selected_model_name}") + return selected_model_class + + +def select_metrics(task_type: str) -> tuple[list, list[str]]: + """ + Allows the user to select metrics based on the task type. + + Args: + task_type (str): The task type ("classification" or "regression"). + + Returns: + tuple[list, list[str]]: The selected metrics and their names. + """ + if task_type == "classification": + available_metrics = get_classification_metrics() + else: + available_metrics = get_regression_metrics() + + selected_metrics_names = st.multiselect( + "Select metrics", + [metric.__str__() for metric in available_metrics] + ) + selected_metrics = [get_metric(name) for name in selected_metrics_names] + st.write( + "Selected metrics: " + f"{[metric.__str__() for metric in selected_metrics]}" + ) + return selected_metrics, selected_metrics_names + + +def select_split_ratio() -> float: + """ + Allows the user to select the split ratio for the dataset. + + Returns: + float: The selected split ratio. + """ + split_ratio = st.slider( + "Select the split ratio (0.1 to 0.9)", 0.1, 0.9, 0.8 + ) + st.write(f"Selected split ratio: {split_ratio}") + return split_ratio + + +def create_pipeline( + selected_dataset: Dataset, + input_features: list[Feature], + target_feature: Feature, + model: type, + selected_metrics: list, + split_ratio: float, +) -> Pipeline: + """ + Creates a pipeline with the selected parameters. + + Args: + selected_dataset (Dataset): The selected dataset. + input_features (list[Feature]): The selected input features. + target_feature (Feature): The selected target feature. + model (type): The selected model. + selected_metrics (list): The selected metrics. + split_ratio (float): The selected split ratio. + + Returns: + Pipeline: The created pipeline. + """ + raw = selected_dataset.read() + + if isinstance(raw, bytes): + raw = pd.read_csv(io.StringIO(raw.decode())) + + dataset = Dataset.from_dataframe( + raw, selected_dataset._name, selected_dataset._asset_path, + selected_dataset._version + ) + + pipeline = Pipeline( + dataset=dataset, + input_features=input_features, + target_feature=target_feature, + model=model, + metrics=selected_metrics, + split=split_ratio, + ) + + return pipeline + + +def to_artifact(pipeline: Pipeline, name: str, version: str) -> Artifact: + """ + Converts a given pipeline into an artifact. + + Args: + pipeline (Pipeline): The pipeline to convert. + name (str): The name of the artifact. + version (str): The version of the artifact. + + Returns: + Artifact: The created artifact from the pipeline. + """ + pipeline_data = pickle.dumps({ + "name": name, + "version": version, + "input_features": pipeline._input_features, + "target_feature": pipeline._target_feature, + "split": pipeline._split, + "model": pipeline._model, + "metrics": pipeline._metrics, + "dataset": pipeline._dataset, + }) + + artifact = Artifact( + name=name, + asset_path=f"{name}_v{version}.pkl", + version=version, + data=pipeline_data, + metadata={ + "input_features": [ + feature.name for feature in pipeline._input_features + ], + "target_feature": pipeline._target_feature.name, + "split": pipeline._split, + "model_type": pipeline._model.type, + "metrics": [ + metric.__str__() for metric in pipeline._metrics + ], + "dataset_name": pipeline._dataset._name + }, + type="pipeline", + tags=["pipeline", "ml", "automl"] + ) + + return artifact + + +Pipeline.to_artifact = to_artifact + + +def save_pipeline(pipeline: Pipeline) -> None: + """ + Prompts the user to give a name and version for the pipeline and saves it. + + Args: + pipeline (Pipeline): The pipeline to save. + """ + st.subheader("Save Pipeline") + pipeline_name = st.text_input("Enter pipeline name") + pipeline_version = st.text_input("Enter pipeline version") + + if st.button("Save Pipeline"): + if pipeline_name and pipeline_version: + artifact = pipeline.to_artifact( + name=pipeline_name, version=pipeline_version + ) + artifact.save1("assets/objects/pipelines") + st.success( + f"Pipeline '{pipeline_name}' (version {pipeline_version}) " + "saved successfully!" + ) + else: + st.error( + "Please provide both a name and a version for the pipeline." + ) + + +st.title("⚙ Modelling") +write_helper_text( + "This is a Machine Learning pipeline that trains a model on a dataset." +) + +automl, datasets = initialize_automl() +selected_dataset = select_dataset(datasets) + +if selected_dataset: + features = detect_feature_types(selected_dataset) + input_features, target_feature = select_features(features) + + if input_features and target_feature: + task_type = determine_task_type(features, target_feature) + st.subheader(f"Detected task type: {task_type}") -automl = AutoMLSystem.get_instance() + model_class = select_model(task_type) + model = model_class() -datasets = automl.registry.list(type="dataset") + selected_metrics, selected_metrics_names = ( + select_metrics(task_type) + ) + split_ratio = select_split_ratio() -# your code here + pipeline = create_pipeline( + selected_dataset, + input_features, + target_feature, + model, + selected_metrics, + split_ratio, + ) + # display a summary of the pipeline + st.subheader("Pipeline Summary") + st.markdown("### Dataset") + st.write(f"**Name:** {selected_dataset._name}") + st.markdown("### Features") + st.write( + "**Input Features:** " + f"{', '.join([feature.name for feature in input_features])}" + ) + st.write(f"**Target Feature:** {target_feature.name}") + st.markdown("### Model") + st.write(f"**Model:** {model.__class__.__name__}") + st.markdown("### Metrics") + st.write(f"**Metrics:** {', '.join(selected_metrics_names)}") + st.markdown("### Split Ratio") + st.write(f"**Split Ratio:** {split_ratio}") + st.subheader("Model metrics and predictions") + st.write(pipeline.execute()) + save_pipeline(pipeline) diff --git "a/app/pages/2_\342\232\231_Modelling.py:Zone.Identifier" "b/app/pages/2_\342\232\231_Modelling.py:Zone.Identifier" deleted file mode 100644 index e69de29bb..000000000 diff --git "a/app/pages/3_\360\237\223\210_Deployment.py" "b/app/pages/3_\360\237\223\210_Deployment.py" new file mode 100644 index 000000000..14da54a4d --- /dev/null +++ "b/app/pages/3_\360\237\223\210_Deployment.py" @@ -0,0 +1,163 @@ +import streamlit as st +import os +import pickle +import pandas as pd + + +st.set_page_config(page_title="Deployment", page_icon="📈") + + +def load_pipelines(directory: str) -> list[str]: + """ + Loads the list of saved pipelines from the specified directory + + Args: + directory (str): The directory to load pipelines from + + Returns: + list[str]: A list of pipeline file paths + """ + pipelines = [] + for filename in os.listdir(directory): + if filename.endswith(".pkl"): + pipelines.append(os.path.join(directory, filename)) + return pipelines + + +def load_pipeline(file_path: str) -> dict: + """ + Loads a pipeline from the specified file path + + Args: + file_path (str): The file path to load the pipeline from + + Returns: + dict: The loaded pipeline data + """ + with open(file_path, "rb") as file: + pipeline_data = pickle.load(file) + return pipeline_data + + +def show_pipeline_summary(pipeline_data: dict) -> None: + """ + Shows the summary of the selected pipeline + + Args: + pipeline_data (dict): The pipeline data to display + + Returns: + None + """ + st.subheader("Pipeline Summary") + st.markdown("### Name") + st.write(pipeline_data["name"]) + st.markdown("### Version") + st.write(pipeline_data["version"]) + st.markdown("### Input Features") + st.write(pipeline_data["input_features"]) + st.markdown("### Target Feature") + st.write(pipeline_data["target_feature"]) + st.markdown("### Data Split") + st.write(pipeline_data["split"]) + st.markdown("### Model") + st.write(pipeline_data["model"]) + st.markdown("### Metrics") + st.write(pipeline_data["metrics"]) + + +def predict_with_pipeline(pipeline_data: dict, prediction_data: pd.DataFrame + ) -> pd.DataFrame: + """ + Performs predictions using the loaded pipeline. + + Args: + pipeline_data (dict): The loaded pipeline data. + prediction_data (pd.DataFrame): The input data for predictions. + + Returns: + pd.DataFrame: A DataFrame containing the original data and predictions. + """ + model = pipeline_data["model"] + predictions = model.predict(prediction_data) + + if isinstance(predictions, pd.DataFrame): + predictions = predictions.iloc[:, 0] + elif len(predictions.shape) == 2: + predictions = predictions[:, 0] + + results = pd.DataFrame(prediction_data) + results["Prediction"] = predictions + + return results + + +st.title("📈 Deployment") + +pipelines_directory = "assets/objects/pipelines" +pipelines = load_pipelines(pipelines_directory) + +if pipelines: + selected_pipeline_path = st.selectbox("Select a pipeline", pipelines) + if selected_pipeline_path: + pipeline_data = load_pipeline(selected_pipeline_path) + show_pipeline_summary(pipeline_data) + + st.subheader("Upload Data for Prediction") + uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) + + if uploaded_file: + input_data = pd.read_csv(uploaded_file) + st.write("### Uploaded Data") + st.dataframe(input_data) + + input_features_names = [ + feature.name for feature in pipeline_data["input_features"] + ] + target_feature_name = pipeline_data["target_feature"].name + + st.write("### Validation Check") + missing_input_features = [ + feature for feature in input_features_names + if feature not in input_data.columns + ] + target_present = target_feature_name in input_data.columns + + if not missing_input_features: + prediction_data = input_data[input_features_names] + target_data = ( + input_data[target_feature_name] if target_present else None + ) + + if target_present: + st.success( + "The uploaded file contains all required features, " + "including the target feature." + ) + else: + st.warning( + "The target feature is missing. Predictions will " + "still proceed." + ) + + predictions = predict_with_pipeline( + pipeline_data, prediction_data + ) + st.write("### Prediction Results") + st.dataframe(predictions) + + csv_data = predictions.to_csv(index=False) + st.download_button( + label="Download Predictions as CSV", + data=csv_data, + file_name="predictions.csv", + mime="text/csv", + ) + else: + st.error( + "The uploaded file is missing the " + "following input features: " + f"{missing_input_features}" + ) +else: + st.write("No saved pipelines found.") diff --git a/assets/dbo/artifacts/0 b/assets/dbo/artifacts/0 new file mode 100644 index 000000000..b4d3304f5 --- /dev/null +++ b/assets/dbo/artifacts/0 @@ -0,0 +1 @@ +{"name": "iris_csv", "version": "1.0.0", "asset_path": "datasets/iris_csv_v1.0.0.csv", "tags": [], "metadata": {}, "type": "dataset"} \ No newline at end of file diff --git a/assets/dbo/artifacts/15 b/assets/dbo/artifacts/15 new file mode 100644 index 000000000..6c3ad45ea --- /dev/null +++ b/assets/dbo/artifacts/15 @@ -0,0 +1 @@ +{"name": "fish_csv", "version": "1.0.0", "asset_path": "datasets/fish_csv_v1.0.0.csv", "tags": [], "metadata": {}, "type": "dataset"} \ No newline at end of file diff --git a/assets/dbo/artifacts/6 b/assets/dbo/artifacts/6 new file mode 100644 index 000000000..e08dd538d --- /dev/null +++ b/assets/dbo/artifacts/6 @@ -0,0 +1 @@ +{"name": "real_estate_csv", "version": "1.0.0", "asset_path": "datasets/real_estate_csv_v1.0.0.csv", "tags": [], "metadata": {}, "type": "dataset"} \ No newline at end of file diff --git a/assets/objects/datasets/fish_csv_v1.0.0.csv b/assets/objects/datasets/fish_csv_v1.0.0.csv new file mode 100644 index 000000000..5b7e51104 --- /dev/null +++ b/assets/objects/datasets/fish_csv_v1.0.0.csv @@ -0,0 +1,160 @@ +Species,Weight,Length1,Length2,Length3,Height,Width +Bream,242.0,23.2,25.4,30.0,11.52,4.02 +Bream,290.0,24.0,26.3,31.2,12.48,4.3056 +Bream,340.0,23.9,26.5,31.1,12.3778,4.6961 +Bream,363.0,26.3,29.0,33.5,12.73,4.4555 +Bream,430.0,26.5,29.0,34.0,12.444,5.134 +Bream,450.0,26.8,29.7,34.7,13.6024,4.9274 +Bream,500.0,26.8,29.7,34.5,14.1795,5.2785 +Bream,390.0,27.6,30.0,35.0,12.67,4.69 +Bream,450.0,27.6,30.0,35.1,14.0049,4.8438 +Bream,500.0,28.5,30.7,36.2,14.2266,4.9594 +Bream,475.0,28.4,31.0,36.2,14.2628,5.1042 +Bream,500.0,28.7,31.0,36.2,14.3714,4.8146 +Bream,500.0,29.1,31.5,36.4,13.7592,4.368 +Bream,340.0,29.5,32.0,37.3,13.9129,5.0728 +Bream,600.0,29.4,32.0,37.2,14.9544,5.1708 +Bream,600.0,29.4,32.0,37.2,15.438,5.58 +Bream,700.0,30.4,33.0,38.3,14.8604,5.2854 +Bream,700.0,30.4,33.0,38.5,14.938,5.1975 +Bream,610.0,30.9,33.5,38.6,15.633,5.1338 +Bream,650.0,31.0,33.5,38.7,14.4738,5.7276 +Bream,575.0,31.3,34.0,39.5,15.1285,5.5695 +Bream,685.0,31.4,34.0,39.2,15.9936,5.3704 +Bream,620.0,31.5,34.5,39.7,15.5227,5.2801 +Bream,680.0,31.8,35.0,40.6,15.4686,6.1306 +Bream,700.0,31.9,35.0,40.5,16.2405,5.589 +Bream,725.0,31.8,35.0,40.9,16.36,6.0532 +Bream,720.0,32.0,35.0,40.6,16.3618,6.09 +Bream,714.0,32.7,36.0,41.5,16.517,5.8515 +Bream,850.0,32.8,36.0,41.6,16.8896,6.1984 +Bream,1000.0,33.5,37.0,42.6,18.957,6.603 +Bream,920.0,35.0,38.5,44.1,18.0369,6.3063 +Bream,955.0,35.0,38.5,44.0,18.084,6.292 +Bream,925.0,36.2,39.5,45.3,18.7542,6.7497 +Bream,975.0,37.4,41.0,45.9,18.6354,6.7473 +Bream,950.0,38.0,41.0,46.5,17.6235,6.3705 +Roach,40.0,12.9,14.1,16.2,4.1472,2.268 +Roach,69.0,16.5,18.2,20.3,5.2983,2.8217 +Roach,78.0,17.5,18.8,21.2,5.5756,2.9044 +Roach,87.0,18.2,19.8,22.2,5.6166,3.1746 +Roach,120.0,18.6,20.0,22.2,6.216,3.5742 +Roach,0.0,19.0,20.5,22.8,6.4752,3.3516 +Roach,110.0,19.1,20.8,23.1,6.1677,3.3957 +Roach,120.0,19.4,21.0,23.7,6.1146,3.2943 +Roach,150.0,20.4,22.0,24.7,5.8045,3.7544 +Roach,145.0,20.5,22.0,24.3,6.6339,3.5478 +Roach,160.0,20.5,22.5,25.3,7.0334,3.8203 +Roach,140.0,21.0,22.5,25.0,6.55,3.325 +Roach,160.0,21.1,22.5,25.0,6.4,3.8 +Roach,169.0,22.0,24.0,27.2,7.5344,3.8352 +Roach,161.0,22.0,23.4,26.7,6.9153,3.6312 +Roach,200.0,22.1,23.5,26.8,7.3968,4.1272 +Roach,180.0,23.6,25.2,27.9,7.0866,3.906 +Roach,290.0,24.0,26.0,29.2,8.8768,4.4968 +Roach,272.0,25.0,27.0,30.6,8.568,4.7736 +Roach,390.0,29.5,31.7,35.0,9.485,5.355 +Whitefish,270.0,23.6,26.0,28.7,8.3804,4.2476 +Whitefish,270.0,24.1,26.5,29.3,8.1454,4.2485 +Whitefish,306.0,25.6,28.0,30.8,8.778,4.6816 +Whitefish,540.0,28.5,31.0,34.0,10.744,6.562 +Whitefish,800.0,33.7,36.4,39.6,11.7612,6.5736 +Whitefish,1000.0,37.3,40.0,43.5,12.354,6.525 +Parkki,55.0,13.5,14.7,16.5,6.8475,2.3265 +Parkki,60.0,14.3,15.5,17.4,6.5772,2.3142 +Parkki,90.0,16.3,17.7,19.8,7.4052,2.673 +Parkki,120.0,17.5,19.0,21.3,8.3922,2.9181 +Parkki,150.0,18.4,20.0,22.4,8.8928,3.2928 +Parkki,140.0,19.0,20.7,23.2,8.5376,3.2944 +Parkki,170.0,19.0,20.7,23.2,9.396,3.4104 +Parkki,145.0,19.8,21.5,24.1,9.7364,3.1571 +Parkki,200.0,21.2,23.0,25.8,10.3458,3.6636 +Parkki,273.0,23.0,25.0,28.0,11.088,4.144 +Parkki,300.0,24.0,26.0,29.0,11.368,4.234 +Perch,5.9,7.5,8.4,8.8,2.112,1.408 +Perch,32.0,12.5,13.7,14.7,3.528,1.9992 +Perch,40.0,13.8,15.0,16.0,3.824,2.432 +Perch,51.5,15.0,16.2,17.2,4.5924,2.6316 +Perch,70.0,15.7,17.4,18.5,4.588,2.9415 +Perch,100.0,16.2,18.0,19.2,5.2224,3.3216 +Perch,78.0,16.8,18.7,19.4,5.1992,3.1234 +Perch,80.0,17.2,19.0,20.2,5.6358,3.0502 +Perch,85.0,17.8,19.6,20.8,5.1376,3.0368 +Perch,85.0,18.2,20.0,21.0,5.082,2.772 +Perch,110.0,19.0,21.0,22.5,5.6925,3.555 +Perch,115.0,19.0,21.0,22.5,5.9175,3.3075 +Perch,125.0,19.0,21.0,22.5,5.6925,3.6675 +Perch,130.0,19.3,21.3,22.8,6.384,3.534 +Perch,120.0,20.0,22.0,23.5,6.11,3.4075 +Perch,120.0,20.0,22.0,23.5,5.64,3.525 +Perch,130.0,20.0,22.0,23.5,6.11,3.525 +Perch,135.0,20.0,22.0,23.5,5.875,3.525 +Perch,110.0,20.0,22.0,23.5,5.5225,3.995 +Perch,130.0,20.5,22.5,24.0,5.856,3.624 +Perch,150.0,20.5,22.5,24.0,6.792,3.624 +Perch,145.0,20.7,22.7,24.2,5.9532,3.63 +Perch,150.0,21.0,23.0,24.5,5.2185,3.626 +Perch,170.0,21.5,23.5,25.0,6.275,3.725 +Perch,225.0,22.0,24.0,25.5,7.293,3.723 +Perch,145.0,22.0,24.0,25.5,6.375,3.825 +Perch,188.0,22.6,24.6,26.2,6.7334,4.1658 +Perch,180.0,23.0,25.0,26.5,6.4395,3.6835 +Perch,197.0,23.5,25.6,27.0,6.561,4.239 +Perch,218.0,25.0,26.5,28.0,7.168,4.144 +Perch,300.0,25.2,27.3,28.7,8.323,5.1373 +Perch,260.0,25.4,27.5,28.9,7.1672,4.335 +Perch,265.0,25.4,27.5,28.9,7.0516,4.335 +Perch,250.0,25.4,27.5,28.9,7.2828,4.5662 +Perch,250.0,25.9,28.0,29.4,7.8204,4.2042 +Perch,300.0,26.9,28.7,30.1,7.5852,4.6354 +Perch,320.0,27.8,30.0,31.6,7.6156,4.7716 +Perch,514.0,30.5,32.8,34.0,10.03,6.018 +Perch,556.0,32.0,34.5,36.5,10.2565,6.3875 +Perch,840.0,32.5,35.0,37.3,11.4884,7.7957 +Perch,685.0,34.0,36.5,39.0,10.881,6.864 +Perch,700.0,34.0,36.0,38.3,10.6091,6.7408 +Perch,700.0,34.5,37.0,39.4,10.835,6.2646 +Perch,690.0,34.6,37.0,39.3,10.5717,6.3666 +Perch,900.0,36.5,39.0,41.4,11.1366,7.4934 +Perch,650.0,36.5,39.0,41.4,11.1366,6.003 +Perch,820.0,36.6,39.0,41.3,12.4313,7.3514 +Perch,850.0,36.9,40.0,42.3,11.9286,7.1064 +Perch,900.0,37.0,40.0,42.5,11.73,7.225 +Perch,1015.0,37.0,40.0,42.4,12.3808,7.4624 +Perch,820.0,37.1,40.0,42.5,11.135,6.63 +Perch,1100.0,39.0,42.0,44.6,12.8002,6.8684 +Perch,1000.0,39.8,43.0,45.2,11.9328,7.2772 +Perch,1100.0,40.1,43.0,45.5,12.5125,7.4165 +Perch,1000.0,40.2,43.5,46.0,12.604,8.142 +Perch,1000.0,41.1,44.0,46.6,12.4888,7.5958 +Pike,200.0,30.0,32.3,34.8,5.568,3.3756 +Pike,300.0,31.7,34.0,37.8,5.7078,4.158 +Pike,300.0,32.7,35.0,38.8,5.9364,4.3844 +Pike,300.0,34.8,37.3,39.8,6.2884,4.0198 +Pike,430.0,35.5,38.0,40.5,7.29,4.5765 +Pike,345.0,36.0,38.5,41.0,6.396,3.977 +Pike,456.0,40.0,42.5,45.5,7.28,4.3225 +Pike,510.0,40.0,42.5,45.5,6.825,4.459 +Pike,540.0,40.1,43.0,45.8,7.786,5.1296 +Pike,500.0,42.0,45.0,48.0,6.96,4.896 +Pike,567.0,43.2,46.0,48.7,7.792,4.87 +Pike,770.0,44.8,48.0,51.2,7.68,5.376 +Pike,950.0,48.3,51.7,55.1,8.9262,6.1712 +Pike,1250.0,52.0,56.0,59.7,10.6863,6.9849 +Pike,1600.0,56.0,60.0,64.0,9.6,6.144 +Pike,1550.0,56.0,60.0,64.0,9.6,6.144 +Pike,1650.0,59.0,63.4,68.0,10.812,7.48 +Smelt,6.7,9.3,9.8,10.8,1.7388,1.0476 +Smelt,7.5,10.0,10.5,11.6,1.972,1.16 +Smelt,7.0,10.1,10.6,11.6,1.7284,1.1484 +Smelt,9.7,10.4,11.0,12.0,2.196,1.38 +Smelt,9.8,10.7,11.2,12.4,2.0832,1.2772 +Smelt,8.7,10.8,11.3,12.6,1.9782,1.2852 +Smelt,10.0,11.3,11.8,13.1,2.2139,1.2838 +Smelt,9.9,11.3,11.8,13.1,2.2139,1.1659 +Smelt,9.8,11.4,12.0,13.2,2.2044,1.1484 +Smelt,12.2,11.5,12.2,13.4,2.0904,1.3936 +Smelt,13.4,11.7,12.4,13.5,2.43,1.269 +Smelt,12.2,12.1,13.0,13.8,2.277,1.2558 +Smelt,19.7,13.2,14.3,15.2,2.8728,2.0672 +Smelt,19.9,13.8,15.0,16.2,2.9322,1.8792 diff --git a/assets/objects/datasets/iris_csv_v1.0.0.csv b/assets/objects/datasets/iris_csv_v1.0.0.csv new file mode 100644 index 000000000..1bf42f254 --- /dev/null +++ b/assets/objects/datasets/iris_csv_v1.0.0.csv @@ -0,0 +1,151 @@ +Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species +1,5.1,3.5,1.4,0.2,Iris-setosa +2,4.9,3.0,1.4,0.2,Iris-setosa +3,4.7,3.2,1.3,0.2,Iris-setosa +4,4.6,3.1,1.5,0.2,Iris-setosa +5,5.0,3.6,1.4,0.2,Iris-setosa +6,5.4,3.9,1.7,0.4,Iris-setosa +7,4.6,3.4,1.4,0.3,Iris-setosa +8,5.0,3.4,1.5,0.2,Iris-setosa +9,4.4,2.9,1.4,0.2,Iris-setosa +10,4.9,3.1,1.5,0.1,Iris-setosa +11,5.4,3.7,1.5,0.2,Iris-setosa +12,4.8,3.4,1.6,0.2,Iris-setosa +13,4.8,3.0,1.4,0.1,Iris-setosa +14,4.3,3.0,1.1,0.1,Iris-setosa +15,5.8,4.0,1.2,0.2,Iris-setosa +16,5.7,4.4,1.5,0.4,Iris-setosa +17,5.4,3.9,1.3,0.4,Iris-setosa +18,5.1,3.5,1.4,0.3,Iris-setosa +19,5.7,3.8,1.7,0.3,Iris-setosa +20,5.1,3.8,1.5,0.3,Iris-setosa +21,5.4,3.4,1.7,0.2,Iris-setosa +22,5.1,3.7,1.5,0.4,Iris-setosa +23,4.6,3.6,1.0,0.2,Iris-setosa +24,5.1,3.3,1.7,0.5,Iris-setosa +25,4.8,3.4,1.9,0.2,Iris-setosa +26,5.0,3.0,1.6,0.2,Iris-setosa +27,5.0,3.4,1.6,0.4,Iris-setosa +28,5.2,3.5,1.5,0.2,Iris-setosa +29,5.2,3.4,1.4,0.2,Iris-setosa +30,4.7,3.2,1.6,0.2,Iris-setosa +31,4.8,3.1,1.6,0.2,Iris-setosa +32,5.4,3.4,1.5,0.4,Iris-setosa +33,5.2,4.1,1.5,0.1,Iris-setosa +34,5.5,4.2,1.4,0.2,Iris-setosa +35,4.9,3.1,1.5,0.1,Iris-setosa +36,5.0,3.2,1.2,0.2,Iris-setosa +37,5.5,3.5,1.3,0.2,Iris-setosa +38,4.9,3.1,1.5,0.1,Iris-setosa +39,4.4,3.0,1.3,0.2,Iris-setosa +40,5.1,3.4,1.5,0.2,Iris-setosa +41,5.0,3.5,1.3,0.3,Iris-setosa +42,4.5,2.3,1.3,0.3,Iris-setosa +43,4.4,3.2,1.3,0.2,Iris-setosa +44,5.0,3.5,1.6,0.6,Iris-setosa +45,5.1,3.8,1.9,0.4,Iris-setosa +46,4.8,3.0,1.4,0.3,Iris-setosa +47,5.1,3.8,1.6,0.2,Iris-setosa +48,4.6,3.2,1.4,0.2,Iris-setosa +49,5.3,3.7,1.5,0.2,Iris-setosa +50,5.0,3.3,1.4,0.2,Iris-setosa +51,7.0,3.2,4.7,1.4,Iris-versicolor +52,6.4,3.2,4.5,1.5,Iris-versicolor +53,6.9,3.1,4.9,1.5,Iris-versicolor +54,5.5,2.3,4.0,1.3,Iris-versicolor +55,6.5,2.8,4.6,1.5,Iris-versicolor +56,5.7,2.8,4.5,1.3,Iris-versicolor +57,6.3,3.3,4.7,1.6,Iris-versicolor +58,4.9,2.4,3.3,1.0,Iris-versicolor +59,6.6,2.9,4.6,1.3,Iris-versicolor +60,5.2,2.7,3.9,1.4,Iris-versicolor +61,5.0,2.0,3.5,1.0,Iris-versicolor +62,5.9,3.0,4.2,1.5,Iris-versicolor +63,6.0,2.2,4.0,1.0,Iris-versicolor +64,6.1,2.9,4.7,1.4,Iris-versicolor +65,5.6,2.9,3.6,1.3,Iris-versicolor +66,6.7,3.1,4.4,1.4,Iris-versicolor +67,5.6,3.0,4.5,1.5,Iris-versicolor +68,5.8,2.7,4.1,1.0,Iris-versicolor +69,6.2,2.2,4.5,1.5,Iris-versicolor +70,5.6,2.5,3.9,1.1,Iris-versicolor +71,5.9,3.2,4.8,1.8,Iris-versicolor +72,6.1,2.8,4.0,1.3,Iris-versicolor +73,6.3,2.5,4.9,1.5,Iris-versicolor +74,6.1,2.8,4.7,1.2,Iris-versicolor +75,6.4,2.9,4.3,1.3,Iris-versicolor +76,6.6,3.0,4.4,1.4,Iris-versicolor +77,6.8,2.8,4.8,1.4,Iris-versicolor +78,6.7,3.0,5.0,1.7,Iris-versicolor +79,6.0,2.9,4.5,1.5,Iris-versicolor +80,5.7,2.6,3.5,1.0,Iris-versicolor +81,5.5,2.4,3.8,1.1,Iris-versicolor +82,5.5,2.4,3.7,1.0,Iris-versicolor +83,5.8,2.7,3.9,1.2,Iris-versicolor +84,6.0,2.7,5.1,1.6,Iris-versicolor +85,5.4,3.0,4.5,1.5,Iris-versicolor +86,6.0,3.4,4.5,1.6,Iris-versicolor +87,6.7,3.1,4.7,1.5,Iris-versicolor +88,6.3,2.3,4.4,1.3,Iris-versicolor +89,5.6,3.0,4.1,1.3,Iris-versicolor +90,5.5,2.5,4.0,1.3,Iris-versicolor +91,5.5,2.6,4.4,1.2,Iris-versicolor +92,6.1,3.0,4.6,1.4,Iris-versicolor +93,5.8,2.6,4.0,1.2,Iris-versicolor +94,5.0,2.3,3.3,1.0,Iris-versicolor +95,5.6,2.7,4.2,1.3,Iris-versicolor +96,5.7,3.0,4.2,1.2,Iris-versicolor +97,5.7,2.9,4.2,1.3,Iris-versicolor +98,6.2,2.9,4.3,1.3,Iris-versicolor +99,5.1,2.5,3.0,1.1,Iris-versicolor +100,5.7,2.8,4.1,1.3,Iris-versicolor +101,6.3,3.3,6.0,2.5,Iris-virginica +102,5.8,2.7,5.1,1.9,Iris-virginica +103,7.1,3.0,5.9,2.1,Iris-virginica +104,6.3,2.9,5.6,1.8,Iris-virginica +105,6.5,3.0,5.8,2.2,Iris-virginica +106,7.6,3.0,6.6,2.1,Iris-virginica +107,4.9,2.5,4.5,1.7,Iris-virginica +108,7.3,2.9,6.3,1.8,Iris-virginica +109,6.7,2.5,5.8,1.8,Iris-virginica +110,7.2,3.6,6.1,2.5,Iris-virginica +111,6.5,3.2,5.1,2.0,Iris-virginica +112,6.4,2.7,5.3,1.9,Iris-virginica +113,6.8,3.0,5.5,2.1,Iris-virginica +114,5.7,2.5,5.0,2.0,Iris-virginica +115,5.8,2.8,5.1,2.4,Iris-virginica +116,6.4,3.2,5.3,2.3,Iris-virginica +117,6.5,3.0,5.5,1.8,Iris-virginica +118,7.7,3.8,6.7,2.2,Iris-virginica +119,7.7,2.6,6.9,2.3,Iris-virginica +120,6.0,2.2,5.0,1.5,Iris-virginica +121,6.9,3.2,5.7,2.3,Iris-virginica +122,5.6,2.8,4.9,2.0,Iris-virginica +123,7.7,2.8,6.7,2.0,Iris-virginica +124,6.3,2.7,4.9,1.8,Iris-virginica +125,6.7,3.3,5.7,2.1,Iris-virginica +126,7.2,3.2,6.0,1.8,Iris-virginica +127,6.2,2.8,4.8,1.8,Iris-virginica +128,6.1,3.0,4.9,1.8,Iris-virginica +129,6.4,2.8,5.6,2.1,Iris-virginica +130,7.2,3.0,5.8,1.6,Iris-virginica +131,7.4,2.8,6.1,1.9,Iris-virginica +132,7.9,3.8,6.4,2.0,Iris-virginica +133,6.4,2.8,5.6,2.2,Iris-virginica +134,6.3,2.8,5.1,1.5,Iris-virginica +135,6.1,2.6,5.6,1.4,Iris-virginica +136,7.7,3.0,6.1,2.3,Iris-virginica +137,6.3,3.4,5.6,2.4,Iris-virginica +138,6.4,3.1,5.5,1.8,Iris-virginica +139,6.0,3.0,4.8,1.8,Iris-virginica +140,6.9,3.1,5.4,2.1,Iris-virginica +141,6.7,3.1,5.6,2.4,Iris-virginica +142,6.9,3.1,5.1,2.3,Iris-virginica +143,5.8,2.7,5.1,1.9,Iris-virginica +144,6.8,3.2,5.9,2.3,Iris-virginica +145,6.7,3.3,5.7,2.5,Iris-virginica +146,6.7,3.0,5.2,2.3,Iris-virginica +147,6.3,2.5,5.0,1.9,Iris-virginica +148,6.5,3.0,5.2,2.0,Iris-virginica +149,6.2,3.4,5.4,2.3,Iris-virginica +150,5.9,3.0,5.1,1.8,Iris-virginica diff --git a/assets/objects/datasets/real_estate_csv_v1.0.0.csv b/assets/objects/datasets/real_estate_csv_v1.0.0.csv new file mode 100644 index 000000000..f0251337e --- /dev/null +++ b/assets/objects/datasets/real_estate_csv_v1.0.0.csv @@ -0,0 +1,415 @@ +No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area +1,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9 +2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2 +3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3 +4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8 +5,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1 +6,2012.667,7.1,2175.03,3,24.96305,121.51254,32.1 +7,2012.667,34.5,623.4731,7,24.97933,121.53642,40.3 +8,2013.417,20.3,287.6025,6,24.98042,121.54228,46.7 +9,2013.5,31.7,5512.038,1,24.95095,121.48458,18.8 +10,2013.417,17.9,1783.18,3,24.96731,121.51486,22.1 +11,2013.083,34.8,405.2134,1,24.97349,121.53372,41.4 +12,2013.333,6.3,90.45606,9,24.97433,121.5431,58.1 +13,2012.917,13.0,492.2313,5,24.96515,121.53737,39.3 +14,2012.667,20.4,2469.645,4,24.96108,121.51046,23.8 +15,2013.5,13.2,1164.838,4,24.99156,121.53406,34.3 +16,2013.583,35.7,579.2083,2,24.9824,121.54619,50.5 +17,2013.25,0.0,292.9978,6,24.97744,121.54458,70.1 +18,2012.75,17.7,350.8515,1,24.97544,121.53119,37.4 +19,2013.417,16.9,368.1363,8,24.9675,121.54451,42.3 +20,2012.667,1.5,23.38284,7,24.96772,121.54102,47.7 +21,2013.417,4.5,2275.877,3,24.96314,121.51151,29.3 +22,2013.417,10.5,279.1726,7,24.97528,121.54541,51.6 +23,2012.917,14.7,1360.139,1,24.95204,121.54842,24.6 +24,2013.083,10.1,279.1726,7,24.97528,121.54541,47.9 +25,2013.0,39.6,480.6977,4,24.97353,121.53885,38.8 +26,2013.083,29.3,1487.868,2,24.97542,121.51726,27.0 +27,2012.667,3.1,383.8624,5,24.98085,121.54391,56.2 +28,2013.25,10.4,276.449,5,24.95593,121.53913,33.6 +29,2013.5,19.2,557.478,4,24.97419,121.53797,47.0 +30,2013.083,7.1,451.2438,5,24.97563,121.54694,57.1 +31,2013.5,25.9,4519.69,0,24.94826,121.49587,22.1 +32,2012.75,29.6,769.4034,7,24.98281,121.53408,25.0 +33,2012.75,37.9,488.5727,1,24.97349,121.53451,34.2 +34,2013.25,16.5,323.655,6,24.97841,121.54281,49.3 +35,2012.75,15.4,205.367,7,24.98419,121.54243,55.1 +36,2013.5,13.9,4079.418,0,25.01459,121.51816,27.3 +37,2012.917,14.7,1935.009,2,24.96386,121.51458,22.9 +38,2013.167,12.0,1360.139,1,24.95204,121.54842,25.3 +39,2012.667,3.1,577.9615,6,24.97201,121.54722,47.7 +40,2013.167,16.2,289.3248,5,24.98203,121.54348,46.2 +41,2013.0,13.6,4082.015,0,24.94155,121.50381,15.9 +42,2013.5,16.8,4066.587,0,24.94297,121.50342,18.2 +43,2013.417,36.1,519.4617,5,24.96305,121.53758,34.7 +44,2012.75,34.4,512.7871,6,24.98748,121.54301,34.1 +45,2013.583,2.7,533.4762,4,24.97445,121.54765,53.9 +46,2013.083,36.6,488.8193,8,24.97015,121.54494,38.3 +47,2013.417,21.7,463.9623,9,24.9703,121.54458,42.0 +48,2013.583,35.9,640.7391,3,24.97563,121.53715,61.5 +49,2013.417,24.2,4605.749,0,24.94684,121.49578,13.4 +50,2012.667,29.4,4510.359,1,24.94925,121.49542,13.2 +51,2013.417,21.7,512.5487,4,24.974,121.53842,44.2 +52,2013.083,31.3,1758.406,1,24.95402,121.55282,20.7 +53,2013.583,32.1,1438.579,3,24.97419,121.5175,27.0 +54,2013.083,13.3,492.2313,5,24.96515,121.53737,38.9 +55,2013.083,16.1,289.3248,5,24.98203,121.54348,51.7 +56,2012.833,31.7,1160.632,0,24.94968,121.53009,13.7 +57,2013.417,33.6,371.2495,8,24.97254,121.54059,41.9 +58,2012.917,3.5,56.47425,7,24.95744,121.53711,53.5 +59,2013.5,30.3,4510.359,1,24.94925,121.49542,22.6 +60,2013.083,13.3,336.0532,5,24.95776,121.53438,42.4 +61,2013.417,11.0,1931.207,2,24.96365,121.51471,21.3 +62,2013.5,5.3,259.6607,6,24.97585,121.54516,63.2 +63,2012.917,17.2,2175.877,3,24.96303,121.51254,27.7 +64,2013.583,2.6,533.4762,4,24.97445,121.54765,55.0 +65,2013.333,17.5,995.7554,0,24.96305,121.54915,25.3 +66,2013.417,40.1,123.7429,8,24.97635,121.54329,44.3 +67,2013.0,1.0,193.5845,6,24.96571,121.54089,50.7 +68,2013.5,8.5,104.8101,5,24.96674,121.54067,56.8 +69,2013.417,30.4,464.223,6,24.97964,121.53805,36.2 +70,2012.833,12.5,561.9845,5,24.98746,121.54391,42.0 +71,2013.583,6.6,90.45606,9,24.97433,121.5431,59.0 +72,2013.083,35.5,640.7391,3,24.97563,121.53715,40.8 +73,2013.583,32.5,424.5442,8,24.97587,121.53913,36.3 +74,2013.167,13.8,4082.015,0,24.94155,121.50381,20.0 +75,2012.917,6.8,379.5575,10,24.98343,121.53762,54.4 +76,2013.5,12.3,1360.139,1,24.95204,121.54842,29.5 +77,2013.583,35.9,616.4004,3,24.97723,121.53767,36.8 +78,2012.833,20.5,2185.128,3,24.96322,121.51237,25.6 +79,2012.917,38.2,552.4371,2,24.97598,121.53381,29.8 +80,2013.0,18.0,1414.837,1,24.95182,121.54887,26.5 +81,2013.5,11.8,533.4762,4,24.97445,121.54765,40.3 +82,2013.0,30.8,377.7956,6,24.96427,121.53964,36.8 +83,2013.083,13.2,150.9347,7,24.96725,121.54252,48.1 +84,2012.917,25.3,2707.392,3,24.96056,121.50831,17.7 +85,2013.083,15.1,383.2805,7,24.96735,121.54464,43.7 +86,2012.75,0.0,338.9679,9,24.96853,121.54413,50.8 +87,2012.833,1.8,1455.798,1,24.9512,121.549,27.0 +88,2013.583,16.9,4066.587,0,24.94297,121.50342,18.3 +89,2012.917,8.9,1406.43,0,24.98573,121.52758,48.0 +90,2013.5,23.0,3947.945,0,24.94783,121.50243,25.3 +91,2012.833,0.0,274.0144,1,24.9748,121.53059,45.4 +92,2013.25,9.1,1402.016,0,24.98569,121.5276,43.2 +93,2012.917,20.6,2469.645,4,24.96108,121.51046,21.8 +94,2012.917,31.9,1146.329,0,24.9492,121.53076,16.1 +95,2012.917,40.9,167.5989,5,24.9663,121.54026,41.0 +96,2012.917,8.0,104.8101,5,24.96674,121.54067,51.8 +97,2013.417,6.4,90.45606,9,24.97433,121.5431,59.5 +98,2013.083,28.4,617.4424,3,24.97746,121.53299,34.6 +99,2013.417,16.4,289.3248,5,24.98203,121.54348,51.0 +100,2013.417,6.4,90.45606,9,24.97433,121.5431,62.2 +101,2013.5,17.5,964.7496,4,24.98872,121.53411,38.2 +102,2012.833,12.7,170.1289,1,24.97371,121.52984,32.9 +103,2013.083,1.1,193.5845,6,24.96571,121.54089,54.4 +104,2012.75,0.0,208.3905,6,24.95618,121.53844,45.7 +105,2012.667,32.7,392.4459,6,24.96398,121.5425,30.5 +106,2012.833,0.0,292.9978,6,24.97744,121.54458,71.0 +107,2013.083,17.2,189.5181,8,24.97707,121.54308,47.1 +108,2013.333,12.2,1360.139,1,24.95204,121.54842,26.6 +109,2013.417,31.4,592.5006,2,24.9726,121.53561,34.1 +110,2013.583,4.0,2147.376,3,24.96299,121.51284,28.4 +111,2013.083,8.1,104.8101,5,24.96674,121.54067,51.6 +112,2013.583,33.3,196.6172,7,24.97701,121.54224,39.4 +113,2013.417,9.9,2102.427,3,24.96044,121.51462,23.1 +114,2013.333,14.8,393.2606,6,24.96172,121.53812,7.6 +115,2012.667,30.6,143.8383,8,24.98155,121.54142,53.3 +116,2013.083,20.6,737.9161,2,24.98092,121.54739,46.4 +117,2013.0,30.9,6396.283,1,24.94375,121.47883,12.2 +118,2013.0,13.6,4197.349,0,24.93885,121.50383,13.0 +119,2013.5,25.3,1583.722,3,24.96622,121.51709,30.6 +120,2013.5,16.6,289.3248,5,24.98203,121.54348,59.6 +121,2013.167,13.3,492.2313,5,24.96515,121.53737,31.3 +122,2013.5,13.6,492.2313,5,24.96515,121.53737,48.0 +123,2013.25,31.5,414.9476,4,24.98199,121.54464,32.5 +124,2013.417,0.0,185.4296,0,24.9711,121.5317,45.5 +125,2012.917,9.9,279.1726,7,24.97528,121.54541,57.4 +126,2013.167,1.1,193.5845,6,24.96571,121.54089,48.6 +127,2013.083,38.6,804.6897,4,24.97838,121.53477,62.9 +128,2013.25,3.8,383.8624,5,24.98085,121.54391,55.0 +129,2013.083,41.3,124.9912,6,24.96674,121.54039,60.7 +130,2013.417,38.5,216.8329,7,24.98086,121.54162,41.0 +131,2013.25,29.6,535.527,8,24.98092,121.53653,37.5 +132,2013.5,4.0,2147.376,3,24.96299,121.51284,30.7 +133,2013.167,26.6,482.7581,5,24.97433,121.53863,37.5 +134,2012.833,18.0,373.3937,8,24.9866,121.54082,39.5 +135,2012.667,33.4,186.9686,6,24.96604,121.54211,42.2 +136,2012.917,18.9,1009.235,0,24.96357,121.54951,20.8 +137,2012.75,11.4,390.5684,5,24.97937,121.54245,46.8 +138,2013.5,13.6,319.0708,6,24.96495,121.54277,47.4 +139,2013.167,10.0,942.4664,0,24.97843,121.52406,43.5 +140,2012.667,12.9,492.2313,5,24.96515,121.53737,42.5 +141,2013.25,16.2,289.3248,5,24.98203,121.54348,51.4 +142,2013.333,5.1,1559.827,3,24.97213,121.51627,28.9 +143,2013.417,19.8,640.6071,5,24.97017,121.54647,37.5 +144,2013.5,13.6,492.2313,5,24.96515,121.53737,40.1 +145,2013.083,11.9,1360.139,1,24.95204,121.54842,28.4 +146,2012.917,2.1,451.2438,5,24.97563,121.54694,45.5 +147,2012.75,0.0,185.4296,0,24.9711,121.5317,52.2 +148,2012.75,3.2,489.8821,8,24.97017,121.54494,43.2 +149,2013.5,16.4,3780.59,0,24.93293,121.51203,45.1 +150,2012.667,34.9,179.4538,8,24.97349,121.54245,39.7 +151,2013.25,35.8,170.7311,7,24.96719,121.54269,48.5 +152,2013.5,4.9,387.7721,9,24.98118,121.53788,44.7 +153,2013.333,12.0,1360.139,1,24.95204,121.54842,28.9 +154,2013.25,6.5,376.1709,6,24.95418,121.53713,40.9 +155,2013.5,16.9,4066.587,0,24.94297,121.50342,20.7 +156,2013.167,13.8,4082.015,0,24.94155,121.50381,15.6 +157,2013.583,30.7,1264.73,0,24.94883,121.52954,18.3 +158,2013.25,16.1,815.9314,4,24.97886,121.53464,35.6 +159,2013.0,11.6,390.5684,5,24.97937,121.54245,39.4 +160,2012.667,15.5,815.9314,4,24.97886,121.53464,37.4 +161,2012.917,3.5,49.66105,8,24.95836,121.53756,57.8 +162,2013.417,19.2,616.4004,3,24.97723,121.53767,39.6 +163,2012.75,16.0,4066.587,0,24.94297,121.50342,11.6 +164,2013.5,8.5,104.8101,5,24.96674,121.54067,55.5 +165,2012.833,0.0,185.4296,0,24.9711,121.5317,55.2 +166,2012.917,13.7,1236.564,1,24.97694,121.55391,30.6 +167,2013.417,0.0,292.9978,6,24.97744,121.54458,73.6 +168,2013.417,28.2,330.0854,8,24.97408,121.54011,43.4 +169,2013.083,27.6,515.1122,5,24.96299,121.5432,37.4 +170,2013.417,8.4,1962.628,1,24.95468,121.55481,23.5 +171,2013.333,24.0,4527.687,0,24.94741,121.49628,14.4 +172,2013.083,3.6,383.8624,5,24.98085,121.54391,58.8 +173,2013.583,6.6,90.45606,9,24.97433,121.5431,58.1 +174,2013.083,41.3,401.8807,4,24.98326,121.5446,35.1 +175,2013.417,4.3,432.0385,7,24.9805,121.53778,45.2 +176,2013.083,30.2,472.1745,3,24.97005,121.53758,36.5 +177,2012.833,13.9,4573.779,0,24.94867,121.49507,19.2 +178,2013.083,33.0,181.0766,9,24.97697,121.54262,42.0 +179,2013.5,13.1,1144.436,4,24.99176,121.53456,36.7 +180,2013.083,14.0,438.8513,1,24.97493,121.5273,42.6 +181,2012.667,26.9,4449.27,0,24.94898,121.49621,15.5 +182,2013.167,11.6,201.8939,8,24.98489,121.54121,55.9 +183,2013.5,13.5,2147.376,3,24.96299,121.51284,23.6 +184,2013.5,17.0,4082.015,0,24.94155,121.50381,18.8 +185,2012.75,14.1,2615.465,0,24.95495,121.56174,21.8 +186,2012.75,31.4,1447.286,3,24.97285,121.5173,21.5 +187,2013.167,20.9,2185.128,3,24.96322,121.51237,25.7 +188,2013.0,8.9,3078.176,0,24.95464,121.56627,22.0 +189,2012.917,34.8,190.0392,8,24.97707,121.54312,44.3 +190,2012.917,16.3,4066.587,0,24.94297,121.50342,20.5 +191,2013.5,35.3,616.5735,8,24.97945,121.53642,42.3 +192,2013.167,13.2,750.0704,2,24.97371,121.54951,37.8 +193,2013.167,43.8,57.58945,7,24.9675,121.54069,42.7 +194,2013.417,9.7,421.479,5,24.98246,121.54477,49.3 +195,2013.5,15.2,3771.895,0,24.93363,121.51158,29.3 +196,2013.333,15.2,461.1016,5,24.95425,121.5399,34.6 +197,2013.0,22.8,707.9067,2,24.981,121.54713,36.6 +198,2013.25,34.4,126.7286,8,24.96881,121.54089,48.2 +199,2013.083,34.0,157.6052,7,24.96628,121.54196,39.1 +200,2013.417,18.2,451.6419,8,24.96945,121.5449,31.6 +201,2013.417,17.4,995.7554,0,24.96305,121.54915,25.5 +202,2013.417,13.1,561.9845,5,24.98746,121.54391,45.9 +203,2012.917,38.3,642.6985,3,24.97559,121.53713,31.5 +204,2012.667,15.6,289.3248,5,24.98203,121.54348,46.1 +205,2013.0,18.0,1414.837,1,24.95182,121.54887,26.6 +206,2013.083,12.8,1449.722,3,24.97289,121.51728,21.4 +207,2013.25,22.2,379.5575,10,24.98343,121.53762,44.0 +208,2013.083,38.5,665.0636,3,24.97503,121.53692,34.2 +209,2012.75,11.5,1360.139,1,24.95204,121.54842,26.2 +210,2012.833,34.8,175.6294,8,24.97347,121.54271,40.9 +211,2013.5,5.2,390.5684,5,24.97937,121.54245,52.2 +212,2013.083,0.0,274.0144,1,24.9748,121.53059,43.5 +213,2013.333,17.6,1805.665,2,24.98672,121.52091,31.1 +214,2013.083,6.2,90.45606,9,24.97433,121.5431,58.0 +215,2013.583,18.1,1783.18,3,24.96731,121.51486,20.9 +216,2013.333,19.2,383.7129,8,24.972,121.54477,48.1 +217,2013.25,37.8,590.9292,1,24.97153,121.53559,39.7 +218,2012.917,28.0,372.6242,6,24.97838,121.54119,40.8 +219,2013.417,13.6,492.2313,5,24.96515,121.53737,43.8 +220,2012.75,29.3,529.7771,8,24.98102,121.53655,40.2 +221,2013.333,37.2,186.5101,9,24.97703,121.54265,78.3 +222,2013.333,9.0,1402.016,0,24.98569,121.5276,38.5 +223,2013.583,30.6,431.1114,10,24.98123,121.53743,48.5 +224,2013.25,9.1,1402.016,0,24.98569,121.5276,42.3 +225,2013.333,34.5,324.9419,6,24.97814,121.5417,46.0 +226,2013.25,1.1,193.5845,6,24.96571,121.54089,49.0 +227,2013.0,16.5,4082.015,0,24.94155,121.50381,12.8 +228,2012.917,32.4,265.0609,8,24.98059,121.53986,40.2 +229,2013.417,11.9,3171.329,0,25.00115,121.51776,46.6 +230,2013.583,31.0,1156.412,0,24.9489,121.53095,19.0 +231,2013.5,4.0,2147.376,3,24.96299,121.51284,33.4 +232,2012.833,16.2,4074.736,0,24.94235,121.50357,14.7 +233,2012.917,27.1,4412.765,1,24.95032,121.49587,17.4 +234,2013.333,39.7,333.3679,9,24.98016,121.53932,32.4 +235,2013.25,8.0,2216.612,4,24.96007,121.51361,23.9 +236,2012.75,12.9,250.631,7,24.96606,121.54297,39.3 +237,2013.167,3.6,373.8389,10,24.98322,121.53765,61.9 +238,2013.167,13.0,732.8528,0,24.97668,121.52518,39.0 +239,2013.083,12.8,732.8528,0,24.97668,121.52518,40.6 +240,2013.5,18.1,837.7233,0,24.96334,121.54767,29.7 +241,2013.083,11.0,1712.632,2,24.96412,121.5167,28.8 +242,2013.5,13.7,250.631,7,24.96606,121.54297,41.4 +243,2012.833,2.0,2077.39,3,24.96357,121.51329,33.4 +244,2013.417,32.8,204.1705,8,24.98236,121.53923,48.2 +245,2013.083,4.8,1559.827,3,24.97213,121.51627,21.7 +246,2013.417,7.5,639.6198,5,24.97258,121.54814,40.8 +247,2013.417,16.4,389.8219,6,24.96412,121.54273,40.6 +248,2013.333,21.7,1055.067,0,24.96211,121.54928,23.1 +249,2013.0,19.0,1009.235,0,24.96357,121.54951,22.3 +250,2012.833,18.0,6306.153,1,24.95743,121.47516,15.0 +251,2013.167,39.2,424.7132,7,24.97429,121.53917,30.0 +252,2012.917,31.7,1159.454,0,24.9496,121.53018,13.8 +253,2012.833,5.9,90.45606,9,24.97433,121.5431,52.7 +254,2012.667,30.4,1735.595,2,24.96464,121.51623,25.9 +255,2012.667,1.1,329.9747,5,24.98254,121.54395,51.8 +256,2013.417,31.5,5512.038,1,24.95095,121.48458,17.4 +257,2012.667,14.6,339.2289,1,24.97519,121.53151,26.5 +258,2013.25,17.3,444.1334,1,24.97501,121.5273,43.9 +259,2013.417,0.0,292.9978,6,24.97744,121.54458,63.3 +260,2013.083,17.7,837.7233,0,24.96334,121.54767,28.8 +261,2013.25,17.0,1485.097,4,24.97073,121.517,30.7 +262,2013.167,16.2,2288.011,3,24.95885,121.51359,24.4 +263,2012.917,15.9,289.3248,5,24.98203,121.54348,53.0 +264,2013.417,3.9,2147.376,3,24.96299,121.51284,31.7 +265,2013.167,32.6,493.657,7,24.96968,121.54522,40.6 +266,2012.833,15.7,815.9314,4,24.97886,121.53464,38.1 +267,2013.25,17.8,1783.18,3,24.96731,121.51486,23.7 +268,2012.833,34.7,482.7581,5,24.97433,121.53863,41.1 +269,2013.417,17.2,390.5684,5,24.97937,121.54245,40.1 +270,2013.0,17.6,837.7233,0,24.96334,121.54767,23.0 +271,2013.333,10.8,252.5822,1,24.9746,121.53046,117.5 +272,2012.917,17.7,451.6419,8,24.96945,121.5449,26.5 +273,2012.75,13.0,492.2313,5,24.96515,121.53737,40.5 +274,2013.417,13.2,170.1289,1,24.97371,121.52984,29.3 +275,2013.167,27.5,394.0173,7,24.97305,121.53994,41.0 +276,2012.667,1.5,23.38284,7,24.96772,121.54102,49.7 +277,2013.0,19.1,461.1016,5,24.95425,121.5399,34.0 +278,2013.417,21.2,2185.128,3,24.96322,121.51237,27.7 +279,2012.75,0.0,208.3905,6,24.95618,121.53844,44.0 +280,2013.417,2.6,1554.25,3,24.97026,121.51642,31.1 +281,2013.25,2.3,184.3302,6,24.96581,121.54086,45.4 +282,2013.333,4.7,387.7721,9,24.98118,121.53788,44.8 +283,2012.917,2.0,1455.798,1,24.9512,121.549,25.6 +284,2013.417,33.5,1978.671,2,24.98674,121.51844,23.5 +285,2012.917,15.0,383.2805,7,24.96735,121.54464,34.4 +286,2013.167,30.1,718.2937,3,24.97509,121.53644,55.3 +287,2012.917,5.9,90.45606,9,24.97433,121.5431,56.3 +288,2013.0,19.2,461.1016,5,24.95425,121.5399,32.9 +289,2013.583,16.6,323.6912,6,24.97841,121.5428,51.0 +290,2013.333,13.9,289.3248,5,24.98203,121.54348,44.5 +291,2013.083,37.7,490.3446,0,24.97217,121.53471,37.0 +292,2012.833,3.4,56.47425,7,24.95744,121.53711,54.4 +293,2013.083,17.5,395.6747,5,24.95674,121.534,24.5 +294,2012.667,12.6,383.2805,7,24.96735,121.54464,42.5 +295,2013.5,26.4,335.5273,6,24.9796,121.5414,38.1 +296,2013.167,18.2,2179.59,3,24.96299,121.51252,21.8 +297,2012.75,12.5,1144.436,4,24.99176,121.53456,34.1 +298,2012.833,34.9,567.0349,4,24.97003,121.5458,28.5 +299,2013.333,16.7,4082.015,0,24.94155,121.50381,16.7 +300,2013.167,33.2,121.7262,10,24.98178,121.54059,46.1 +301,2013.083,2.5,156.2442,4,24.96696,121.53992,36.9 +302,2012.75,38.0,461.7848,0,24.97229,121.53445,35.7 +303,2013.5,16.5,2288.011,3,24.95885,121.51359,23.2 +304,2013.5,38.3,439.7105,0,24.97161,121.53423,38.4 +305,2013.417,20.0,1626.083,3,24.96622,121.51668,29.4 +306,2013.083,16.2,289.3248,5,24.98203,121.54348,55.0 +307,2013.5,14.4,169.9803,1,24.97369,121.52979,50.2 +308,2012.833,10.3,3079.89,0,24.9546,121.56627,24.7 +309,2013.417,16.4,289.3248,5,24.98203,121.54348,53.0 +310,2013.25,30.3,1264.73,0,24.94883,121.52954,19.1 +311,2013.583,16.4,1643.499,2,24.95394,121.55174,24.7 +312,2013.167,21.3,537.7971,4,24.97425,121.53814,42.2 +313,2013.583,35.4,318.5292,9,24.97071,121.54069,78.0 +314,2013.333,8.3,104.8101,5,24.96674,121.54067,42.8 +315,2013.25,3.7,577.9615,6,24.97201,121.54722,41.6 +316,2013.083,15.6,1756.411,2,24.9832,121.51812,27.3 +317,2013.25,13.3,250.631,7,24.96606,121.54297,42.0 +318,2012.75,15.6,752.7669,2,24.97795,121.53451,37.5 +319,2013.333,7.1,379.5575,10,24.98343,121.53762,49.8 +320,2013.25,34.6,272.6783,5,24.95562,121.53872,26.9 +321,2012.75,13.5,4197.349,0,24.93885,121.50383,18.6 +322,2012.917,16.9,964.7496,4,24.98872,121.53411,37.7 +323,2013.0,12.9,187.4823,1,24.97388,121.52981,33.1 +324,2013.417,28.6,197.1338,6,24.97631,121.54436,42.5 +325,2012.667,12.4,1712.632,2,24.96412,121.5167,31.3 +326,2013.083,36.6,488.8193,8,24.97015,121.54494,38.1 +327,2013.5,4.1,56.47425,7,24.95744,121.53711,62.1 +328,2013.417,3.5,757.3377,3,24.97538,121.54971,36.7 +329,2012.833,15.9,1497.713,3,24.97003,121.51696,23.6 +330,2013.0,13.6,4197.349,0,24.93885,121.50383,19.2 +331,2013.083,32.0,1156.777,0,24.94935,121.53046,12.8 +332,2013.333,25.6,4519.69,0,24.94826,121.49587,15.6 +333,2013.167,39.8,617.7134,2,24.97577,121.53475,39.6 +334,2012.75,7.8,104.8101,5,24.96674,121.54067,38.4 +335,2012.917,30.0,1013.341,5,24.99006,121.5346,22.8 +336,2013.583,27.3,337.6016,6,24.96431,121.54063,36.5 +337,2012.833,5.1,1867.233,2,24.98407,121.51748,35.6 +338,2012.833,31.3,600.8604,5,24.96871,121.54651,30.9 +339,2012.917,31.5,258.186,9,24.96867,121.54331,36.3 +340,2013.333,1.7,329.9747,5,24.98254,121.54395,50.4 +341,2013.333,33.6,270.8895,0,24.97281,121.53265,42.9 +342,2013.0,13.0,750.0704,2,24.97371,121.54951,37.0 +343,2012.667,5.7,90.45606,9,24.97433,121.5431,53.5 +344,2013.0,33.5,563.2854,8,24.98223,121.53597,46.6 +345,2013.5,34.6,3085.17,0,24.998,121.5155,41.2 +346,2012.667,0.0,185.4296,0,24.9711,121.5317,37.9 +347,2013.417,13.2,1712.632,2,24.96412,121.5167,30.8 +348,2013.583,17.4,6488.021,1,24.95719,121.47353,11.2 +349,2012.833,4.6,259.6607,6,24.97585,121.54516,53.7 +350,2012.75,7.8,104.8101,5,24.96674,121.54067,47.0 +351,2013.0,13.2,492.2313,5,24.96515,121.53737,42.3 +352,2012.833,4.0,2180.245,3,24.96324,121.51241,28.6 +353,2012.833,18.4,2674.961,3,24.96143,121.50827,25.7 +354,2013.5,4.1,2147.376,3,24.96299,121.51284,31.3 +355,2013.417,12.2,1360.139,1,24.95204,121.54842,30.1 +356,2013.25,3.8,383.8624,5,24.98085,121.54391,60.7 +357,2012.833,10.3,211.4473,1,24.97417,121.52999,45.3 +358,2013.417,0.0,338.9679,9,24.96853,121.54413,44.9 +359,2013.167,1.1,193.5845,6,24.96571,121.54089,45.1 +360,2013.5,5.6,2408.993,0,24.95505,121.55964,24.7 +361,2012.667,32.9,87.30222,10,24.983,121.54022,47.1 +362,2013.083,41.4,281.205,8,24.97345,121.54093,63.3 +363,2013.417,17.1,967.4,4,24.98872,121.53408,40.0 +364,2013.5,32.3,109.9455,10,24.98182,121.54086,48.0 +365,2013.417,35.3,614.1394,7,24.97913,121.53666,33.1 +366,2012.917,17.3,2261.432,4,24.96182,121.51222,29.5 +367,2012.75,14.2,1801.544,1,24.95153,121.55254,24.8 +368,2012.833,15.0,1828.319,2,24.96464,121.51531,20.9 +369,2013.417,18.2,350.8515,1,24.97544,121.53119,43.1 +370,2012.667,20.2,2185.128,3,24.96322,121.51237,22.8 +371,2012.75,15.9,289.3248,5,24.98203,121.54348,42.1 +372,2013.5,4.1,312.8963,5,24.95591,121.53956,51.7 +373,2013.0,33.9,157.6052,7,24.96628,121.54196,41.5 +374,2013.083,0.0,274.0144,1,24.9748,121.53059,52.2 +375,2013.25,5.4,390.5684,5,24.97937,121.54245,49.5 +376,2013.25,21.7,1157.988,0,24.96165,121.55011,23.8 +377,2013.417,14.7,1717.193,2,24.96447,121.51649,30.5 +378,2013.333,3.9,49.66105,8,24.95836,121.53756,56.8 +379,2013.333,37.3,587.8877,8,24.97077,121.54634,37.4 +380,2013.333,0.0,292.9978,6,24.97744,121.54458,69.7 +381,2013.333,14.1,289.3248,5,24.98203,121.54348,53.3 +382,2013.417,8.0,132.5469,9,24.98298,121.53981,47.3 +383,2013.0,16.3,3529.564,0,24.93207,121.51597,29.3 +384,2012.667,29.1,506.1144,4,24.97845,121.53889,40.3 +385,2012.75,16.1,4066.587,0,24.94297,121.50342,12.9 +386,2013.0,18.3,82.88643,10,24.983,121.54026,46.6 +387,2012.833,0.0,185.4296,0,24.9711,121.5317,55.3 +388,2013.25,16.2,2103.555,3,24.96042,121.51462,25.6 +389,2013.5,10.4,2251.938,4,24.95957,121.51353,27.3 +390,2013.25,40.9,122.3619,8,24.96756,121.5423,67.7 +391,2013.5,32.8,377.8302,9,24.97151,121.5435,38.6 +392,2013.583,6.2,1939.749,1,24.95155,121.55387,31.3 +393,2013.083,42.7,443.802,6,24.97927,121.53874,35.3 +394,2013.0,16.9,967.4,4,24.98872,121.53408,40.3 +395,2013.5,32.6,4136.271,1,24.95544,121.4963,24.7 +396,2012.917,21.2,512.5487,4,24.974,121.53842,42.5 +397,2012.667,37.1,918.6357,1,24.97198,121.55063,31.9 +398,2013.417,13.1,1164.838,4,24.99156,121.53406,32.2 +399,2013.417,14.7,1717.193,2,24.96447,121.51649,23.0 +400,2012.917,12.7,170.1289,1,24.97371,121.52984,37.3 +401,2013.25,26.8,482.7581,5,24.97433,121.53863,35.5 +402,2013.083,7.6,2175.03,3,24.96305,121.51254,27.7 +403,2012.833,12.7,187.4823,1,24.97388,121.52981,28.5 +404,2012.667,30.9,161.942,9,24.98353,121.53966,39.7 +405,2013.333,16.4,289.3248,5,24.98203,121.54348,41.2 +406,2012.667,23.0,130.9945,6,24.95663,121.53765,37.2 +407,2013.167,1.9,372.1386,7,24.97293,121.54026,40.5 +408,2013.0,5.2,2408.993,0,24.95505,121.55964,22.3 +409,2013.417,18.5,2175.744,3,24.9633,121.51243,28.1 +410,2013.0,13.7,4082.015,0,24.94155,121.50381,15.4 +411,2012.667,5.6,90.45606,9,24.97433,121.5431,50.0 +412,2013.25,18.8,390.9696,7,24.97923,121.53986,40.6 +413,2013.0,8.1,104.8101,5,24.96674,121.54067,52.5 +414,2013.5,6.5,90.45606,9,24.97433,121.5431,63.9 diff --git a/autoop/core/database.py b/autoop/core/database.py index fd34fbde1..060912a35 100644 --- a/autoop/core/database.py +++ b/autoop/core/database.py @@ -1,12 +1,26 @@ - import json -from typing import Dict, Tuple, List, Union +from typing import Tuple, List, Union from autoop.core.storage import Storage + class Database(): + """ + A simple database abstraction layer that uses a `Storage` object + for persistence. + """ + + def __init__(self, storage: Storage) -> None: + """ + Initializes the Database instance. + + Args: + storage (Storage): The storage backend to be used for the database. - def __init__(self, storage: Storage): + Attributes: + _storage (Storage): The storage backend instance. + _data (dict): A dictionary to hold the data loaded from storage. + """ self._storage = storage self._data = {} self._load() @@ -35,13 +49,14 @@ def get(self, collection: str, id: str) -> Union[dict, None]: collection (str): The collection to get the data from id (str): The id of the data Returns: - Union[dict, None]: The data that was stored, or None if it doesn't exist + Union[dict, None]: The data that was stored, + or None if it doesn't exist """ if not self._data.get(collection, None): return None return self._data[collection].get(id, None) - - def delete(self, collection: str, id: str): + + def delete(self, collection: str, id: str) -> None: """Delete a key from the database Args: collection (str): The collection to delete the data from @@ -60,23 +75,26 @@ def list(self, collection: str) -> List[Tuple[str, dict]]: Args: collection (str): The collection to list the data from Returns: - List[Tuple[str, dict]]: A list of tuples containing the id and data for each item in the collection + List[Tuple[str, dict]]: A list of tuples containing the + id and data for each item in the collection """ if not self._data.get(collection, None): return [] return [(id, data) for id, data in self._data[collection].items()] - def refresh(self): + def refresh(self) -> None: """Refresh the database by loading the data from storage""" self._load() - def _persist(self): + def _persist(self) -> None: """Persist the data to storage""" for collection, data in self._data.items(): if not data: continue for id, item in data.items(): - self._storage.save(json.dumps(item).encode(), f"{collection}/{id}") + self._storage.save( + json.dumps(item).encode(), f"{collection}/{id}" + ) # for things that were deleted, we need to remove them from the storage keys = self._storage.list("") @@ -84,8 +102,8 @@ def _persist(self): collection, id = key.split("/")[-2:] if not self._data.get(collection, id): self._storage.delete(f"{collection}/{id}") - - def _load(self): + + def _load(self) -> None: """Load the data from storage""" self._data = {} for key in self._storage.list(""): @@ -95,4 +113,3 @@ def _load(self): if collection not in self._data: self._data[collection] = {} self._data[collection][id] = json.loads(data.decode()) - diff --git a/autoop/core/database.py:Zone.Identifier b/autoop/core/database.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/artifact.py b/autoop/core/ml/artifact.py index 7b6ded6be..0bbe9f8ed 100644 --- a/autoop/core/ml/artifact.py +++ b/autoop/core/ml/artifact.py @@ -1,5 +1,160 @@ -from pydantic import BaseModel, Field import base64 +import itertools +import os -class Artifact(BaseModel): - pass \ No newline at end of file +from abc import ABC + + +class Artifact(ABC): + """Abstract base class representing an asset that can be stored and + contains information about this specific asset.""" + + _id_iter = itertools.count() + _base_path = "assets/objects/" + + def __init__( + self, + name: str, + asset_path: str, + version: str, + data: bytes = None, + metadata: dict = None, + type: str = "", + tags: list = None + ) -> None: + """ + Initializes an Artifact instance. + + Args: + name (str): The name of the asset. + asset_path (str): The path where the asset is stored. + version (str): The version of the asset. + data (bytes, optional): The binary data of the asset. + Defaults to None. + metadata (dict, optional): A dictionary containing additional + metadata about the asset. + Defaults to an empty dictionary if None. + type (str, optional): The type of the asset. + Defaults to an empty string. + tags (list, optional): A list of tags describing the asset. + Defaults to an empty list if None. + """ + self._name = name + self._asset_path = asset_path + self._version = version + self._data = data + self._metadata = metadata if metadata is not None else {} + self._type = type + self._tags = tags if tags is not None else [] + self._id = str(next(Artifact._id_iter)) + + @property + def name(self) -> str: + """Gets the name of the artifact.""" + return self._name + + @property + def asset_path(self) -> str: + """Gets the asset path of the artifact.""" + return self._asset_path + + @property + def version(self) -> str: + """Gets the version of the artifact.""" + return self._version + + @property + def data(self) -> bytes: + """Gets the binary data of the artifact.""" + return self._data + + @property + def metadata(self) -> dict: + """Gets the metadata of the artifact.""" + return self._metadata + + @property + def type(self) -> str: + """Gets the type of the artifact.""" + return self._type + + @property + def tags(self) -> list: + """Gets the tags associated with the artifact.""" + return self._tags + + @property + def id(self) -> str: + """Gets the unique ID of the artifact.""" + return self._id + + def save1(self, directory: str) -> None: + """ + Saves the artifact data to a specified directory. If the directory does + not exist, it creates it. + + Args: + directory (str): The directory where the data will be saved. + """ + print(f"The dir path is path is {directory}") + + if not os.path.exists(directory): + os.makedirs(directory) + with open(f"{directory + '/' + self._asset_path}", 'wb') as file: + file.write(self._data) + + def read(self) -> bytes: + """ + Reads and returns the binary data of the artifact. If the artifact file + does not exist, it creates a new file in the "exports" directory. + + Returns: + bytes: The binary data of the artifact. + + Raises: + FileNotFoundError: If the file at `asset_path` could not be found + after attempting to read it. + """ + if not os.path.exists(self._base_path + self._asset_path): + self.save1(self._base_path) + try: + with open(self._base_path + self._asset_path, "rb") as file: + return file.read() + except FileNotFoundError: + raise FileNotFoundError( + f"The file at path {self._base_path + self._asset_path} " + f"could not be found." + ) + + def get_id(self) -> str: + """ + Generates a unique ID for the artifact based on its asset path and + version. + + Returns: + str: A unique ID string derived from base64 encoding of the asset + path and version. + """ + encoded_path = ( + base64.urlsafe_b64encode(self._asset_path.encode()).decode() + ) + return f"{encoded_path}:{self._version}" + + def get_metadata(self) -> dict: + """ + Retrieves the metadata of the artifact. + + Returns: + dict: A dictionary containing the metadata. + """ + return self._metadata + + def update_metadata(self, key: str, value: any) -> None: + """ + Updates the metadata of the artifact with a new key-value pair. + + Args: + key (str): The key to update or add. + value (any): The value associated with the key. + """ + self._metadata[key] = value diff --git a/autoop/core/ml/artifact.py:Zone.Identifier b/autoop/core/ml/artifact.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/dataset.py b/autoop/core/ml/dataset.py index 2ab739821..35c697b7c 100644 --- a/autoop/core/ml/dataset.py +++ b/autoop/core/ml/dataset.py @@ -1,28 +1,72 @@ from autoop.core.ml.artifact import Artifact -from abc import ABC, abstractmethod import pandas as pd import io + class Dataset(Artifact): + """ + A class representing a dataset, inheriting from the Artifact base class. + This class specifically handles datasets and provides additional + functionality for reading and saving data in pandas DataFrame format. + """ + + def __init__(self, *args, **kwargs) -> None: + """ + Initializes a Dataset instance, setting its type to "dataset". - def __init__(self, *args, **kwargs): + Args: + *args: Positional arguments passed to the parent class. + **kwargs: Keyword arguments passed to the parent class. + """ super().__init__(type="dataset", *args, **kwargs) @staticmethod - def from_dataframe(data: pd.DataFrame, name: str, asset_path: str, version: str="1.0.0"): + def from_dataframe( + data: pd.DataFrame, + name: str, + asset_path: str, + version: str = "1.0.0" + ) -> 'Dataset': + """ + Creates a Dataset instance from a pandas DataFrame. + + Args: + data (pd.DataFrame): The DataFrame to be saved as a dataset. + name (str): The name of the dataset. + asset_path (str): The path where the dataset will be stored. + version (str, optional): The version of the dataset. Defaults to + "1.0.0". + + Returns: + Dataset: A Dataset instance containing the serialized DataFrame. + """ return Dataset( name=name, asset_path=asset_path, data=data.to_csv(index=False).encode(), version=version, ) - + def read(self) -> pd.DataFrame: + """ + Reads the dataset and returns it as a pandas DataFrame. + + Returns: + pd.DataFrame: The dataset as a pandas DataFrame. + """ bytes = super().read() csv = bytes.decode() return pd.read_csv(io.StringIO(csv)) - + def save(self, data: pd.DataFrame) -> bytes: + """ + Saves the given pandas DataFrame as a dataset. + + Args: + data (pd.DataFrame): The DataFrame to be saved. + + Returns: + bytes: The serialized DataFrame in bytes format. + """ bytes = data.to_csv(index=False).encode() - return super().save(bytes) - \ No newline at end of file + return super().save1(bytes) diff --git a/autoop/core/ml/dataset.py:Zone.Identifier b/autoop/core/ml/dataset.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/feature.py b/autoop/core/ml/feature.py index c1dc59688..9e4280325 100644 --- a/autoop/core/ml/feature.py +++ b/autoop/core/ml/feature.py @@ -1,12 +1,42 @@ +class Feature: + """ + Represents a feature in a dataset. + """ -from pydantic import BaseModel, Field -from typing import Literal -import numpy as np + def __init__(self, name: str, type: str) -> None: + """ + Initializes a Feature instance. -from autoop.core.ml.dataset import Dataset + Args: + name (str): The name of the feature + type (str): The type of the feature + """ + self.name = name + self.type = type -class Feature(BaseModel): - # attributes here + def __repr__(self) -> str: + """ + Returns a string representation of the Feature instance. - def __str__(self): - raise NotImplementedError("To be implemented.") \ No newline at end of file + Returns: + str: A string describing the Feature instance. + """ + return f"Feature(name={self.name}, type={self.type})" + + def is_numerical(self) -> bool: + """ + Checks if the feature is numerical. + + Returns: + bool: True if the feature is numerical, otherwise False. + """ + return self.type == "numerical" + + def is_categorical(self) -> bool: + """ + Checks if the feature is categorical. + + Returns: + bool: True if the feature is categorical, otherwise False. + """ + return self.type == "categorical" diff --git a/autoop/core/ml/feature.py:Zone.Identifier b/autoop/core/ml/feature.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/metric.py b/autoop/core/ml/metric.py index 78429503e..ecc962bb9 100644 --- a/autoop/core/ml/metric.py +++ b/autoop/core/ml/metric.py @@ -1,25 +1,339 @@ from abc import ABC, abstractmethod -from typing import Any import numpy as np -METRICS = [ - "mean_squared_error", - "accuracy", -] # add the names (in strings) of the metrics you implement -def get_metric(name: str): - # Factory function to get a metric by name. - # Return a metric instance given its str name. - raise NotImplementedError("To be implemented.") +def get_metric(name: str) -> "Metric": + """ + Factory function to get a metric by name. + + Args: + name (str): The name of the metric to retrieve. + + Returns: + Metric: An instance of a metric class corresponding to the name. + + Raises: + ValueError: If the metric name is not recognized. + """ + if name == "Mean Squared Error": + return MeanSquaredError() + elif name == "Accuracy": + return Accuracy() + elif name == "Balanced Accuracy": + return BalancedAccuracy() + elif name == "Macro Precision": + return MacroPrecision() + elif name == "R² Score": + return R2Score() + elif name == "Mean Absolute Error": + return MeanAbsoluteError() + else: + raise ValueError(f"Unknown metric: {name}") + + +class Metric(ABC): + """ + Base class for all metrics. + Metrics take ground truth and predictions + as input and return a real number. + """ + + @abstractmethod + def __call__( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Calculate the metric value given the ground truth and predictions. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. + + Returns: + float: The computed metric value. + """ + pass + + @abstractmethod + def __str__(self) -> str: + """ + Return a string representation of the metric. + + Returns: + str: The string representation of the metric. + """ + pass + + def evaluate( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Evaluate the metric by calculating the value and printing a summary. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. + + Returns: + float: The computed metric value. + """ + result = self.__call__(ground_truth, prediction) + print(f"Evaluating {self.__class__.__name__}: {result:.4f}") + return result + + +class MeanSquaredError(Metric): + """ + Implementation of Mean Squared Error (MSE) metric. + """ + + def __call__( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Calculate the Mean Squared Error between ground truth and predictions. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. + + Returns: + float: The computed Mean Squared Error. + """ + error = np.mean((ground_truth - prediction) ** 2) + return error + + def __str__(self) -> str: + """ + Returns a string representation of the metric. + + Returns: + str: The name of the metric, "Mean Squared Error". + """ + return "Mean Squared Error" + + +class Accuracy(Metric): + """ + Implementation of Accuracy metric for multi-class classification tasks. + """ + + def __call__( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Calculate the accuracy by comparing ground truth and predictions. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. + + Returns: + float: The computed accuracy value. + """ + correct_predictions = np.sum(ground_truth == prediction) + accuracy = correct_predictions / len(ground_truth) + return accuracy + + def __str__(self) -> str: + """ + Returns a string representation of the metric. + + Returns: + str: The name of the metric, "Accuracy". + """ + return "Accuracy" + + +class BalancedAccuracy(Metric): + """ + Implementation of Balanced Accuracy metric for classification tasks. + """ + + def __call__( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Calculate the Balanced Accuracy by averaging the recall for each class. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. -class Metric(...): - """Base class for all metrics. + Returns: + float: The computed Balanced Accuracy value. + """ + unique_classes = np.unique(ground_truth) + recalls = [] + + for cls in unique_classes: + true_positive = np.sum( + (prediction == cls) & (ground_truth == cls) + ) + false_negative = np.sum( + (prediction != cls) & (ground_truth == cls) + ) + if true_positive + false_negative == 0: + recalls.append(0.0) + else: + recalls.append( + true_positive / (true_positive + false_negative) + ) + + balanced_accuracy = np.mean(recalls) + return balanced_accuracy + + def __str__(self) -> str: + """ + Returns a string representation of the metric. + + Returns: + str: The name of the metric, "Balanced Accuracy". + """ + return "Balanced Accuracy" + + +class MacroPrecision(Metric): """ - # your code here - # remember: metrics take ground truth and prediction as input and return a real number + Implementation of Macro Precision metric for classification tasks. + """ + + def __call__( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Calculate the Macro Precision. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. + + Returns: + float: The computed Macro Precision. + """ + classes = np.unique(ground_truth) + precision_values = [ + self._calculate_precision_for_class( + ground_truth, prediction, label + ) + for label in classes + ] + return np.mean(precision_values) + + def _calculate_precision_for_class( + self, ground_truth: np.ndarray, prediction: np.ndarray, label: str + ) -> float: + """ + Helper function to calculate precision for a specific class. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. + label (str): The class label to calculate precision for. + + Returns: + float: Precision value for the specified class. + """ + true_positives = np.sum( + (prediction == label) & (ground_truth == label) + ) + total_predicted = np.sum(prediction == label) + if total_predicted > 0: + return true_positives / total_predicted + else: + return 0.0 + + def __str__(self) -> str: + """ + Returns a string representation of the metric. + + Returns: + str: The name of the metric, "Macro Precision". + """ + return "Macro Precision" + + +class R2Score(Metric): + """ + Implementation of R-squared (R²) metric for regression tasks. + """ + + def __call__( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Calculate the R-squared (R²) value between + ground truth and predictions. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. - def __call__(self): - raise NotImplementedError("To be implemented.") + Returns: + float: The computed R-squared value. + """ + ss_total = np.sum((ground_truth - np.mean(ground_truth)) ** 2) + ss_residual = np.sum((ground_truth - prediction) ** 2) + r2_score = 1 - (ss_residual / ss_total) + return r2_score -# add here concrete implementations of the Metric class - \ No newline at end of file + def __str__(self) -> str: + """ + Returns a string representation of the metric. + + Returns: + str: The string "R² Score". + """ + return "R² Score" + + +class MeanAbsoluteError(Metric): + """ + Implementation of Mean Absolute Error (MAE) metric for regression tasks. + """ + + def __call__( + self, ground_truth: np.ndarray, prediction: np.ndarray + ) -> float: + """ + Calculate the Mean Absolute Error between + ground truth and predictions. + + Args: + ground_truth (np.ndarray): The true values. + prediction (np.ndarray): The predicted values. + + Returns: + float: The computed Mean Absolute Error. + """ + error = np.mean(np.abs(ground_truth - prediction)) + return error + + def __str__(self) -> str: + """ + Returns a string representation of the metric. + + Returns: + str: The name of the metric, "Mean Absolute Error". + """ + return "Mean Absolute Error" + + +def get_regression_metrics() -> list[Metric]: + """ + Get a list of regression metrics. + + Returns: + list[Metric]: A list of regression metrics. + """ + return [MeanSquaredError(), R2Score(), MeanAbsoluteError()] + + +def get_classification_metrics() -> list[Metric]: + """ + Get a list of classification metrics. + + Returns: + list[Metric]: A list of classification metrics. + """ + return [Accuracy(), BalancedAccuracy(), MacroPrecision()] diff --git a/autoop/core/ml/metric.py:Zone.Identifier b/autoop/core/ml/metric.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/model/__init__.py b/autoop/core/ml/model/__init__.py index 1ac8cf82c..175414aa4 100644 --- a/autoop/core/ml/model/__init__.py +++ b/autoop/core/ml/model/__init__.py @@ -1,13 +1,78 @@ +""" +This module provides a factory function to + retrieve machine learning models by name. + +Classes: + Model: Base class for all models. + MultipleLinearRegression: Implements multiple linear regression. + DecisionTreeRegressionModel: Implements decision tree regression. + LassoRegressionModel: Implements lasso regression. + KNearestNeighbors: Implements k-nearest neighbors classification. + DecisionTreeClassificationModel: Implements decision tree classification. + RandomForestClassifierModel: Implements random forest classification. + +Constants: + REGRESSION_MODELS (list): List of available regression model names. + CLASSIFICATION_MODELS (list): List of available classification model names. + +Functions: + get_model(model_name: str) -> Model: + Factory function to get a model by name. + Args: + model_name (str): The name of the model to retrieve. + Returns: + Model: An instance of the requested model. + Raises: + ValueError: If the model name is not recognized. +""" from autoop.core.ml.model.model import Model -from autoop.core.ml.model.regression import MultipleLinearRegression +from autoop.core.ml.model.regression.mulitple_linear_regression import ( + MultipleLinearRegression, +) +from autoop.core.ml.model.regression.decision_tree import ( + DecisionTreeRegressionModel, +) +from autoop.core.ml.model.regression.lasso_regression import ( + LassoRegressionModel, +) +from autoop.core.ml.model.classification.knn import ( + KNearestNeighbors, +) +from autoop.core.ml.model.classification.decision_tree_classification import ( + DecisionTreeClassificationModel, +) +from autoop.core.ml.model.classification.random_forest import ( + RandomForestClassifierModel, +) REGRESSION_MODELS = [ -] # add your models as str here + "MultipleLinearRegression", + "DecisionTreeRegressionModel", + "LassoRegressionModel", +] CLASSIFICATION_MODELS = [ -] # add your models as str here + "KNNModel", + "DecisionTreeClassificationModel", + "RandomForestModel", +] + def get_model(model_name: str) -> Model: """Factory function to get a model by name.""" - raise NotImplementedError("To be implemented.") \ No newline at end of file + if model_name == "MultipleLinearRegression": + return MultipleLinearRegression() + elif model_name == "DecisionTreeRegressionModel": + return DecisionTreeRegressionModel() + elif model_name == "LassoRegressionModel": + return LassoRegressionModel() + elif model_name == "KNNModel": + return KNearestNeighbors() + elif model_name == "DecisionTreeClassificationModel": + return DecisionTreeClassificationModel() + elif model_name == "RandomForestModel": + return RandomForestClassifierModel() + else: + raise ValueError(f"'{model_name}' is not recognized. " + f"Please use a valid model name.") diff --git a/autoop/core/ml/model/__init__.py:Zone.Identifier b/autoop/core/ml/model/__init__.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/model/classification/__init__.py b/autoop/core/ml/model/classification/__init__.py index e69de29bb..451dad3c4 100644 --- a/autoop/core/ml/model/classification/__init__.py +++ b/autoop/core/ml/model/classification/__init__.py @@ -0,0 +1,6 @@ +""" +This is the initialization file for the classification module. + +It is used to initialize the classification submodule of +the autoop.core.ml.model package. +""" diff --git a/autoop/core/ml/model/classification/__init__.py:Zone.Identifier b/autoop/core/ml/model/classification/__init__.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/model/classification/classification_models.py b/autoop/core/ml/model/classification/classification_models.py new file mode 100644 index 000000000..b03bc35ba --- /dev/null +++ b/autoop/core/ml/model/classification/classification_models.py @@ -0,0 +1,24 @@ +from autoop.core.ml.model.classification.decision_tree_classification import ( + DecisionTreeClassificationModel, +) +from autoop.core.ml.model.classification.random_forest import ( + RandomForestClassifierModel, +) +from autoop.core.ml.model.classification.knn import ( + KNearestNeighbors, +) + + +def get_classification_models() -> dict: + """ + Retrieves a dictionary of available classification models. + + Returns: + dict: A dictionary where keys are model names and values are the + corresponding model classes. + """ + return { + "Decision Tree Classification": DecisionTreeClassificationModel, + "Random Forest": RandomForestClassifierModel, + "KNN": KNearestNeighbors, + } diff --git a/autoop/core/ml/model/classification/decision_tree_classification.py b/autoop/core/ml/model/classification/decision_tree_classification.py new file mode 100644 index 000000000..3fc331e76 --- /dev/null +++ b/autoop/core/ml/model/classification/decision_tree_classification.py @@ -0,0 +1,121 @@ +from sklearn.tree import DecisionTreeClassifier +from autoop.core.ml.model import Model +from autoop.core.ml.artifact import Artifact +import numpy as np +from pydantic import PrivateAttr, Field + + +class DecisionTreeClassificationModel(Model): + """ + Decision Tree Classification model that uses scikit-learn's + DecisionTreeClassifier to perform classification tasks. + + Attributes: + _model (DecisionTreeClassifier): The decision tree classification + model from scikit-learn. + max_depth (int): Maximum depth of the tree. + min_samples_split (int): Minimum number of samples required to + split an internal node. + """ + + _model: DecisionTreeClassifier = PrivateAttr() + max_depth: int = Field( + default=None, + ge=1, + description="Maximum depth of the tree" + ) + min_samples_split: int = Field( + default=2, + ge=2, + description="Minimum number of samples required to " + "split an internal node" + ) + + def __init__( + self, + name: str = "test_model", + asset_path: str = "./tmp", + version: str = "0.1", + **data + ) -> None: + """ + Initializes the Decision Tree Classification model + with specified hyperparameters. + + Args: + name (str): The name of the model. + Defaults to "test_model". + asset_path (str): Path to store model artifacts. + Defaults to "./tmp". + version (str): Version of the model. Defaults to "0.1". + **data: Additional hyperparameters for the model. + """ + super().__init__( + name=name, asset_path=asset_path, version=version, **data + ) + self._model = DecisionTreeClassifier( + max_depth=self.max_depth, + min_samples_split=self.min_samples_split + ) + self._parameters = { + "max_depth": self.max_depth, + "min_samples_split": self.min_samples_split + } + self._type = "classification" + + def fit(self, observations: np.ndarray, ground_truth: np.ndarray) -> None: + """ + Fits the Decision Tree Classification model + to the provided training data. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + ground_truth (np.ndarray): Target vector of + shape (n_samples,). + """ + self._model.fit(observations, ground_truth) + self._parameters["tree_structure"] = self._model.tree_ + self._parameters["trained"] = True + + def predict(self, observations: np.ndarray) -> np.ndarray: + """ + Predicts the class labels for the provided observations. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + + Returns: + np.ndarray: Predicted class labels of shape (n_samples,). + """ + return self._model.predict(observations) + + def save(self, directory: str) -> None: + """ + Save the model using the Artifact class. + + Args: + directory (str): The directory where the model should be saved. + """ + model_data = { + "model": self._model, + "parameters": self._parameters + } + self._artifact.data = str(model_data).encode() + self._artifact.save(directory) + + def load(self, directory: str, artifact_id: str) -> None: + """ + Load the model using the Artifact class. + + Args: + directory (str): The directory where the model is stored. + artifact_id (str): The unique ID of the model + artifact to be loaded. + """ + loaded_artifact = Artifact.load(directory, artifact_id) + model_data = eval(loaded_artifact.data.decode()) + self._model = model_data["model"] + self._parameters = model_data["parameters"] + self._type = "classification" diff --git a/autoop/core/ml/model/classification/knn.py b/autoop/core/ml/model/classification/knn.py new file mode 100644 index 000000000..1beb318dc --- /dev/null +++ b/autoop/core/ml/model/classification/knn.py @@ -0,0 +1,112 @@ +import numpy as np +from sklearn.neighbors import KNeighborsClassifier +from autoop.core.ml.artifact import Artifact +from autoop.core.ml.model import Model +from pydantic import PrivateAttr, Field + + +class KNearestNeighbors(Model): + """ + K-Nearest Neighbors (KNN) model implementation classifies + an observation by analyzing the classes of its `k` nearest neighbors. + + Attributes: + k (int): Number of nearest neighbors to consider. + """ + + k: int = Field(default=3, ge=1, description="Number of neighbors") + _model: KNeighborsClassifier = PrivateAttr() + _parameters: dict = PrivateAttr(default_factory=dict) + + def __init__( + self, + k: int = 3, + name: str = "test_model", + asset_path: str = "./tmp", + version: str = "0.1", + **data + ) -> None: + """ + Initializes the KNN model with a specified value of "k". + + Args: + k (int, optional): Number of neighbors. Defaults to 3. + name (str, optional): Name of the model. Defaults to "test_model". + asset_path (str, optional): Path to store model artifacts. + Defaults to "./tmp". + version (str, optional): Version of the model. Defaults to "0.1". + **data: Additional data for initialization. + """ + super().__init__( + name=name, asset_path=asset_path, version=version, **data + ) + self.k = k + self._parameters["k"] = k + self._type = "classification" + self._model = KNeighborsClassifier(n_neighbors=k) + + def fit(self, observations: np.ndarray, ground_truth: np.ndarray) -> None: + """ + Fits the KNN model to the provided training data. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, n_features). + ground_truth (np.ndarray): Target vector of shape (n_samples,). + """ + X = np.asarray(observations) + self._model.fit(X, ground_truth) + self._parameters = { + "observations": observations, + "ground_truth": ground_truth, + "k": self.k + } + + def predict(self, observations: np.ndarray) -> np.ndarray: + """ + Predicts the class of each observation. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, n_features). + + Returns: + np.ndarray: Predicted class labels of shape (n_samples,). + """ + return self._model.predict(observations) + + def save(self, directory: str) -> None: + """ + Save the model using the Artifact class. + + Args: + directory (str): The directory where the model should be saved. + """ + model_data = { + "parameters": self._parameters + } + self._artifact.data = str(model_data).encode() + self._artifact.save(directory) + + def load(self, directory: str, artifact_id: str) -> None: + """ + Load the model using the Artifact class. + + Args: + directory (str): The directory where the model is stored. + artifact_id (str): The unique ID of the model + artifact to be loaded. + """ + loaded_artifact = Artifact.load(directory, artifact_id) + model_data = eval(loaded_artifact.data.decode()) + self._parameters = model_data["parameters"] + self.k = self._parameters.get("k", 3) + self._model = KNeighborsClassifier(n_neighbors=self.k) + if ( + "observations" in self._parameters and "ground_truth" in + self._parameters + ): + self._model.fit( + self._parameters["observations"], + self._parameters["ground_truth"] + ) diff --git a/autoop/core/ml/model/classification/random_forest.py b/autoop/core/ml/model/classification/random_forest.py new file mode 100644 index 000000000..80cd1b2e4 --- /dev/null +++ b/autoop/core/ml/model/classification/random_forest.py @@ -0,0 +1,112 @@ +import numpy as np +from typing import Dict +from sklearn.ensemble import RandomForestClassifier +from autoop.core.ml.artifact import Artifact +from autoop.core.ml.model import Model +from pydantic import PrivateAttr, Field + + +class RandomForestClassifierModel(Model): + """ + Random Forest Classifier model implementation + + Uses a RandomForestClassifier from scikit-learn to perform classification + tasks + """ + + n_estimators: int = Field( + default=100, ge=1, description="Number of trees in the forest" + ) + _model: RandomForestClassifier = PrivateAttr() + _hyperparameters: Dict = PrivateAttr(default_factory=dict) + + def __init__( + self, + name: str = "test_model", + asset_path: str = "./tmp", + version: str = "0.1", + n_estimators: int = 100, + max_depth: int = None, + **data + ) -> None: + """ + Initializes the Random Forest Classifier model + with specified hyperparameters + + Args: + name (str): Name of the model. Defaults to "test_model". + asset_path (str): Path to store model artifacts. + Defaults to "./tmp". + version (str): Version of the model. Defaults to "0.1". + n_estimators (int): Number of trees in the forest. Defaults to 100. + max_depth (int): Maximum depth of the trees. Defaults to None. + **data: Additional hyperparameters for the model. + """ + super().__init__( + name=name, asset_path=asset_path, version=version, **data + ) + self._hyperparameters["n_estimators"] = n_estimators + self._hyperparameters["max_depth"] = max_depth + + self._model = RandomForestClassifier( + n_estimators=n_estimators, max_depth=max_depth + ) + self._type = "classification" + + def fit(self, observations: np.ndarray, ground_truth: np.ndarray) -> None: + """ + Fits the Random Forest model to the provided training data. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, n_features). + ground_truth (np.ndarray): Target vector of + shape (n_samples,). + """ + self._model.fit(observations, ground_truth) + self._parameters["trained"] = True + + def predict(self, observations: np.ndarray) -> np.ndarray: + """ + Predicts the values for the provided observations. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, n_features). + + Returns: + np.ndarray: Predicted values of shape (n_samples,). + """ + return self._model.predict(observations) + + def save(self, directory: str) -> None: + """ + Save the model using the Artifact class. + + Args: + directory (str): The directory where the model should be saved. + """ + model_data = { + "parameters": self._parameters, + "hyperparameters": self._hyperparameters, + } + self._artifact.data = str(model_data).encode() + self._artifact.save(directory) + + def load(self, directory: str, artifact_id: str) -> None: + """ + Load the model using the Artifact class. + + Args: + directory (str): The directory where the model is stored. + artifact_id (str): The unique ID of the model artifact + to be loaded. + """ + loaded_artifact = Artifact.load(directory, artifact_id) + model_data = eval(loaded_artifact.data.decode()) + self._parameters = model_data["parameters"] + self._hyperparameters = model_data["hyperparameters"] + self._model = RandomForestClassifier( + n_estimators=self._hyperparameters["n_estimators"], + max_depth=self._hyperparameters["max_depth"], + ) diff --git a/autoop/core/ml/model/model.py b/autoop/core/ml/model/model.py index 35f176927..99a5dc1a9 100644 --- a/autoop/core/ml/model/model.py +++ b/autoop/core/ml/model/model.py @@ -1,10 +1,153 @@ - -from abc import abstractmethod from autoop.core.ml.artifact import Artifact import numpy as np +from typing import Dict, Union, List +from abc import ABC, abstractmethod +from pydantic import BaseModel, PrivateAttr from copy import deepcopy -from typing import Literal -class Model: - pass # your code (attribute and methods) here - + +class Model(BaseModel, ABC): + """ + Abstract base class for all learning models. + + This class defines the standard structure for machine learning models, + including methods for fitting, predicting, saving, and loading models. + + Attributes: + _parameters (Dict): Stores model-specific parameters. + _artifact (Artifact): Manages the artifact for the model. + _type (str): The type of the model (e.g., classification, regression). + """ + + _parameters: Dict = PrivateAttr(default_factory=dict) + _artifact: Artifact = PrivateAttr() + _type: str = PrivateAttr() + + def __init__( + self, + name: str, + asset_path: str, + version: str, + **data: Dict[str, Union[str, int, float, bool, List, dict]], + ) -> None: + """ + Initializes the Model with an associated Artifact. + + Args: + name (str): Name of the model artifact. + asset_path (str): Path for storing the artifact. + version (str): Version of the model. + **data (Dict[str, Union[str, int, float, bool, List, dict]]): + Additional data passed to the BaseModel initializer. + """ + super().__init__(**data) + self._artifact = Artifact( + name=name, + asset_path=asset_path, + version=version, + data=b"", # Placeholder for model binary data + metadata={}, + type="model", + tags=["machine_learning"] + ) + + @property + def parameters(self) -> Dict: + """ + Returns a deepcopy of the model parameters. + + Returns: + Dict: A deepcopy of the model parameters. + """ + return deepcopy(self._parameters) + + @abstractmethod + def fit(self, observations: np.ndarray, ground_truth: np.ndarray) -> None: + """ + Fits the model to the provided training data. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, n_features). + ground_truth (np.ndarray): Target vector of shape (n_samples,). + """ + pass + + @abstractmethod + def predict(self, observations: np.ndarray) -> np.ndarray: + """ + Predicts the values for the provided observations. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, n_features). + + Returns: + np.ndarray: Predicted values of shape (n_samples,). + """ + pass + + def save(self, directory: str) -> None: + """ + Save the model using the Artifact class. + + Args: + directory (str): The directory where the model should be saved. + """ + model_data = {"parameters": self._parameters} + self._artifact.data = str(model_data).encode() + self._artifact.asset_path = f"{directory}/{self._artifact.name}.bin" + + def load(self, directory: str, artifact_id: str) -> None: + """ + Load the model using the Artifact class. + + Args: + directory (str): The directory where the model is stored. + artifact_id (str): Unique ID of the model artifact to be loaded. + """ + self._artifact.asset_path = f"{directory}/{artifact_id}.bin" + loaded_data = self._artifact.read() + model_data = eval(loaded_data.decode()) + self._parameters = model_data.get("parameters", {}) + + def set_params( + self, **params: Dict[str, Union[str, int, float, bool, List, dict]] + ) -> None: + """ + Set the model parameters. + + Args: + **params (Dict[str, Union[str, int, float, bool, List, dict]]): + Arbitrary keyword arguments representing model parameters. + """ + for key, value in params.items(): + self._parameters[key] = value + + def get_params(self) -> Dict: + """ + Get the model parameters. + + Returns: + Dict: The current model parameters. + """ + return self.parameters + + def is_trained(self) -> bool: + """ + Check if the model has been trained. + + Returns: + bool: True if the model has been trained, False otherwise. + """ + return bool(self._parameters) + + @property + def type(self) -> str: + """ + Get the type of the model. + + Returns: + str: The type of the model. + """ + return self._type diff --git a/autoop/core/ml/model/model.py:Zone.Identifier b/autoop/core/ml/model/model.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/model/regression/__init__.py b/autoop/core/ml/model/regression/__init__.py index 44661350e..0ced89a81 100644 --- a/autoop/core/ml/model/regression/__init__.py +++ b/autoop/core/ml/model/regression/__init__.py @@ -1,2 +1,6 @@ +""" +This is the initialization file for the regression module. -from autoop.core.ml.model.regression.multiple_linear_regression import MultipleLinearRegression \ No newline at end of file +It is used to initialize the regression submodule of +the autoop.core.ml.model package. +""" diff --git a/autoop/core/ml/model/regression/__init__.py:Zone.Identifier b/autoop/core/ml/model/regression/__init__.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/ml/model/regression/decision_tree.py b/autoop/core/ml/model/regression/decision_tree.py new file mode 100644 index 000000000..4e26fba57 --- /dev/null +++ b/autoop/core/ml/model/regression/decision_tree.py @@ -0,0 +1,116 @@ +from sklearn.tree import DecisionTreeRegressor +from autoop.core.ml.model import Model +from autoop.core.ml.artifact import Artifact +import numpy as np +from pydantic import PrivateAttr, Field + + +class DecisionTreeRegressionModel(Model): + """ + Decision Tree Regression model that uses scikit-learn's + DecisionTreeRegressor to perform regression tasks. + + Attributes: + _model (DecisionTreeRegressor): The decision tree regression model from + scikit-learn. + max_depth (int): Maximum depth of the tree. + min_samples_split (int): Minimum number of samples required to split + an internal node. + """ + + _model: DecisionTreeRegressor = PrivateAttr() + max_depth: int = Field( + default=None, ge=1, description="Maximum depth of the tree" + ) + min_samples_split: int = Field( + default=2, + ge=2, + description="Minimum number of samples required to " + "split an internal node", + ) + + def __init__( + self, + name: str = "test_model", + asset_path: str = "./tmp", + version: str = "0.1", + **data, + ) -> None: + """ + Initializes the Decision Tree Regression model with specified + hyperparameters. + + Args: + name (str): Name of the model. Defaults to "test_model". + asset_path (str): Path to store model artifacts. + Defaults to "./tmp". + version (str): Version of the model. Defaults to "0.1". + **data: Additional parameters passed to the model. + """ + super().__init__( + name=name, asset_path=asset_path, version=version, **data + ) + self._model = DecisionTreeRegressor( + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + ) + self._parameters = { + "max_depth": self.max_depth, + "min_samples_split": self.min_samples_split, + } + self._type = "regression" + + def fit(self, observations: np.ndarray, ground_truth: np.ndarray) -> None: + """ + Fits the Decision Tree Regression model to the provided training data. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + ground_truth (np.ndarray): Target vector of shape (n_samples,). + """ + self._model.fit(observations, ground_truth) + self._parameters["tree_structure"] = self._model.tree_ + self._parameters["trained"] = True + + def predict(self, observations: np.ndarray) -> np.ndarray: + """ + Predicts the values for the provided observations. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + + Returns: + np.ndarray: Predicted values of shape (n_samples,). + """ + return self._model.predict(observations) + + def save(self, directory: str) -> None: + """ + Save the model using the Artifact class. + + Args: + directory (str): The directory where the model should be saved. + """ + model_data = { + "model": self._model, + "parameters": self._parameters, + } + self._artifact.data = str(model_data).encode() + self._artifact.save(directory) + + def load(self, directory: str, artifact_id: str) -> None: + """ + Load the model using the Artifact class. + + Args: + directory (str): The directory where the model is stored. + artifact_id (str): The unique ID of the model + artifact to be loaded. + """ + loaded_artifact = Artifact.load(directory, artifact_id) + model_data = eval(loaded_artifact.data.decode()) + self._model = model_data["model"] + self._parameters = model_data["parameters"] + self._type = "regression" diff --git a/autoop/core/ml/model/regression/lasso_regression.py b/autoop/core/ml/model/regression/lasso_regression.py new file mode 100644 index 000000000..ae536d74e --- /dev/null +++ b/autoop/core/ml/model/regression/lasso_regression.py @@ -0,0 +1,101 @@ +from sklearn.linear_model import Lasso as SKLasso +from autoop.core.ml.model import Model +from autoop.core.ml.artifact import Artifact +import numpy as np +from pydantic import PrivateAttr + + +class LassoRegressionModel(Model): + """ + Lasso Regression model that performs linear regression using L1 + regularization. It uses scikit-learn's Lasso implementation. + + Attributes: + _model (SKLasso): The Lasso regression model from scikit-learn. + """ + + _model: SKLasso = PrivateAttr() + + def __init__( + self, + alpha: float = 1.0, + name: str = "lasso_model", + asset_path: str = "./tmp", + version: str = "0.1", + **data, + ) -> None: + """ + Initializes the Lasso model with a specified alpha parameter. + + Args: + alpha (float): Regularization strength (default: 1.0) + name (str): Name of the model. Defaults to "lasso_model" + asset_path (str): Path to store model artifacts + Defaults to "./tmp" + version (str): Version of the model. Defaults to "0.1" + **data: Additional parameters for the model + """ + super().__init__( + name=name, asset_path=asset_path, version=version, **data + ) + self._model = SKLasso(alpha=alpha) + self._parameters = { + "hyperparameters": self._model.get_params() + } + self._type = "regression" + + def fit(self, observations: np.ndarray, ground_truth: np.ndarray) -> None: + """ + Fits the Lasso regression model to the provided training data. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + ground_truth (np.ndarray): Target vector of + shape (n_samples,). + """ + self._model.fit(observations, ground_truth) + self._parameters["coefficients"] = self._model.coef_ + self._parameters["intercept"] = self._model.intercept_ + + def predict(self, observations: np.ndarray) -> np.ndarray: + """ + Predicts the values for the provided observations. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + + Returns: + np.ndarray: Predicted values of shape (n_samples,). + """ + return self._model.predict(observations) + + def save(self, directory: str) -> None: + """ + Save the model using the Artifact class. + + Args: + directory (str): The directory where the model should be saved. + """ + model_data = { + "model": self._model, + "parameters": self._parameters, + } + self._artifact.data = str(model_data).encode() + self._artifact.save(directory) + + def load(self, directory: str, artifact_id: str) -> None: + """ + Load the model using the Artifact class. + + Args: + directory (str): The directory where the model is stored. + artifact_id (str): The unique ID of the model + artifact to be loaded. + """ + loaded_artifact = Artifact.load(directory, artifact_id) + model_data = eval(loaded_artifact.data.decode()) + self._model = model_data["model"] + self._parameters = model_data["parameters"] + self._type = "regression" diff --git a/autoop/core/ml/model/regression/mulitple_linear_regression.py b/autoop/core/ml/model/regression/mulitple_linear_regression.py new file mode 100644 index 000000000..3b563c743 --- /dev/null +++ b/autoop/core/ml/model/regression/mulitple_linear_regression.py @@ -0,0 +1,164 @@ +import numpy as np +from autoop.core.ml.artifact import Artifact +from autoop.core.ml.model import Model +from pydantic import PrivateAttr +from typing import Dict + + +class MultipleLinearRegression(Model): + """ + Multiple Linear Regression model implementation that inherits from the + base "Model" class. + + Calculates the linear correlation between features (the observations) and + a target variable (ground truth). + + Attributes: + _weights (np.ndarray): Stores the coefficients (weights) and intercept + of the model. + _artifact (Artifact): Manages the model artifact. + """ + + _weights: np.ndarray = PrivateAttr(default=None) + _artifact: Artifact = PrivateAttr(default=None) + + def __init__( + self, + name: str = "test_model", + asset_path: str = "./tmp", + version: str = "0.1", + regularization: float = 0.0, + **data, + ) -> None: + """ + Initializes the Multiple Linear Regression model with test-friendly + defaults. + + Args: + name (str): Model name, default "test_model" for testing. + asset_path (str): Model asset path, default "./tmp". + version (str): Model version, default "0.1". + regularization (float): Regularization strength (default: 0.0). + **data: Additional parameters for the model. + """ + super().__init__( + name=name, asset_path=asset_path, version=version, **data + ) + self._parameters = { + "regularization": regularization, + } + self._type = "regression" + + @property + def weights(self) -> Dict: + """ + Returns a copy of the model weights. + + Returns: + Dict: Copy of the weights, or None if the model is not trained. + """ + return self._weights.copy() if self._weights is not None else None + + def fit(self, observations: np.ndarray, ground_truth: np.ndarray) -> None: + """ + Fits the multiple linear regression model to the provided + training data. + Applies L2 regularization if the regularization parameter is greater + than 0. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + ground_truth (np.ndarray): Target vector of shape (n_samples,). + """ + regularization = self._parameters.get("regularization", 0.0) + observations_b: np.ndarray = np.c_[ + observations, np.ones(observations.shape[0]) + ] + + XtX = np.matmul(observations_b.T, observations_b) + XtY = np.matmul(observations_b.T, ground_truth) + + if regularization > 0: + identity_matrix = np.eye(XtX.shape[0]) + identity_matrix[-1, -1] = 0 + XtX += regularization * identity_matrix + + try: + XtX_inv = np.linalg.pinv(XtX) + self._weights = np.matmul(XtX_inv, XtY) + self._parameters["weights"] = self._weights + except np.linalg.LinAlgError as e: + raise ValueError(f"Error inverting matrix during training: {e}") + + def predict(self, observations: np.ndarray) -> np.ndarray: + """ + Predicts the values for the provided observations. + + Args: + observations (np.ndarray): Feature matrix of + shape (n_samples, p_features). + + Returns: + np.ndarray: Predicted values of shape (n_samples,). + + Raises: + ValueError: If the model is not trained. + """ + if self._weights is None: + raise ValueError( + "Model has not been trained yet. " + "Please fit the model before predicting." + ) + + observations_b = np.c_[ + observations, np.ones(observations.shape[0]) + ] + return np.matmul(observations_b, self._weights) + + def save(self, directory: str) -> None: + """ + Save the model using the Artifact class. + + Args: + directory (str): The directory where the model should be saved. + """ + self._artifact = Artifact( + asset_path=f"{directory}/{self.__class__.__name__}.bin", + version="1.0.0", + data=b"", + metadata={}, + type_="model:regression", + tags=["multiple_linear_regression"], + ) + + model_data = { + "parameters": self._parameters, + } + self._artifact.data = str(model_data).encode() + + self._artifact.save() + + def load(self, directory: str, artifact_id: str) -> None: + """ + Load the model using the Artifact class. + + Args: + directory (str): The directory where the model is stored. + artifact_id (str): The unique ID of the model + artifact to be loaded. + """ + self._artifact = Artifact( + asset_path=f"{directory}/{artifact_id}.bin", + version="1.0.0", + data=b"", + metadata={}, + type_="model:regression", + tags=["multiple_linear_regression"], + ) + + loaded_data = self._artifact.read() + model_data = eval(loaded_data.decode()) + self._parameters = model_data["parameters"] + self._weights = self._parameters.get("weights") + self._artifact.asset_path = f"{directory}/{artifact_id}.bin" diff --git a/autoop/core/ml/model/regression/regression_models.py b/autoop/core/ml/model/regression/regression_models.py new file mode 100644 index 000000000..545ec7949 --- /dev/null +++ b/autoop/core/ml/model/regression/regression_models.py @@ -0,0 +1,24 @@ +from autoop.core.ml.model.regression.mulitple_linear_regression import ( + MultipleLinearRegression, +) +from autoop.core.ml.model.regression.decision_tree import ( + DecisionTreeRegressionModel, +) +from autoop.core.ml.model.regression.lasso_regression import ( + LassoRegressionModel, +) + + +def get_regression_models() -> dict: + """ + Retrieves a dictionary of available regression models. + + Returns: + dict: A dictionary where keys are model names and values are the + corresponding model classes. + """ + return { + "Linear Regression": MultipleLinearRegression, + "Random Forest": DecisionTreeRegressionModel, + "Lasso Regression": LassoRegressionModel, + } diff --git a/autoop/core/ml/pipeline.py b/autoop/core/ml/pipeline.py index a3297a96d..5f5cb9b8d 100644 --- a/autoop/core/ml/pipeline.py +++ b/autoop/core/ml/pipeline.py @@ -1,25 +1,54 @@ -from typing import List import pickle +import numpy as np +from typing import List from autoop.core.ml.artifact import Artifact from autoop.core.ml.dataset import Dataset from autoop.core.ml.model import Model from autoop.core.ml.feature import Feature from autoop.core.ml.metric import Metric from autoop.functional.preprocessing import preprocess_features -import numpy as np -class Pipeline(): - - def __init__(self, - metrics: List[Metric], - dataset: Dataset, - model: Model, - input_features: List[Feature], - target_feature: Feature, - split=0.8, - ): +class Pipeline: + """ + A class representing a machine learning pipeline. The pipeline handles + dataset preprocessing, model training, evaluation, and artifact + management. + + Attributes: + _dataset (Dataset): The dataset used in the pipeline. + _model (Model): The model used in the pipeline. + _input_features (List[Feature]): List of input features. + _target_feature (Feature): The target feature. + _metrics (List[Metric]): List of evaluation metrics. + _split (float): The train-test split ratio. + _artifacts (dict): Dictionary of artifacts generated during execution. + """ + + def __init__( + self, + metrics: List[Metric], + dataset: Dataset, + model: Model, + input_features: List[Feature], + target_feature: Feature, + split: float = 0.8, + ) -> None: + """ + Initializes the pipeline. + + Args: + metrics (List[Metric]): List of evaluation metrics. + dataset (Dataset): The dataset to be used. + model (Model): The model to be used. + input_features (List[Feature]): List of input features. + target_feature (Feature): The target feature. + split (float, optional): Train-test split ratio. Defaults to 0.8. + + Raises: + ValueError: If target feature type and model type are incompatible. + """ self._dataset = dataset self._model = model self._input_features = input_features @@ -27,12 +56,26 @@ def __init__(self, self._metrics = metrics self._artifacts = {} self._split = split - if target_feature.type == "categorical" and model.type != "classification": - raise ValueError("Model type must be classification for categorical target feature") + is_categorical = target_feature.type == "categorical" + is_not_classification = model.type != "classification" + + if is_categorical and is_not_classification: + raise ValueError( + "Model type must be classification for " + "categorical target feature" + ) if target_feature.type == "continuous" and model.type != "regression": - raise ValueError("Model type must be regression for continuous target feature") + raise ValueError( + "Model type must be regression for continuous target feature" + ) - def __str__(self): + def __str__(self) -> str: + """ + Returns a string representation of the pipeline. + + Returns: + str: A formatted string describing the pipeline. + """ return f""" Pipeline( model={self._model.type}, @@ -44,12 +87,22 @@ def __str__(self): """ @property - def model(self): + def model(self) -> Model: + """ + Returns the model used in the pipeline. + + Returns: + Model: The model instance. + """ return self._model @property def artifacts(self) -> List[Artifact]: - """Used to get the artifacts generated during the pipeline execution to be saved + """ + Retrieves the artifacts generated during the pipeline execution. + + Returns: + List[Artifact]: List of artifacts. """ artifacts = [] for name, artifact in self._artifacts.items(): @@ -67,40 +120,88 @@ def artifacts(self) -> List[Artifact]: "target_feature": self._target_feature, "split": self._split, } - artifacts.append(Artifact(name="pipeline_config", data=pickle.dumps(pipeline_data))) - artifacts.append(self._model.to_artifact(name=f"pipeline_model_{self._model.type}")) + artifacts.append( + Artifact(name="pipeline_config", data=pickle.dumps(pipeline_data)) + ) + artifacts.append( + self._model.to_artifact(name=f"pipeline_model_{self._model.type}") + ) return artifacts - - def _register_artifact(self, name: str, artifact): + + def _register_artifact(self, name: str, artifact: Artifact) -> None: + """ + Registers an artifact in the pipeline. + + Args: + name (str): The name of the artifact. + artifact: The artifact object. + """ self._artifacts[name] = artifact - def _preprocess_features(self): - (target_feature_name, target_data, artifact) = preprocess_features([self._target_feature], self._dataset)[0] + def _preprocess_features(self) -> None: + """ + Preprocesses input and target features, registers artifacts, and + prepares data for training and testing. + """ + (target_feature_name, target_data, artifact) = preprocess_features( + [self._target_feature], self._dataset + )[0] self._register_artifact(target_feature_name, artifact) - input_results = preprocess_features(self._input_features, self._dataset) + input_results = preprocess_features( + self._input_features, self._dataset + ) for (feature_name, data, artifact) in input_results: self._register_artifact(feature_name, artifact) - # Get the input vectors and output vector, sort by feature name for consistency self._output_vector = target_data - self._input_vectors = [data for (feature_name, data, artifact) in input_results] + self._input_vectors = [ + data for (feature_name, data, artifact) in input_results + ] - def _split_data(self): - # Split the data into training and testing sets + def _split_data(self) -> None: + """ + Splits the data into training and + testing sets based on the split ratio. + """ split = self._split - self._train_X = [vector[:int(split * len(vector))] for vector in self._input_vectors] - self._test_X = [vector[int(split * len(vector)):] for vector in self._input_vectors] - self._train_y = self._output_vector[:int(split * len(self._output_vector))] - self._test_y = self._output_vector[int(split * len(self._output_vector)):] + self._train_X = [ + vector[: int(split * len(vector))] + for vector in self._input_vectors + ] + self._test_X = [ + vector[int(split * len(vector)):] + for vector in self._input_vectors + ] + self._train_y = self._output_vector[ + : int(split * len(self._output_vector)) + ] + self._test_y = self._output_vector[ + int(split * len(self._output_vector)): + ] def _compact_vectors(self, vectors: List[np.array]) -> np.array: + """ + Compacts a list of vectors into a single array by concatenating them. + + Args: + vectors (List[np.array]): List of numpy arrays. + + Returns: + np.array: A single concatenated array. + """ return np.concatenate(vectors, axis=1) - def _train(self): + def _train(self) -> None: + """ + Trains the model using the training data. + """ X = self._compact_vectors(self._train_X) Y = self._train_y self._model.fit(X, Y) - def _evaluate(self): + def _evaluate(self) -> None: + """ + Evaluates the model using the testing data and calculates metrics. + """ X = self._compact_vectors(self._test_X) Y = self._test_y self._metrics_results = [] @@ -110,15 +211,30 @@ def _evaluate(self): self._metrics_results.append((metric, result)) self._predictions = predictions - def execute(self): + def execute(self) -> dict: + """ + Executes the pipeline, including preprocessing, splitting, training, + and evaluation. Also calculates training metrics. + + Returns: + dict: A dictionary containing training metrics, evaluation metrics, + and predictions. + """ self._preprocess_features() self._split_data() self._train() self._evaluate() + + train_X = self._compact_vectors(self._train_X) + train_Y = self._train_y + train_metrics_results = [] + train_predictions = self._model.predict(train_X) + for metric in self._metrics: + train_result = metric.evaluate(train_predictions, train_Y) + train_metrics_results.append((metric, train_result)) + return { - "metrics": self._metrics_results, + "training_metrics": train_metrics_results, + "evaluation_metrics": self._metrics_results, "predictions": self._predictions, } - - - \ No newline at end of file diff --git a/autoop/core/ml/pipeline.py:Zone.Identifier b/autoop/core/ml/pipeline.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/core/storage.py b/autoop/core/storage.py index 440cc833b..683d72e38 100644 --- a/autoop/core/storage.py +++ b/autoop/core/storage.py @@ -1,64 +1,103 @@ from abc import ABC, abstractmethod import os -from typing import List, Union +from typing import List from glob import glob + class NotFoundError(Exception): - def __init__(self, path): + """ + Exception raised when a specified path is not found. + """ + + def __init__(self, path: str) -> None: + """ + Initializes the NotFoundError with the specified path. + + Args: + path (str): The path that was not found. + """ super().__init__(f"Path not found: {path}") + class Storage(ABC): + """ + Abstract base class for defining a storage interface. + """ @abstractmethod - def save(self, data: bytes, path: str): + def save(self, data: bytes, path: str) -> None: """ - Save data to a given path + Save data to a given path. + Args: - data (bytes): Data to save - path (str): Path to save data + data (bytes): Data to save. + path (str): Path to save data. """ pass @abstractmethod def load(self, path: str) -> bytes: """ - Load data from a given path + Load data from a given path. + Args: - path (str): Path to load data + path (str): Path to load data. + Returns: - bytes: Loaded data + bytes: Loaded data. """ pass @abstractmethod - def delete(self, path: str): + def delete(self, path: str) -> None: """ - Delete data at a given path + Delete data at a given path. + Args: - path (str): Path to delete data + path (str): Path to delete data. """ pass @abstractmethod - def list(self, path: str) -> list: + def list(self, path: str) -> List[str]: """ - List all paths under a given path + List all paths under a given path. + Args: - path (str): Path to list + path (str): Path to list. + Returns: - list: List of paths + List[str]: List of paths. """ pass class LocalStorage(Storage): + """ + LocalStorage implements the Storage interface for local file system. + """ - def __init__(self, base_path: str="./assets"): + def __init__(self, base_path: str = "./assets") -> None: + """ + Initializes the LocalStorage with a base path. Creates the base path + directory if it does not exist. + + Args: + base_path (str): The base path for local storage. + Defaults to "./assets". + """ self._base_path = base_path if not os.path.exists(self._base_path): os.makedirs(self._base_path) - def save(self, data: bytes, key: str): + def save(self, data: bytes, key: str) -> None: + """ + Save data to a specific key under the base path. + + Args: + data (bytes): Data to save. + key (str): Key representing the relative path to save data. + """ path = self._join_path(key) if not os.path.exists(path): os.makedirs(os.path.dirname(path), exist_ok=True) @@ -66,28 +105,68 @@ def save(self, data: bytes, key: str): f.write(data) def load(self, key: str) -> bytes: + """ + Load data from a specific key under the base path. + + Args: + key (str): Key representing the relative path to load data. + + Returns: + bytes: Loaded data. + """ path = self._join_path(key) self._assert_path_exists(path) with open(path, 'rb') as f: return f.read() - def delete(self, key: str="/"): - self._assert_path_exists(self._join_path(key)) + def delete(self, key: str = "/") -> None: + """ + Delete data at a specific key under the base path. + + Args: + key (str): Key representing the relative path to delete data. + Defaults to "/". + """ path = self._join_path(key) + self._assert_path_exists(path) os.remove(path) def list(self, prefix: str) -> List[str]: + """ + List all files under a specified prefix. + + Args: + prefix (str): Prefix representing the relative path to list files. + + Returns: + List[str]: List of file paths under the specified prefix. + """ path = self._join_path(prefix) self._assert_path_exists(path) keys = glob(path + "/**/*", recursive=True) return list(filter(os.path.isfile, keys)) - def _assert_path_exists(self, path: str): + def _assert_path_exists(self, path: str) -> None: + """ + Assert that a given path exists. Raises NotFoundError if not. + + Args: + path (str): Path to check. + + Raises: + NotFoundError: If the path does not exist. + """ if not os.path.exists(path): raise NotFoundError(path) - + def _join_path(self, path: str) -> str: - return os.path.join(self._base_path, path) + """ + Join the base path with the given relative path. + Args: + path (str): Relative path to join. - \ No newline at end of file + Returns: + str: The full path. + """ + return os.path.join(self._base_path, path) diff --git a/autoop/core/storage.py:Zone.Identifier b/autoop/core/storage.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/functional/feature.py b/autoop/functional/feature.py index 4f0ea5dff..ad1d8786d 100644 --- a/autoop/functional/feature.py +++ b/autoop/functional/feature.py @@ -1,13 +1,39 @@ - from typing import List from autoop.core.ml.dataset import Dataset from autoop.core.ml.feature import Feature +import pandas as pd +import io + def detect_feature_types(dataset: Dataset) -> List[Feature]: - """Assumption: only categorical and numerical features and no NaN values. + """ + Detects the feature types (categorical or numerical) from a dataset. + Args: - dataset: Dataset + dataset: Dataset - an instance of Dataset containing the data. + Returns: - List[Feature]: List of features with their types. + List[Feature]: A list of Feature objects with their detected + types (categorical or numerical). """ - raise NotImplementedError("This should be implemented by you.") + + if dataset.data is None: + raise ValueError("The Dataset object does not contain any valid data. " + "Please ensure that the dataset is properly " + "initialized with data.") + + if not isinstance(dataset.data, (str, bytes)): + raise TypeError("The Dataset object's data should be " + "of type str or bytes.") + + csv_data = dataset.data.decode("utf-8") + data = pd.read_csv(io.StringIO(csv_data)) + features = [] + + for column in data.columns: + feature_type = ( + "categorical" if data[column].dtype == "object" else "numerical" + ) + features.append(Feature(name=column, type=feature_type)) + + return features diff --git a/autoop/functional/feature.py:Zone.Identifier b/autoop/functional/feature.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/functional/preprocessing.py b/autoop/functional/preprocessing.py index 8614f8c1b..929d9a2e4 100644 --- a/autoop/functional/preprocessing.py +++ b/autoop/functional/preprocessing.py @@ -1,31 +1,50 @@ from typing import List, Tuple from autoop.core.ml.feature import Feature from autoop.core.ml.dataset import Dataset -import pandas as pd import numpy as np from sklearn.preprocessing import OneHotEncoder, StandardScaler -def preprocess_features(features: List[Feature], dataset: Dataset) -> List[Tuple[str, np.ndarray, dict]]: - """Preprocess features. + +def preprocess_features( + features: List[Feature], dataset: Dataset +) -> List[Tuple[str, np.ndarray, dict]]: + """ + Preprocess features. + Args: features (List[Feature]): List of features. dataset (Dataset): Dataset object. + Returns: - List[str, Tuple[np.ndarray, dict]]: List of preprocessed features. Each ndarray of shape (N, ...) + List[Tuple[str, np.ndarray, dict]]: List of preprocessed features. + Each tuple contains the feature name, preprocessed data, and an + artifact dictionary. """ results = [] raw = dataset.read() + for feature in features: if feature.type == "categorical": encoder = OneHotEncoder() - data = encoder.fit_transform(raw[feature.name].values.reshape(-1, 1)).toarray() - aritfact = {"type": "OneHotEncoder", "encoder": encoder.get_params()} - results.append((feature.name, data, aritfact)) + data = encoder.fit_transform( + raw[feature.name].values.reshape(-1, 1) + ).toarray() + artifact = { + "type": "OneHotEncoder", + "encoder": encoder.get_params(), + } + results.append((feature.name, data, artifact)) if feature.type == "numerical": scaler = StandardScaler() - data = scaler.fit_transform(raw[feature.name].values.reshape(-1, 1)) - artifact = {"type": "StandardScaler", "scaler": scaler.get_params()} + data = scaler.fit_transform( + raw[feature.name].values.reshape(-1, 1) + ) + artifact = { + "type": "StandardScaler", + "scaler": scaler.get_params(), + } results.append((feature.name, data, artifact)) + # Sort for consistency results = list(sorted(results, key=lambda x: x[0])) return results diff --git a/autoop/functional/preprocessing.py:Zone.Identifier b/autoop/functional/preprocessing.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/tests/main.py b/autoop/tests/main.py index d8754d5d0..5afeddc81 100644 --- a/autoop/tests/main.py +++ b/autoop/tests/main.py @@ -1,4 +1,3 @@ - import unittest from autoop.tests.test_database import TestDatabase from autoop.tests.test_storage import TestStorage @@ -6,4 +5,4 @@ from autoop.tests.test_pipeline import TestPipeline if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/autoop/tests/main.py:Zone.Identifier b/autoop/tests/main.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/tests/test_database.py b/autoop/tests/test_database.py index e035846fc..c604ec20b 100644 --- a/autoop/tests/test_database.py +++ b/autoop/tests/test_database.py @@ -6,21 +6,39 @@ import tempfile class TestDatabase(unittest.TestCase): - - def setUp(self): + """ + Unit tests for the Database class, ensuring correct functionality for + initialization, CRUD operations, persistence, and data consistency. + """ + + def setUp(self) -> None: + """ + Set up the test environment by initializing a temporary LocalStorage + and a Database instance. + """ self.storage = LocalStorage(tempfile.mkdtemp()) self.db = Database(self.storage) - def test_init(self): + def test_init(self) -> None: + """ + Test that the Database is initialized correctly. + """ self.assertIsInstance(self.db, Database) - def test_set(self): + def test_set(self) -> None: + """ + Test that an entry can be set in the database and retrieved correctly. + """ id = str(random.randint(0, 100)) entry = {"key": random.randint(0, 100)} self.db.set("collection", id, entry) self.assertEqual(self.db.get("collection", id)["key"], entry["key"]) - def test_delete(self): + def test_delete(self) -> None: + """ + Test that an entry can be deleted from the database, and the deletion + persists after refreshing the database. + """ id = str(random.randint(0, 100)) value = {"key": random.randint(0, 100)} self.db.set("collection", id, value) @@ -29,14 +47,22 @@ def test_delete(self): self.db.refresh() self.assertIsNone(self.db.get("collection", id)) - def test_persistance(self): + def test_persistance(self) -> None: + """ + Test that data persists between different instances of the Database + using the same storage. + """ id = str(random.randint(0, 100)) value = {"key": random.randint(0, 100)} self.db.set("collection", id, value) other_db = Database(self.storage) self.assertEqual(other_db.get("collection", id)["key"], value["key"]) - def test_refresh(self): + def test_refresh(self) -> None: + """ + Test that the refresh method updates the database instance to reflect + changes made by other Database instances. + """ key = str(random.randint(0, 100)) value = {"key": random.randint(0, 100)} other_db = Database(self.storage) @@ -44,7 +70,10 @@ def test_refresh(self): other_db.refresh() self.assertEqual(other_db.get("collection", key)["key"], value["key"]) - def test_list(self): + def test_list(self) -> None: + """ + Test that all entries in a collection can be listed correctly. + """ key = str(random.randint(0, 100)) value = {"key": random.randint(0, 100)} self.db.set("collection", key, value) diff --git a/autoop/tests/test_database.py:Zone.Identifier b/autoop/tests/test_database.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/tests/test_features.py b/autoop/tests/test_features.py index d9384062c..53ee8a905 100644 --- a/autoop/tests/test_features.py +++ b/autoop/tests/test_features.py @@ -7,11 +7,20 @@ from autoop.functional.feature import detect_feature_types class TestFeatures(unittest.TestCase): - + """ + Unit tests for feature detection functionalities. + """ + def setUp(self) -> None: + """ + Placeholder for setup logic. Currently, no initialization is required. + """ pass - def test_detect_features_continuous(self): + def test_detect_features_continuous(self) -> None: + """ + Test detection of continuous numerical features in the Iris dataset. + """ iris = load_iris() df = pd.DataFrame( iris.data, @@ -32,7 +41,11 @@ def test_detect_features_continuous(self): self.assertEqual(feature.name in iris.feature_names, True) self.assertEqual(feature.type, "numerical") - def test_detect_features_with_categories(self): + def test_detect_features_with_categories(self) -> None: + """ + Test detection of features with a mix of numerical and categorical types + in the Adult dataset. + """ data = fetch_openml(name="adult", version=1, parser="auto") df = pd.DataFrame( data.data, diff --git a/autoop/tests/test_features.py:Zone.Identifier b/autoop/tests/test_features.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/tests/test_pipeline.py b/autoop/tests/test_pipeline.py index a7a0cc391..725f5e98d 100644 --- a/autoop/tests/test_pipeline.py +++ b/autoop/tests/test_pipeline.py @@ -6,12 +6,19 @@ from autoop.core.ml.dataset import Dataset from autoop.core.ml.feature import Feature from autoop.functional.feature import detect_feature_types -from autoop.core.ml.model.regression import MultipleLinearRegression +from autoop.core.ml.model.regression.mulitple_linear_regression import MultipleLinearRegression + from autoop.core.ml.metric import MeanSquaredError class TestPipeline(unittest.TestCase): - + """ + Unit tests for the Pipeline class. + """ + def setUp(self) -> None: + """ + Set up the test environment by initializing dataset, features, and pipeline. + """ data = fetch_openml(name="adult", version=1, parser="auto") df = pd.DataFrame( data.data, @@ -33,26 +40,41 @@ def setUp(self) -> None: ) self.ds_size = data.data.shape[0] - def test_init(self): + def test_init(self) -> None: + """ + Test initialization of the Pipeline class. + """ self.assertIsInstance(self.pipeline, Pipeline) - def test_preprocess_features(self): + def test_preprocess_features(self) -> None: + """ + Test preprocessing of features in the pipeline. + """ self.pipeline._preprocess_features() self.assertEqual(len(self.pipeline._artifacts), len(self.features)) - def test_split_data(self): + def test_split_data(self) -> None: + """ + Test data splitting functionality in the pipeline. + """ self.pipeline._preprocess_features() self.pipeline._split_data() self.assertEqual(self.pipeline._train_X[0].shape[0], int(0.8 * self.ds_size)) self.assertEqual(self.pipeline._test_X[0].shape[0], self.ds_size - int(0.8 * self.ds_size)) - def test_train(self): + def test_train(self) -> None: + """ + Test training functionality of the pipeline. + """ self.pipeline._preprocess_features() self.pipeline._split_data() self.pipeline._train() self.assertIsNotNone(self.pipeline._model.parameters) - def test_evaluate(self): + def test_evaluate(self) -> None: + """ + Test evaluation functionality of the pipeline. + """ self.pipeline._preprocess_features() self.pipeline._split_data() self.pipeline._train() diff --git a/autoop/tests/test_pipeline.py:Zone.Identifier b/autoop/tests/test_pipeline.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/autoop/tests/test_storage.py b/autoop/tests/test_storage.py index c958fb7e6..6a17ac0f6 100644 --- a/autoop/tests/test_storage.py +++ b/autoop/tests/test_storage.py @@ -1,4 +1,3 @@ - import unittest from autoop.core.storage import LocalStorage, NotFoundError @@ -6,15 +5,30 @@ import tempfile class TestStorage(unittest.TestCase): + """ + Unit tests for the LocalStorage class, testing initialization, storage, + retrieval, deletion, and listing of files. + """ - def setUp(self): + def setUp(self) -> None: + """ + Set up the test environment by creating a temporary directory + and initializing a LocalStorage instance. + """ temp_dir = tempfile.mkdtemp() self.storage = LocalStorage(temp_dir) - def test_init(self): + def test_init(self) -> None: + """ + Test that the LocalStorage instance is initialized correctly. + """ self.assertIsInstance(self.storage, LocalStorage) - def test_store(self): + def test_store(self) -> None: + """ + Test storing and retrieving data in the storage, ensuring + data integrity and handling of missing keys. + """ key = str(random.randint(0, 100)) test_bytes = bytes([random.randint(0, 255) for _ in range(100)]) key = "test/path" @@ -27,7 +41,11 @@ def test_store(self): except Exception as e: self.assertIsInstance(e, NotFoundError) - def test_delete(self): + def test_delete(self) -> None: + """ + Test deleting an entry from the storage and handling missing keys + after deletion. + """ key = str(random.randint(0, 100)) test_bytes = bytes([random.randint(0, 255) for _ in range(100)]) key = "test/path" @@ -38,7 +56,10 @@ def test_delete(self): except Exception as e: self.assertIsInstance(e, NotFoundError) - def test_list(self): + def test_list(self) -> None: + """ + Test listing keys in a directory within the storage. + """ key = str(random.randint(0, 100)) test_bytes = bytes([random.randint(0, 255) for _ in range(100)]) random_keys = [f"test/{random.randint(0, 100)}" for _ in range(10)] diff --git a/autoop/tests/test_storage.py:Zone.Identifier b/autoop/tests/test_storage.py:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000 diff --git a/datasets/Fish[1].csv b/datasets/Fish[1].csv new file mode 100644 index 000000000..2cd724b2d --- /dev/null +++ b/datasets/Fish[1].csv @@ -0,0 +1,160 @@ +Species,Weight,Length1,Length2,Length3,Height,Width +Bream,242,23.2,25.4,30,11.52,4.02 +Bream,290,24,26.3,31.2,12.48,4.3056 +Bream,340,23.9,26.5,31.1,12.3778,4.6961 +Bream,363,26.3,29,33.5,12.73,4.4555 +Bream,430,26.5,29,34,12.444,5.134 +Bream,450,26.8,29.7,34.7,13.6024,4.9274 +Bream,500,26.8,29.7,34.5,14.1795,5.2785 +Bream,390,27.6,30,35,12.67,4.69 +Bream,450,27.6,30,35.1,14.0049,4.8438 +Bream,500,28.5,30.7,36.2,14.2266,4.9594 +Bream,475,28.4,31,36.2,14.2628,5.1042 +Bream,500,28.7,31,36.2,14.3714,4.8146 +Bream,500,29.1,31.5,36.4,13.7592,4.368 +Bream,340,29.5,32,37.3,13.9129,5.0728 +Bream,600,29.4,32,37.2,14.9544,5.1708 +Bream,600,29.4,32,37.2,15.438,5.58 +Bream,700,30.4,33,38.3,14.8604,5.2854 +Bream,700,30.4,33,38.5,14.938,5.1975 +Bream,610,30.9,33.5,38.6,15.633,5.1338 +Bream,650,31,33.5,38.7,14.4738,5.7276 +Bream,575,31.3,34,39.5,15.1285,5.5695 +Bream,685,31.4,34,39.2,15.9936,5.3704 +Bream,620,31.5,34.5,39.7,15.5227,5.2801 +Bream,680,31.8,35,40.6,15.4686,6.1306 +Bream,700,31.9,35,40.5,16.2405,5.589 +Bream,725,31.8,35,40.9,16.36,6.0532 +Bream,720,32,35,40.6,16.3618,6.09 +Bream,714,32.7,36,41.5,16.517,5.8515 +Bream,850,32.8,36,41.6,16.8896,6.1984 +Bream,1000,33.5,37,42.6,18.957,6.603 +Bream,920,35,38.5,44.1,18.0369,6.3063 +Bream,955,35,38.5,44,18.084,6.292 +Bream,925,36.2,39.5,45.3,18.7542,6.7497 +Bream,975,37.4,41,45.9,18.6354,6.7473 +Bream,950,38,41,46.5,17.6235,6.3705 +Roach,40,12.9,14.1,16.2,4.1472,2.268 +Roach,69,16.5,18.2,20.3,5.2983,2.8217 +Roach,78,17.5,18.8,21.2,5.5756,2.9044 +Roach,87,18.2,19.8,22.2,5.6166,3.1746 +Roach,120,18.6,20,22.2,6.216,3.5742 +Roach,0,19,20.5,22.8,6.4752,3.3516 +Roach,110,19.1,20.8,23.1,6.1677,3.3957 +Roach,120,19.4,21,23.7,6.1146,3.2943 +Roach,150,20.4,22,24.7,5.8045,3.7544 +Roach,145,20.5,22,24.3,6.6339,3.5478 +Roach,160,20.5,22.5,25.3,7.0334,3.8203 +Roach,140,21,22.5,25,6.55,3.325 +Roach,160,21.1,22.5,25,6.4,3.8 +Roach,169,22,24,27.2,7.5344,3.8352 +Roach,161,22,23.4,26.7,6.9153,3.6312 +Roach,200,22.1,23.5,26.8,7.3968,4.1272 +Roach,180,23.6,25.2,27.9,7.0866,3.906 +Roach,290,24,26,29.2,8.8768,4.4968 +Roach,272,25,27,30.6,8.568,4.7736 +Roach,390,29.5,31.7,35,9.485,5.355 +Whitefish,270,23.6,26,28.7,8.3804,4.2476 +Whitefish,270,24.1,26.5,29.3,8.1454,4.2485 +Whitefish,306,25.6,28,30.8,8.778,4.6816 +Whitefish,540,28.5,31,34,10.744,6.562 +Whitefish,800,33.7,36.4,39.6,11.7612,6.5736 +Whitefish,1000,37.3,40,43.5,12.354,6.525 +Parkki,55,13.5,14.7,16.5,6.8475,2.3265 +Parkki,60,14.3,15.5,17.4,6.5772,2.3142 +Parkki,90,16.3,17.7,19.8,7.4052,2.673 +Parkki,120,17.5,19,21.3,8.3922,2.9181 +Parkki,150,18.4,20,22.4,8.8928,3.2928 +Parkki,140,19,20.7,23.2,8.5376,3.2944 +Parkki,170,19,20.7,23.2,9.396,3.4104 +Parkki,145,19.8,21.5,24.1,9.7364,3.1571 +Parkki,200,21.2,23,25.8,10.3458,3.6636 +Parkki,273,23,25,28,11.088,4.144 +Parkki,300,24,26,29,11.368,4.234 +Perch,5.9,7.5,8.4,8.8,2.112,1.408 +Perch,32,12.5,13.7,14.7,3.528,1.9992 +Perch,40,13.8,15,16,3.824,2.432 +Perch,51.5,15,16.2,17.2,4.5924,2.6316 +Perch,70,15.7,17.4,18.5,4.588,2.9415 +Perch,100,16.2,18,19.2,5.2224,3.3216 +Perch,78,16.8,18.7,19.4,5.1992,3.1234 +Perch,80,17.2,19,20.2,5.6358,3.0502 +Perch,85,17.8,19.6,20.8,5.1376,3.0368 +Perch,85,18.2,20,21,5.082,2.772 +Perch,110,19,21,22.5,5.6925,3.555 +Perch,115,19,21,22.5,5.9175,3.3075 +Perch,125,19,21,22.5,5.6925,3.6675 +Perch,130,19.3,21.3,22.8,6.384,3.534 +Perch,120,20,22,23.5,6.11,3.4075 +Perch,120,20,22,23.5,5.64,3.525 +Perch,130,20,22,23.5,6.11,3.525 +Perch,135,20,22,23.5,5.875,3.525 +Perch,110,20,22,23.5,5.5225,3.995 +Perch,130,20.5,22.5,24,5.856,3.624 +Perch,150,20.5,22.5,24,6.792,3.624 +Perch,145,20.7,22.7,24.2,5.9532,3.63 +Perch,150,21,23,24.5,5.2185,3.626 +Perch,170,21.5,23.5,25,6.275,3.725 +Perch,225,22,24,25.5,7.293,3.723 +Perch,145,22,24,25.5,6.375,3.825 +Perch,188,22.6,24.6,26.2,6.7334,4.1658 +Perch,180,23,25,26.5,6.4395,3.6835 +Perch,197,23.5,25.6,27,6.561,4.239 +Perch,218,25,26.5,28,7.168,4.144 +Perch,300,25.2,27.3,28.7,8.323,5.1373 +Perch,260,25.4,27.5,28.9,7.1672,4.335 +Perch,265,25.4,27.5,28.9,7.0516,4.335 +Perch,250,25.4,27.5,28.9,7.2828,4.5662 +Perch,250,25.9,28,29.4,7.8204,4.2042 +Perch,300,26.9,28.7,30.1,7.5852,4.6354 +Perch,320,27.8,30,31.6,7.6156,4.7716 +Perch,514,30.5,32.8,34,10.03,6.018 +Perch,556,32,34.5,36.5,10.2565,6.3875 +Perch,840,32.5,35,37.3,11.4884,7.7957 +Perch,685,34,36.5,39,10.881,6.864 +Perch,700,34,36,38.3,10.6091,6.7408 +Perch,700,34.5,37,39.4,10.835,6.2646 +Perch,690,34.6,37,39.3,10.5717,6.3666 +Perch,900,36.5,39,41.4,11.1366,7.4934 +Perch,650,36.5,39,41.4,11.1366,6.003 +Perch,820,36.6,39,41.3,12.4313,7.3514 +Perch,850,36.9,40,42.3,11.9286,7.1064 +Perch,900,37,40,42.5,11.73,7.225 +Perch,1015,37,40,42.4,12.3808,7.4624 +Perch,820,37.1,40,42.5,11.135,6.63 +Perch,1100,39,42,44.6,12.8002,6.8684 +Perch,1000,39.8,43,45.2,11.9328,7.2772 +Perch,1100,40.1,43,45.5,12.5125,7.4165 +Perch,1000,40.2,43.5,46,12.604,8.142 +Perch,1000,41.1,44,46.6,12.4888,7.5958 +Pike,200,30,32.3,34.8,5.568,3.3756 +Pike,300,31.7,34,37.8,5.7078,4.158 +Pike,300,32.7,35,38.8,5.9364,4.3844 +Pike,300,34.8,37.3,39.8,6.2884,4.0198 +Pike,430,35.5,38,40.5,7.29,4.5765 +Pike,345,36,38.5,41,6.396,3.977 +Pike,456,40,42.5,45.5,7.28,4.3225 +Pike,510,40,42.5,45.5,6.825,4.459 +Pike,540,40.1,43,45.8,7.786,5.1296 +Pike,500,42,45,48,6.96,4.896 +Pike,567,43.2,46,48.7,7.792,4.87 +Pike,770,44.8,48,51.2,7.68,5.376 +Pike,950,48.3,51.7,55.1,8.9262,6.1712 +Pike,1250,52,56,59.7,10.6863,6.9849 +Pike,1600,56,60,64,9.6,6.144 +Pike,1550,56,60,64,9.6,6.144 +Pike,1650,59,63.4,68,10.812,7.48 +Smelt,6.7,9.3,9.8,10.8,1.7388,1.0476 +Smelt,7.5,10,10.5,11.6,1.972,1.16 +Smelt,7,10.1,10.6,11.6,1.7284,1.1484 +Smelt,9.7,10.4,11,12,2.196,1.38 +Smelt,9.8,10.7,11.2,12.4,2.0832,1.2772 +Smelt,8.7,10.8,11.3,12.6,1.9782,1.2852 +Smelt,10,11.3,11.8,13.1,2.2139,1.2838 +Smelt,9.9,11.3,11.8,13.1,2.2139,1.1659 +Smelt,9.8,11.4,12,13.2,2.2044,1.1484 +Smelt,12.2,11.5,12.2,13.4,2.0904,1.3936 +Smelt,13.4,11.7,12.4,13.5,2.43,1.269 +Smelt,12.2,12.1,13,13.8,2.277,1.2558 +Smelt,19.7,13.2,14.3,15.2,2.8728,2.0672 +Smelt,19.9,13.8,15,16.2,2.9322,1.8792 \ No newline at end of file diff --git a/datasets/Iris.csv b/datasets/Iris.csv new file mode 100644 index 000000000..1bf42f254 --- /dev/null +++ b/datasets/Iris.csv @@ -0,0 +1,151 @@ +Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species +1,5.1,3.5,1.4,0.2,Iris-setosa +2,4.9,3.0,1.4,0.2,Iris-setosa +3,4.7,3.2,1.3,0.2,Iris-setosa +4,4.6,3.1,1.5,0.2,Iris-setosa +5,5.0,3.6,1.4,0.2,Iris-setosa +6,5.4,3.9,1.7,0.4,Iris-setosa +7,4.6,3.4,1.4,0.3,Iris-setosa +8,5.0,3.4,1.5,0.2,Iris-setosa +9,4.4,2.9,1.4,0.2,Iris-setosa +10,4.9,3.1,1.5,0.1,Iris-setosa +11,5.4,3.7,1.5,0.2,Iris-setosa +12,4.8,3.4,1.6,0.2,Iris-setosa +13,4.8,3.0,1.4,0.1,Iris-setosa +14,4.3,3.0,1.1,0.1,Iris-setosa +15,5.8,4.0,1.2,0.2,Iris-setosa +16,5.7,4.4,1.5,0.4,Iris-setosa +17,5.4,3.9,1.3,0.4,Iris-setosa +18,5.1,3.5,1.4,0.3,Iris-setosa +19,5.7,3.8,1.7,0.3,Iris-setosa +20,5.1,3.8,1.5,0.3,Iris-setosa +21,5.4,3.4,1.7,0.2,Iris-setosa +22,5.1,3.7,1.5,0.4,Iris-setosa +23,4.6,3.6,1.0,0.2,Iris-setosa +24,5.1,3.3,1.7,0.5,Iris-setosa +25,4.8,3.4,1.9,0.2,Iris-setosa +26,5.0,3.0,1.6,0.2,Iris-setosa +27,5.0,3.4,1.6,0.4,Iris-setosa +28,5.2,3.5,1.5,0.2,Iris-setosa +29,5.2,3.4,1.4,0.2,Iris-setosa +30,4.7,3.2,1.6,0.2,Iris-setosa +31,4.8,3.1,1.6,0.2,Iris-setosa +32,5.4,3.4,1.5,0.4,Iris-setosa +33,5.2,4.1,1.5,0.1,Iris-setosa +34,5.5,4.2,1.4,0.2,Iris-setosa +35,4.9,3.1,1.5,0.1,Iris-setosa +36,5.0,3.2,1.2,0.2,Iris-setosa +37,5.5,3.5,1.3,0.2,Iris-setosa +38,4.9,3.1,1.5,0.1,Iris-setosa +39,4.4,3.0,1.3,0.2,Iris-setosa +40,5.1,3.4,1.5,0.2,Iris-setosa +41,5.0,3.5,1.3,0.3,Iris-setosa +42,4.5,2.3,1.3,0.3,Iris-setosa +43,4.4,3.2,1.3,0.2,Iris-setosa +44,5.0,3.5,1.6,0.6,Iris-setosa +45,5.1,3.8,1.9,0.4,Iris-setosa +46,4.8,3.0,1.4,0.3,Iris-setosa +47,5.1,3.8,1.6,0.2,Iris-setosa +48,4.6,3.2,1.4,0.2,Iris-setosa +49,5.3,3.7,1.5,0.2,Iris-setosa +50,5.0,3.3,1.4,0.2,Iris-setosa +51,7.0,3.2,4.7,1.4,Iris-versicolor +52,6.4,3.2,4.5,1.5,Iris-versicolor +53,6.9,3.1,4.9,1.5,Iris-versicolor +54,5.5,2.3,4.0,1.3,Iris-versicolor +55,6.5,2.8,4.6,1.5,Iris-versicolor +56,5.7,2.8,4.5,1.3,Iris-versicolor +57,6.3,3.3,4.7,1.6,Iris-versicolor +58,4.9,2.4,3.3,1.0,Iris-versicolor +59,6.6,2.9,4.6,1.3,Iris-versicolor +60,5.2,2.7,3.9,1.4,Iris-versicolor +61,5.0,2.0,3.5,1.0,Iris-versicolor +62,5.9,3.0,4.2,1.5,Iris-versicolor +63,6.0,2.2,4.0,1.0,Iris-versicolor +64,6.1,2.9,4.7,1.4,Iris-versicolor +65,5.6,2.9,3.6,1.3,Iris-versicolor +66,6.7,3.1,4.4,1.4,Iris-versicolor +67,5.6,3.0,4.5,1.5,Iris-versicolor +68,5.8,2.7,4.1,1.0,Iris-versicolor +69,6.2,2.2,4.5,1.5,Iris-versicolor +70,5.6,2.5,3.9,1.1,Iris-versicolor +71,5.9,3.2,4.8,1.8,Iris-versicolor +72,6.1,2.8,4.0,1.3,Iris-versicolor +73,6.3,2.5,4.9,1.5,Iris-versicolor +74,6.1,2.8,4.7,1.2,Iris-versicolor +75,6.4,2.9,4.3,1.3,Iris-versicolor +76,6.6,3.0,4.4,1.4,Iris-versicolor +77,6.8,2.8,4.8,1.4,Iris-versicolor +78,6.7,3.0,5.0,1.7,Iris-versicolor +79,6.0,2.9,4.5,1.5,Iris-versicolor +80,5.7,2.6,3.5,1.0,Iris-versicolor +81,5.5,2.4,3.8,1.1,Iris-versicolor +82,5.5,2.4,3.7,1.0,Iris-versicolor +83,5.8,2.7,3.9,1.2,Iris-versicolor +84,6.0,2.7,5.1,1.6,Iris-versicolor +85,5.4,3.0,4.5,1.5,Iris-versicolor +86,6.0,3.4,4.5,1.6,Iris-versicolor +87,6.7,3.1,4.7,1.5,Iris-versicolor +88,6.3,2.3,4.4,1.3,Iris-versicolor +89,5.6,3.0,4.1,1.3,Iris-versicolor +90,5.5,2.5,4.0,1.3,Iris-versicolor +91,5.5,2.6,4.4,1.2,Iris-versicolor +92,6.1,3.0,4.6,1.4,Iris-versicolor +93,5.8,2.6,4.0,1.2,Iris-versicolor +94,5.0,2.3,3.3,1.0,Iris-versicolor +95,5.6,2.7,4.2,1.3,Iris-versicolor +96,5.7,3.0,4.2,1.2,Iris-versicolor +97,5.7,2.9,4.2,1.3,Iris-versicolor +98,6.2,2.9,4.3,1.3,Iris-versicolor +99,5.1,2.5,3.0,1.1,Iris-versicolor +100,5.7,2.8,4.1,1.3,Iris-versicolor +101,6.3,3.3,6.0,2.5,Iris-virginica +102,5.8,2.7,5.1,1.9,Iris-virginica +103,7.1,3.0,5.9,2.1,Iris-virginica +104,6.3,2.9,5.6,1.8,Iris-virginica +105,6.5,3.0,5.8,2.2,Iris-virginica +106,7.6,3.0,6.6,2.1,Iris-virginica +107,4.9,2.5,4.5,1.7,Iris-virginica +108,7.3,2.9,6.3,1.8,Iris-virginica +109,6.7,2.5,5.8,1.8,Iris-virginica +110,7.2,3.6,6.1,2.5,Iris-virginica +111,6.5,3.2,5.1,2.0,Iris-virginica +112,6.4,2.7,5.3,1.9,Iris-virginica +113,6.8,3.0,5.5,2.1,Iris-virginica +114,5.7,2.5,5.0,2.0,Iris-virginica +115,5.8,2.8,5.1,2.4,Iris-virginica +116,6.4,3.2,5.3,2.3,Iris-virginica +117,6.5,3.0,5.5,1.8,Iris-virginica +118,7.7,3.8,6.7,2.2,Iris-virginica +119,7.7,2.6,6.9,2.3,Iris-virginica +120,6.0,2.2,5.0,1.5,Iris-virginica +121,6.9,3.2,5.7,2.3,Iris-virginica +122,5.6,2.8,4.9,2.0,Iris-virginica +123,7.7,2.8,6.7,2.0,Iris-virginica +124,6.3,2.7,4.9,1.8,Iris-virginica +125,6.7,3.3,5.7,2.1,Iris-virginica +126,7.2,3.2,6.0,1.8,Iris-virginica +127,6.2,2.8,4.8,1.8,Iris-virginica +128,6.1,3.0,4.9,1.8,Iris-virginica +129,6.4,2.8,5.6,2.1,Iris-virginica +130,7.2,3.0,5.8,1.6,Iris-virginica +131,7.4,2.8,6.1,1.9,Iris-virginica +132,7.9,3.8,6.4,2.0,Iris-virginica +133,6.4,2.8,5.6,2.2,Iris-virginica +134,6.3,2.8,5.1,1.5,Iris-virginica +135,6.1,2.6,5.6,1.4,Iris-virginica +136,7.7,3.0,6.1,2.3,Iris-virginica +137,6.3,3.4,5.6,2.4,Iris-virginica +138,6.4,3.1,5.5,1.8,Iris-virginica +139,6.0,3.0,4.8,1.8,Iris-virginica +140,6.9,3.1,5.4,2.1,Iris-virginica +141,6.7,3.1,5.6,2.4,Iris-virginica +142,6.9,3.1,5.1,2.3,Iris-virginica +143,5.8,2.7,5.1,1.9,Iris-virginica +144,6.8,3.2,5.9,2.3,Iris-virginica +145,6.7,3.3,5.7,2.5,Iris-virginica +146,6.7,3.0,5.2,2.3,Iris-virginica +147,6.3,2.5,5.0,1.9,Iris-virginica +148,6.5,3.0,5.2,2.0,Iris-virginica +149,6.2,3.4,5.4,2.3,Iris-virginica +150,5.9,3.0,5.1,1.8,Iris-virginica diff --git a/datasets/Real estate.csv b/datasets/Real estate.csv new file mode 100644 index 000000000..3968197d5 --- /dev/null +++ b/datasets/Real estate.csv @@ -0,0 +1,415 @@ +No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area +1,2012.917,32,84.87882,10,24.98298,121.54024,37.9 +2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2 +3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3 +4,2013.500,13.3,561.9845,5,24.98746,121.54391,54.8 +5,2012.833,5,390.5684,5,24.97937,121.54245,43.1 +6,2012.667,7.1,2175.03,3,24.96305,121.51254,32.1 +7,2012.667,34.5,623.4731,7,24.97933,121.53642,40.3 +8,2013.417,20.3,287.6025,6,24.98042,121.54228,46.7 +9,2013.500,31.7,5512.038,1,24.95095,121.48458,18.8 +10,2013.417,17.9,1783.18,3,24.96731,121.51486,22.1 +11,2013.083,34.8,405.2134,1,24.97349,121.53372,41.4 +12,2013.333,6.3,90.45606,9,24.97433,121.5431,58.1 +13,2012.917,13,492.2313,5,24.96515,121.53737,39.3 +14,2012.667,20.4,2469.645,4,24.96108,121.51046,23.8 +15,2013.500,13.2,1164.838,4,24.99156,121.53406,34.3 +16,2013.583,35.7,579.2083,2,24.9824,121.54619,50.5 +17,2013.250,0,292.9978,6,24.97744,121.54458,70.1 +18,2012.750,17.7,350.8515,1,24.97544,121.53119,37.4 +19,2013.417,16.9,368.1363,8,24.9675,121.54451,42.3 +20,2012.667,1.5,23.38284,7,24.96772,121.54102,47.7 +21,2013.417,4.5,2275.877,3,24.96314,121.51151,29.3 +22,2013.417,10.5,279.1726,7,24.97528,121.54541,51.6 +23,2012.917,14.7,1360.139,1,24.95204,121.54842,24.6 +24,2013.083,10.1,279.1726,7,24.97528,121.54541,47.9 +25,2013.000,39.6,480.6977,4,24.97353,121.53885,38.8 +26,2013.083,29.3,1487.868,2,24.97542,121.51726,27 +27,2012.667,3.1,383.8624,5,24.98085,121.54391,56.2 +28,2013.250,10.4,276.449,5,24.95593,121.53913,33.6 +29,2013.500,19.2,557.478,4,24.97419,121.53797,47 +30,2013.083,7.1,451.2438,5,24.97563,121.54694,57.1 +31,2013.500,25.9,4519.69,0,24.94826,121.49587,22.1 +32,2012.750,29.6,769.4034,7,24.98281,121.53408,25 +33,2012.750,37.9,488.5727,1,24.97349,121.53451,34.2 +34,2013.250,16.5,323.655,6,24.97841,121.54281,49.3 +35,2012.750,15.4,205.367,7,24.98419,121.54243,55.1 +36,2013.500,13.9,4079.418,0,25.01459,121.51816,27.3 +37,2012.917,14.7,1935.009,2,24.96386,121.51458,22.9 +38,2013.167,12,1360.139,1,24.95204,121.54842,25.3 +39,2012.667,3.1,577.9615,6,24.97201,121.54722,47.7 +40,2013.167,16.2,289.3248,5,24.98203,121.54348,46.2 +41,2013.000,13.6,4082.015,0,24.94155,121.50381,15.9 +42,2013.500,16.8,4066.587,0,24.94297,121.50342,18.2 +43,2013.417,36.1,519.4617,5,24.96305,121.53758,34.7 +44,2012.750,34.4,512.7871,6,24.98748,121.54301,34.1 +45,2013.583,2.7,533.4762,4,24.97445,121.54765,53.9 +46,2013.083,36.6,488.8193,8,24.97015,121.54494,38.3 +47,2013.417,21.7,463.9623,9,24.9703,121.54458,42 +48,2013.583,35.9,640.7391,3,24.97563,121.53715,61.5 +49,2013.417,24.2,4605.749,0,24.94684,121.49578,13.4 +50,2012.667,29.4,4510.359,1,24.94925,121.49542,13.2 +51,2013.417,21.7,512.5487,4,24.974,121.53842,44.2 +52,2013.083,31.3,1758.406,1,24.95402,121.55282,20.7 +53,2013.583,32.1,1438.579,3,24.97419,121.5175,27 +54,2013.083,13.3,492.2313,5,24.96515,121.53737,38.9 +55,2013.083,16.1,289.3248,5,24.98203,121.54348,51.7 +56,2012.833,31.7,1160.632,0,24.94968,121.53009,13.7 +57,2013.417,33.6,371.2495,8,24.97254,121.54059,41.9 +58,2012.917,3.5,56.47425,7,24.95744,121.53711,53.5 +59,2013.500,30.3,4510.359,1,24.94925,121.49542,22.6 +60,2013.083,13.3,336.0532,5,24.95776,121.53438,42.4 +61,2013.417,11,1931.207,2,24.96365,121.51471,21.3 +62,2013.500,5.3,259.6607,6,24.97585,121.54516,63.2 +63,2012.917,17.2,2175.877,3,24.96303,121.51254,27.7 +64,2013.583,2.6,533.4762,4,24.97445,121.54765,55 +65,2013.333,17.5,995.7554,0,24.96305,121.54915,25.3 +66,2013.417,40.1,123.7429,8,24.97635,121.54329,44.3 +67,2013.000,1,193.5845,6,24.96571,121.54089,50.7 +68,2013.500,8.5,104.8101,5,24.96674,121.54067,56.8 +69,2013.417,30.4,464.223,6,24.97964,121.53805,36.2 +70,2012.833,12.5,561.9845,5,24.98746,121.54391,42 +71,2013.583,6.6,90.45606,9,24.97433,121.5431,59 +72,2013.083,35.5,640.7391,3,24.97563,121.53715,40.8 +73,2013.583,32.5,424.5442,8,24.97587,121.53913,36.3 +74,2013.167,13.8,4082.015,0,24.94155,121.50381,20 +75,2012.917,6.8,379.5575,10,24.98343,121.53762,54.4 +76,2013.500,12.3,1360.139,1,24.95204,121.54842,29.5 +77,2013.583,35.9,616.4004,3,24.97723,121.53767,36.8 +78,2012.833,20.5,2185.128,3,24.96322,121.51237,25.6 +79,2012.917,38.2,552.4371,2,24.97598,121.53381,29.8 +80,2013.000,18,1414.837,1,24.95182,121.54887,26.5 +81,2013.500,11.8,533.4762,4,24.97445,121.54765,40.3 +82,2013.000,30.8,377.7956,6,24.96427,121.53964,36.8 +83,2013.083,13.2,150.9347,7,24.96725,121.54252,48.1 +84,2012.917,25.3,2707.392,3,24.96056,121.50831,17.7 +85,2013.083,15.1,383.2805,7,24.96735,121.54464,43.7 +86,2012.750,0,338.9679,9,24.96853,121.54413,50.8 +87,2012.833,1.8,1455.798,1,24.9512,121.549,27 +88,2013.583,16.9,4066.587,0,24.94297,121.50342,18.3 +89,2012.917,8.9,1406.43,0,24.98573,121.52758,48 +90,2013.500,23,3947.945,0,24.94783,121.50243,25.3 +91,2012.833,0,274.0144,1,24.9748,121.53059,45.4 +92,2013.250,9.1,1402.016,0,24.98569,121.5276,43.2 +93,2012.917,20.6,2469.645,4,24.96108,121.51046,21.8 +94,2012.917,31.9,1146.329,0,24.9492,121.53076,16.1 +95,2012.917,40.9,167.5989,5,24.9663,121.54026,41 +96,2012.917,8,104.8101,5,24.96674,121.54067,51.8 +97,2013.417,6.4,90.45606,9,24.97433,121.5431,59.5 +98,2013.083,28.4,617.4424,3,24.97746,121.53299,34.6 +99,2013.417,16.4,289.3248,5,24.98203,121.54348,51 +100,2013.417,6.4,90.45606,9,24.97433,121.5431,62.2 +101,2013.500,17.5,964.7496,4,24.98872,121.53411,38.2 +102,2012.833,12.7,170.1289,1,24.97371,121.52984,32.9 +103,2013.083,1.1,193.5845,6,24.96571,121.54089,54.4 +104,2012.750,0,208.3905,6,24.95618,121.53844,45.7 +105,2012.667,32.7,392.4459,6,24.96398,121.5425,30.5 +106,2012.833,0,292.9978,6,24.97744,121.54458,71 +107,2013.083,17.2,189.5181,8,24.97707,121.54308,47.1 +108,2013.333,12.2,1360.139,1,24.95204,121.54842,26.6 +109,2013.417,31.4,592.5006,2,24.9726,121.53561,34.1 +110,2013.583,4,2147.376,3,24.96299,121.51284,28.4 +111,2013.083,8.1,104.8101,5,24.96674,121.54067,51.6 +112,2013.583,33.3,196.6172,7,24.97701,121.54224,39.4 +113,2013.417,9.9,2102.427,3,24.96044,121.51462,23.1 +114,2013.333,14.8,393.2606,6,24.96172,121.53812,7.6 +115,2012.667,30.6,143.8383,8,24.98155,121.54142,53.3 +116,2013.083,20.6,737.9161,2,24.98092,121.54739,46.4 +117,2013.000,30.9,6396.283,1,24.94375,121.47883,12.2 +118,2013.000,13.6,4197.349,0,24.93885,121.50383,13 +119,2013.500,25.3,1583.722,3,24.96622,121.51709,30.6 +120,2013.500,16.6,289.3248,5,24.98203,121.54348,59.6 +121,2013.167,13.3,492.2313,5,24.96515,121.53737,31.3 +122,2013.500,13.6,492.2313,5,24.96515,121.53737,48 +123,2013.250,31.5,414.9476,4,24.98199,121.54464,32.5 +124,2013.417,0,185.4296,0,24.9711,121.5317,45.5 +125,2012.917,9.9,279.1726,7,24.97528,121.54541,57.4 +126,2013.167,1.1,193.5845,6,24.96571,121.54089,48.6 +127,2013.083,38.6,804.6897,4,24.97838,121.53477,62.9 +128,2013.250,3.8,383.8624,5,24.98085,121.54391,55 +129,2013.083,41.3,124.9912,6,24.96674,121.54039,60.7 +130,2013.417,38.5,216.8329,7,24.98086,121.54162,41 +131,2013.250,29.6,535.527,8,24.98092,121.53653,37.5 +132,2013.500,4,2147.376,3,24.96299,121.51284,30.7 +133,2013.167,26.6,482.7581,5,24.97433,121.53863,37.5 +134,2012.833,18,373.3937,8,24.9866,121.54082,39.5 +135,2012.667,33.4,186.9686,6,24.96604,121.54211,42.2 +136,2012.917,18.9,1009.235,0,24.96357,121.54951,20.8 +137,2012.750,11.4,390.5684,5,24.97937,121.54245,46.8 +138,2013.500,13.6,319.0708,6,24.96495,121.54277,47.4 +139,2013.167,10,942.4664,0,24.97843,121.52406,43.5 +140,2012.667,12.9,492.2313,5,24.96515,121.53737,42.5 +141,2013.250,16.2,289.3248,5,24.98203,121.54348,51.4 +142,2013.333,5.1,1559.827,3,24.97213,121.51627,28.9 +143,2013.417,19.8,640.6071,5,24.97017,121.54647,37.5 +144,2013.500,13.6,492.2313,5,24.96515,121.53737,40.1 +145,2013.083,11.9,1360.139,1,24.95204,121.54842,28.4 +146,2012.917,2.1,451.2438,5,24.97563,121.54694,45.5 +147,2012.750,0,185.4296,0,24.9711,121.5317,52.2 +148,2012.750,3.2,489.8821,8,24.97017,121.54494,43.2 +149,2013.500,16.4,3780.59,0,24.93293,121.51203,45.1 +150,2012.667,34.9,179.4538,8,24.97349,121.54245,39.7 +151,2013.250,35.8,170.7311,7,24.96719,121.54269,48.5 +152,2013.500,4.9,387.7721,9,24.98118,121.53788,44.7 +153,2013.333,12,1360.139,1,24.95204,121.54842,28.9 +154,2013.250,6.5,376.1709,6,24.95418,121.53713,40.9 +155,2013.500,16.9,4066.587,0,24.94297,121.50342,20.7 +156,2013.167,13.8,4082.015,0,24.94155,121.50381,15.6 +157,2013.583,30.7,1264.73,0,24.94883,121.52954,18.3 +158,2013.250,16.1,815.9314,4,24.97886,121.53464,35.6 +159,2013.000,11.6,390.5684,5,24.97937,121.54245,39.4 +160,2012.667,15.5,815.9314,4,24.97886,121.53464,37.4 +161,2012.917,3.5,49.66105,8,24.95836,121.53756,57.8 +162,2013.417,19.2,616.4004,3,24.97723,121.53767,39.6 +163,2012.750,16,4066.587,0,24.94297,121.50342,11.6 +164,2013.500,8.5,104.8101,5,24.96674,121.54067,55.5 +165,2012.833,0,185.4296,0,24.9711,121.5317,55.2 +166,2012.917,13.7,1236.564,1,24.97694,121.55391,30.6 +167,2013.417,0,292.9978,6,24.97744,121.54458,73.6 +168,2013.417,28.2,330.0854,8,24.97408,121.54011,43.4 +169,2013.083,27.6,515.1122,5,24.96299,121.5432,37.4 +170,2013.417,8.4,1962.628,1,24.95468,121.55481,23.5 +171,2013.333,24,4527.687,0,24.94741,121.49628,14.4 +172,2013.083,3.6,383.8624,5,24.98085,121.54391,58.8 +173,2013.583,6.6,90.45606,9,24.97433,121.5431,58.1 +174,2013.083,41.3,401.8807,4,24.98326,121.5446,35.1 +175,2013.417,4.3,432.0385,7,24.9805,121.53778,45.2 +176,2013.083,30.2,472.1745,3,24.97005,121.53758,36.5 +177,2012.833,13.9,4573.779,0,24.94867,121.49507,19.2 +178,2013.083,33,181.0766,9,24.97697,121.54262,42 +179,2013.500,13.1,1144.436,4,24.99176,121.53456,36.7 +180,2013.083,14,438.8513,1,24.97493,121.5273,42.6 +181,2012.667,26.9,4449.27,0,24.94898,121.49621,15.5 +182,2013.167,11.6,201.8939,8,24.98489,121.54121,55.9 +183,2013.500,13.5,2147.376,3,24.96299,121.51284,23.6 +184,2013.500,17,4082.015,0,24.94155,121.50381,18.8 +185,2012.750,14.1,2615.465,0,24.95495,121.56174,21.8 +186,2012.750,31.4,1447.286,3,24.97285,121.5173,21.5 +187,2013.167,20.9,2185.128,3,24.96322,121.51237,25.7 +188,2013.000,8.9,3078.176,0,24.95464,121.56627,22 +189,2012.917,34.8,190.0392,8,24.97707,121.54312,44.3 +190,2012.917,16.3,4066.587,0,24.94297,121.50342,20.5 +191,2013.500,35.3,616.5735,8,24.97945,121.53642,42.3 +192,2013.167,13.2,750.0704,2,24.97371,121.54951,37.8 +193,2013.167,43.8,57.58945,7,24.9675,121.54069,42.7 +194,2013.417,9.7,421.479,5,24.98246,121.54477,49.3 +195,2013.500,15.2,3771.895,0,24.93363,121.51158,29.3 +196,2013.333,15.2,461.1016,5,24.95425,121.5399,34.6 +197,2013.000,22.8,707.9067,2,24.981,121.54713,36.6 +198,2013.250,34.4,126.7286,8,24.96881,121.54089,48.2 +199,2013.083,34,157.6052,7,24.96628,121.54196,39.1 +200,2013.417,18.2,451.6419,8,24.96945,121.5449,31.6 +201,2013.417,17.4,995.7554,0,24.96305,121.54915,25.5 +202,2013.417,13.1,561.9845,5,24.98746,121.54391,45.9 +203,2012.917,38.3,642.6985,3,24.97559,121.53713,31.5 +204,2012.667,15.6,289.3248,5,24.98203,121.54348,46.1 +205,2013.000,18,1414.837,1,24.95182,121.54887,26.6 +206,2013.083,12.8,1449.722,3,24.97289,121.51728,21.4 +207,2013.250,22.2,379.5575,10,24.98343,121.53762,44 +208,2013.083,38.5,665.0636,3,24.97503,121.53692,34.2 +209,2012.750,11.5,1360.139,1,24.95204,121.54842,26.2 +210,2012.833,34.8,175.6294,8,24.97347,121.54271,40.9 +211,2013.500,5.2,390.5684,5,24.97937,121.54245,52.2 +212,2013.083,0,274.0144,1,24.9748,121.53059,43.5 +213,2013.333,17.6,1805.665,2,24.98672,121.52091,31.1 +214,2013.083,6.2,90.45606,9,24.97433,121.5431,58 +215,2013.583,18.1,1783.18,3,24.96731,121.51486,20.9 +216,2013.333,19.2,383.7129,8,24.972,121.54477,48.1 +217,2013.250,37.8,590.9292,1,24.97153,121.53559,39.7 +218,2012.917,28,372.6242,6,24.97838,121.54119,40.8 +219,2013.417,13.6,492.2313,5,24.96515,121.53737,43.8 +220,2012.750,29.3,529.7771,8,24.98102,121.53655,40.2 +221,2013.333,37.2,186.5101,9,24.97703,121.54265,78.3 +222,2013.333,9,1402.016,0,24.98569,121.5276,38.5 +223,2013.583,30.6,431.1114,10,24.98123,121.53743,48.5 +224,2013.250,9.1,1402.016,0,24.98569,121.5276,42.3 +225,2013.333,34.5,324.9419,6,24.97814,121.5417,46 +226,2013.250,1.1,193.5845,6,24.96571,121.54089,49 +227,2013.000,16.5,4082.015,0,24.94155,121.50381,12.8 +228,2012.917,32.4,265.0609,8,24.98059,121.53986,40.2 +229,2013.417,11.9,3171.329,0,25.00115,121.51776,46.6 +230,2013.583,31,1156.412,0,24.9489,121.53095,19 +231,2013.500,4,2147.376,3,24.96299,121.51284,33.4 +232,2012.833,16.2,4074.736,0,24.94235,121.50357,14.7 +233,2012.917,27.1,4412.765,1,24.95032,121.49587,17.4 +234,2013.333,39.7,333.3679,9,24.98016,121.53932,32.4 +235,2013.250,8,2216.612,4,24.96007,121.51361,23.9 +236,2012.750,12.9,250.631,7,24.96606,121.54297,39.3 +237,2013.167,3.6,373.8389,10,24.98322,121.53765,61.9 +238,2013.167,13,732.8528,0,24.97668,121.52518,39 +239,2013.083,12.8,732.8528,0,24.97668,121.52518,40.6 +240,2013.500,18.1,837.7233,0,24.96334,121.54767,29.7 +241,2013.083,11,1712.632,2,24.96412,121.5167,28.8 +242,2013.500,13.7,250.631,7,24.96606,121.54297,41.4 +243,2012.833,2,2077.39,3,24.96357,121.51329,33.4 +244,2013.417,32.8,204.1705,8,24.98236,121.53923,48.2 +245,2013.083,4.8,1559.827,3,24.97213,121.51627,21.7 +246,2013.417,7.5,639.6198,5,24.97258,121.54814,40.8 +247,2013.417,16.4,389.8219,6,24.96412,121.54273,40.6 +248,2013.333,21.7,1055.067,0,24.96211,121.54928,23.1 +249,2013.000,19,1009.235,0,24.96357,121.54951,22.3 +250,2012.833,18,6306.153,1,24.95743,121.47516,15 +251,2013.167,39.2,424.7132,7,24.97429,121.53917,30 +252,2012.917,31.7,1159.454,0,24.9496,121.53018,13.8 +253,2012.833,5.9,90.45606,9,24.97433,121.5431,52.7 +254,2012.667,30.4,1735.595,2,24.96464,121.51623,25.9 +255,2012.667,1.1,329.9747,5,24.98254,121.54395,51.8 +256,2013.417,31.5,5512.038,1,24.95095,121.48458,17.4 +257,2012.667,14.6,339.2289,1,24.97519,121.53151,26.5 +258,2013.250,17.3,444.1334,1,24.97501,121.5273,43.9 +259,2013.417,0,292.9978,6,24.97744,121.54458,63.3 +260,2013.083,17.7,837.7233,0,24.96334,121.54767,28.8 +261,2013.250,17,1485.097,4,24.97073,121.517,30.7 +262,2013.167,16.2,2288.011,3,24.95885,121.51359,24.4 +263,2012.917,15.9,289.3248,5,24.98203,121.54348,53 +264,2013.417,3.9,2147.376,3,24.96299,121.51284,31.7 +265,2013.167,32.6,493.657,7,24.96968,121.54522,40.6 +266,2012.833,15.7,815.9314,4,24.97886,121.53464,38.1 +267,2013.250,17.8,1783.18,3,24.96731,121.51486,23.7 +268,2012.833,34.7,482.7581,5,24.97433,121.53863,41.1 +269,2013.417,17.2,390.5684,5,24.97937,121.54245,40.1 +270,2013.000,17.6,837.7233,0,24.96334,121.54767,23 +271,2013.333,10.8,252.5822,1,24.9746,121.53046,117.5 +272,2012.917,17.7,451.6419,8,24.96945,121.5449,26.5 +273,2012.750,13,492.2313,5,24.96515,121.53737,40.5 +274,2013.417,13.2,170.1289,1,24.97371,121.52984,29.3 +275,2013.167,27.5,394.0173,7,24.97305,121.53994,41 +276,2012.667,1.5,23.38284,7,24.96772,121.54102,49.7 +277,2013.000,19.1,461.1016,5,24.95425,121.5399,34 +278,2013.417,21.2,2185.128,3,24.96322,121.51237,27.7 +279,2012.750,0,208.3905,6,24.95618,121.53844,44 +280,2013.417,2.6,1554.25,3,24.97026,121.51642,31.1 +281,2013.250,2.3,184.3302,6,24.96581,121.54086,45.4 +282,2013.333,4.7,387.7721,9,24.98118,121.53788,44.8 +283,2012.917,2,1455.798,1,24.9512,121.549,25.6 +284,2013.417,33.5,1978.671,2,24.98674,121.51844,23.5 +285,2012.917,15,383.2805,7,24.96735,121.54464,34.4 +286,2013.167,30.1,718.2937,3,24.97509,121.53644,55.3 +287,2012.917,5.9,90.45606,9,24.97433,121.5431,56.3 +288,2013.000,19.2,461.1016,5,24.95425,121.5399,32.9 +289,2013.583,16.6,323.6912,6,24.97841,121.5428,51 +290,2013.333,13.9,289.3248,5,24.98203,121.54348,44.5 +291,2013.083,37.7,490.3446,0,24.97217,121.53471,37 +292,2012.833,3.4,56.47425,7,24.95744,121.53711,54.4 +293,2013.083,17.5,395.6747,5,24.95674,121.534,24.5 +294,2012.667,12.6,383.2805,7,24.96735,121.54464,42.5 +295,2013.500,26.4,335.5273,6,24.9796,121.5414,38.1 +296,2013.167,18.2,2179.59,3,24.96299,121.51252,21.8 +297,2012.750,12.5,1144.436,4,24.99176,121.53456,34.1 +298,2012.833,34.9,567.0349,4,24.97003,121.5458,28.5 +299,2013.333,16.7,4082.015,0,24.94155,121.50381,16.7 +300,2013.167,33.2,121.7262,10,24.98178,121.54059,46.1 +301,2013.083,2.5,156.2442,4,24.96696,121.53992,36.9 +302,2012.750,38,461.7848,0,24.97229,121.53445,35.7 +303,2013.500,16.5,2288.011,3,24.95885,121.51359,23.2 +304,2013.500,38.3,439.7105,0,24.97161,121.53423,38.4 +305,2013.417,20,1626.083,3,24.96622,121.51668,29.4 +306,2013.083,16.2,289.3248,5,24.98203,121.54348,55 +307,2013.500,14.4,169.9803,1,24.97369,121.52979,50.2 +308,2012.833,10.3,3079.89,0,24.9546,121.56627,24.7 +309,2013.417,16.4,289.3248,5,24.98203,121.54348,53 +310,2013.250,30.3,1264.73,0,24.94883,121.52954,19.1 +311,2013.583,16.4,1643.499,2,24.95394,121.55174,24.7 +312,2013.167,21.3,537.7971,4,24.97425,121.53814,42.2 +313,2013.583,35.4,318.5292,9,24.97071,121.54069,78 +314,2013.333,8.3,104.8101,5,24.96674,121.54067,42.8 +315,2013.250,3.7,577.9615,6,24.97201,121.54722,41.6 +316,2013.083,15.6,1756.411,2,24.9832,121.51812,27.3 +317,2013.250,13.3,250.631,7,24.96606,121.54297,42 +318,2012.750,15.6,752.7669,2,24.97795,121.53451,37.5 +319,2013.333,7.1,379.5575,10,24.98343,121.53762,49.8 +320,2013.250,34.6,272.6783,5,24.95562,121.53872,26.9 +321,2012.750,13.5,4197.349,0,24.93885,121.50383,18.6 +322,2012.917,16.9,964.7496,4,24.98872,121.53411,37.7 +323,2013.000,12.9,187.4823,1,24.97388,121.52981,33.1 +324,2013.417,28.6,197.1338,6,24.97631,121.54436,42.5 +325,2012.667,12.4,1712.632,2,24.96412,121.5167,31.3 +326,2013.083,36.6,488.8193,8,24.97015,121.54494,38.1 +327,2013.500,4.1,56.47425,7,24.95744,121.53711,62.1 +328,2013.417,3.5,757.3377,3,24.97538,121.54971,36.7 +329,2012.833,15.9,1497.713,3,24.97003,121.51696,23.6 +330,2013.000,13.6,4197.349,0,24.93885,121.50383,19.2 +331,2013.083,32,1156.777,0,24.94935,121.53046,12.8 +332,2013.333,25.6,4519.69,0,24.94826,121.49587,15.6 +333,2013.167,39.8,617.7134,2,24.97577,121.53475,39.6 +334,2012.750,7.8,104.8101,5,24.96674,121.54067,38.4 +335,2012.917,30,1013.341,5,24.99006,121.5346,22.8 +336,2013.583,27.3,337.6016,6,24.96431,121.54063,36.5 +337,2012.833,5.1,1867.233,2,24.98407,121.51748,35.6 +338,2012.833,31.3,600.8604,5,24.96871,121.54651,30.9 +339,2012.917,31.5,258.186,9,24.96867,121.54331,36.3 +340,2013.333,1.7,329.9747,5,24.98254,121.54395,50.4 +341,2013.333,33.6,270.8895,0,24.97281,121.53265,42.9 +342,2013.000,13,750.0704,2,24.97371,121.54951,37 +343,2012.667,5.7,90.45606,9,24.97433,121.5431,53.5 +344,2013.000,33.5,563.2854,8,24.98223,121.53597,46.6 +345,2013.500,34.6,3085.17,0,24.998,121.5155,41.2 +346,2012.667,0,185.4296,0,24.9711,121.5317,37.9 +347,2013.417,13.2,1712.632,2,24.96412,121.5167,30.8 +348,2013.583,17.4,6488.021,1,24.95719,121.47353,11.2 +349,2012.833,4.6,259.6607,6,24.97585,121.54516,53.7 +350,2012.750,7.8,104.8101,5,24.96674,121.54067,47 +351,2013.000,13.2,492.2313,5,24.96515,121.53737,42.3 +352,2012.833,4,2180.245,3,24.96324,121.51241,28.6 +353,2012.833,18.4,2674.961,3,24.96143,121.50827,25.7 +354,2013.500,4.1,2147.376,3,24.96299,121.51284,31.3 +355,2013.417,12.2,1360.139,1,24.95204,121.54842,30.1 +356,2013.250,3.8,383.8624,5,24.98085,121.54391,60.7 +357,2012.833,10.3,211.4473,1,24.97417,121.52999,45.3 +358,2013.417,0,338.9679,9,24.96853,121.54413,44.9 +359,2013.167,1.1,193.5845,6,24.96571,121.54089,45.1 +360,2013.500,5.6,2408.993,0,24.95505,121.55964,24.7 +361,2012.667,32.9,87.30222,10,24.983,121.54022,47.1 +362,2013.083,41.4,281.205,8,24.97345,121.54093,63.3 +363,2013.417,17.1,967.4,4,24.98872,121.53408,40 +364,2013.500,32.3,109.9455,10,24.98182,121.54086,48 +365,2013.417,35.3,614.1394,7,24.97913,121.53666,33.1 +366,2012.917,17.3,2261.432,4,24.96182,121.51222,29.5 +367,2012.750,14.2,1801.544,1,24.95153,121.55254,24.8 +368,2012.833,15,1828.319,2,24.96464,121.51531,20.9 +369,2013.417,18.2,350.8515,1,24.97544,121.53119,43.1 +370,2012.667,20.2,2185.128,3,24.96322,121.51237,22.8 +371,2012.750,15.9,289.3248,5,24.98203,121.54348,42.1 +372,2013.500,4.1,312.8963,5,24.95591,121.53956,51.7 +373,2013.000,33.9,157.6052,7,24.96628,121.54196,41.5 +374,2013.083,0,274.0144,1,24.9748,121.53059,52.2 +375,2013.250,5.4,390.5684,5,24.97937,121.54245,49.5 +376,2013.250,21.7,1157.988,0,24.96165,121.55011,23.8 +377,2013.417,14.7,1717.193,2,24.96447,121.51649,30.5 +378,2013.333,3.9,49.66105,8,24.95836,121.53756,56.8 +379,2013.333,37.3,587.8877,8,24.97077,121.54634,37.4 +380,2013.333,0,292.9978,6,24.97744,121.54458,69.7 +381,2013.333,14.1,289.3248,5,24.98203,121.54348,53.3 +382,2013.417,8,132.5469,9,24.98298,121.53981,47.3 +383,2013.000,16.3,3529.564,0,24.93207,121.51597,29.3 +384,2012.667,29.1,506.1144,4,24.97845,121.53889,40.3 +385,2012.750,16.1,4066.587,0,24.94297,121.50342,12.9 +386,2013.000,18.3,82.88643,10,24.983,121.54026,46.6 +387,2012.833,0,185.4296,0,24.9711,121.5317,55.3 +388,2013.250,16.2,2103.555,3,24.96042,121.51462,25.6 +389,2013.500,10.4,2251.938,4,24.95957,121.51353,27.3 +390,2013.250,40.9,122.3619,8,24.96756,121.5423,67.7 +391,2013.500,32.8,377.8302,9,24.97151,121.5435,38.6 +392,2013.583,6.2,1939.749,1,24.95155,121.55387,31.3 +393,2013.083,42.7,443.802,6,24.97927,121.53874,35.3 +394,2013.000,16.9,967.4,4,24.98872,121.53408,40.3 +395,2013.500,32.6,4136.271,1,24.95544,121.4963,24.7 +396,2012.917,21.2,512.5487,4,24.974,121.53842,42.5 +397,2012.667,37.1,918.6357,1,24.97198,121.55063,31.9 +398,2013.417,13.1,1164.838,4,24.99156,121.53406,32.2 +399,2013.417,14.7,1717.193,2,24.96447,121.51649,23 +400,2012.917,12.7,170.1289,1,24.97371,121.52984,37.3 +401,2013.250,26.8,482.7581,5,24.97433,121.53863,35.5 +402,2013.083,7.6,2175.03,3,24.96305,121.51254,27.7 +403,2012.833,12.7,187.4823,1,24.97388,121.52981,28.5 +404,2012.667,30.9,161.942,9,24.98353,121.53966,39.7 +405,2013.333,16.4,289.3248,5,24.98203,121.54348,41.2 +406,2012.667,23,130.9945,6,24.95663,121.53765,37.2 +407,2013.167,1.9,372.1386,7,24.97293,121.54026,40.5 +408,2013.000,5.2,2408.993,0,24.95505,121.55964,22.3 +409,2013.417,18.5,2175.744,3,24.9633,121.51243,28.1 +410,2013.000,13.7,4082.015,0,24.94155,121.50381,15.4 +411,2012.667,5.6,90.45606,9,24.97433,121.5431,50 +412,2013.250,18.8,390.9696,7,24.97923,121.53986,40.6 +413,2013.000,8.1,104.8101,5,24.96674,121.54067,52.5 +414,2013.500,6.5,90.45606,9,24.97433,121.5431,63.9 diff --git a/requirements.txt:Zone.Identifier b/requirements.txt:Zone.Identifier deleted file mode 100644 index e69de29bb..000000000