Skip to content

Commit

Permalink
Add AGA-WebUI (#89)
Browse files Browse the repository at this point in the history
Co-authored-by: Yuan Tian <[email protected]>
Co-authored-by: Anirudh Dagar <[email protected]>
  • Loading branch information
3 people authored Nov 9, 2024
1 parent 49cb475 commit 9df3ed7
Show file tree
Hide file tree
Showing 19 changed files with 1,717 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
.aim/
AutogluonModels/
data/
user_data/
sample_dataset/
results/
cookies.json
cdk.out/
.metaflow
*.feather
.langchain.db
output_*.csv

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,29 @@ aga ./data ./config --output-filename my_output.csv --config-overrides "autogluo
```

`autogluon-assistant-tools` provides more functionality and utilities for benchmarking, wrapped around autogluon-assistant. Please check out the [repo](https://github.com/autogluon/autogluon-assistant-tools/) for more details.

## Autogluon Assistant Web UI
The Autogluon Assistant Web UI is a user-friendly application that allows users to leverage the capabilities of the Autogluon-Assistant library through an intuitive web interface.

The web UI enables users to upload datasets, configure Autogluon-Assistant runs with customized settings, preview data, monitor execution progress, view and download results, and supports secure, isolated sessions for concurrent users.

#### To run the Autogluon Assistant Web UI:
Navigate to the project directory and run the app:
````
cd src/autogluon_assistant/ui && streamlit run app.py
````
The Autogluon Assistant Web UI should now be accessible in your web browser at `http://localhost:8501`

#### Add GPT4 Model to the LLM Option:
If you’d like to add additional GPT4 model to the language model (LLM) dropdown:

1. Navigate to src/autogluon_assistant/WebUI/constants.py

2. Locate the `LLM_OPTIONS` variable, which looks like this:
````
LLM_OPTIONS = ["Claude 3.5 with Amazon Bedrock"]
````
3. Add "GPT 4o" to the list
````
LLM_OPTIONS = ["Claude 3.5 with Amazon Bedrock", "GPT 4o"]
````
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ dependencies = [
"sentence-transformers>=3.1.0",
"tenacity>=8.2.2,<10.0",
"gensim>=4.3",
"pandas>=2.2"
"pandas>=2.2",
"stqdm==0.0.5",
"streamlit==1.37.1",
"streamlit-aggrid==1.0.5",
"streamlit-extras==0.4.7",
"psutil>=5.9.8",
]

[project.optional-dependencies]
Expand Down
10 changes: 10 additions & 0 deletions src/autogluon_assistant/ui/.streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[server]
maxUploadSize = 4096
enableStaticServing = true
[theme]
primaryColor="#4C7DE7"
backgroundColor="#FFFFFF"
secondaryBackgroundColor="#fbfcfc"
textColor="#404040"
[client]
showSidebarNavigation = false
82 changes: 82 additions & 0 deletions src/autogluon_assistant/ui/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from copy import deepcopy

import streamlit as st
import streamlit.components.v1 as components
from constants import DEFAULT_SESSION_VALUES, LOGO_PATH
from pages.demo import main as demo
from pages.feature import main as feature
from pages.nav_bar import nav_bar
from pages.preview import main as preview
from pages.task import main as run
from pages.tutorial import main as tutorial

st.set_page_config(
page_title="AutoGluon Assistant",
page_icon=LOGO_PATH,
layout="wide",
initial_sidebar_state="collapsed",
)

# fontawesome
st.markdown(
"""
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
""",
unsafe_allow_html=True,
)

# Bootstrap 4.1.3
st.markdown(
"""
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
""",
unsafe_allow_html=True,
)
with open("style.css") as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)


reload_warning = """
<script>
window.onbeforeunload = function () {
return "Are you sure want to LOGOUT the session ?";
};
</script>
"""

components.html(reload_warning, height=0)


def initial_session_state():
"""
Initial Session State
"""
for key, default_value in DEFAULT_SESSION_VALUES.items():
if key not in st.session_state:
st.session_state[key] = (
deepcopy(default_value) if isinstance(default_value, (dict, list)) else default_value
)


def main():
initial_session_state()
nav_bar()
tutorial()
demo()
feature()
run()
preview()

st.markdown(
"""
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script>
""",
unsafe_allow_html=True,
)


if __name__ == "__main__":
main()
134 changes: 134 additions & 0 deletions src/autogluon_assistant/ui/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from copy import deepcopy

BASE_DATA_DIR = "./user_data"


# Preset configurations
PRESET_DEFAULT_CONFIG = {
"Best Quality": {"time_limit": "4 hrs", "feature_generation": True},
"High Quality": {"time_limit": "1 hr", "feature_generation": False},
"Medium Quality": {"time_limit": "10 mins", "feature_generation": False},
}
DEFAULT_PRESET = "Medium Quality"

PRESET_MAPPING = {
"Best Quality": "best_quality",
"High Quality": "high_quality",
"Medium Quality": "medium_quality",
}
PRESET_OPTIONS = ["Best Quality", "High Quality", "Medium Quality"]

# Time limit configurations (in seconds)
TIME_LIMIT_MAPPING = {
"1 min": 60,
"10 mins": 600,
"30 mins": 1800,
"1 hr": 3600,
"2 hrs": 7200,
"4 hrs": 14400,
}

DEFAULT_TIME_LIMIT = "10 mins"

TIME_LIMIT_OPTIONS = ["1 min", "10 mins", "30 mins", "1 hr", "2 hrs", "4 hrs"]

# LLM configurations
LLM_MAPPING = {
"Claude 3.5 with Amazon Bedrock": "anthropic.claude-3-5-sonnet-20241022-v2:0",
"GPT 4o": "gpt-4o-mini-2024-07-18",
}

LLM_OPTIONS = ["Claude 3.5 with Amazon Bedrock"]

# Provider configuration
PROVIDER_MAPPING = {"Claude 3.5 with Amazon Bedrock": "bedrock", "GPT 4o": "openai"}


API_KEY_LOCATION = {"Claude 3.5 with Amazon Bedrock": "BEDROCK_API_KEY", "GPT 4o": "OPENAI_API_KEY"}

INITIAL_STAGE = {
"Task Understanding": [],
"Feature Generation": [],
"Model Training": [],
"Prediction": [],
}
# Initial Session state
DEFAULT_SESSION_VALUES = {
"config_overrides": [],
"preset": DEFAULT_PRESET,
"time_limit": DEFAULT_TIME_LIMIT,
"llm": None,
"pid": None,
"logs": "",
"process": None,
"clicked": False,
"task_running": False,
"output_file": None,
"output_filename": None,
"task_description": "",
"sample_description": "",
"return_code": None,
"task_canceled": False,
"uploaded_files": {},
"sample_files": {},
"selected_dataset": None,
"sample_dataset_dir": None,
"description_uploader_key": 0,
"sample_dataset_selector": None,
"current_stage": None,
"feature_generation": False,
"stage_status": {},
"show_remaining_time": False,
"model_path": None,
"increment_time": 0,
"progress_bar": None,
"increment": 2,
"zip_path": None,
"stage_container": deepcopy(INITIAL_STAGE),
}

# Message to display different logging stage
STATUS_BAR_STAGE = {
"Task loaded!": 10,
"Model training starts": 25,
"Fitting model": 50,
"AutoGluon training complete": 80,
"Prediction starts": 90,
}

STAGE_COMPLETE_SIGNAL = [
"Task understanding complete",
"Automatic feature generation complete",
"Model training complete",
"Prediction complete",
]

# Stage Names
STAGE_TASK_UNDERSTANDING = "Task Understanding"
STAGE_FEATURE_GENERATION = "Feature Generation"
STAGE_MODEL_TRAINING = "Model Training"
STAGE_PREDICTION = "Prediction"

# Log Messages
MSG_TASK_UNDERSTANDING = "Task understanding starts"
MSG_FEATURE_GENERATION = "Automatic feature generation starts"
MSG_MODEL_TRAINING = "Model training starts"
MSG_PREDICTION = "Prediction starts"

# Mapping
STAGE_MESSAGES = {
MSG_TASK_UNDERSTANDING: STAGE_TASK_UNDERSTANDING,
MSG_FEATURE_GENERATION: STAGE_FEATURE_GENERATION,
MSG_MODEL_TRAINING: STAGE_MODEL_TRAINING,
MSG_PREDICTION: STAGE_PREDICTION,
}
# DataSet Options
DATASET_OPTIONS = ["Sample Dataset", "Upload Dataset"]

# Captions under DataSet Options
CAPTIONS = ["Run with sample dataset", "Upload Train, Test and Output (Optional) Dataset"]

DEMO_URL = "https://automl-mm-bench.s3.amazonaws.com/autogluon-assistant/aga-kaggle-demo.mp4"

SAMPLE_DATASET_DESCRIPTION = """You are solving this data science tasks:The dataset presented here (knot theory) comprises a lot of numerical features. Some of the features may be missing, with nan value. Your task is to predict the 'signature', which has 18 unique integers. The evaluation metric is the classification accuracy."""
LOGO_PATH = "static/page_icon.png"
57 changes: 57 additions & 0 deletions src/autogluon_assistant/ui/file_uploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os

import pandas as pd
import streamlit as st
from utils import get_user_data_dir


def save_description_file(description):
"""
Save the task description to a file in the user's data directory.
Args:
description (str): The task description to save.
"""
try:
user_data_dir = get_user_data_dir()
description_file = os.path.join(user_data_dir, "description.txt")
with open(description_file, "w") as f:
f.write(description)
except Exception as e:
print(f"Error saving file: {str(e)}")


def description_file_uploader():
"""
Handle Description file uploads
"""
uploaded_file = st.file_uploader(
"Upload task description file",
type="txt",
key=st.session_state.description_uploader_key,
help="Accepted file format: .txt",
label_visibility="collapsed",
)
if uploaded_file:
task_description = uploaded_file.read().decode("utf-8")
st.session_state.task_description = task_description
save_description_file(st.session_state.task_description)
st.session_state.description_uploader_key += 1
st.rerun()


def file_uploader():
"""
Handle file uploads
"""
st.markdown("#### Upload Dataset")
uploaded_files = st.file_uploader(
"Select the dataset", accept_multiple_files=True, label_visibility="collapsed", type=["csv", "xlsx"]
)
st.session_state.uploaded_files = {}
for file in uploaded_files:
if file.name.endswith(".csv"):
df = pd.read_csv(file)
elif file.name.endswith(".xlsx"):
df = pd.read_excel(file)
st.session_state.uploaded_files[file.name] = {"file": file, "df": df}
Loading

0 comments on commit 9df3ed7

Please sign in to comment.