-
Notifications
You must be signed in to change notification settings - Fork 80
Lazy loading/streaming with priority-based query dispatch #1058
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 10 commits
5b7c048
be629fb
7b51b55
a0e9c07
3f73714
05a64a7
1255105
8ce17da
a37c70f
4c99834
00b6610
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,16 +1,17 @@ | ||
| from dash import dcc, callback | ||
| import dash_bootstrap_components as dbc | ||
| from dash.dependencies import Input, Output | ||
| from typing import List, Optional, Tuple, Union | ||
| import pandas as pd | ||
| import logging | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| from dateutil.relativedelta import * # type: ignore | ||
| import plotly.express as px | ||
| import plotly.graph_objects as go | ||
| from pages.utils.graph_utils import baby_blue | ||
| from queries.commits_query import commits_query as cmq | ||
| from pages.utils.job_utils import nodata_graph | ||
| import time | ||
| from pages.utils.query_status import load_query_data | ||
| import datetime as dt | ||
| import cache_manager.cache_facade as cf | ||
| from components.visualization import VisualizationAIO | ||
|
|
||
| PAGE = "affiliation" | ||
|
|
@@ -75,36 +76,22 @@ | |
| ], | ||
| background=True, | ||
| ) | ||
| def commit_domains_graph(repolist, num, start_date, end_date): | ||
| # wait for data to asynchronously download and become available. | ||
| while not_cached := cf.get_uncached(func_name=cmq.__name__, repolist=repolist): | ||
| logging.warning(f"COMMITS_OVER_TIME_VIZ - WAITING ON DATA TO BECOME AVAILABLE") | ||
| time.sleep(0.5) | ||
|
|
||
| start = time.perf_counter() | ||
| logging.warning(f"{VIZ_ID}- START") | ||
|
|
||
| # GET ALL DATA FROM POSTGRES CACHE | ||
| df = cf.retrieve_from_cache( | ||
| tablename=cmq.__name__, | ||
| repolist=repolist, | ||
| ) | ||
|
|
||
| # test if there is data | ||
| if df.empty: | ||
| logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") | ||
| def commit_domains_graph( | ||
| repolist: List[int], num, start_date: Optional[str], end_date: Optional[str] | ||
| ) -> Tuple[go.Figure, bool]: | ||
| # Wait for and load query data (includes timeout, error handling, and validation) | ||
| df = load_query_data(cmq, repolist, VIZ_ID) | ||
| if df is None: | ||
| return nodata_graph | ||
|
|
||
| # function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS | ||
| df = process_data(df, num, start_date, end_date) | ||
|
|
||
| fig = create_figure(df) | ||
|
|
||
| logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") | ||
| return fig | ||
|
|
||
|
|
||
| def process_data(df: pd.DataFrame, num, start_date, end_date): | ||
| def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date: Optional[str]) -> pd.DataFrame: | ||
| # TODO: create docstring | ||
|
|
||
| # convert to datetime objects rather than strings | ||
|
|
@@ -148,7 +135,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date): | |
| return df | ||
|
|
||
|
|
||
| def create_figure(df: pd.DataFrame): | ||
| def create_figure(df: pd.DataFrame) -> go.Figure: | ||
| # graph generation | ||
| fig = px.pie(df, names="domains", values="occurrences", color_discrete_sequence=baby_blue) | ||
| fig.update_traces( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,18 +1,19 @@ | ||
| from dash import dcc, callback | ||
| import dash_bootstrap_components as dbc | ||
| from dash.dependencies import Input, Output | ||
| from typing import List, Optional, Tuple, Union | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| import pandas as pd | ||
| import logging | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| from dateutil.relativedelta import * # type: ignore | ||
| import plotly.express as px | ||
| import plotly.graph_objects as go | ||
| from pages.utils.graph_utils import baby_blue | ||
| from queries.affiliation_query import affiliation_query as aq | ||
| from pages.utils.job_utils import nodata_graph | ||
| import time | ||
| from pages.utils.query_status import load_query_data | ||
| import datetime as dt | ||
| from rapidfuzz import fuzz | ||
| import app | ||
| import cache_manager.cache_facade as cf | ||
| from components.visualization import VisualizationAIO | ||
|
|
||
| PAGE = "affiliation" | ||
|
|
@@ -76,23 +77,12 @@ | |
| ], | ||
| background=True, | ||
| ) | ||
| def gh_org_affiliation_graph(repolist, num, start_date, end_date, bot_switch): | ||
| # wait for data to asynchronously download and become available. | ||
| while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist): | ||
| logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE") | ||
| time.sleep(0.5) | ||
|
|
||
| start = time.perf_counter() | ||
| logging.warning(f"{VIZ_ID}- START") | ||
|
|
||
| # GET ALL DATA FROM POSTGRES CACHE | ||
| df = cf.retrieve_from_cache( | ||
| tablename=aq.__name__, | ||
| repolist=repolist, | ||
| ) | ||
| # test if there is data | ||
| if df.empty: | ||
| logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") | ||
| def gh_org_affiliation_graph( | ||
| repolist: List[int], num, start_date: Optional[str], end_date, bot_switch: bool | ||
| ) -> Tuple[go.Figure, bool]: | ||
| # Wait for and load query data (includes timeout, error handling, and validation) | ||
| df = load_query_data(aq, repolist, VIZ_ID) | ||
| if df is None: | ||
| return nodata_graph | ||
|
|
||
| # remove bot data | ||
|
|
@@ -103,12 +93,10 @@ def gh_org_affiliation_graph(repolist, num, start_date, end_date, bot_switch): | |
| df = process_data(df, num, start_date, end_date) | ||
|
|
||
| fig = create_figure(df) | ||
|
|
||
| logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") | ||
| return fig | ||
|
|
||
|
|
||
| def process_data(df: pd.DataFrame, num, start_date, end_date): | ||
| def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date: Optional[str]) -> pd.DataFrame: | ||
| """Implement your custom data-processing logic in this function. | ||
| The output of this function is the data you intend to create a visualization with, | ||
| requiring no further processing.""" | ||
|
|
@@ -183,7 +171,7 @@ def fuzzy_match(df, name): | |
| return [i for i, x in enumerate(matches) if x] | ||
|
|
||
|
|
||
| def create_figure(df: pd.DataFrame): | ||
| def create_figure(df: pd.DataFrame) -> go.Figure: | ||
| # graph generation | ||
| fig = px.pie( | ||
| df, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,18 @@ | ||
| from dash import dcc, callback | ||
| import dash_bootstrap_components as dbc | ||
| from dash.dependencies import Input, Output | ||
| from typing import List, Optional, Tuple, Union | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| import pandas as pd | ||
| import logging | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| from dateutil.relativedelta import * # type: ignore | ||
| import plotly.express as px | ||
| import plotly.graph_objects as go | ||
| from pages.utils.graph_utils import baby_blue | ||
| from queries.affiliation_query import affiliation_query as aq | ||
| from pages.utils.job_utils import nodata_graph | ||
| import time | ||
| from pages.utils.query_status import load_query_data | ||
| import datetime as dt | ||
| import app | ||
| import cache_manager.cache_facade as cf | ||
| from components.visualization import VisualizationAIO | ||
|
|
||
| PAGE = "affiliation" | ||
|
|
@@ -116,7 +117,9 @@ | |
| ], | ||
| background=True, | ||
| ) | ||
| def org_associated_activity_graph(repolist, num, start_date, end_date, email_filter, bot_switch): | ||
| def org_associated_activity_graph( | ||
| repolist: List[int], num, start_date: Optional[str], end_date, email_filter, bot_switch: bool | ||
| ) -> Tuple[go.Figure, bool]: | ||
| """Each contribution is associated with a contributor. That contributor can be associated with | ||
|
|
||
| more than one different email. Hence each contribution is associated with all of the emails that a contributor has historically used. | ||
|
|
@@ -129,24 +132,8 @@ def org_associated_activity_graph(repolist, num, start_date, end_date, email_fil | |
|
|
||
| will have many emails. We acknowledge that this will almost always contribute to an overcount but will never undercount." | ||
| """ | ||
|
|
||
| # wait for data to asynchronously download and become available. | ||
| while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist): | ||
| logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE") | ||
| time.sleep(0.5) | ||
|
|
||
| start = time.perf_counter() | ||
| logging.warning(f"{VIZ_ID}- START") | ||
|
|
||
| # GET ALL DATA FROM POSTGRES CACHE | ||
| df = cf.retrieve_from_cache( | ||
| tablename=aq.__name__, | ||
| repolist=repolist, | ||
| ) | ||
|
|
||
| # test if there is data | ||
| if df.empty: | ||
| logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") | ||
| df = load_query_data(aq, repolist, VIZ_ID) | ||
| if df is None: | ||
| return nodata_graph | ||
|
|
||
| # remove bot data | ||
|
|
@@ -157,12 +144,10 @@ def org_associated_activity_graph(repolist, num, start_date, end_date, email_fil | |
| df = process_data(df, num, start_date, end_date, email_filter) | ||
|
|
||
| fig = create_figure(df) | ||
|
|
||
| logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") | ||
| return fig | ||
|
|
||
|
|
||
| def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter): | ||
| def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date, email_filter): | ||
| # convert to datetime objects rather than strings | ||
| df["created_at"] = pd.to_datetime(df["created_at"], utc=True) | ||
|
|
||
|
|
@@ -214,7 +199,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter): | |
| return df | ||
|
|
||
|
|
||
| def create_figure(df: pd.DataFrame): | ||
| def create_figure(df: pd.DataFrame) -> go.Figure: | ||
| # graph generation | ||
| fig = px.bar(df, x="domains", y="occurrences", color_discrete_sequence=[baby_blue[8]]) | ||
| fig.update_xaxes(rangeslider_visible=True, range=[-0.5, 15]) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,18 @@ | ||
| from dash import dcc, callback | ||
| import dash_bootstrap_components as dbc | ||
| from dash.dependencies import Input, Output | ||
| from typing import List, Optional, Tuple, Union | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| import pandas as pd | ||
| import logging | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| from dateutil.relativedelta import * # type: ignore | ||
| import plotly.express as px | ||
| import plotly.graph_objects as go | ||
| from pages.utils.graph_utils import baby_blue | ||
| from queries.affiliation_query import affiliation_query as aq | ||
| from pages.utils.job_utils import nodata_graph | ||
| import time | ||
| from pages.utils.query_status import load_query_data | ||
| import datetime as dt | ||
| import app | ||
| import cache_manager.cache_facade as cf | ||
| from components.visualization import VisualizationAIO | ||
|
|
||
| PAGE = "affiliation" | ||
|
|
@@ -140,23 +141,9 @@ def compay_associated_activity_graph( | |
| email_filter, | ||
| bot_switch, | ||
| ): | ||
| # wait for data to asynchronously download and become available. | ||
| while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist): | ||
| logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE") | ||
| time.sleep(0.5) | ||
|
|
||
| start = time.perf_counter() | ||
| logging.warning(f"{VIZ_ID}- START") | ||
|
|
||
| # GET ALL DATA FROM POSTGRES CACHE | ||
| df = cf.retrieve_from_cache( | ||
| tablename=aq.__name__, | ||
| repolist=repolist, | ||
| ) | ||
|
|
||
| # test if there is data | ||
| if df.empty: | ||
| logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") | ||
| # Wait for and load query data (includes timeout, error handling, and validation) | ||
| df = load_query_data(aq, repolist, VIZ_ID) | ||
| if df is None: | ||
| return nodata_graph | ||
|
|
||
| # remove bot data | ||
|
|
@@ -167,12 +154,10 @@ def compay_associated_activity_graph( | |
| df = process_data(df, contributions, contributors, start_date, end_date, email_filter) | ||
|
|
||
| fig = create_figure(df) | ||
|
|
||
| logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") | ||
| return fig | ||
|
|
||
|
|
||
| def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_date, email_filter): | ||
| def process_data(df: pd.DataFrame, contributions, contributors, start_date: Optional[str], end_date, email_filter): | ||
| # convert to datetime objects rather than strings | ||
| df["created_at"] = pd.to_datetime(df["created_at"], utc=True) | ||
|
|
||
|
|
@@ -231,7 +216,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_ | |
| return df | ||
|
|
||
|
|
||
| def create_figure(df: pd.DataFrame): | ||
| def create_figure(df: pd.DataFrame) -> go.Figure: | ||
| # graph generation | ||
| fig = px.bar(df, x="domains", y="contributors", color_discrete_sequence=[baby_blue[8]]) | ||
| fig.update_xaxes(rangeslider_visible=True, range=[-0.5, 15]) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,18 @@ | ||
| from dash import dcc, callback | ||
| import dash_bootstrap_components as dbc | ||
| from dash.dependencies import Input, Output | ||
| from typing import List, Optional, Tuple, Union | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| import pandas as pd | ||
| import logging | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| from dateutil.relativedelta import * # type: ignore | ||
| import plotly.express as px | ||
| import plotly.graph_objects as go | ||
| from pages.utils.graph_utils import baby_blue | ||
| from queries.affiliation_query import affiliation_query as aq | ||
| from pages.utils.job_utils import nodata_graph | ||
| import time | ||
| from pages.utils.query_status import load_query_data | ||
| import datetime as dt | ||
| import app | ||
| import cache_manager.cache_facade as cf | ||
| from components.visualization import VisualizationAIO | ||
|
|
||
| PAGE = "affiliation" | ||
|
|
@@ -75,24 +76,12 @@ | |
| ], | ||
| background=True, | ||
| ) | ||
| def unique_domains_graph(repolist, num, start_date, end_date, bot_switch): | ||
| # wait for data to asynchronously download and become available. | ||
| while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist): | ||
| logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE") | ||
| time.sleep(0.5) | ||
|
|
||
| start = time.perf_counter() | ||
| logging.warning(f"{VIZ_ID}- START") | ||
|
|
||
| # GET ALL DATA FROM POSTGRES CACHE | ||
| df = cf.retrieve_from_cache( | ||
| tablename=aq.__name__, | ||
| repolist=repolist, | ||
| ) | ||
|
|
||
| # test if there is data | ||
| if df.empty: | ||
| logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") | ||
| def unique_domains_graph( | ||
| repolist: List[int], num, start_date: Optional[str], end_date, bot_switch: bool | ||
| ) -> Tuple[go.Figure, bool]: | ||
| # Wait for and load query data (includes timeout, error handling, and validation) | ||
| df = load_query_data(aq, repolist, VIZ_ID) | ||
| if df is None: | ||
| return nodata_graph | ||
|
|
||
| # remove bot data | ||
|
|
@@ -103,12 +92,10 @@ def unique_domains_graph(repolist, num, start_date, end_date, bot_switch): | |
| df = process_data(df, num, start_date, end_date) | ||
|
|
||
| fig = create_figure(df) | ||
|
|
||
| logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") | ||
| return fig | ||
|
|
||
|
|
||
| def process_data(df: pd.DataFrame, num, start_date, end_date): | ||
| def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date: Optional[str]) -> pd.DataFrame: | ||
| # convert to datetime objects rather than strings | ||
| df["created_at"] = pd.to_datetime(df["created_at"], utc=True) | ||
|
|
||
|
|
@@ -150,7 +137,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date): | |
| return df | ||
|
|
||
|
|
||
| def create_figure(df: pd.DataFrame): | ||
| def create_figure(df: pd.DataFrame) -> go.Figure: | ||
| # graph generation | ||
| fig = px.pie(df, names="domains", values="occurences", color_discrete_sequence=baby_blue) | ||
| fig.update_traces( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[pylint] reported by reviewdog 🐶
W0611: Unused Union imported from typing (unused-import)