Skip to content
Draft
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 11 additions & 24 deletions 8Knot/pages/affiliation/visualizations/commit_domains.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from dash import dcc, callback
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
from typing import List, Optional, Tuple, Union
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Union imported from typing (unused-import)

import pandas as pd
import logging
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused import logging (unused-import)

from dateutil.relativedelta import * # type: ignore
import plotly.express as px
import plotly.graph_objects as go
from pages.utils.graph_utils import baby_blue
from queries.commits_query import commits_query as cmq
from pages.utils.job_utils import nodata_graph
import time
from pages.utils.query_status import load_query_data
import datetime as dt
import cache_manager.cache_facade as cf
from components.visualization import VisualizationAIO

PAGE = "affiliation"
Expand Down Expand Up @@ -75,36 +76,22 @@
],
background=True,
)
def commit_domains_graph(repolist, num, start_date, end_date):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=cmq.__name__, repolist=repolist):
logging.warning(f"COMMITS_OVER_TIME_VIZ - WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)

start = time.perf_counter()
logging.warning(f"{VIZ_ID}- START")

# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
tablename=cmq.__name__,
repolist=repolist,
)

# test if there is data
if df.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
def commit_domains_graph(
repolist: List[int], num, start_date: Optional[str], end_date: Optional[str]
) -> Tuple[go.Figure, bool]:
# Wait for and load query data (includes timeout, error handling, and validation)
df = load_query_data(cmq, repolist, VIZ_ID)
if df is None:
return nodata_graph

# function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS
df = process_data(df, num, start_date, end_date)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, num, start_date, end_date):
def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date: Optional[str]) -> pd.DataFrame:
# TODO: create docstring

# convert to datetime objects rather than strings
Expand Down Expand Up @@ -148,7 +135,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
return df


def create_figure(df: pd.DataFrame):
def create_figure(df: pd.DataFrame) -> go.Figure:
# graph generation
fig = px.pie(df, names="domains", values="occurrences", color_discrete_sequence=baby_blue)
fig.update_traces(
Expand Down
34 changes: 11 additions & 23 deletions 8Knot/pages/affiliation/visualizations/gh_org_affiliation.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from dash import dcc, callback
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
from typing import List, Optional, Tuple, Union
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Union imported from typing (unused-import)

import pandas as pd
import logging
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused import logging (unused-import)

from dateutil.relativedelta import * # type: ignore
import plotly.express as px
import plotly.graph_objects as go
from pages.utils.graph_utils import baby_blue
from queries.affiliation_query import affiliation_query as aq
from pages.utils.job_utils import nodata_graph
import time
from pages.utils.query_status import load_query_data
import datetime as dt
from rapidfuzz import fuzz
import app
import cache_manager.cache_facade as cf
from components.visualization import VisualizationAIO

PAGE = "affiliation"
Expand Down Expand Up @@ -76,23 +77,12 @@
],
background=True,
)
def gh_org_affiliation_graph(repolist, num, start_date, end_date, bot_switch):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)

start = time.perf_counter()
logging.warning(f"{VIZ_ID}- START")

# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
tablename=aq.__name__,
repolist=repolist,
)
# test if there is data
if df.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
def gh_org_affiliation_graph(
repolist: List[int], num, start_date: Optional[str], end_date, bot_switch: bool
) -> Tuple[go.Figure, bool]:
# Wait for and load query data (includes timeout, error handling, and validation)
df = load_query_data(aq, repolist, VIZ_ID)
if df is None:
return nodata_graph

# remove bot data
Expand All @@ -103,12 +93,10 @@ def gh_org_affiliation_graph(repolist, num, start_date, end_date, bot_switch):
df = process_data(df, num, start_date, end_date)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, num, start_date, end_date):
def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date: Optional[str]) -> pd.DataFrame:
"""Implement your custom data-processing logic in this function.
The output of this function is the data you intend to create a visualization with,
requiring no further processing."""
Expand Down Expand Up @@ -183,7 +171,7 @@ def fuzzy_match(df, name):
return [i for i, x in enumerate(matches) if x]


def create_figure(df: pd.DataFrame):
def create_figure(df: pd.DataFrame) -> go.Figure:
# graph generation
fig = px.pie(
df,
Expand Down
35 changes: 10 additions & 25 deletions 8Knot/pages/affiliation/visualizations/org_associated_activity.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from dash import dcc, callback
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
from typing import List, Optional, Tuple, Union
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Union imported from typing (unused-import)

import pandas as pd
import logging
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused import logging (unused-import)

from dateutil.relativedelta import * # type: ignore
import plotly.express as px
import plotly.graph_objects as go
from pages.utils.graph_utils import baby_blue
from queries.affiliation_query import affiliation_query as aq
from pages.utils.job_utils import nodata_graph
import time
from pages.utils.query_status import load_query_data
import datetime as dt
import app
import cache_manager.cache_facade as cf
from components.visualization import VisualizationAIO

PAGE = "affiliation"
Expand Down Expand Up @@ -116,7 +117,9 @@
],
background=True,
)
def org_associated_activity_graph(repolist, num, start_date, end_date, email_filter, bot_switch):
def org_associated_activity_graph(
repolist: List[int], num, start_date: Optional[str], end_date, email_filter, bot_switch: bool
) -> Tuple[go.Figure, bool]:
"""Each contribution is associated with a contributor. That contributor can be associated with

more than one different email. Hence each contribution is associated with all of the emails that a contributor has historically used.
Expand All @@ -129,24 +132,8 @@ def org_associated_activity_graph(repolist, num, start_date, end_date, email_fil

will have many emails. We acknowledge that this will almost always contribute to an overcount but will never undercount."
"""

# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)

start = time.perf_counter()
logging.warning(f"{VIZ_ID}- START")

# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
tablename=aq.__name__,
repolist=repolist,
)

# test if there is data
if df.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
df = load_query_data(aq, repolist, VIZ_ID)
if df is None:
return nodata_graph

# remove bot data
Expand All @@ -157,12 +144,10 @@ def org_associated_activity_graph(repolist, num, start_date, end_date, email_fil
df = process_data(df, num, start_date, end_date, email_filter)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date, email_filter):
# convert to datetime objects rather than strings
df["created_at"] = pd.to_datetime(df["created_at"], utc=True)

Expand Down Expand Up @@ -214,7 +199,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
return df


def create_figure(df: pd.DataFrame):
def create_figure(df: pd.DataFrame) -> go.Figure:
# graph generation
fig = px.bar(df, x="domains", y="occurrences", color_discrete_sequence=[baby_blue[8]])
fig.update_xaxes(rangeslider_visible=True, range=[-0.5, 15])
Expand Down
31 changes: 8 additions & 23 deletions 8Knot/pages/affiliation/visualizations/org_core_contributors.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from dash import dcc, callback
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
from typing import List, Optional, Tuple, Union
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused List imported from typing (unused-import)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Tuple imported from typing (unused-import)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Union imported from typing (unused-import)

import pandas as pd
import logging
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused import logging (unused-import)

from dateutil.relativedelta import * # type: ignore
import plotly.express as px
import plotly.graph_objects as go
from pages.utils.graph_utils import baby_blue
from queries.affiliation_query import affiliation_query as aq
from pages.utils.job_utils import nodata_graph
import time
from pages.utils.query_status import load_query_data
import datetime as dt
import app
import cache_manager.cache_facade as cf
from components.visualization import VisualizationAIO

PAGE = "affiliation"
Expand Down Expand Up @@ -140,23 +141,9 @@ def compay_associated_activity_graph(
email_filter,
bot_switch,
):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)

start = time.perf_counter()
logging.warning(f"{VIZ_ID}- START")

# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
tablename=aq.__name__,
repolist=repolist,
)

# test if there is data
if df.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
# Wait for and load query data (includes timeout, error handling, and validation)
df = load_query_data(aq, repolist, VIZ_ID)
if df is None:
return nodata_graph

# remove bot data
Expand All @@ -167,12 +154,10 @@ def compay_associated_activity_graph(
df = process_data(df, contributions, contributors, start_date, end_date, email_filter)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_date, email_filter):
def process_data(df: pd.DataFrame, contributions, contributors, start_date: Optional[str], end_date, email_filter):
# convert to datetime objects rather than strings
df["created_at"] = pd.to_datetime(df["created_at"], utc=True)

Expand Down Expand Up @@ -231,7 +216,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
return df


def create_figure(df: pd.DataFrame):
def create_figure(df: pd.DataFrame) -> go.Figure:
# graph generation
fig = px.bar(df, x="domains", y="contributors", color_discrete_sequence=[baby_blue[8]])
fig.update_xaxes(rangeslider_visible=True, range=[-0.5, 15])
Expand Down
35 changes: 11 additions & 24 deletions 8Knot/pages/affiliation/visualizations/unqiue_domains.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from dash import dcc, callback
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
from typing import List, Optional, Tuple, Union
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Union imported from typing (unused-import)

import pandas as pd
import logging
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused import logging (unused-import)

from dateutil.relativedelta import * # type: ignore
import plotly.express as px
import plotly.graph_objects as go
from pages.utils.graph_utils import baby_blue
from queries.affiliation_query import affiliation_query as aq
from pages.utils.job_utils import nodata_graph
import time
from pages.utils.query_status import load_query_data
import datetime as dt
import app
import cache_manager.cache_facade as cf
from components.visualization import VisualizationAIO

PAGE = "affiliation"
Expand Down Expand Up @@ -75,24 +76,12 @@
],
background=True,
)
def unique_domains_graph(repolist, num, start_date, end_date, bot_switch):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=aq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)

start = time.perf_counter()
logging.warning(f"{VIZ_ID}- START")

# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
tablename=aq.__name__,
repolist=repolist,
)

# test if there is data
if df.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
def unique_domains_graph(
repolist: List[int], num, start_date: Optional[str], end_date, bot_switch: bool
) -> Tuple[go.Figure, bool]:
# Wait for and load query data (includes timeout, error handling, and validation)
df = load_query_data(aq, repolist, VIZ_ID)
if df is None:
return nodata_graph

# remove bot data
Expand All @@ -103,12 +92,10 @@ def unique_domains_graph(repolist, num, start_date, end_date, bot_switch):
df = process_data(df, num, start_date, end_date)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, num, start_date, end_date):
def process_data(df: pd.DataFrame, num, start_date: Optional[str], end_date: Optional[str]) -> pd.DataFrame:
# convert to datetime objects rather than strings
df["created_at"] = pd.to_datetime(df["created_at"], utc=True)

Expand Down Expand Up @@ -150,7 +137,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
return df


def create_figure(df: pd.DataFrame):
def create_figure(df: pd.DataFrame) -> go.Figure:
# graph generation
fig = px.pie(df, names="domains", values="occurences", color_discrete_sequence=baby_blue)
fig.update_traces(
Expand Down
Loading
Loading