Skip to content

Commit

Permalink
updated API and added wordcloud
Browse files Browse the repository at this point in the history
  • Loading branch information
kasperwelbers committed Aug 14, 2023
1 parent 2ca4dfc commit 3b563ed
Show file tree
Hide file tree
Showing 15 changed files with 735 additions and 232 deletions.
384 changes: 380 additions & 4 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"lodash": "^4.17.21",
"lottie-react": "^2.3.1",
"react-highlight-words": "^0.20.0",
"react-wordcloud": "^1.2.7",
"recharts": "^2.7.1",
"typescript": "^4.4.3",
"web-vitals": "^2.1.4"
Expand Down
Binary file modified public/port-0.0.0-py3-none-any.whl
Binary file not shown.
Binary file modified src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
Binary file not shown.
102 changes: 77 additions & 25 deletions src/framework/processing/py/port/api/props.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class Translations(TypedDict):
en: str
nl: str


@dataclass
class Translatable:
"""Wrapper class for Translations"""
Expand Down Expand Up @@ -80,6 +79,75 @@ def toDict(self):
dict["cancel"] = self.cancel.toDict()
return dict

@dataclass
class PropsUIDataVisualizationGroup:
"""Grouping variable for aggregating the data
Attributes:
column: name of the column to aggregate
label: Optionally, a label to display in the visualization (default is the column name)
dateFormat: if given, transforms a data column to the specified date format for aggregation
"""
column: str
label: Optional[str] = None
dateFormat: Optional[str] = None

def toDict(self):
dict = {}
dict["__type__"] = "PropsUIDataVisualizationGroup"
dict["column"] = self.column
dict["label"] = self.label
dict["dateFormat"] = self.dateFormat
return dict

@dataclass
class PropsUIDataVisualizationValue:
"""Value to aggregate
Attributes:
column: name of the column to aggregate
label: Optionally, a label to display in the visualization (default is the column name)
aggregate: function for aggregating the values
addZeroes: if true, add zeroes for missing values
"""
column: str
label: Optional[str] = None
aggregate: Optional[str] = "count"
addZeroes: Optional[bool] = False

def toDict(self):
dict = {}
dict["__type__"] = "PropsUIDataVisualizationValue"
dict["column"] = self.column
dict["label"] = self.label
dict["aggregate"] = self.aggregate
dict["addZeroes"] = self.addZeroes
return dict

@dataclass
class PropsUIDataVisualization:
"""Data visualization
Attributes:
title: title of the visualization
type: type of visualization
group: grouping variable for aggregating the data
values: list of values to aggregate
"""
title: Translatable
type: str
group: PropsUIDataVisualizationGroup
values: list[PropsUIDataVisualizationValue]

def toDict(self):
dict = {}
dict["__type__"] = "PropsUIDataVisualization"
dict["title"] = self.title.toDict()
dict["type"] = self.type
dict["group"] = self.group.toDict()
dict["values"] = [value.toDict() for value in self.values]
return dict


@dataclass
class PropsUIPromptConsentFormTable:
Expand All @@ -89,37 +157,28 @@ class PropsUIPromptConsentFormTable:
id: a unique string to itentify the table after donation
title: title of the table
data_frame: table to be shown
visualizations: optional list of visualizations to be shown
"""
id: str
title: Translatable
data_frame: pd.DataFrame
visualizations: Optional[list[PropsUIDataVisualization]] = None

def translate_visualizations(self):
if self.visualizations is None:
return None
return [vis.toDict() for vis in self.visualizations]

def toDict(self):
dict = {}
dict["__type__"] = "PropsUIPromptConsentFormTable"
dict["id"] = self.id
dict["title"] = self.title.toDict()
dict["data_frame"] = self.data_frame.to_json()
dict["visualizations"] = self.translate_visualizations()
return dict

@dataclass
class PropsUIDataVisualization:
"""Instructions for which ConsentFormTables to visualize and how

Attributes:
id: id of the table (same as in PropsUIPromptConsentFormTable) to use as data
title: title of the visualization
settings: configuration of the visualization. What type of visualization, what columns to use, etc.
"""
id: str
settings: dict

def toDict(self):
dict = {}
dict["__type__"] = "PropsUIDataVisualization"
dict["id"] = self.id
dict["settings"] = self.settings
return dict

@dataclass
class PropsUIPromptConsentForm:
Expand All @@ -131,7 +190,6 @@ class PropsUIPromptConsentForm:
"""
tables: list[PropsUIPromptConsentFormTable]
meta_tables: list[PropsUIPromptConsentFormTable]
visualizations: Optional[list[PropsUIDataVisualization]] = None

def translate_tables(self):
output = []
Expand All @@ -145,17 +203,11 @@ def translate_meta_tables(self):
output.append(table.toDict())
return output

def translate_visualizations(self):
if self.visualizations is None:
return None
return [vis.toDict() for vis in self.visualizations]

def toDict(self):
dict = {}
dict["__type__"] = "PropsUIPromptConsentForm"
dict["tables"] = self.translate_tables()
dict["metaTables"] = self.translate_meta_tables()
dict["visualizations"] = self.translate_visualizations()
return dict


Expand Down
86 changes: 29 additions & 57 deletions src/framework/processing/py/port/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,39 +146,9 @@ def create_consent_form(table_list: list[props.PropsUIPromptConsentFormTable]) -
"""
Assembles all donated data in consent form to be displayed
"""
return props.PropsUIPromptConsentForm(table_list, meta_tables=[], visualizations=specify_visualizations())
return props.PropsUIPromptConsentForm(table_list, meta_tables=[])


def create_consent_form_tables(unique_table_id: str, title: props.Translatable, df: pd.DataFrame) -> list[props.PropsUIPromptConsentFormTable]:
"""
This function chunks extracted data into tables of 5000 rows that can be renderd on screen
COMMENT KASPER: is chunking necessary? I don't think it matters how many rows a table has as long as UI doesn't render it all at once.
I removed it for now, because it also complicates linking visualizations to tables
"""

#df_list = helpers.split_dataframe(df, 5000)
df_list = [df]
out = []

if len(df_list) == 1:
table = props.PropsUIPromptConsentFormTable(unique_table_id, title, df_list[0])
out.append(table)
else:
for i, df in enumerate(df_list):
index = i + 1
title_with_index = props.Translatable({lang: f"{val} {index}" for lang, val in title.translations.items()})
table = props.PropsUIPromptConsentFormTable(f"{unique_table_id}_{index}", title_with_index, df)
out.append(table)

return out

def create_consent_form_visualization(unique_vis_id: str, title: props.Translatable, df: pd.DataFrame, settings: dict) -> props.PropsUIDataVisualization:
"""
This function creates a visualization of the extracted data
"""
return props.PropsUIDataVisualization(unique_vis_id, title, df, settings)

def return_empty_result_set():
result = {}

Expand Down Expand Up @@ -232,57 +202,69 @@ def extract_netflix(netflix_zip: str, selected_user: str) -> list[props.PropsUIP
df = netflix.ratings_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix ratings", "nl": "Netflix ratings"})
tables = create_consent_form_tables("netflix_ratings", table_title, df)
tables_to_render.extend(tables)
wordcloud = props.PropsUIDataVisualization(
title=props.Translatable({"en": "Highest ratings", "nl": "Hoogste ratings"}),
type="wordcloud",
group= props.PropsUIDataVisualizationGroup(column="Title Name"),
values= [props.PropsUIDataVisualizationValue(label='N', column='Thumbs Value', aggregate='sum')]
)
table = props.PropsUIPromptConsentFormTable("netflix_rating", table_title, df, [wordcloud])
tables_to_render.append(table)

# Extract the viewing activity
df = netflix.viewing_activity_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix viewings", "nl": "Netflix viewings"})
tables = create_consent_form_tables("netflix_viewings", table_title, df)
tables_to_render.extend(tables)
date_graph = props.PropsUIDataVisualization(
title=props.Translatable({"en": "Number of viewings over time", "nl": "Aantal gezien over tijd"}),
type="area",
group= props.PropsUIDataVisualizationGroup(column="Start Time", dateFormat="auto"),
values= [props.PropsUIDataVisualizationValue(label='N', column='Duration', addZeroes= True)]
)
table = props.PropsUIPromptConsentFormTable("netflix_viewings", table_title, df, [date_graph])
tables_to_render.append(table)

# Extract the clickstream
df = netflix.clickstream_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix clickstream", "nl": "Netflix clickstream"})
tables = create_consent_form_tables("netflix_clickstream", table_title, df)
tables_to_render.extend(tables)
table = props.PropsUIPromptConsentFormTable("netflix_clickstream", table_title, df)
tables_to_render.append(table)

# Extract my list
df = netflix.my_list_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix bookmarks", "nl": "Netflix bookmarks"})
tables = create_consent_form_tables("netflix_my_list", table_title, df)
tables_to_render.extend(tables)
table = props.PropsUIPromptConsentFormTable("netflix_my_list", table_title, df)
tables_to_render.append(table)

# Extract Indicated preferences
df = netflix.indicated_preferences_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix indicated preferences", "nl": "Netflix indicated preferences"})
tables = create_consent_form_tables("netflix_indicated_preferences", table_title, df)
tables_to_render.extend(tables)
table = props.PropsUIPromptConsentFormTable("netflix_indicated_preferences", table_title, df)
tables_to_render.append(table)

# Extract playback related events
df = netflix.playback_related_events_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix playback related events", "nl": "Netflix playback related events"})
tables = create_consent_form_tables("netflix_playback", table_title, df)
tables_to_render.extend(tables)
table = props.PropsUIPromptConsentFormTable("netflix_playback", table_title, df)
tables_to_render.append(table)

# Extract search history
df = netflix.search_history_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix search history", "nl": "Netflix search history"})
tables = create_consent_form_tables("netflix_search", table_title, df)
tables_to_render.extend(tables)
table = props.PropsUIPromptConsentFormTable("netflix_search", table_title, df)
tables_to_render.append(table)

# Extract messages sent by netflix
df = netflix.messages_sent_by_netflix_to_df(netflix_zip, selected_user)
if not df.empty:
table_title = props.Translatable({"en": "Netflix messages", "nl": "Netflix messages"})
tables = create_consent_form_tables("netflix_messages", table_title, df)
tables_to_render.extend(tables)
table = props.PropsUIPromptConsentFormTable("netflix_messages", table_title, df)
tables_to_render.append(table)

return tables_to_render

Expand All @@ -298,16 +280,6 @@ def extract_users(netflix_zip):
return users


##################################################################
# Visualization settings

def specify_visualizations():
settings = dict(type="keyword_frequency", keyword="title")
most_viewed = props.PropsUIDataVisualization(id="netflix_viewings", settings=settings)

return [most_viewed]


##########################################
# Functions provided by Eyra did not change

Expand Down
Loading

0 comments on commit 3b563ed

Please sign in to comment.