Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/topics/cost-calculations.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ The proportional usage described above does not take into account the underutili

Group-level costs are calculated by aggregating the user-level costs for all users within a group. Each entry in the [`/costs-per-user`](#id-costs-per-user) endpoint includes a `usergroup` key that indicates the group which the user belongs.

:::{note}
Only user group memberships from the most recent dates are considered. Historical user group memberships incur a heavy performance penalty.
:::

There are a few extra endpoints provided by the cost monitoring application to help track user group costs.

### `/total-costs-per-group`
Expand Down
13 changes: 2 additions & 11 deletions src/jupyterhub_cost_monitoring/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,6 @@ def total_costs(

@app.get("/user-groups")
def user_groups(
from_date: str | None = Query(
None, alias="from", description="Start date in YYYY-MM-DDTHH:MMZ format"
),
to_date: str | None = Query(
None, alias="to", description="End date in YYYY-MM-DDTHH:MMZ format"
),
hub: str | None = Query(None, description="Name of the hub to filter results"),
username: str | None = Query(
None, description="Name of the user to filter results"
Expand All @@ -98,11 +92,10 @@ def user_groups(
),
):
"""
Endpoint to serve user group memberships.
Endpoint to serve user group memberships. Note that only the most recent date for each user group membership is returned.
"""
date_range = parse_from_to_in_query_params(from_date, to_date)

return query_user_groups(date_range, hub, username, usergroup)
return query_user_groups(hub, username, usergroup)


@app.get("/users-with-multiple-groups")
Expand Down Expand Up @@ -269,8 +262,6 @@ def costs_per_user(
if not usergroup or ("all" in [u.lower() for u in usergroup]):
usergroup = [None]

logger.info(f"Limit parameter: {limit}")

# Get per-user costs by combining AWS costs with Prometheus usage data
results = []
for ug in usergroup:
Expand Down
29 changes: 22 additions & 7 deletions src/jupyterhub_cost_monitoring/query_cost_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,21 +574,36 @@ def query_total_costs_per_user(
results.append(entry)
results = [x for x in results if x["hub"] != "binder"] # Exclude binder hubs
user_groups = query_user_groups(date_range, hub, user)
seen = set()
list_groups = []
# Ensure uniquely keyed entries when double-counting group costs
for r in results:
matched = False
for entry in user_groups:
if (r["date"] == entry["date"]) and (
r["hub"] == entry["hub"] and (r["user"] == entry["username"])
):
if "usergroup" not in r.keys():
if r["hub"] == entry["hub"] and r["user"] == entry["username"]:
key = (
r["date"],
r["hub"],
r["user"],
r["component"],
entry["usergroup"],
)
if key in seen:
continue
seen.add(key)
if "usergroup" not in r:
r["usergroup"] = entry["usergroup"]
matched = True
else:
r_copy = copy.deepcopy(r)
r_copy["usergroup"] = entry["usergroup"]
list_groups.append(r_copy)
if r.get("usergroup") is None:
logger.debug(f"No username match for group membership: {r}")
r["usergroup"] = "none"
matched = True
if not matched:
key = (r["date"], r["hub"], r["user"], r["component"], "none")
if key not in seen:
seen.add(key)
r["usergroup"] = "none"
results.extend(list_groups)
if limit:
limit = int(limit)
Expand Down
62 changes: 29 additions & 33 deletions src/jupyterhub_cost_monitoring/query_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

import os
from collections import defaultdict
from datetime import datetime, timezone
from datetime import datetime, timedelta, timezone

import escapism
import requests
from yarl import URL

from .cache import ttl_lru_cache
from .const_usage import USAGE_MAP, USER_GROUP_INFO
from .date_utils import DateRange
from .date_utils import DateRange, get_now_date
from .logs import get_logger

logger = get_logger(__name__)
Expand Down Expand Up @@ -46,11 +46,11 @@ def query_prometheus(query: str, date_range: DateRange, step: str) -> requests.R
"step": step,
}
query_api = URL(prometheus_api.with_path("/api/v1/query_range"))
response = requests.get(query_api, params=parameters)
logger.info(f"Querying Prometheus: {response.url}")
response.raise_for_status()
result = response.json()
return result
with requests.get(query_api, params=parameters) as response:
logger.info(f"Querying Prometheus: {response.url}")
response.raise_for_status()
result = response.json()
return result


def query_usage(
Expand Down Expand Up @@ -243,11 +243,15 @@ def _calculate_daily_cost_factors(

@ttl_lru_cache(seconds_to_live=3600)
def query_user_groups(
date_range: DateRange,
hub_name: str | None = None,
user_name: str | None = None,
group_name: str | None = None,
) -> list[dict]:
"""
Get user group information from the Prometheus server for the most recent day.
"""
now_date = get_now_date() - timedelta(days=1)
date_range = DateRange(start_date=now_date, end_date=now_date)
response = query_prometheus(USER_GROUP_INFO, date_range, step="1d")
result = _process_user_groups(response, hub_name, user_name, group_name)
return result
Expand All @@ -260,33 +264,25 @@ def _process_user_groups(
group_name: str | None = None,
) -> list[dict]:
"""
Process the response from the Prometheus server to extract user group information.
Process the response from the Prometheus server to extract user group information. Note that only the most recent date of user group membership is used.
"""
result = []
unique_keys = set()
for data in response["data"]["result"]:
for value in data["values"]:
date = datetime.fromtimestamp(value[0], tz=timezone.utc).strftime(
"%Y-%m-%d"
)
hub = data["metric"]["namespace"]
user = data["metric"]["username"]
user_escaped = data["metric"]["username_escaped"]
group = data["metric"]["usergroup"]
key = (date, hub, user, user_escaped, group)
if key not in unique_keys:
unique_keys.add(key)
result.append(
{
"date": date,
"hub": hub,
"username": user,
"username_escaped": user_escaped,
"usergroup": group,
}
)
result = _filter_json(
result, hub=hub_name, username=user_name, usergroup=group_name
hub = data["metric"]["namespace"]
user = data["metric"]["username"]
user_escaped = data["metric"]["username_escaped"]
group = data["metric"]["usergroup"]
key = (hub, user, user_escaped, group)
if key not in unique_keys:
unique_keys.add(key)
result.append(
{
"hub": hub,
"username": user,
"username_escaped": user_escaped,
"usergroup": group,
}
)
return result

Expand All @@ -297,7 +293,7 @@ def query_users_with_multiple_groups(
hub_name: str | None = None,
user_name: str | None = None,
) -> list[dict]:
response = query_user_groups(date_range, hub_name=hub_name, user_name=user_name)
response = query_user_groups(hub_name=hub_name, user_name=user_name)
grouped = defaultdict(
lambda: {"username": None, "hub": None, "usergroups": [], "has_multiple": False}
)
Expand Down Expand Up @@ -327,7 +323,7 @@ def query_users_with_no_groups(
hub_name: str | None = None,
user_name: str | None = None,
) -> list[dict]:
response = query_user_groups(date_range, hub_name=hub_name, user_name=user_name)
response = query_user_groups(hub_name=hub_name, user_name=user_name)
grouped = defaultdict(lambda: {"username": None, "hub": None})
for entry in response:
key = (entry["username"], entry["hub"])
Expand Down
1 change: 0 additions & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def test_get_user_group_info(mock_prometheus_user_group_info, env_vars):
from src.jupyterhub_cost_monitoring.query_usage import query_user_groups

response = query_user_groups(
date_range,
hub_name=None,
user_name=None,
group_name=None,
Expand Down