Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: rebase upstream changes to d78fab9 #7

Merged
merged 1 commit into from
May 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dashboard/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Step 1: Builds and tests
FROM node:12.22.12-bullseye AS build
FROM node:14.21.3-bullseye AS build

ARG kubeflowversion
ARG commit
Expand All @@ -24,7 +24,9 @@ RUN BUILDARCH="$(dpkg --print-architecture)" && npm rebuild && \
npm prune --production

# Step 2: Packages assets for serving
FROM node:12.22.12-alpine AS serve
FROM node:14.21.3-alpine3.17 AS serve

USER node

ENV NODE_ENV=production
WORKDIR /app
Expand Down
47 changes: 0 additions & 47 deletions dashboard/Makefile

This file was deleted.

10 changes: 10 additions & 0 deletions dashboard/app/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {KubernetesService} from './k8s_service';
import {Interval, MetricsService} from './metrics_service';

export const ERRORS = {
no_metrics_service_configured: 'No metrics service configured',
operation_not_supported: 'Operation not supported',
invalid_links_config: 'Cannot load dashboard menu link',
invalid_settings: 'Cannot load dashboard settings'
Expand All @@ -28,6 +29,15 @@ export class Api {
*/
routes(): Router {
return Router()
.get('/metrics', async (req: Request, res: Response) => {
if (!this.metricsService) {
return apiError({
res, code: 405,
error: ERRORS.operation_not_supported,
});
}
res.json(this.metricsService.getChartsLink());
})
.get(
'/metrics/:type((node|podcpu|podmem))',
async (req: Request, res: Response) => {
Expand Down
30 changes: 25 additions & 5 deletions dashboard/app/api_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,22 @@ describe('Main API', () => {
port = addressInfo.port;
});

it('Should return a 405 status code', (done) => {
get(`http://localhost:${port}/api/metrics/podcpu`, (res) => {
expect(res.statusCode).toBe(405);
done();
it('Should return a 405 status code', async () => {
const metricsEndpoint = new Promise((resolve) => {
get(`http://localhost:${port}/api/metrics`, (res) => {
expect(res.statusCode).toBe(405);
resolve();
});
});

const metricsTypeEndpoint = new Promise((resolve) => {
get(`http://localhost:${port}/api/metrics/podcpu`, (res) => {
expect(res.statusCode).toBe(405);
resolve();
});
});

await Promise.all([metricsEndpoint, metricsTypeEndpoint]);
});
});

Expand All @@ -47,7 +58,7 @@ describe('Main API', () => {
mockK8sService = jasmine.createSpyObj<KubernetesService>(['']);
mockProfilesService = jasmine.createSpyObj<DefaultApi>(['']);
mockMetricsService = jasmine.createSpyObj<MetricsService>([
'getNodeCpuUtilization', 'getPodCpuUtilization', 'getPodMemoryUsage'
'getNodeCpuUtilization', 'getPodCpuUtilization', 'getPodMemoryUsage', 'getChartsLink'
]);

testApp = express();
Expand All @@ -64,6 +75,15 @@ describe('Main API', () => {
}
});

it('Should retrieve charts link in Metrics service', (done) => {
get(`http://localhost:${port}/api/metrics`, (res) => {
expect(res.statusCode).toBe(200);
expect(mockMetricsService.getChartsLink)
.toHaveBeenCalled();
done();
});
});

it('Should retrieve Node CPU Utilization for default 15m interval',
async () => {
const defaultInterval = new Promise((resolve) => {
Expand Down
11 changes: 11 additions & 0 deletions dashboard/app/metrics_service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ export interface TimeSeriesPoint {
value: number;
}

export interface MetricsInfo {
resourceChartsLink: string | undefined;
resourceChartsLinkText: string;
}

/**
* Interface definition for implementers of metrics services capable of
* returning time-series resource utilization metrics for the Kubeflow system.
Expand All @@ -39,4 +44,10 @@ export interface MetricsService {
* @param interval
*/
getPodMemoryUsage(interval: Interval): Promise<TimeSeriesPoint[]>;

/**
* Return a MetricsInfo object containing the url of the metric dashboard and the
* text to display for the redirect button.
*/
getChartsLink(): MetricsInfo;
}
90 changes: 90 additions & 0 deletions dashboard/app/prometheus_metrics_service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import {Interval, MetricsInfo, MetricsService, TimeSeriesPoint} from "./metrics_service";
import {PrometheusDriver, RangeVector, ResponseType} from 'prometheus-query';

export class PrometheusMetricsService implements MetricsService {
private readonly prometheusDriver: PrometheusDriver;
private readonly dashboardUrl: string | undefined;

constructor(prometheusDriver: PrometheusDriver, dashboardUrl: string | undefined) {
this.prometheusDriver = prometheusDriver;
this.dashboardUrl = dashboardUrl;
}

async getNodeCpuUtilization(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(rate(node_cpu_seconds_total[5m])) by (instance)`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

async getPodCpuUtilization(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(rate(container_cpu_usage_seconds_total[5m]))`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

async getPodMemoryUsage(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(container_memory_usage_bytes)`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

private async queryPrometheus(query: string, start: number, end: number = Date.now()): Promise<RangeVector[]> {
const result = await this.prometheusDriver.rangeQuery(query, start, end, 10);
if(result.resultType !== ResponseType.MATRIX) {
console.warn(`The prometheus server returned invalid result type: ${result.resultType}`);
return [];
}
return result.result as RangeVector[];
}

private getCorrespondingTime(interval: Interval): number {
let minutes = 0;
switch (interval) {
case Interval.Last5m:
minutes = 5;
break;
case Interval.Last15m:
minutes = 15;
break;
case Interval.Last30m:
minutes = 30;
break;
case Interval.Last60m:
minutes = 60;
break;
case Interval.Last180m:
minutes = 180;
break;
default:
console.warn("unknown interval.");
}
return Date.now() - minutes * 60 * 1000;
}

private convertToTimeSeriesPoints(series: RangeVector[]): TimeSeriesPoint[] {
const timeSeriesPoints: TimeSeriesPoint[] = [];
series.forEach(serie => {

const label = Object.entries(serie.metric.labels).map((entry) => {
return entry[0] + "=" + entry[1];
}).join(",");

// The `public/components/resource-chart.js` is multiplying the timestamp by 1000 and the value by 100
serie.values.forEach(value => {
timeSeriesPoints.push({
timestamp: value.time.getTime() / 1000,
label,
value: value.value / 100,
});
});
});
return timeSeriesPoints;
}

getChartsLink(): MetricsInfo {
return {
resourceChartsLink: this.dashboardUrl,
resourceChartsLinkText: 'View in dashboard'
};
}
}
142 changes: 142 additions & 0 deletions dashboard/app/prometheus_metrics_service_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import {Metric, PrometheusDriver, QueryResult, ResponseType} from "prometheus-query";
import {PrometheusMetricsService} from "./prometheus_metrics_service";
import {Interval, MetricsService, TimeSeriesPoint} from "./metrics_service";
import {SampleValue} from "prometheus-query/dist/types";

type MetricsServiceKeys = keyof MetricsService;
const methods: MetricsServiceKeys[] = ["getNodeCpuUtilization", "getPodCpuUtilization", "getPodMemoryUsage"];
const queries: {[id: string]: string} = {
"getNodeCpuUtilization": "sum(rate(node_cpu_seconds_total[5m])) by (instance)",
"getPodCpuUtilization": "sum(rate(container_cpu_usage_seconds_total[5m]))",
"getPodMemoryUsage": "sum(container_memory_usage_bytes)"
};

const fixedDate = 1557705600000;

const emptyDataSet: QueryResult = {"resultType": ResponseType.MATRIX,"result":[]};
const singleInstanceDataSet: QueryResult = {
"resultType": ResponseType.MATRIX,
"result":[
{
"metric": {"labels": {"instance":"one"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 95.5,
} as SampleValue
]
}
]
};
const multipleInstancesDataSet: QueryResult = {
"resultType": ResponseType.MATRIX,
"result":[
{
"metric": {"labels": {"instance":"one"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 1.0,
} as SampleValue
]
},
{
"metric": {"labels": {"instance":"two"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 2.0,
} as SampleValue
]
},
{
"metric": {"labels": {"instance":"three"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 3.0,
} as SampleValue
]
}
]
};

describe('PrometheusMetricsService', () => {
let prometheusDriverClient: jasmine.SpyObj<PrometheusDriver>;
let service: PrometheusMetricsService;

beforeEach(() => {
jasmine.clock().install();
jasmine.clock().mockDate(new Date(1557705600000));
prometheusDriverClient = jasmine.createSpyObj<PrometheusDriver>(
'prometheusDriverClient', ['rangeQuery']);

service =
new PrometheusMetricsService(prometheusDriverClient, undefined);
});

// Iterate over all methods since they have the same behavior
methods.forEach((method) => {
describe(method, async () => {
it('Empty return', async () => {
prometheusDriverClient.rangeQuery.withArgs(
queries[method],
Date.now() - 5 * 60 * 1000,
Date.now(),
10
).and.returnValue(Promise.resolve(emptyDataSet));

const emptyResult = await service[method](Interval.Last5m);
expect(emptyResult).toEqual(Array.of<TimeSeriesPoint>());
});

it('One instance', async () => {
prometheusDriverClient.rangeQuery.withArgs(
queries[method],
Date.now() - 5 * 60 * 1000,
Date.now(),
10
).and.returnValue(Promise.resolve(singleInstanceDataSet));

const singleInstanceResult = await service[method](Interval.Last5m);
expect(singleInstanceResult).toEqual(Array.of<TimeSeriesPoint>({
timestamp: fixedDate / 1000,
value: 0.955,
label: "instance=one"
}));
});

it('Multiple instances', async () => {
prometheusDriverClient.rangeQuery.withArgs(
queries[method],
Date.now() - 5 * 60 * 1000,
Date.now(),
10
).and.returnValue(Promise.resolve(multipleInstancesDataSet));

const singleInstanceResult = await service[method](Interval.Last5m);
expect(singleInstanceResult).toEqual(
Array.of<TimeSeriesPoint>({
timestamp: fixedDate / 1000,
value: 0.010,
label: "instance=one"
},
{
timestamp: fixedDate / 1000,
value: 0.020,
label: "instance=two"
},
{
timestamp: fixedDate / 1000,
value: 0.030,
label: "instance=three"
})
);
});
});
});

afterEach(() => {
jasmine.clock().uninstall();
});
});
Loading
Loading