diff --git a/charts/spark-thrift-server/Chart.yaml b/charts/spark-thrift-server/Chart.yaml new file mode 100644 index 00000000..ed7a7193 --- /dev/null +++ b/charts/spark-thrift-server/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: spark-thrift-server +description: A helm chart to deploy the Spark Thrift Server + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "3.3.1" diff --git a/charts/spark-thrift-server/run.sh b/charts/spark-thrift-server/run.sh new file mode 100644 index 00000000..bc68622a --- /dev/null +++ b/charts/spark-thrift-server/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +export SPARK_LOG_DIR=/tmp +export SPARK_NO_DAEMONIZE=true + +exec /opt/spark/sbin/start-thriftserver.sh \ + "--master=k8s://https://{{ .Values.spark.k8sApiServerHost }}:{{ .Values.spark.k8sApiServerPort }}" \ + "--conf=spark.driver.host=$POD_IP" \ + "--conf=spark.dynamicAllocation.enabled=true" \ + "--conf=spark.dynamicAllocation.minExecutors=1" \{{/* TODO make configurable */}} + "--conf=spark.dynamicAllocation.shuffleTracking.enabled=true" \ + "--conf=spark.kubernetes.container.image={{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" \ + "--conf=spark.kubernetes.executor.podNamePrefix=$POD_NAME-$RANDOM" \ + "--conf=spark.kubernetes.driver.pod.name=$POD_NAME" \ + "--conf=spark.kubernetes.executor.request.cores=400m" \{{/* TODO make configurable */}} + "--conf=spark.kubernetes.namespace={{ .Release.Namespace }}" \ + "--conf=spark.jars.ivy=/tmp/ivy" \ + "--conf=spark.jars.packages={{ include "spark-thrift-server.sparkPackages" . }}" \ + "--conf=spark.ui.enabled={{ .Values.spark.enableWebUi | ternary "true" "false" }}" \ + {{- if .Values.spark.enableDelta }} + "--conf=spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension" \ + "--conf=spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" \ + {{- end }} + diff --git a/charts/spark-thrift-server/templates/_helpers.tpl b/charts/spark-thrift-server/templates/_helpers.tpl new file mode 100644 index 00000000..99f9df9c --- /dev/null +++ b/charts/spark-thrift-server/templates/_helpers.tpl @@ -0,0 +1,70 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "spark-thrift-server.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "spark-thrift-server.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "spark-thrift-server.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "spark-thrift-server.labels" -}} +helm.sh/chart: {{ include "spark-thrift-server.chart" . }} +{{ include "spark-thrift-server.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "spark-thrift-server.selectorLabels" -}} +app.kubernetes.io/name: {{ include "spark-thrift-server.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "spark-thrift-server.serviceAccountName" -}} +{{- default (include "spark-thrift-server.fullname" .) .Values.serviceAccount.name }} +{{- end }} + + +{{/* +Build the list of maven packages to be added at run time +*/}} +{{- define "spark-thrift-server.sparkPackages" -}} +{{- $local := list -}} +{{- if .Values.spark.enableDelta -}} +{{- $local = printf "io.delta:delta-core_2.12:%s" .Values.spark.deltaVersion | append $local -}} +{{- end -}} +{{- join "," $local -}} +{{- end }} diff --git a/charts/spark-thrift-server/templates/configmap.yaml b/charts/spark-thrift-server/templates/configmap.yaml new file mode 100644 index 00000000..c5b6eb3e --- /dev/null +++ b/charts/spark-thrift-server/templates/configmap.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "spark-thrift-server.fullname" . }} + labels: + {{- include "spark-thrift-server.labels" . | nindent 4 }} +binaryData: +{{- range $path, $_ := .Files.Glob "run.sh" }} + run.sh: {{ tpl ($.Files.Get $path) $ | b64enc }} +{{ end }} diff --git a/charts/spark-thrift-server/templates/deployment.yaml b/charts/spark-thrift-server/templates/deployment.yaml new file mode 100644 index 00000000..59910b32 --- /dev/null +++ b/charts/spark-thrift-server/templates/deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "spark-thrift-server.fullname" . }} + labels: + {{- include "spark-thrift-server.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "spark-thrift-server.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "spark-thrift-server.selectorLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "spark-thrift-server.serviceAccountName" . }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + + {{/* + securityContext: + runAsUser: 0 + */}} + + args: + - "/scripts/run.sh" + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: thrift + containerPort: 10000 + protocol: TCP + {{- if .Values.spark.enableWebUi }} + - name: ui + containerPort: 4040 + protocol: TCP + {{- end }} + livenessProbe: + tcpSocket: + port: thrift + readinessProbe: + tcpSocket: + port: thrift + startupProbe: + tcpSocket: + port: thrift + periodSeconds: 10 + failureThreshold: 18 # 3 minutes + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: scripts + mountPath: /scripts/run.sh + subPath: run.sh + volumes: + - name: scripts + configMap: + name: {{ include "spark-thrift-server.fullname" . }} + defaultMode: 0755 diff --git a/charts/spark-thrift-server/templates/role-binding.yaml b/charts/spark-thrift-server/templates/role-binding.yaml new file mode 100644 index 00000000..cef80c7c --- /dev/null +++ b/charts/spark-thrift-server/templates/role-binding.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "spark-thrift-server.fullname" . }} + labels: + {{- include "spark-thrift-server.labels" . | nindent 4 }} +subjects: +- kind: ServiceAccount + name: {{ include "spark-thrift-server.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: admin # TODO: This is waaaaaay more permissions than we need + apiGroup: rbac.authorization.k8s.io + + diff --git a/charts/spark-thrift-server/templates/serviceaccount.yaml b/charts/spark-thrift-server/templates/serviceaccount.yaml new file mode 100644 index 00000000..1d0b6c27 --- /dev/null +++ b/charts/spark-thrift-server/templates/serviceaccount.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "spark-thrift-server.serviceAccountName" . }} + labels: + {{- include "spark-thrift-server.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} diff --git a/charts/spark-thrift-server/templates/thrift-service.yaml b/charts/spark-thrift-server/templates/thrift-service.yaml new file mode 100644 index 00000000..29b3700e --- /dev/null +++ b/charts/spark-thrift-server/templates/thrift-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "spark-thrift-server.fullname" . }} + labels: + {{- include "spark-thrift-server.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: thrift + protocol: TCP + name: thrift + selector: + {{- include "spark-thrift-server.selectorLabels" . | nindent 4 }} diff --git a/charts/spark-thrift-server/values.yaml b/charts/spark-thrift-server/values.yaml new file mode 100644 index 00000000..a08ec78b --- /dev/null +++ b/charts/spark-thrift-server/values.yaml @@ -0,0 +1,39 @@ + +image: + repository: apache/spark + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + annotations: {} + name: "" + +podAnnotations: {} + +service: + type: ClusterIP + port: 10015 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +spark: + k8sApiServerHost: kubernetes.default + k8sApiServerPort: 443 + deltaVersion: 2.2.0 + + enableDelta: false + enableWebUi: true