Skip to content

Commit e8866d0

Browse files
committed
Added option to store datanode data on persistent volumes
1 parent a28441b commit e8866d0

File tree

3 files changed

+110
-1
lines changed

3 files changed

+110
-1
lines changed

charts/README.md

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ HDFS on K8s supports the following features:
2626
file data. File data should also survive datanode crash or restart. HDFS on
2727
K8s stores the file data on the local disks of the K8s cluster nodes using
2828
K8s HostPath volumes. (We plan to switch to a better mechanism, K8s
29-
persistent local volumes)
29+
persistent local volumes).
30+
HDFS on K8s supports storing file data on persistent volumes as well.
3031
- Kerberos: Vanilla HDFS is not secure. Intruders can easily write custom
3132
client code, put a fake user name in requests and steal data. Production
3233
HDFS often secure itself using Kerberos. HDFS on K8s supports Kerberos.
@@ -368,6 +369,36 @@ node when the pod restarts.
368369
--set hdfs-simple-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode-0
369370
```
370371

372+
### Using persistent volumes for datanodes
373+
374+
You can store file data on persistent volumes instead of hostPath volumes.
375+
In this case, datanode pods are managed by StatefulSet instead of DaemonSet.
376+
377+
To install the chart in this mode, run
378+
379+
```
380+
$ helm install -n my-hdfs charts/hdfs-k8s \
381+
--set hdfs-datanode-k8s.persistence.enabled=true
382+
```
383+
384+
By default, 2 datanodes are created with 100Gi volume each.
385+
386+
You can customize datanode number. For example, to create 3 datanodes, run
387+
388+
```
389+
$ helm install -n my-hdfs charts/hdfs-k8s \
390+
--set hdfs-datanode-k8s.persistence.enabled=true \
391+
--set hdfs-datanode-k8s.persistence.replicas=3
392+
```
393+
394+
You can also customize other persistence properties by analogy with namenodes, for example
395+
396+
```
397+
$ helm install -n my-hdfs charts/hdfs-k8s \
398+
--set hdfs-datanode-k8s.persistence.enabled=true \
399+
--set hdfs-datanode-k8s.persistence.size=200Gi
400+
```
401+
371402
# Security
372403

373404
## K8s secret containing Kerberos keytab files

charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,47 @@ data:
3232
done
3333
echo $_CLUSTER_ID | grep -q -v null
3434
---
35+
{{- if .Values.persistence.enabled }}
36+
# Required to generate StatefulSet pod names.
37+
apiVersion: v1
38+
kind: Service
39+
metadata:
40+
name: {{ template "hdfs-k8s.datanode.fullname" . }}
41+
labels:
42+
app: {{ template "hdfs-k8s.datanode.name" . }}
43+
chart: {{ template "hdfs-k8s.subchart" . }}
44+
release: {{ .Release.Name }}
45+
annotations:
46+
# TODO: Deprecated. Replace tolerate-unready-endpoints with
47+
# v1.Service.PublishNotReadyAddresses.
48+
service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
49+
spec:
50+
clusterIP: None
51+
selector:
52+
app: {{ template "hdfs-k8s.datanode.name" . }}
53+
release: {{ .Release.Name }}
54+
{{- end }}
55+
---
56+
{{- if .Values.persistence.enabled }}
57+
apiVersion: apps/v1beta1
58+
kind: StatefulSet
59+
{{- else }}
3560
# Deleting a daemonset may need some trick. See
3661
# https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489
3762
apiVersion: extensions/v1beta1
3863
kind: DaemonSet
64+
{{- end }}
3965
metadata:
4066
name: {{ template "hdfs-k8s.datanode.fullname" . }}
4167
labels:
4268
app: {{ template "hdfs-k8s.datanode.name" . }}
4369
chart: {{ template "hdfs-k8s.subchart" . }}
4470
release: {{ .Release.Name }}
4571
spec:
72+
{{- if .Values.persistence.enabled }}
73+
serviceName: {{ template "hdfs-k8s.datanode.fullname" . }}
74+
replicas: {{ .Values.replicas }}
75+
{{- end }}
4676
template:
4777
metadata:
4878
labels:
@@ -115,10 +145,15 @@ spec:
115145
- name: hdfs-config
116146
mountPath: /etc/hadoop-custom-conf
117147
readOnly: true
148+
{{- if .Values.persistence.enabled }}
149+
- name: hdfs-data-0
150+
mountPath: /hadoop/dfs/data/0
151+
{{- else }}
118152
{{- range $index, $path := .Values.global.dataNodeHostPath }}
119153
- name: hdfs-data-{{ $index }}
120154
mountPath: /hadoop/dfs/data/{{ $index }}
121155
{{- end }}
156+
{{- end }}
122157
{{- if .Values.global.kerberosEnabled }}
123158
- name: kerberos-config
124159
mountPath: /etc/krb5.conf
@@ -167,11 +202,13 @@ spec:
167202
configMap:
168203
name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts
169204
defaultMode: 0744
205+
{{- if not .Values.persistence.enabled }}
170206
{{- range $index, $path := .Values.global.dataNodeHostPath }}
171207
- name: hdfs-data-{{ $index }}
172208
hostPath:
173209
path: {{ $path }}
174210
{{- end }}
211+
{{- end }}
175212
- name: hdfs-config
176213
configMap:
177214
name: {{ template "hdfs-k8s.config.fullname" . }}
@@ -189,3 +226,25 @@ spec:
189226
emptyDir: {}
190227
{{- end }}
191228
{{- end }}
229+
{{- if .Values.persistence.enabled }}
230+
volumeClaimTemplates:
231+
- metadata:
232+
name: hdfs-data-0
233+
spec:
234+
accessModes:
235+
- {{ .Values.persistence.accessMode | quote }}
236+
resources:
237+
requests:
238+
storage: {{ .Values.persistence.size | quote }}
239+
{{- if .Values.persistence.storageClass }}
240+
{{- if (eq "-" .Values.persistence.storageClass) }}
241+
storageClassName: ""
242+
{{- else }}
243+
storageClassName: "{{ .Values.persistence.storageClass }}"
244+
{{- end }}
245+
{{- end }}
246+
{{- if .Values.persistence.selector }}
247+
selector:
248+
{{ toYaml .Values.persistence.selector | indent 10 }}
249+
{{- end }}
250+
{{- end }}

charts/hdfs-k8s/values.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,25 @@ hdfs-datanode-k8s:
123123
nodeSelector: {}
124124
tolerations: []
125125
affinity: {}
126+
persistence:
127+
enabled: false
128+
replicas: 2
129+
accessMode: ReadWriteOnce
130+
size: 100Gi
131+
## Persistent Volume Storage Class
132+
## If defined, storageClassName: <storageClass>
133+
## If set to "-", storageClassName: "", which disables dynamic provisioning
134+
## If undefined (the default) or set to null, no storageClassName spec is
135+
## set, choosing the default provisioner. (gp2 on AWS, standard on
136+
## GKE, AWS & OpenStack)
137+
##
138+
# storageClass: "-"
139+
140+
## To choose a suitable persistent volume from available static volumes, selectors
141+
## are used.
142+
# selector:
143+
# matchLabels:
144+
# volume-type: hdfs-ssd
126145

127146
## ------------------------------------------------------------------------------
128147
## hdfs-krb5-k8s:

0 commit comments

Comments
 (0)