在大数据场景下,选用ecs.d1ne.6xlarge规格的ECS实例运行Spark作业。每个实例自带12块5 TB的HDD数据盘,需要对这12个数据盘进行手动分区格式化和挂载。当节点数量多时,手动分区格式化和挂载是一个十分繁琐和耗时的操作。本文介绍如何通过LVM数据卷对数据盘格式化和挂载进行简化。
前提条件
- 创建Kubernetes托管版集群。
- 集群Worker节点的数据盘设备未经过手动格式化和挂载。
部署LVM插件
apiVersion: storage.k8s.io/v1beta1 kind: CSIDriver metadata: name: localplugin.csi.alibabacloud.com spec: attachRequired: false podInfoOnMount: true --- kind: DaemonSet apiVersion: apps/v1 metadata: name: csi-local-plugin namespace: kube-system spec: selector: matchLabels: app: csi-local-plugin template: metadata: labels: app: csi-local-plugin spec: tolerations: - operator: Exists serviceAccount: admin priorityClassName: system-node-critical hostNetwork: true hostPID: true containers: - name: driver-registrar image: registry.cn-hangzhou.aliyuncs.com/acs/csi-node-driver-registrar:v1.1.0 imagePullPolicy: Always args: - "--v=5" - "--csi-address=/csi/csi.sock" - "--kubelet-registration-path=/var/lib/kubelet/csi-plugins/localplugin.csi.alibabacloud.com/csi.sock" env: - name: KUBE_NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName volumeMounts: - name: plugin-dir mountPath: /csi - name: registration-dir mountPath: /registration - name: csi-localplugin securityContext: privileged: true capabilities: add: ["SYS_ADMIN"] allowPrivilegeEscalation: true image: registry.cn-hangzhou.aliyuncs.com/acs/csi-plugin:v1.14.8.41-bce68b74-aliyun imagePullPolicy: "Always" args : - "--endpoint=$(CSI_ENDPOINT)" - "--v=5" - "--nodeid=$(KUBE_NODE_NAME)" - "--driver=localplugin.csi.alibabacloud.com" env: - name: KUBE_NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName - name: DRIVER_VENDOR value: localplugin.csi.alibabacloud.com - name: CSI_ENDPOINT value: unix://var/lib/kubelet/csi-plugins/localplugin.csi.alibabacloud.com/csi.sock volumeMounts: - name: pods-mount-dir mountPath: /var/lib/kubelet mountPropagation: "Bidirectional" - mountPath: /dev mountPropagation: "HostToContainer" name: host-dev - mountPath: /var/log/ name: host-log volumes: - name: plugin-dir hostPath: path: /var/lib/kubelet/csi-plugins/localplugin.csi.alibabacloud.com type: DirectoryOrCreate - name: registration-dir hostPath: path: /var/lib/kubelet/plugins_registry type: DirectoryOrCreate - name: pods-mount-dir hostPath: path: /var/lib/kubelet type: Directory - name: host-dev hostPath: path: /dev - name: host-log hostPath: path: /var/log/ updateStrategy: rollingUpdate: maxUnavailable: 10% type: RollingUpdate
kind: Deployment apiVersion: apps/v1 metadata: name: csi-local-provisioner namespace: kube-system spec: selector: matchLabels: app: csi-local-provisioner replicas: 2 template: metadata: labels: app: csi-local-provisioner spec: tolerations: - operator: "Exists" affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 preference: matchExpressions: - key: node-role.kubernetes.io/master operator: Exists priorityClassName: system-node-critical serviceAccount: admin hostNetwork: true containers: - name: external-local-provisioner image: registry.cn-hangzhou.aliyuncs.com/acs/csi-provisioner:v1.6.0-b6f763a43-ack args: - "--csi-address=$(ADDRESS)" - "--feature-gates=Topology=True" - "--volume-name-prefix=lvm" - "--strict-topology=true" - "--timeout=150s" - "--extra-create-metadata=true" - "--enable-leader-election=true" - "--leader-election-type=leases" - "--retry-interval-start=500ms" - "--v=5" env: - name: ADDRESS value: /socketDir/csi.sock imagePullPolicy: "Always" volumeMounts: - name: socket-dir mountPath: /socketDir - name: external-local-resizer image: registry.cn-hangzhou.aliyuncs.com/acs/csi-resizer:v0.3.0 args: - "--v=5" - "--csi-address=$(ADDRESS)" - "--leader-election" env: - name: ADDRESS value: /socketDir/csi.sock imagePullPolicy: "Always" volumeMounts: - name: socket-dir mountPath: /socketDir/ volumes: - name: socket-dir hostPath: path: /var/lib/kubelet/csi-plugins/localplugin.csi.alibabacloud.com type: DirectoryOrCreate
apiVersion: extensions/v1beta1 kind: DaemonSet metadata: name: node-storage-manager namespace: kube-system spec: selector: matchLabels: app: node-storage-manager template: metadata: labels: app: node-storage-manager spec: containers: - args: - --nodeid=$(KUBE_NODE_NAME) env: - name: KUBE_NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName image: registry.cn-hangzhou.aliyuncs.com/plugins/node-storage-manager:v1.14.8-bac4c12 imagePullPolicy: Always name: node-storage-manager securityContext: allowPrivilegeEscalation: true capabilities: add: - SYS_ADMIN privileged: true volumeMounts: - mountPath: /dev mountPropagation: HostToContainer name: host-dev - mountPath: /var/log/ name: host-log - mountPath: /host/etc name: etc hostNetwork: true hostPID: true priorityClassName: system-node-critical restartPolicy: Always serviceAccount: admin serviceAccountName: admin tolerations: - operator: Exists volumes: - hostPath: path: /dev type: "" name: host-dev - hostPath: path: /var/log/ type: "" name: host-log - hostPath: path: /etc type: "" name: etc templateGeneration: 1 updateStrategy: rollingUpdate: maxUnavailable: 10% type: RollingUpdate
您需根据需要配置节点的黑、白名单和pvConfig。通过配置ConfigMap,您可以在目标节点上面使用LVM。
apiVersion: v1 kind: ConfigMap metadata: name: cm-node-storage namespace: kube-system data: volumegroup.json: |- { "volumegroup1": { "nodeList": { "whiteList": { "nodeName": ["all-cluster-nodes"], "nodeLabel": ["diskType=ssd", "diskType=hhd"] }, "blackList": { "nodeName": [], "nodeLabel": [] } }, "pvConfig": { "globalConfig": ["aliyun-local-disk"], "specialConfig": { "nodeName": {}, "nodeLabel": {} } }, "status": "In_Use" } }
配置PVC
配置Alluxio
使用以下模板配置Alluxio。
# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
# (the "License"). You may not use this work except in compliance with the License, which is
# available at www.apache.org/licenses/LICENSE-2.0
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied, as more fully set forth in the License.
#
# See the NOTICE file distributed with this work for information regarding copyright ownership.
#
# This should not be modified in the usual case.
fullnameOverride: alluxio
## Common ##
# Docker Image
image: registry-vpc.cn-beijing.aliyuncs.com/alluxio/alluxio
imageTag: 2.3.0
imagePullPolicy: IfNotPresent
# Security Context
user: 0
group: 0
fsGroup: 0
# Site properties for all the components
properties:
fs.oss.accessKeyId: YOUR-ACCESS-KEY-ID
fs.oss.accessKeySecret: YOUR-ACCESS-KEY-SECRET
fs.oss.endpoint: oss-cn-beijing-internal.aliyuncs.com
alluxio.master.mount.table.root.ufs: oss://cloudnativeai/
alluxio.master.persistence.blacklist: .staging,_temporary
alluxio.security.stale.channel.purge.interval: 365d
alluxio.user.metrics.collection.enabled: 'true'
alluxio.user.short.circuit.enabled: 'true'
alluxio.user.file.write.tier.default: 1
alluxio.user.block.size.bytes.default: 64MB #default 64MB
alluxio.user.file.writetype.default: CACHE_THROUGH
alluxio.user.file.metadata.load.type: ONCE
alluxio.user.file.readtype.default: CACHE
#alluxio.worker.allocator.class: alluxio.worker.block.allocator.MaxFreeAllocator
alluxio.worker.allocator.class: alluxio.worker.block.allocator.RoundRobinAllocator
alluxio.worker.file.buffer.size: 128MB
alluxio.worker.evictor.class: alluxio.worker.block.evictor.LRUEvictor
alluxio.job.master.client.threads: 5000
alluxio.job.worker.threadpool.size: 300
# Recommended JVM Heap options for running in Docker
# Ref: https://developers.redhat.com/blog/2017/03/14/java-inside-docker/
# These JVM options are common to all Alluxio services
# jvmOptions:
# - "-XX:+UnlockExperimentalVMOptions"
# - "-XX:+UseCGroupMemoryLimitForHeap"
# - "-XX:MaxRAMFraction=2"
# Mount Persistent Volumes to all components
# mounts:
# - name: <persistentVolume claimName>
# path: <mountPath>
# Use labels to run Alluxio on a subset of the K8s nodes
## Master ##
master:
count: 1 # Controls the number of StatefulSets. For multiMaster mode increase this to >1.
replicas: 1 # Controls #replicas in a StatefulSet and should not be modified in the usual case.
enableLivenessProbe: false
enableReadinessProbe: false
args: # Arguments to Docker entrypoint
- master-only
- --no-format
# Properties for the master component
properties:
# Example: use ROCKS DB instead of Heap
# alluxio.master.metastore: ROCKS
# alluxio.master.metastore.dir: /metastore
resources:
# The default xmx is 8G
limits:
cpu: "4"
memory: "8G"
requests:
cpu: "1"
memory: "1G"
ports:
embedded: 19200
rpc: 19998
web: 19999
hostPID: true
hostNetwork: true
# dnsPolicy will be ClusterFirstWithHostNet if hostNetwork: true
# and ClusterFirst if hostNetwork: false
# You can specify dnsPolicy here to override this inference
# dnsPolicy: ClusterFirst
# JVM options specific to the master container
jvmOptions:
nodeSelector:
alluxio: 'true'
jobMaster:
args:
- job-master
# Properties for the jobMaster component
enableLivenessProbe: false
enableReadinessProbe: false
properties:
resources:
limits:
cpu: "4"
memory: "8G"
requests:
cpu: "1"
memory: "1G"
ports:
embedded: 20003
rpc: 20001
web: 20002
# JVM options specific to the jobMaster container
jvmOptions:
# Alluxio supports journal type of UFS and EMBEDDED
# UFS journal with HDFS example
# journal:
# type: "UFS"
# folder: "hdfs://{$hostname}:{$hostport}/journal"
# EMBEDDED journal to /journal example
# journal:
# type: "EMBEDDED"
# folder: "/journal"
journal:
type: "UFS" # "UFS" or "EMBEDDED"
ufsType: "local" # Ignored if type is "EMBEDDED". "local" or "HDFS"
folder: "/journal" # Master journal folder
# volumeType controls the type of journal volume.
# It can be "persistentVolumeClaim" or "emptyDir"
volumeType: emptyDir
size: 1Gi
# Attributes to use when the journal is persistentVolumeClaim
storageClass: "standard"
accessModes:
- ReadWriteOnce
# Attributes to use when the journal is emptyDir
medium: ""
# Configuration for journal formatting job
format:
runFormat: false # Change to true to format journal
job:
activeDeadlineSeconds: 30
ttlSecondsAfterFinished: 10
resources:
limits:
cpu: "4"
memory: "8G"
requests:
cpu: "1"
memory: "1G"
# You can enable metastore to use ROCKS DB instead of Heap
# metastore:
# volumeType: persistentVolumeClaim # Options: "persistentVolumeClaim" or "emptyDir"
# size: 1Gi
# mountPath: /metastore
# # Attributes to use when the metastore is persistentVolumeClaim
# storageClass: "standard"
# accessModes:
# - ReadWriteOnce
# # Attributes to use when the metastore is emptyDir
# medium: ""
## Worker ##
worker:
args:
- worker-only
- --no-format
enableLivenessProbe: false
enableReadinessProbe: false
# Properties for the worker component
properties:
resources:
limits:
cpu: "4"
memory: "4G"
requests:
cpu: "1"
memory: "2G"
ports:
rpc: 29999
web: 30000
hostPID: true
hostNetwork: true
# dnsPolicy will be ClusterFirstWithHostNet if hostNetwork: true
# and ClusterFirst if hostNetwork: false
# You can specify dnsPolicy here to override this inference
# dnsPolicy: ClusterFirst
# JVM options specific to the worker container
jvmOptions:
nodeSelector:
alluxio: 'true'
jobWorker:
args:
- job-worker
enableLivenessProbe: false
enableReadinessProbe: false
# Properties for the jobWorker component
properties:
resources:
limits:
cpu: "4"
memory: "4G"
requests:
cpu: "1"
memory: "1G"
ports:
rpc: 30001
data: 30002
web: 30003
# JVM options specific to the jobWorker container
jvmOptions:
# Tiered Storage
# emptyDir example
# - level: 0
# alias: MEM
# mediumtype: MEM
# path: /dev/shm
# type: emptyDir
# quota: 1G
#
# hostPath example
# - level: 0
# alias: MEM
# mediumtype: MEM
# path: /dev/shm
# type: hostPath
# quota: 1G
#
# persistentVolumeClaim example
# - level: 1
# alias: SSD
# mediumtype: SSD
# type: persistentVolumeClaim
# name: alluxio-ssd
# path: /dev/ssd
# quota: 10G
#
# multi-part mediumtype example
# - level: 1
# alias: SSD,HDD
# mediumtype: SSD,HDD
# type: persistentVolumeClaim
# name: alluxio-ssd,alluxio-hdd
# path: /dev/ssd,/dev/hdd
# quota: 10G,10G
tieredstore:
levels:
- level: 0
alias: HDD
mediumtype: HDD-0
path: /mnt/disk1
type: persistentVolumeClaim
name: lvm-pvc-im
quota: 12000G
high: 0.95
low: 0.7
# Short circuit related properties
shortCircuit:
enabled: true
# The policy for short circuit can be "local" or "uuid",
# local means the cache directory is in the same mount namespace,
# uuid means interact with domain socket
policy: uuid
# volumeType controls the type of shortCircuit volume.
# It can be "persistentVolumeClaim" or "hostPath"
volumeType: hostPath
size: 1Mi
# Attributes to use if the domain socket volume is PVC
pvcName: alluxio-worker-domain-socket
accessModes:
- ReadWriteOnce
storageClass: standard
# Attributes to use if the domain socket volume is hostPath
hostPath: "/tmp/alluxio-domain" # The hostPath directory to use
## FUSE ##
fuse:
image: registry-vpc.cn-beijing.aliyuncs.com/alluxio/alluxio-fuse
imageTag: 2.3.0
imagePullPolicy: IfNotPresent
# Change both to true to deploy FUSE
enabled: false
clientEnabled: false
# Properties for the jobWorker component
properties:
# Customize the MaxDirectMemorySize
# These options are specific to the FUSE daemon
jvmOptions:
- "-XX:MaxDirectMemorySize=2g"
hostNetwork: true
hostPID: true
dnsPolicy: ClusterFirstWithHostNet
user: 0
group: 0
fsGroup: 0
args:
- fuse
- --fuse-opts=allow_other
# Mount path in the host
mountPath: /mnt/alluxio-fuse
resources:
requests:
cpu: "0.5"
memory: "1G"
limits:
cpu: "4"
memory: "4G"
nodeSelector:
alluxio: 'true'
## Secrets ##
# Format: (<name>:<mount path under /secrets/>):
# secrets:
# master: # Shared by master and jobMaster containers
# alluxio-hdfs-config: hdfsConfig
# worker: # Shared by worker and jobWorker containers
# alluxio-hdfs-config: hdfsConfig
注意
- 需使用上述配置的PVC配置tieredstore。
tieredstore: levels: - level: 0 alias: HDD mediumtype: HDD-0 path: /mnt/disk1 type: persistentVolumeClaim name: lvm-pvc-im quota: 12000G high: 0.95 low: 0.7
- Alluxio需要和PVC在同一个namspace下。
在文档使用中是否遇到以下问题
更多建议
匿名提交