GPUカスタムメトリクスを使用したクラスタの自動スケーリング

要件:
手順
Last updated
Was this helpful?

Last updated
Was this helpful?
Was this helpful?
helm repo add xplat-fke
https://registry.fke.fptcloud.com/chartrepo/xplat-fke
&& helm repo update
helm install --wait --generate-name \
-n prometheus --create-namespace \ xplat-fke/kube-prometheus-stack
prometheus_service=$(kubectl get svc -n prometheus -lapp=kube-prometheus-stack-prometheus -ojsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')
helm install --wait --generate-name \
-n prometheus --create-namespace \ xplat-fke/prometheus-adapter \
--set prometheus.url=http://${prometheus_service}.prometheus.svc.cluster.local<>.<>.svc.cluster.local
prometheus-kube-prometheus-prometheus.prometheus.svc.cluster.localapiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: my-gpu-app
spec:
maxReplicas: 3 # Update this accordingly
minReplicas: 1
scaleTargetRef:
apiVersion: apps/v1beta1
kind: Deployment
name: my-gpu-app # Add label from Deployment we need to autoscale
metrics:
- type: Pods # scale pod based on gpu
pods:
metric:
name: DCGM_FI_PROF_GR_ENGINE_ACTIVE # Add the DCGM metric here accordingly
target:
type: AverageValue
averageValue: 0.8