Lesson 3.3: Resources and Limits (CPU, Memory)
Installing metrics server addons
[root@master reqandlimits]# cat metrics-server.yml
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
rules:
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- apiGroups:
- ""
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
ports:
- name: https
port: 443
protocol: TCP
targetPort: https
selector:
k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: metrics-server
strategy:
rollingUpdate:
maxUnavailable: 0
template:
metadata:
labels:
k8s-app: metrics-server
spec:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=10250
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --kubelet-insecure-tls
- --metric-resolution=15s
image: registry.k8s.io/metrics-server/metrics-server:v0.7.1
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /livez
port: https
scheme: HTTPS
periodSeconds: 10
name: metrics-server
ports:
- containerPort: 10250
name: https
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /readyz
port: https
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 200Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
volumeMounts:
- mountPath: /tmp
name: tmp-dir
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
volumes:
- emptyDir: {}
name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
labels:
k8s-app: metrics-server
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
groupPriorityMinimum: 100
insecureSkipTLSVerify: true
service:
name: metrics-server
namespace: kube-system
version: v1beta1
versionPriority: 100
[root@master reqandlimits]# kubectl apply -f metrics-server.yml
serviceaccount/metrics-server created
clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader created
clusterrole.rbac.authorization.k8s.io/system:metrics-server created
rolebinding.rbac.authorization.k8s.io/metrics-server-auth-reader created
clusterrolebinding.rbac.authorization.k8s.io/metrics-server:system:auth-delegator created
clusterrolebinding.rbac.authorization.k8s.io/system:metrics-server created
service/metrics-server created
deployment.apps/metrics-server created
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
[root@master reqandlimits]# kubectl get pods -n kube-system | grep metrics-server
metrics-server-67fc4df55-f5s2q 1/1 Running 0 72s
These metrics are now shown due to the metrics-server addons.
[root@master reqandlimits]# kubectl top node
NAME CPU(cores) CPU(%) MEMORY(bytes) MEMORY(%)
cka-cluster2-control-plane 112m 5% 663Mi 18%
cka-cluster2-worker 33m 1% 218Mi 6%
cka-cluster2-worker2 22m 1% 148Mi 4%
Now we will do some stress testing. These examples demonstrate how Kubernetes handles resource requests and limits for memory. Let’s break down each case to understand what’s happening.
Key Concepts
- Resource Requests:
- The amount of memory or CPU that Kubernetes guarantees to a container.
- Used by the scheduler to decide which node can accommodate the Pod.
- Resource Limits:
- The maximum amount of memory or CPU that a container can use.
- If a container exceeds its memory limit, it may be terminated (OOMKilled).
- OOMKilled:
Out Of Memory (OOM)
Killer terminates a container if it exceeds its memory limit.
- Pending State:
- A Pod remains in the Pending state if the scheduler cannot find a node with sufficient resources to satisfy the Pod’s requests.
Case I: Pod with Proper Requests and Limits
[root@master reqandlimits]# cat mem-request.yml
apiVersion: v1
kind: Pod
metadata:
name: memory-demo
namespace: mem-example
spec:
containers:
- name: memory-demo-ctr
image: benchpilot/stress
resources:
requests:
memory: "100Mi"
limits:
memory: "200Mi"
command: ["stress"]
args: ["--vm", "1", "--vm-bytes", "150M", "--vm-hang", "1"]
[root@master reqandlimits]# kubectl apply -f mem-request.yml
pod/memory-demo created
[root@master reqandlimits]# kubectl get pods -n mem-example
NAME READY STATUS RESTARTS AGE
memory-demo 1/1 Running 0 33s
[root@master reqandlimits]# kubectl top pod memory-demo -n mem-example
NAME CPU(cores) MEMORY(bytes)
memory-demo 45m 152Mi
-
Explanation:
- Requests: The container requests 100Mi of memory.
- Limits: The container is limited to 200Mi of memory.
- The stress command allocates 150M of memory, which is within the limit.
-
Outcome:
- The Pod runs successfully because:
- The requested memory (100Mi) is available on a node.
- The allocated memory (150M) is within the limit (200Mi).
- The Pod is using 152Mi of memory, which is within the limit.
Case II: Pod Exceeding Memory Limit
[root@master reqandlimits]# cat mem-request2.yml
apiVersion: v1
kind: Pod
metadata:
name: memory-demo-2
namespace: mem-example
spec:
containers:
- name: memory-demo-ctr
image: benchpilot/stress
resources:
requests:
memory: "50Mi"
limits:
memory: "100Mi"
command: ["stress"]
args: ["--vm", "1", "--vm-bytes", "200M", "--vm-hang", "1"]
[root@master reqandlimits]# kubectl apply -f mem-request2.yml -n mem-example
pod/memory-demo-2 created
[root@master reqandlimits]# kubectl get pods -n mem-example
NAME READY STATUS RESTARTS AGE
memory-demo 1/1 Running 0 3m1s
memory-demo-2 0/1 OOMKilled 0 7s
[root@master reqandlimits]# kubectl describe pod memory-demo-2 -n mem-example
Warning BackOff 10s (x3 over 26s) kubelet Back-off restarting failed container memory-demo-ctr in pod memory-demo-2_mem-example(7e422f65-50e5-4724-9b7e-8a71506a7b1b)
-
Explanation:
- Requests: The container requests 50Mi of memory.
- Limits: The container is limited to 100Mi of memory.
- The stress command allocates 200M of memory, which exceeds the limit.
-
Outcome:
- The container is terminated by the OOM Killer because it exceeds its memory limit (100Mi).
- Kubernetes restarts the container, but it continues to exceed the limit, resulting in repeated failures.
- The container is repeatedly restarted and fails due to OOM.
Case III: Insuffiecient Memory on Nodes
[root@master reqandlimits]# cat mem-request3.yml
apiVersion: v1
kind: Pod
metadata:
name: memory-demo-3
namespace: mem-example
spec:
containers:
- name: memory-demo-ctr
image: benchpilot/stress
resources:
requests:
memory: "1000Gi"
limits:
memory: "1000Gi"
command: ["stress"]
args: ["--vm", "1", "--vm-bytes", "150M", "--vm-hang", "1"]
[root@master reqandlimits]# kubectl apply -f mem-request3.yml -n mem-example
pod/memory-demo-3 created
[root@master reqandlimits]# kubectl get pods -n mem-example
NAME READY STATUS RESTARTS AGE
memory-demo 1/1 Running 0 7m12s
memory-demo-3 0/1 Pending 0 8s
[root@master reqandlimits]# kubectl describe pod memory-demo-3 -n mem-example
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 23s default-scheduler 0/3 nodes are available: 1 node(s) had untolerated taint {node-role.kubernetes.io/control-plane: }, 2 Insufficient memory. preemption: 0/3 nodes are available: 1 Preemption is not helpful for scheduling, 2 No preemption victims found for incoming pod.
-
Explanation:
- Requests: The container requests 1000Gi of memory.
- Limits: The container is limited to 1000Gi of memory.
- The stress command allocates 150M of memory, but the request is unrealistic.
-
Outcome:
- The Pod remains in the Pending state because no node in the cluster has 1000Gi of memory available.
- The scheduler cannot find a node to satisfy the request.