Lesson 3.3: Resources and Limits (CPU, Memory)

Installing metrics server addons

[root@master reqandlimits]# cat metrics-server.yml 
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
    rbac.authorization.k8s.io/aggregate-to-admin: "true"
    rbac.authorization.k8s.io/aggregate-to-edit: "true"
    rbac.authorization.k8s.io/aggregate-to-view: "true"
  name: system:aggregated-metrics-reader
rules:
- apiGroups:
  - metrics.k8s.io
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
rules:
- apiGroups:
  - ""
  resources:
  - nodes/metrics
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:metrics-server
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  ports:
  - name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  selector:
    matchLabels:
      k8s-app: metrics-server
  strategy:
    rollingUpdate:
      maxUnavailable: 0
  template:
    metadata:
      labels:
        k8s-app: metrics-server
    spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --secure-port=10250
        - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
        - --kubelet-use-node-status-port
        - --kubelet-insecure-tls
        - --metric-resolution=15s
        image: registry.k8s.io/metrics-server/metrics-server:v0.7.1
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /livez
            port: https
            scheme: HTTPS
          periodSeconds: 10
        name: metrics-server
        ports:
        - containerPort: 10250
          name: https
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /readyz
            port: https
            scheme: HTTPS
          initialDelaySeconds: 20
          periodSeconds: 10
        resources:
          requests:
            cpu: 100m
            memory: 200Mi
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop:
            - ALL
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
          seccompProfile:
            type: RuntimeDefault
        volumeMounts:
        - mountPath: /tmp
          name: tmp-dir
      nodeSelector:
        kubernetes.io/os: linux
      priorityClassName: system-cluster-critical
      serviceAccountName: metrics-server
      volumes:
      - emptyDir: {}
        name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
  labels:
    k8s-app: metrics-server
  name: v1beta1.metrics.k8s.io
spec:
  group: metrics.k8s.io
  groupPriorityMinimum: 100
  insecureSkipTLSVerify: true
  service:
    name: metrics-server
    namespace: kube-system
  version: v1beta1
  versionPriority: 100
 
[root@master reqandlimits]# kubectl apply -f metrics-server.yml 
serviceaccount/metrics-server created
clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader created
clusterrole.rbac.authorization.k8s.io/system:metrics-server created
rolebinding.rbac.authorization.k8s.io/metrics-server-auth-reader created
clusterrolebinding.rbac.authorization.k8s.io/metrics-server:system:auth-delegator created
clusterrolebinding.rbac.authorization.k8s.io/system:metrics-server created
service/metrics-server created
deployment.apps/metrics-server created
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
 
[root@master reqandlimits]# kubectl get pods -n kube-system | grep metrics-server
metrics-server-67fc4df55-f5s2q                       1/1     Running   0          72s

These metrics are now shown due to the metrics-server addons.

[root@master reqandlimits]# kubectl top node 
NAME                         CPU(cores)   CPU(%)   MEMORY(bytes)   MEMORY(%)   
cka-cluster2-control-plane   112m         5%       663Mi           18%         
cka-cluster2-worker          33m          1%       218Mi           6%          
cka-cluster2-worker2         22m          1%       148Mi           4%

Now we will do some stress testing. These examples demonstrate how Kubernetes handles resource requests and limits for memory. Let’s break down each case to understand what’s happening.

Key Concepts

Resource Requests:
- The amount of memory or CPU that Kubernetes guarantees to a container.
- Used by the scheduler to decide which node can accommodate the Pod.
Resource Limits:
- The maximum amount of memory or CPU that a container can use.
- If a container exceeds its memory limit, it may be terminated (OOMKilled).
OOMKilled:
- Out Of Memory (OOM) Killer terminates a container if it exceeds its memory limit.
Pending State:
- A Pod remains in the Pending state if the scheduler cannot find a node with sufficient resources to satisfy the Pod’s requests.

Case I: Pod with Proper Requests and Limits

[root@master reqandlimits]# cat mem-request.yml 
apiVersion: v1
kind: Pod
metadata:
  name: memory-demo
  namespace: mem-example
spec:
  containers:
  - name: memory-demo-ctr
    image: benchpilot/stress
    resources:
      requests:
        memory: "100Mi"
      limits:
        memory: "200Mi"
    command: ["stress"]
    args: ["--vm", "1", "--vm-bytes", "150M", "--vm-hang", "1"]
 
[root@master reqandlimits]# kubectl apply -f mem-request.yml 
pod/memory-demo created
 
[root@master reqandlimits]# kubectl get pods -n mem-example 
NAME          READY   STATUS    RESTARTS   AGE
memory-demo   1/1     Running   0          33s
 
[root@master reqandlimits]# kubectl top pod memory-demo -n mem-example 
NAME          CPU(cores)   MEMORY(bytes)   
memory-demo   45m          152Mi

Explanation:
- Requests: The container requests 100Mi of memory.
- Limits: The container is limited to 200Mi of memory.
- The stress command allocates 150M of memory, which is within the limit.
Outcome:
- The Pod runs successfully because:
- The requested memory (100Mi) is available on a node.
- The allocated memory (150M) is within the limit (200Mi).
- The Pod is using 152Mi of memory, which is within the limit.

Case II: Pod Exceeding Memory Limit

[root@master reqandlimits]# cat mem-request2.yml 
apiVersion: v1
kind: Pod
metadata:
  name: memory-demo-2
  namespace: mem-example
spec:
  containers:
  - name: memory-demo-ctr
    image: benchpilot/stress
    resources:
      requests:
        memory: "50Mi"
      limits:
        memory: "100Mi"
    command: ["stress"]
    args: ["--vm", "1", "--vm-bytes", "200M", "--vm-hang", "1"]
 
[root@master reqandlimits]# kubectl apply -f mem-request2.yml -n mem-example 
pod/memory-demo-2 created
 
[root@master reqandlimits]# kubectl get pods -n mem-example
NAME            READY   STATUS      RESTARTS   AGE
memory-demo     1/1     Running     0          3m1s
memory-demo-2   0/1     OOMKilled   0          7s
 
[root@master reqandlimits]# kubectl describe pod memory-demo-2 -n mem-example 
  Warning  BackOff    10s (x3 over 26s)  kubelet            Back-off restarting failed container memory-demo-ctr in pod memory-demo-2_mem-example(7e422f65-50e5-4724-9b7e-8a71506a7b1b)

Explanation:
- Requests: The container requests 50Mi of memory.
- Limits: The container is limited to 100Mi of memory.
- The stress command allocates 200M of memory, which exceeds the limit.
Outcome:
- The container is terminated by the OOM Killer because it exceeds its memory limit (100Mi).
- Kubernetes restarts the container, but it continues to exceed the limit, resulting in repeated failures.
- The container is repeatedly restarted and fails due to OOM.

Case III: Insuffiecient Memory on Nodes

[root@master reqandlimits]# cat mem-request3.yml 
apiVersion: v1
kind: Pod
metadata:
  name: memory-demo-3
  namespace: mem-example
spec:
  containers:
  - name: memory-demo-ctr
    image: benchpilot/stress
    resources:
      requests:
        memory: "1000Gi"
      limits:
        memory: "1000Gi"
    command: ["stress"]
    args: ["--vm", "1", "--vm-bytes", "150M", "--vm-hang", "1"]
[root@master reqandlimits]# kubectl apply -f mem-request3.yml -n mem-example 
pod/memory-demo-3 created
 
[root@master reqandlimits]# kubectl get pods -n mem-example 
NAME            READY   STATUS    RESTARTS   AGE
memory-demo     1/1     Running   0          7m12s
memory-demo-3   0/1     Pending   0          8s
 
[root@master reqandlimits]# kubectl describe pod memory-demo-3 -n mem-example 
Events:
  Type     Reason            Age   From               Message
  ----     ------            ----  ----               -------
  Warning  FailedScheduling  23s   default-scheduler  0/3 nodes are available: 1 node(s) had untolerated taint {node-role.kubernetes.io/control-plane: }, 2 Insufficient memory. preemption: 0/3 nodes are available: 1 Preemption is not helpful for scheduling, 2 No preemption victims found for incoming pod.

Explanation:
- Requests: The container requests 1000Gi of memory.
- Limits: The container is limited to 1000Gi of memory.
- The stress command allocates 150M of memory, but the request is unrealistic.
Outcome:
- The Pod remains in the Pending state because no node in the cluster has 1000Gi of memory available.
- The scheduler cannot find a node to satisfy the request.