Pods, Deployments, Services, Ingress, ConfigMaps, Helm, RBAC — container orchestration at scale.
# ── Pod with multiple containers, probes, and lifecycle ──
apiVersion: v1
kind: Pod
metadata:
name: web-app
namespace: production
labels:
app: web
tier: frontend
env: production
annotations:
description: "Main web application pod"
spec:
serviceAccountName: web-app-sa
restartPolicy: Always
terminationGracePeriodSeconds: 60
nodeSelector:
kubernetes.io/os: linux
node-role: worker
# ── Init Container ──
initContainers:
- name: db-migrate
image: myapp:1.5.0
command: ["./migrate", "--direction", "up"]
envFrom:
- secretRef:
name: db-credentials
resources:
limits:
memory: "256Mi"
cpu: "250m"
# ── Main Container ──
containers:
- name: web
image: myapp:1.5.0
imagePullPolicy: IfNotPresent
command: ["./start.sh"]
args: ["--port", "8080", "--log-level", "info"]
workingDir: /app
ports:
- name: http
containerPort: 8080
protocol: TCP
- name: metrics
containerPort: 9090
protocol: TCP
# ── Environment Variables ──
env:
- name: APP_ENV
value: "production"
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: db-credentials
key: password
- name: MAX_CONNECTIONS
valueFrom:
resourceFieldRef:
containerName: web
resource: limits.cpu
envFrom:
- configMapRef:
name: app-config
- secretRef:
name: app-secrets
prefix: "APP_"
# ── Resource Requests & Limits ──
resources:
requests:
memory: "128Mi"
cpu: "100m"
ephemeral-storage: "1Gi"
limits:
memory: "512Mi"
cpu: "500m"
ephemeral-storage: "2Gi"
# ── Liveness Probe ──
livenessProbe:
httpGet:
path: /healthz
port: http
scheme: HTTP
initialDelaySeconds: 15
periodSeconds: 20
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
# ── Readiness Probe ──
readinessProbe:
httpGet:
path: /ready
port: http
httpHeaders:
- name: X-Custom-Header
value: "k8s-probe"
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 3
successThreshold: 1
failureThreshold: 3
# ── Startup Probe ──
startupProbe:
httpGet:
path: /healthz
port: http
initialDelaySeconds: 0
periodSeconds: 5
failureThreshold: 30 # max 150s startup
# ── Lifecycle Hooks ──
lifecycle:
postStart:
exec:
command: ["/bin/sh", "-c", "echo 'Container started' >> /var/log/startup.log"]
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 10 && ./graceful-shutdown.sh"]
# ── Volume Mounts ──
volumeMounts:
- name: config-volume
mountPath: /app/config
readOnly: true
- name: secret-volume
mountPath: /app/secrets
readOnly: true
- name: data-volume
mountPath: /app/data
- name: tmp-logs
mountPath: /var/log
# ── Security Context ──
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 3000
fsGroup: 2000
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
# ── Sidecar Container ──
- name: log-collector
image: fluentd:1.16
volumeMounts:
- name: tmp-logs
mountPath: /var/log
readOnly: true
resources:
limits:
memory: "128Mi"
cpu: "100m"
# ── Volumes ──
volumes:
- name: config-volume
configMap:
name: app-config
items:
- key: config.yaml
path: config.yaml
- name: secret-volume
secret:
secretName: app-secrets
defaultMode: 0400
- name: data-volume
emptyDir:
medium: Memory
sizeLimit: "64Mi"
- name: tmp-logs
emptyDir: {}| Policy | Behavior |
|---|---|
| Always | Restart on any exit (default for Deployments/DaemonSets) |
| OnFailure | Restart only on non-zero exit code (Jobs) |
| Never | Never restart (one-shot Jobs, debugging) |
| Method | Use Case |
|---|---|
| httpGet | HTTP/HTTPS endpoint health check |
| tcpSocket | TCP port open check |
| exec | Run command inside container (exit 0 = healthy) |
| grpc | gRPC health checking (K8s 1.24+) |
| Policy | Description |
|---|---|
| IfNotPresent | Pull only if image not present (default) |
| Always | Always pull latest on pod creation |
| Never | Never pull, use local image only |
| Field | Description |
|---|---|
| runAsNonRoot | Container must run as non-root |
| runAsUser | UID to run the container process |
| runAsGroup | GID for the container process |
| fsGroup | GID for volume file ownership |
| readOnlyRootFilesystem | Mount root filesystem as read-only |
| allowPrivilegeEscalation | Allow setuid binaries (default true) |
| seccompProfile | Seccomp profile (RuntimeDefault, Unconfined) |
| seLinuxOptions | SELinux labels for the container |
# ── Alternative probe types ──
spec:
containers:
- name: app
image: myapp:latest
# TCP socket probe (database connectivity)
livenessProbe:
tcpSocket:
port: 5432
initialDelaySeconds: 10
periodSeconds: 15
# Exec probe (file existence check)
readinessProbe:
exec:
command:
- cat
- /tmp/ready
initialDelaySeconds: 5
periodSeconds: 10
# gRPC probe (Kubernetes 1.24+)
startupProbe:
grpc:
port: 9090
service: myapp.Health
periodSeconds: 5
failureThreshold: 20failureThreshold * periodSeconds to cover the worst-case startup time.preStop with sleep to allow time for deregistration from load balancers and in-flight requests to complete. Without this, the kubelet sends SIGTERM immediately and the pod may be killed before finishing.# ── Production Deployment with rolling update ──
apiVersion: apps/v1
kind: Deployment
metadata:
name: api-server
namespace: production
labels:
app: api
version: v2
spec:
replicas: 3
# ── Selector must match template labels ──
selector:
matchLabels:
app: api
# ── Rolling Update Strategy ──
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1 # max 1 pod above desired
maxUnavailable: 0 # no pods below desired
# ── Revision History ──
revisionHistoryLimit: 10
# ── Progress Deadline ──
progressDeadlineSeconds: 600
template:
metadata:
labels:
app: api
version: v2
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
spec:
serviceAccountName: api-sa
terminationGracePeriodSeconds: 30
# ── Anti-affinity: spread pods across nodes ──
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
app: api
topologyKey: kubernetes.io/hostname
# ── Topology spread (K8s 1.19+) ──
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app: api
containers:
- name: api
image: api-server:2.5.0
ports:
- containerPort: 8080
env:
- name: VERSION
value: "2.5.0"
resources:
requests:
memory: "256Mi"
cpu: "200m"
limits:
memory: "1Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 15
periodSeconds: 20
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10| Strategy | Description |
|---|---|
| RollingUpdate | Gradually replace pods (default). Zero-downtime. |
| Recreate | Kill all pods, then create new ones. Brief downtime. |
| Field | Description |
|---|---|
| maxSurge | Max pods above desired count during update |
| maxUnavailable | Max pods below desired count during update |
| revisionHistoryLimit | Number of old ReplicaSets to retain (default 10) |
| progressDeadlineSeconds | Max time to wait for rollout progress |
# ── DaemonSet: run on every (or selected) node ──
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
labels:
app: node-exporter
spec:
selector:
matchLabels:
app: node-exporter
# ── Update strategy ──
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
template:
metadata:
labels:
app: node-exporter
spec:
# Run on specific nodes only
nodeSelector:
kubernetes.io/os: linux
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
- key: node-role.kubernetes.io/master
effect: NoSchedule
containers:
- name: node-exporter
image: prom/node-exporter:v1.8.0
args:
- "--path.sysfs=/host/sys"
- "--path.rootfs=/host/root"
ports:
- containerPort: 9100
volumeMounts:
- name: sys
mountPath: /host/sys
readOnly: true
- name: root
mountPath: /host/root
readOnly: true
resources:
limits:
memory: "64Mi"
cpu: "100m"
volumes:
- name: sys
hostPath:
path: /sys
- name: root
hostPath:
path: /# ── StatefulSet for stateful workloads ──
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: postgres-cluster
namespace: databases
spec:
serviceName: postgres-headless # required: headless Service
replicas: 3
selector:
matchLabels:
app: postgres
# ── Pod Management Policy ──
podManagementPolicy: OrderedReady # or Parallel
# ── Update Strategy ──
updateStrategy:
type: RollingUpdate
rollingUpdate:
partition: 0 # pods with ordinal >= partition are updated
# ── Persistent Volume Claim Templates ──
volumeClaimTemplates:
- metadata:
name: postgres-data
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: fast-ssd
resources:
requests:
storage: 50Gi
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: postgres:16-alpine
ports:
- containerPort: 5432
env:
- name: POSTGRES_DB
value: mydb
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: postgres-credentials
key: username
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-credentials
key: password
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: postgres-data
mountPath: /var/lib/postgresql/data
livenessProbe:
exec:
command: ["pg_isready", "-U", "postgres"]
periodSeconds: 10
readinessProbe:
exec:
command: ["pg_isready", "-U", "postgres"]
periodSeconds: 5# ── Job: batch processing ──
apiVersion: batch/v1
kind: Job
metadata:
name: data-migration
namespace: batch
spec:
# ── Completion & Parallelism ──
completions: 1
parallelism: 1
backoffLimit: 6 # max retries before marking failed
activeDeadlineSeconds: 3600 # max runtime
ttlSecondsAfterFinished: 86400 # auto-cleanup (K8s 1.23+)
template:
metadata:
labels:
job: data-migration
spec:
restartPolicy: Never # Never or OnFailure
containers:
- name: migrate
image: myapp:1.5.0
command: ["./migrate", "--direction", "up"]
envFrom:
- secretRef:
name: db-credentials# ── CronJob: scheduled tasks ──
apiVersion: batch/v1
kind: CronJob
metadata:
name: nightly-backup
namespace: batch
spec:
schedule: "0 2 * * *" # daily at 2 AM UTC
timezone: "UTC" # K8s 1.27+
# ── Concurrency ──
concurrencyPolicy: Forbid # Allow, Forbid, or Replace
# ── History Limits ──
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# ── Starting Deadline ──
startingDeadlineSeconds: 200
jobTemplate:
spec:
backoffLimit: 2
ttlSecondsAfterFinished: 7200
template:
spec:
restartPolicy: OnFailure
containers:
- name: backup
image: postgres:16-alpine
command:
- /bin/bash
- -c
- |
pg_dump -h $DB_HOST -U $DB_USER -d $DB_NAME > /backup/backup-$(date +%Y%m%d).sql
env:
- name: DB_HOST
value: "postgres-primary"
- name: DB_USER
valueFrom:
secretKeyRef:
name: postgres-credentials
key: username
- name: DB_NAME
value: "production"
- name: PGPASSWORD
valueFrom:
secretKeyRef:
name: postgres-credentials
key: password
volumeMounts:
- name: backup-storage
mountPath: /backup
volumes:
- name: backup-storage
persistentVolumeClaim:
claimName: backup-pvcpostgres-cluster-0, postgres-cluster-1, etc. Each pod gets its own PVC from volumeClaimTemplates. Use partition in update strategy to do canary rollouts.concurrencyPolicy: Forbid for CronJobs to prevent overlapping runs. Replace cancels the running job and starts a new one. Allow (default) runs them concurrently, which can cause issues.# ── ClusterIP Service (internal only) ──
apiVersion: v1
kind: Service
metadata:
name: api-service
namespace: production
labels:
app: api
spec:
type: ClusterIP # default type
selector:
app: api # matches pod labels
ports:
- name: http
port: 80 # service port
targetPort: 8080 # container port (name or number)
protocol: TCP
- name: metrics
port: 9090
targetPort: 9090
protocol: TCP
# ── Session Affinity ──
sessionAffinity: None # or ClientIP (sticky sessions)
sessionAffinityConfig:
clientIP:
timeoutSeconds: 10800# ── NodePort Service ──
apiVersion: v1
kind: Service
metadata:
name: web-nodeport
spec:
type: NodePort
selector:
app: web
ports:
- port: 80
targetPort: 8080
nodePort: 30080 # 30000-32767 range
protocol: TCP
---
# ── LoadBalancer Service (cloud provisioned) ──
apiVersion: v1
kind: Service
metadata:
name: web-lb
annotations:
# Cloud-specific annotations
service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
service.beta.kubernetes.io/aws-load-balancer-internal: "true"
service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: "true"
spec:
type: LoadBalancer
selector:
app: web
ports:
- port: 443
targetPort: 8080
protocol: TCP
# Assign a specific IP (if supported by cloud provider)
loadBalancerIP: "10.0.100.200"
# Health check port
healthCheckNodePort: 31080
# External traffic policy
externalTrafficPolicy: Local # Local or Cluster# ── Headless Service (for StatefulSets) ──
apiVersion: v1
kind: Service
metadata:
name: postgres-headless
namespace: databases
spec:
type: ClusterIP
clusterIP: None # headless: no cluster IP assigned
selector:
app: postgres
ports:
- port: 5432
targetPort: 5432
# DNS: postgres-cluster-0.postgres-headless.databases.svc.cluster.local
# DNS: postgres-cluster-1.postgres-headless.databases.svc.cluster.local
---
# ── ExternalName Service (alias to external DNS) ──
apiVersion: v1
kind: Service
metadata:
name: external-db
namespace: production
spec:
type: ExternalName
externalName: db.prod.example.com| Type | Description | Use Case |
|---|---|---|
| ClusterIP | Internal cluster IP only | Internal microservices |
| NodePort | Exposes on each node IP at static port | On-prem, dev/testing |
| LoadBalancer | Cloud LB + NodePort + ClusterIP | External-facing apps |
| ExternalName | DNS CNAME to external name | External service alias |
| Headless | clusterIP: None, DNS returns pod IPs | StatefulSets, custom LBs |
| Record | Example |
|---|---|
| Service | api-service.production.svc.cluster.local |
| Pod (StatefulSet) | postgres-0.postgres-headless.databases.svc.cluster.local |
| Pod (general) | 10-244-1-5.default.pod.cluster.local |
| Namespace | production.svc.cluster.local |
| Headless pod A record | Returns all pod IPs directly |
# ── Ingress with nginx controller ──
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: app-ingress
namespace: production
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /$2
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
nginx.ingress.kubernetes.io/cors-allow-origin: "https://app.example.com"
nginx.ingress.kubernetes.io/enable-cors: "true"
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/rate-limit-window: "1m"
# WAF / auth annotations
nginx.ingress.kubernetes.io/auth-url: "https://auth.example.com/verify"
nginx.ingress.kubernetes.io/auth-signin: "https://auth.example.com/login"
spec:
ingressClassName: nginx # IngressClass reference
# ── TLS ──
tls:
- hosts:
- app.example.com
- api.example.com
secretName: app-tls-cert
# ── Rules ──
rules:
- host: app.example.com
http:
paths:
- path: /api(/|$)(.*)
pathType: Prefix
backend:
service:
name: api-service
port:
number: 80
- path: /(.*)
pathType: Prefix
backend:
service:
name: web-service
port:
number: 80
- host: api.example.com
http:
paths:
- path: /v1
pathType: Prefix
backend:
service:
name: api-service
port:
number: 80# ── IngressClass (define ingress controller) ──
apiVersion: networking.k8s.io/v1
kind: IngressClass
metadata:
name: nginx
annotations:
ingressclass.kubernetes.io/is-default-class: "true"
spec:
controller: k8s.io/ingress-nginx# ── NetworkPolicy: restrict traffic ──
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: api-network-policy
namespace: production
spec:
podSelector:
matchLabels:
app: api
policyTypes:
- Ingress
- Egress
# ── Ingress Rules ──
ingress:
- from:
# From pods in the same namespace with label tier=frontend
- podSelector:
matchLabels:
tier: frontend
# From the monitoring namespace
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app: prometheus
ports:
- protocol: TCP
port: 8080
# ── Egress Rules ──
egress:
- to:
- podSelector:
matchLabels:
app: postgres
ports:
- protocol: TCP
port: 5432
# Allow DNS resolution (required for most workloads)
- to: []
ports:
- protocol: UDP
port: 53externalTrafficPolicy: Local on LoadBalancer/NodePort services to preserve client source IP. The tradeoff is that traffic only goes to nodes with running pods, potentially creating uneven distribution.# ── ConfigMap with data and binaryData ──
apiVersion: v1
kind: ConfigMap
metadata:
name: app-config
namespace: production
labels:
app: web
data:
# Key-value pairs
APP_ENV: "production"
LOG_LEVEL: "info"
MAX_CONNECTIONS: "100"
CACHE_TTL: "3600"
# File-like keys (use subPath for individual files)
config.yaml: |
server:
port: 8080
host: 0.0.0.0
database:
pool_size: 20
timeout: 30
logging:
level: info
format: json
nginx.conf: |
upstream backend {
server api-service:80;
}
server {
listen 80;
location / {
proxy_pass http://backend;
}
}
immutable: true # K8s 1.21+: cannot be updated after creation# ── Secret types ──
# Opaque: generic key-value secret (default)
apiVersion: v1
kind: Secret
metadata:
name: app-secrets
namespace: production
type: Opaque
data:
# Values must be base64 encoded
DATABASE_URL: cG9zdGdyZXNxbDovL2FwcDpzZWNyZXRAMnBvc3RncmVzOjU0MzIvYXBwZGI=
API_KEY: YWJjZGVmMTIzNDU2Nzg5MA==
JWT_SECRET: bXktc3VwZXItc2VjcmV0LWtleS0yMDI0
immutable: true
---
# ── Docker registry secret ──
apiVersion: v1
kind: Secret
metadata:
name: docker-registry-secret
namespace: production
type: kubernetes.io/dockerconfigjson
data:
.dockerconfigjson: eyJhdXRocyI6eyJodHRwczovL2luZGV4LmRvY2tlci5pby92MS8iOnsidXNlcm5hbWUiOiJ1c2VyIiwicGFzc3dvcmQiOiJwYXNzIiwiYXV0aCI6ImRYTmxjanB3WVhOeiJ9fX0=
---
# ── TLS secret (for Ingress) ──
apiVersion: v1
kind: Secret
metadata:
name: app-tls-cert
namespace: production
type: kubernetes.io/tls
data:
tls.crt: LS0tLS1CRUdJTi... (base64 encoded cert)
tls.key: LS0tLS1CRUdJTi... (base64 encoded key)
---
# ── Service account token secret (K8s 1.24+: auto-created)
apiVersion: v1
kind: Secret
metadata:
name: my-sa-token
namespace: production
annotations:
kubernetes.io/service-account.name: my-service-account
type: kubernetes.io/service-account-token# ── Inject ConfigMap and Secret as environment variables ──
apiVersion: v1
kind: Pod
metadata:
name: app-with-config
spec:
containers:
- name: app
image: myapp:latest
env:
# Single key from ConfigMap
- name: LOG_LEVEL
valueFrom:
configMapKeyRef:
name: app-config
key: LOG_LEVEL
# Single key from Secret
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: app-secrets
key: DATABASE_URL
# All keys from ConfigMap as env vars
envFrom:
- configMapRef:
name: app-config
prefix: "APP_"
optional: false
- secretRef:
name: app-secrets
prefix: ""
optional: false# ── Mount ConfigMap and Secret as volumes ──
apiVersion: v1
kind: Pod
metadata:
name: app-with-volumes
spec:
containers:
- name: app
image: myapp:latest
volumeMounts:
# ── ConfigMap volume (all keys as files) ──
- name: config-volume
mountPath: /etc/app/config
readOnly: true
# ── Single file from ConfigMap using subPath ──
- name: config-volume
mountPath: /etc/app/config.yaml
subPath: config.yaml
readOnly: true
# ── Secret volume ──
- name: secret-volume
mountPath: /etc/app/secrets
readOnly: true
# ── Single secret key using subPath ──
- name: secret-volume
mountPath: /etc/app/jwt-secret
subPath: JWT_SECRET
readOnly: true
volumes:
# ── ConfigMap volume ──
- name: config-volume
configMap:
name: app-config
defaultMode: 0644 # file permissions
items: # optional: select specific keys
- key: config.yaml
path: config.yaml
mode: 0644
- key: nginx.conf
path: nginx.conf
mode: 0644
# ── Secret volume ──
- name: secret-volume
secret:
secretName: app-secrets
defaultMode: 0400 # read-only for secrets| Type | Description |
|---|---|
| Opaque | Generic key-value (base64 encoded) |
| kubernetes.io/dockerconfigjson | Docker registry credentials |
| kubernetes.io/tls | TLS certificate and private key |
| kubernetes.io/basic-auth | Username and password |
| kubernetes.io/ssh-auth | SSH private key |
| kubernetes.io/service-account-token | Service account token |
| bootstrap.kubernetes.io/token | Bootstrap tokens |
| Feature | ConfigMap | Secret |
|---|---|---|
| Data storage | Plain text | Base64 encoded |
| Security | Not encrypted | Can be encrypted at rest |
| Size limit | 1 MiB | 1 MiB |
| Use case | Non-sensitive config | Passwords, keys, certs |
| immutable | K8s 1.21+ | K8s 1.21+ |
| Versioning | No | No (use external tools) |
immutable: truefor ConfigMaps and Secrets that don't change. This significantly improves kubelet performance because it skips watching for updates. Great for production deployments.# ── StorageClass for dynamic provisioning ──
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: fast-ssd
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: ebs.csi.aws.com
parameters:
type: gp3
iops: "5000"
throughput: "250"
encrypted: "true"
kmsKeyId: "arn:aws:kms:us-east-1:123456789:key/abc"
fsType: ext4
reclaimPolicy: Delete # Delete or Retain
allowVolumeExpansion: true # allow resizing PVCs
volumeBindingMode: WaitForFirstConsumer
mountOptions:
- debug
---
# ── Regional storage (multi-AZ) ──
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: standard-storage
provisioner: kubernetes.io/aws-ebs
parameters:
type: gp3
zone: us-east-1a
reclaimPolicy: Retain
volumeBindingMode: Immediate# ── PersistentVolume (static provisioning) ──
apiVersion: v1
kind: PersistentVolume
metadata:
name: data-pv
labels:
app: database
environment: production
spec:
capacity:
storage: 100Gi
volumeMode: Filesystem # or Block
accessModes:
- ReadWriteOnce # RWO: single node
persistentVolumeReclaimPolicy: Retain # Retain, Delete, Recycle
storageClassName: fast-ssd
# ── AWS EBS volume ──
csi:
driver: ebs.csi.aws.com
volumeHandle: vol-0abcdef1234567890
fsType: ext4
# ── NFS volume ──
# nfs:
# server: 10.0.1.100
# path: /exports/data
# ── Host path (dev only) ──
# hostPath:
# path: /mnt/data
# type: DirectoryOrCreate
mountOptions:
- hard
- nfsvers=4.1# ── PersistentVolumeClaim ──
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: app-data-pvc
namespace: production
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: fast-ssd
volumeMode: Filesystem # or Block (for raw block storage)
---
# ── PVC with selector (bind to specific PV) ──
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: specific-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
storageClassName: ""
selector:
matchLabels:
app: database| Mode | Abbrev | Description |
|---|---|---|
| ReadWriteOnce | RWO | Single node read-write (most common) |
| ReadOnlyMany | ROX | Multiple nodes read-only |
| ReadWriteMany | RWX | Multiple nodes read-write (NFS, CephFS) |
| ReadWriteOncePod | RWOP | Single pod read-write (K8s 1.27+) |
| Type | Description | Use Case |
|---|---|---|
| hostPath | Node filesystem path | Development / testing only |
| emptyDir | Ephemeral pod-local storage | Caches, temp data, scratch space |
| nfs | Network File System | Shared storage across nodes |
| awsEBS | AWS EBS block storage | Single-AZ persistent storage |
| azureDisk | Azure managed disk | Azure single-zone storage |
| gcePersistentDisk | Google Compute disk | GCP single-zone storage |
| local | Local persistent volumes | On-prem SSDs/HDDs |
| csi | CSI driver volumes | Any CSI-compatible storage |
# ── Pod using PVC with expanded size ──
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: expandable-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi # initial size
storageClassName: fast-ssd # must allowVolumeExpansion: true
---
# Later, expand the PVC (no pod restart needed for Filesystem mode)
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: expandable-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi # expanded size
storageClassName: fast-ssd
# ── Pod referencing the PVC ──
apiVersion: v1
kind: Pod
metadata:
name: app-with-pvc
spec:
containers:
- name: app
image: myapp:latest
volumeMounts:
- name: data
mountPath: /app/data
volumes:
- name: data
persistentVolumeClaim:
claimName: expandable-pvc| Policy | Description |
|---|---|
| Delete | PV deleted when PVC is deleted (dynamic) |
| Retain | PV kept when PVC deleted (manual cleanup) |
| Recycle | Run rm -rf on volume (deprecated, use Delete) |
| Mode | Description |
|---|---|
| Immediate | Bind PV as soon as PVC created (default) |
| WaitForFirstConsumer | Delay binding until pod scheduled (zone-aware) |
WaitForFirstConsumer volumeBindingMode for multi-zone clusters. This ensures the PV is provisioned in the same availability zone as the pod, avoiding cross-zone latency and costs.emptyDir is ephemeral — its data is deleted when the pod is removed. Use medium: Memory for tmpfs-backed emptyDir (fast, but counts against memory limits). For shared RWX storage across nodes, use NFS, CephFS, or Longhorn.# ── ServiceAccount ──
apiVersion: v1
kind: ServiceAccount
metadata:
name: app-service-account
namespace: production
automountServiceAccountToken: false # disable auto-mount unless needed
secrets: [] # K8s 1.24+: no auto token secret# ── Role: namespace-scoped permissions ──
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: pod-reader
namespace: production
rules:
- apiGroups: [""]
resources: ["pods", "pods/log"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["create"]
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get", "list", "watch", "patch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]
verbs: ["get", "list", "watch"]
---
# ── RoleBinding: bind Role to subjects ──
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: read-pods
namespace: production
subjects:
- kind: ServiceAccount
name: app-service-account
namespace: production
- kind: User
name: jane
apiGroup: rbac.authorization.k8s.io
roleRef:
kind: Role
name: pod-reader
apiGroup: rbac.authorization.k8s.io# ── ClusterRole: cluster-wide permissions ──
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: secret-reader
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/healthz", "/readyz", "/livez"]
verbs: ["get"]
---
# ── ClusterRoleBinding: bind ClusterRole cluster-wide ──
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: read-secrets-global
subjects:
- kind: ServiceAccount
name: monitoring-sa
namespace: monitoring
- kind: Group
name: system:masters
apiGroup: rbac.authorization.k8s.io
roleRef:
kind: ClusterRole
name: secret-reader
apiGroup: rbac.authorization.k8s.io
---
# ── Bind ClusterRole to specific namespace via RoleBinding ──
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: read-secrets-production
namespace: production
subjects:
- kind: ServiceAccount
name: deploy-sa
namespace: production
roleRef:
kind: ClusterRole
name: secret-reader # cluster-wide role scoped to this namespace
apiGroup: rbac.authorization.k8s.io# ── Pod Security Standards (K8s 1.23+) ──
# Enforce via namespace labels (K8s 1.25+)
apiVersion: v1
kind: Namespace
metadata:
name: production
labels:
# Enforce: blocks non-compliant pods
pod-security.kubernetes.io/enforce: restricted
# Audit: logs violations (does not block)
pod-security.kubernetes.io/audit: restricted
# Warn: warns but allows (useful during migration)
pod-security.kubernetes.io/warn: restricted
---
# Namespace for privileged workloads
apiVersion: v1
kind: Namespace
metadata:
name: system
labels:
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/warn: privileged| Level | Description |
|---|---|
| Privileged | Unrestricted — all pods allowed |
| Baseline | Minimally restrictive — prevents known privilege escalations |
| Restricted | Heavily restricted — follows best practices (drop ALL, runAsNonRoot, no hostNetwork, seccomp) |
| Verb | Description |
|---|---|
| get | Get a specific resource |
| list | List all resources of a type |
| watch | Watch for changes to resources |
| create | Create new resources |
| update | Update existing resources |
| patch | Partially update a resource |
| delete | Delete a resource |
| deletecollection | Delete multiple resources |
| * | All verbs (use sparingly!) |
# ── ResourceQuota: limit namespace resource consumption ──
apiVersion: v1
kind: ResourceQuota
metadata:
name: namespace-quota
namespace: production
spec:
hard:
# Compute resources
requests.cpu: "20"
requests.memory: "40Gi"
limits.cpu: "40"
limits.memory: "80Gi"
ephemeral-storage: "100Gi"
# Object counts
pods: "50"
services: "20"
persistentvolumeclaims: "30"
configmaps: "50"
secrets: "50"
replicationcontrollers: "10"
jobs: "10"
cronjobs: "5"
---
# ── LimitRange: set default/min/max resource constraints ──
apiVersion: v1
kind: LimitRange
metadata:
name: default-limits
namespace: production
spec:
limits:
- type: Container
default: # default limits if not specified
cpu: "500m"
memory: "512Mi"
defaultRequest: # default requests if not specified
cpu: "100m"
memory: "128Mi"
max:
cpu: "4"
memory: "8Gi"
min:
cpu: "50m"
memory: "64Mi"
maxLimitRequestRatio:
cpu: "10" # limit/request ratio capautomountServiceAccountToken: falsefor pods that don't need API access.restricted profile enforces: runAsNonRoot, drop ALL capabilities, seccomp: RuntimeDefault, no hostNetwork/hostPID/hostIPC.# ── Chart.yaml ──
apiVersion: v2
name: myapp
description: Production-grade web application Helm chart
type: application
version: 2.5.0 # chart version (semver)
appVersion: "1.5.0" # version of the application being deployed
kubeVersion: ">=1.28.0-0"
keywords:
- web
- api
- production
home: https://github.com/myorg/myapp
sources:
- https://github.com/myorg/myapp
maintainers:
- name: DevOps Team
email: devops@example.com
dependencies:
- name: postgresql
version: "15.x"
repository: https://charts.bitnami.com/bitnami
condition: postgresql.enabled
- name: redis
version: "18.x"
repository: https://charts.bitnami.com/bitnami
condition: redis.enabled# ── values.yaml: default configuration ──
# Global settings
global:
imageRegistry: ""
imagePullSecrets: []
namespaceOverride: ""
# Replica count
replicaCount: 3
# Container image
image:
repository: myapp
tag: "1.5.0"
pullPolicy: IfNotPresent
# Service configuration
service:
type: ClusterIP
port: 80
targetPort: 8080
annotations: {}
# Ingress configuration
ingress:
enabled: true
className: nginx
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
hosts:
- host: app.example.com
paths:
- path: /
pathType: Prefix
tls:
- secretName: app-tls
hosts:
- app.example.com
# Resource requests and limits
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
# Horizontal Pod Autoscaler
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 10
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 80
# Pod Disruption Budget
pdb:
enabled: true
minAvailable: 1
# Liveness / readiness probes
livenessProbe:
httpGet:
path: /healthz
port: http
initialDelaySeconds: 15
periodSeconds: 20
readinessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 5
periodSeconds: 10
# Node selector, tolerations, affinity
nodeSelector: {}
tolerations: []
affinity: {}
# Toggles for optional components
postgresql:
enabled: true
redis:
enabled: false# ── templates/deployment.yaml ──
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "myapp.fullname" . }}
labels:
{{- include "myapp.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "myapp.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "myapp.selectorLabels" . | nindent 8 }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
spec:
serviceAccountName: {{ include "myapp.serviceAccountName" . }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.targetPort }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
envFrom:
- configMapRef:
name: {{ include "myapp.fullname" . }}# ── templates/service.yaml ──
apiVersion: v1
kind: Service
metadata:
name: {{ include "myapp.fullname" . }}
labels:
{{- include "myapp.labels" . | nindent 4 }}
{{- with .Values.service.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "myapp.selectorLabels" . | nindent 4 }}# ── templates/ingress.yaml ──
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "myapp.fullname" . }}
labels:
{{- include "myapp.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.className }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
pathType: {{ .pathType }}
backend:
service:
name: {{ include "myapp.fullname" $ }}
port:
name: http
{{- end }}
{{- end }}
{{- end }}# ── Helm CLI commands ──
# Add repositories
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo add jetstack https://charts.jetstack.io
helm repo update
# Search for charts
helm search repo nginx
helm search repo nginx --versions
helm show chart bitnami/nginx
# Install / upgrade
helm install myapp ./chart --namespace production --create-namespace
helm install myapp ./chart -f values-prod.yaml --set replicaCount=5
helm upgrade myapp ./chart --reuse-values
helm upgrade --install myapp ./chart # install or upgrade (idempotent)
# Rollback
helm rollback myapp 1 # rollback to revision 1
helm rollback myapp 0 # rollback to previous revision
helm history myapp # see revision history
# Debug / template
helm template myapp ./chart # render templates locally
helm lint ./chart # lint the chart
helm diff upgrade myapp ./chart # show diff (helm-diff plugin)
# Uninstall
helm uninstall myapp --namespace production
# List
helm list --all-namespaces
helm list -n production
# Package & dependency
helm dependency build ./chart # download dependencies
helm package ./chart # create .tgz archive| Function | Example | Description |
|---|---|---|
| include | include "myapp.labels" . | Include a named template |
| toYaml | toYaml .Values.resources | Convert to YAML block with indent |
| nindent | nindent 4 | Newline + indent |
| indent | indent 4 | Indent without newline |
| default | .Values.tag | default "latest" | Set default value |
| quote | .Values.host | quote | Wrap in double quotes |
| trimSuffix | trimSuffix "-snapshot" | Remove suffix from string |
| replace | replace " " "-" | String replacement |
| b64enc / b64dec | b64enc .Values.secret | Base64 encode/decode |
| sha256sum | sha256sum .Config | SHA256 hash |
| lookup | lookup "v1" "Secret" "ns" "name" | Read cluster resource |
| semver | semver ">=1.28" | Semantic version comparison |
helm upgrade --install for CI/CD pipelines. It is idempotent — it installs if the release does not exist, or upgrades if it does. Add --wait to block until all pods are ready.--values files for environment-specific configs instead of --set flags. --set values are stored in the release secret, making it hard to track changes. Use --reuse-values when upgrading to preserve previous overrides.# ── kubectl: essential commands ──
# ── Cluster Info ──
kubectl version --short
kubectl cluster-info
kubectl get nodes -o wide
kubectl describe node worker-01
kubectl top nodes
kubectl cordon worker-01 # mark node unschedulable
kubectl uncordon worker-01 # mark node schedulable
kubectl drain worker-01 --ignore-daemonsets --delete-emptydir-data
# ── Pods ──
kubectl get pods -n production -o wide
kubectl get pods -n production --sort-by=.metadata.creationTimestamp
kubectl get pods --all-namespaces --field-selector=status.phase=Failed
kubectl get pods -l app=api -l tier=frontend
kubectl get pods -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.phase}{"\n"}{end}'
kubectl describe pod web-app-xyz123 -n production
kubectl logs web-app-xyz123 -n production -f --tail=100
kubectl logs web-app-xyz123 -c web-container # specific container
kubectl logs --previous web-app-xyz123 # crashed container logs
kubectl exec -it web-app-xyz123 -- /bin/sh
kubectl exec -it web-app-xyz123 -c sidecar -- /bin/sh
# ── Port Forward ──
kubectl port-forward svc/api-service 8080:80 -n production
kubectl port-forward pod/web-app-xyz123 8080:8080 -n production
kubectl port-forward deploy/api-server 8080:8080
# ── Copy files ──
kubectl cp pod/web-app:/app/config.yaml ./local-config.yaml
kubectl cp ./local-config.yaml pod/web-app:/app/config.yaml
# ── Rollout Management ──
kubectl rollout status deploy/api-server -n production
kubectl rollout history deploy/api-server --revision=2
kubectl rollout undo deploy/api-server # rollback to previous
kubectl rollout undo deploy/api-server --to-revision=2
kubectl rollout restart deploy/api-server # restart all pods
kubectl set image deploy/api-server web=api:v2.6.0 --record# ── Advanced querying & debugging ──
# ── Output formats ──
kubectl get pods -o yaml # full YAML
kubectl get pods -o json # full JSON
kubectl get pods -o name # just resource names
kubectl get pods -o wide # with node IP
kubectl get pods -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName
kubectl get pods -o jsonpath='{.items[0].status.podIP}'
# ── Events (critical for debugging) ──
kubectl get events --sort-by=.metadata.creationTimestamp
kubectl get events -n production --field-selector type=Warning
kubectl get events --field-selector involvedObject.kind=Pod
kubectl get events --field-selector involvedObject.name=web-app-xyz123
# ── Resource usage ──
kubectl top pods -n production --sort-by=memory
kubectl top pods -n production --sort-by=cpu
kubectl top pods -l app=api --containers
# ── Labels & selectors ──
kubectl label pod web-app env=staging --overwrite
kubectl label node worker-01 disktype=ssd
kubectl annotate pod web-app description="Debug pod"
kubectl get pods -l 'environment in (production,staging),tier notin (frontend)'
# ── Apply / edit / patch ──
kubectl apply -f deployment.yaml
kubectl apply -f ./manifests/ --recursive
kubectl edit deploy api-server
kubectl patch deploy api-server --type merge -p '{"spec":{"replicas":5}}'
kubectl patch deploy api-server --type='json' -p='[{"op":"replace","path":"/spec/replicas","value":5}]'
kubectl delete -f deployment.yaml
kubectl delete pod web-app --grace-period=0 --force # force delete
# ── Config maps & secrets ──
kubectl create configmap app-config --from-literal=KEY=VALUE
kubectl create configmap app-config --from-file=config.yaml
kubectl create configmap app-config --from-env-file=.env
kubectl create secret generic app-secret --from-literal=password=secret
kubectl create secret docker-registry regcred --docker-server=registry.example.com --docker-username=user --docker-password=pass
kubectl create secret generic tls-secret --from-file=tls.crt=cert.pem --from-file=tls.key=key.pem# ── Prometheus: basic alerting rules ──
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: app-alerts
namespace: monitoring
spec:
groups:
- name: app.rules
rules:
# High error rate
- alert: HighErrorRate
expr: |
sum(rate(http_requests_total{status=~"5.."}[5m]))
/ sum(rate(http_requests_total[5m])) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate on {{ $labels.service }}"
description: "Error rate is {{ $value | humanizePercentage }}"
# Pod restart loop
- alert: PodCrashLooping
expr: rate(kube_pod_container_status_restarts_total[15m]) > 0.1
for: 15m
labels:
severity: warning
annotations:
summary: "Pod {{ $labels.pod }} is crash looping"
# High memory usage
- alert: HighMemoryUsage
expr: |
container_memory_working_set_bytes
/ container_spec_memory_limit_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.container }} using >90% memory"
# PVC nearly full
- alert: PVCAlmostFull
expr: |
(kubelet_volume_stats_used_bytes
/ kubelet_volume_stats_capacity_bytes) > 0.85
for: 5m
labels:
severity: warning
annotations:
summary: "PVC {{ $labels.persistentvolumeclaim }} is {{ $value | humanizePercentage }} full"| Shortcut | Full Command |
|---|---|
| kubectl run nginx --image=nginx | Create a pod interactively |
| kubectl expose deploy nginx --port=80 | Create a Service for a Deployment |
| kubectl autoscale deploy nginx --min=2 --max=10 --cpu=80 | Create an HPA |
| kubectl create job backup --image=myapp -- ./backup.sh | Create a one-off Job |
| kubectl debug pod/web --image=busybox --target=web | Ephemeral debug container (K8s 1.18+) |
| kubectl get all -n production | Get all common resources |
| kubectl api-resources | List all API resource types |
| kubectl api-versions | List available API versions |
| Step | Command / Check |
|---|---|
| 1. Check pod status | kubectl get pods -o wide |
| 2. Describe pod | kubectl describe pod <name> |
| 3. Check events | kubectl get events --sort-by=.metadata.creationTimestamp |
| 4. View logs | kubectl logs <pod> --previous |
| 5. Exec into pod | kubectl exec -it <pod> -- /bin/sh |
| 6. Check node | kubectl describe node <node> |
| 7. Check resources | kubectl top pods / kubectl top nodes |
| 8. Network debug | kubectl run tmp --image=busybox --rm -it -- wget -qO- http://svc:port |
# ── Metrics Server: required for HPA and kubectl top ──
# Install via Helm:
# helm install metrics-server metrics-server/metrics-server \
# --namespace kube-system --set args="{--kubelet-insecure-tls}"
# Verify
kubectl top nodes
kubectl top pods -A
# ── HorizontalPodAutoscaler ──
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: api-hpa
namespace: production
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: api-server
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 30
policies:
- type: Percent
value: 100
periodSeconds: 15
- type: Pods
value: 4
periodSeconds: 15
selectPolicy: Maxkubectl get events --sort-by=.metadata.creationTimestampis the single most useful debugging command in Kubernetes. Events tell you why pods aren't scheduling, why containers are crashing, why PVCs aren't binding, and more.kubectl top and HPA to work. In production, consider replacing Metrics Server with Prometheus Adapter for more advanced metrics (custom metrics, request rates, queue depths).A Pod is the smallest deployable unit in Kubernetes. It encapsulates one or more containers that share the same network namespace (same IP, can communicate via localhost), storage volumes, and lifecycle. K8s uses Pods (not individual containers) because:
localhostRollingUpdate gradually replaces old pods with new ones, ensuring zero downtime:
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1 # allow 1 extra pod during update
maxUnavailable: 0 # never go below desired countkubectl rollout status to monitor, kubectl rollout undo to rollbackBoth store configuration data as key-value pairs, but:
kubernetes.io/dockerconfigjson, kubernetes.io/tls, kubernetes.io/basic-authenvFromimmutable: true and enable encryption at rest, or use external secret managers (Vault, Sealed Secrets, External Secrets Operator)Services provide stable network endpoints for a set of Pods:
clusterIP: None): No cluster IP assigned. DNS returns individual pod IPs. Used for StatefulSets and custom load balancing.Services use label selectors to route traffic. Endpoints are automatically updated as pods are added/removed. CoreDNS resolves {service-name}.{namespace}.svc.cluster.local.
postgres-0, postgres-1), ordered creation/deletion, each pod gets its own PVC from volumeClaimTemplates, stable network identities via headless Service. Used for databases, message queues, distributed systems like Kafka/ZooKeeper.Key difference: if a StatefulSet pod dies and is rescheduled, it gets the same name, same DNS, and mounts the same PVC — preserving its identity and data.
RBAC (Role-Based Access Control) has four objects:
Example: A ClusterRole can be bound to a namespace via a RoleBinding, giving the subject permissions only within that namespace.
# ServiceAccount can only read pods in the "production" namespace
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: pod-reader
namespace: production
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: read-pods
namespace: production
subjects:
- kind: ServiceAccount
name: deploy-sa
namespace: production
roleRef:
kind: Role
name: pod-reader
apiGroup: rbac.authorization.k8s.ioAll three support httpGet, tcpSocket, exec, and grpc (K8s 1.24+). Set initialDelaySeconds appropriately to avoid false-positive failures during startup.
kubectl get events --sort-by=.metadata.creationTimestampkubectl describe pod <name> — look for State, Last State, Eventskubectl logs <pod> --previous — see why the container exitedkubectl describe node <node> — the pod might be getting evicted due to resource pressureA PersistentVolume (PV) is a piece of storage in the cluster provisioned by an administrator or dynamically by a StorageClass. A PersistentVolumeClaim (PVC)is a user's request for storage.
Dynamic provisioning: When a PVC references a StorageClass with a provisioner (e.g., ebs.csi.aws.com), the CSI driver automatically creates a PV and binds it to the PVC. No manual PV creation needed.
Init containers run to completion before any main containers start. They are perfect for:
./migrate --up before the app startschown volumes that will be used by the main containerInit containers run sequentially (one after another). If any init container fails, K8s restarts it according to the pod's restartPolicy. They can use different images than the main container and have separate resource limits.
initContainers:
- name: wait-for-db
image: busybox:1.36
command: ['sh', '-c', 'until nc -z postgres-service 5432; do sleep 2; done']
- name: run-migrations
image: myapp:1.5.0
command: ['./migrate', '--direction', 'up']