Monitoring Docker Containers with cAdvisor and Prometheus
Set up comprehensive Docker container monitoring with cAdvisor for metrics collection, Prometheus for storage and alerting, and Grafana for visualization....
Why Container Monitoring Matters
Running Docker containers without monitoring is like driving without a dashboard. You need to know CPU usage, memory consumption, network I/O, and disk usage for every container — in real-time and historically.
<div style="margin:2.5rem auto;max-width:600px;width:100%;text-align:center;"><svg viewBox="0 0 600 220" xmlns="http://www.w3.org/2000/svg" style="width:100%;height:auto;"><rect width="600" height="220" rx="12" fill="#1a1a2e"/><rect x="200" y="15" width="200" height="40" rx="8" fill="#6366f1"/><text x="300" y="40" text-anchor="middle" fill="#ffffff" font-size="13" font-family="system-ui" font-weight="bold">Orchestrator</text><line x1="250" y1="55" x2="100" y2="90" stroke="#e2e8f0" stroke-width="1.5" stroke-dasharray="4,3"/><line x1="300" y1="55" x2="300" y2="90" stroke="#e2e8f0" stroke-width="1.5" stroke-dasharray="4,3"/><line x1="350" y1="55" x2="500" y2="90" stroke="#e2e8f0" stroke-width="1.5" stroke-dasharray="4,3"/><rect x="40" y="90" width="120" height="110" rx="8" fill="none" stroke="#3b82f6" stroke-width="1.5"/><text x="100" y="110" text-anchor="middle" fill="#3b82f6" font-size="11" font-family="system-ui">Node 1</text><rect x="55" y="120" width="90" height="25" rx="4" fill="#6366f1" opacity="0.7"/><text x="100" y="137" text-anchor="middle" fill="#ffffff" font-size="10" font-family="system-ui">Container A</text><rect x="55" y="150" width="90" height="25" rx="4" fill="#a855f7" opacity="0.7"/><text x="100" y="167" text-anchor="middle" fill="#ffffff" font-size="10" font-family="system-ui">Container B</text><rect x="240" y="90" width="120" height="110" rx="8" fill="none" stroke="#3b82f6" stroke-width="1.5"/><text x="300" y="110" text-anchor="middle" fill="#3b82f6" font-size="11" font-family="system-ui">Node 2</text><rect x="255" y="120" width="90" height="25" rx="4" fill="#2dd4bf" opacity="0.7"/><text x="300" y="137" text-anchor="middle" fill="#1a1a2e" font-size="10" font-family="system-ui">Container C</text><rect x="255" y="150" width="90" height="25" rx="4" fill="#6366f1" opacity="0.7"/><text x="300" y="167" text-anchor="middle" fill="#ffffff" font-size="10" font-family="system-ui">Container A</text><rect x="440" y="90" width="120" height="110" rx="8" fill="none" stroke="#3b82f6" stroke-width="1.5"/><text x="500" y="110" text-anchor="middle" fill="#3b82f6" font-size="11" font-family="system-ui">Node 3</text><rect x="455" y="120" width="90" height="25" rx="4" fill="#a855f7" opacity="0.7"/><text x="500" y="137" text-anchor="middle" fill="#ffffff" font-size="10" font-family="system-ui">Container B</text><rect x="455" y="150" width="90" height="25" rx="4" fill="#f59e0b" opacity="0.7"/><text x="500" y="167" text-anchor="middle" fill="#1a1a2e" font-size="10" font-family="system-ui">Container D</text></svg><p style="margin-top:0.75rem;font-size:0.85rem;color:#94a3b8;font-style:italic;line-height:1.4;">Container orchestration distributes workloads across multiple nodes for resilience and scale.</p></div>
The Monitoring Stack
Our monitoring stack consists of three components:
Docker Compose Setup
version: "3.8"
services:
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
privileged: true
devices:
- /dev/kmsg:/dev/kmsg
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
ports:
- "8080:8080"
restart: unless-stopped
prometheus:
image: prom/prometheus:latest
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- ./prometheus/alert-rules.yml:/etc/prometheus/alert-rules.yml
- prometheus_data:/prometheus
ports:
- "9090:9090"
restart: unless-stopped
grafana:
image: grafana/grafana:latest
container_name: grafana
environment:
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: your-grafana-password
GF_SERVER_ROOT_URL: https://monitoring.example.com
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
ports:
- "3000:3000"
restart: unless-stopped
volumes:
prometheus_data:
grafana_data:Prometheus Configuration
# prometheus/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "alert-rules.yml"
scrape_configs:
- job_name: "cadvisor"
static_configs:
- targets: ["cadvisor:8080"]
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: "node-exporter"
static_configs:
- targets: ["node-exporter:9100"]
- job_name: "docker"
static_configs:
- targets: ["host.docker.internal:9323"]Alert Rules
# prometheus/alert-rules.yml
groups:
- name: container-alerts
rules:
- alert: ContainerHighCPU
expr: rate(container_cpu_usage_seconds_total{name!=""}[5m]) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ .Labels.name }} high CPU usage"
description: "Container {{ .Labels.name }} CPU usage is above 80% for 5 minutes"
- alert: ContainerHighMemory
expr: container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""} * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ .Labels.name }} high memory usage"
- alert: ContainerDown
expr: absent(container_last_seen{name=~".+"})
for: 1m
labels:
severity: critical
annotations:
summary: "Container {{ .Labels.name }} is down"
- alert: HighDiskUsage
expr: (node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_avail_bytes{mountpoint="/"}) / node_filesystem_size_bytes{mountpoint="/"} * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "Disk usage above 85%"<div style="margin:2.5rem auto;max-width:600px;width:100%;text-align:center;"><svg viewBox="0 0 600 200" xmlns="http://www.w3.org/2000/svg" style="width:100%;height:auto;"><rect width="600" height="200" rx="12" fill="#1a1a2e"/><rect x="30" y="30" width="100" height="130" rx="6" fill="none" stroke="#3b82f6" stroke-width="1.5"/><text x="80" y="55" text-anchor="middle" fill="#3b82f6" font-size="10" font-family="monospace">docker-</text><text x="80" y="70" text-anchor="middle" fill="#3b82f6" font-size="10" font-family="monospace">compose</text><text x="80" y="85" text-anchor="middle" fill="#3b82f6" font-size="10" font-family="monospace">.yml</text><line x1="45" y1="95" x2="115" y2="95" stroke="#3b82f6" stroke-width="0.5" opacity="0.5"/><rect x="50" y="105" width="50" height="8" rx="2" fill="#94a3b8" opacity="0.3"/><rect x="50" y="118" width="60" height="8" rx="2" fill="#94a3b8" opacity="0.3"/><rect x="50" y="131" width="40" height="8" rx="2" fill="#94a3b8" opacity="0.3"/><path d="M135,95 L175,95" stroke="#e2e8f0" stroke-width="2" marker-end="url(#arrow2)"/><defs><marker id="arrow2" markerWidth="8" markerHeight="6" refX="8" refY="3" orient="auto"><path d="M0,0 L8,3 L0,6" fill="#e2e8f0"/></marker></defs><rect x="180" y="20" width="130" height="35" rx="6" fill="#6366f1" opacity="0.85"/><text x="245" y="42" text-anchor="middle" fill="#ffffff" font-size="11" font-family="system-ui">Web App</text><rect x="180" y="62" width="130" height="35" rx="6" fill="#a855f7" opacity="0.85"/><text x="245" y="84" text-anchor="middle" fill="#ffffff" font-size="11" font-family="system-ui">API Server</text><rect x="180" y="104" width="130" height="35" rx="6" fill="#2dd4bf" opacity="0.85"/><text x="245" y="126" text-anchor="middle" fill="#1a1a2e" font-size="11" font-family="system-ui">Database</text><rect x="180" y="146" width="130" height="35" rx="6" fill="#f59e0b" opacity="0.85"/><text x="245" y="168" text-anchor="middle" fill="#1a1a2e" font-size="11" font-family="system-ui">Cache</text><rect x="370" y="40" width="200" height="130" rx="8" fill="none" stroke="#e2e8f0" stroke-width="1" stroke-dasharray="5,4"/><text x="470" y="62" text-anchor="middle" fill="#e2e8f0" font-size="10" font-family="system-ui">Docker Network</text><line x1="310" y1="37" x2="390" y2="80" stroke="#94a3b8" stroke-width="1" opacity="0.5"/><line x1="310" y1="79" x2="390" y2="100" stroke="#94a3b8" stroke-width="1" opacity="0.5"/><line x1="310" y1="121" x2="390" y2="120" stroke="#94a3b8" stroke-width="1" opacity="0.5"/><line x1="310" y1="163" x2="390" y2="140" stroke="#94a3b8" stroke-width="1" opacity="0.5"/><circle cx="400" cy="80" r="5" fill="#6366f1"/><circle cx="400" cy="100" r="5" fill="#a855f7"/><circle cx="400" cy="120" r="5" fill="#2dd4bf"/><circle cx="400" cy="140" r="5" fill="#f59e0b"/><text x="470" y="85" text-anchor="middle" fill="#94a3b8" font-size="10" font-family="system-ui">:3000</text><text x="470" y="105" text-anchor="middle" fill="#94a3b8" font-size="10" font-family="system-ui">:8080</text><text x="470" y="125" text-anchor="middle" fill="#94a3b8" font-size="10" font-family="system-ui">:5432</text><text x="470" y="145" text-anchor="middle" fill="#94a3b8" font-size="10" font-family="system-ui">:6379</text></svg><p style="margin-top:0.75rem;font-size:0.85rem;color:#94a3b8;font-style:italic;line-height:1.4;">Docker Compose defines your entire application stack in a single YAML file.</p></div>
Useful PromQL Queries
# CPU usage per container (percentage)
rate(container_cpu_usage_seconds_total{name!=""}[5m]) * 100
# Memory usage per container (MB)
container_memory_usage_bytes{name!=""} / 1024 / 1024
# Network received per container (bytes/sec)
rate(container_network_receive_bytes_total{name!=""}[5m])
# Network transmitted per container (bytes/sec)
rate(container_network_transmit_bytes_total{name!=""}[5m])
# Disk read per container (bytes/sec)
rate(container_fs_reads_bytes_total{name!=""}[5m])
# Container restart count
increase(container_restart_count{name!=""}[1h])
# Top 10 containers by memory usage
topk(10, container_memory_usage_bytes{name!=""})Grafana Dashboard
Import the Docker Container dashboard (ID: 11600) from grafana.com for a pre-built view. For custom dashboards, use these panels:
Container Overview Panel
{
"title": "Container CPU Usage",
"type": "timeseries",
"datasource": "Prometheus",
"targets": [
{
"expr": "rate(container_cpu_usage_seconds_total{name!=''}[5m]) * 100",
"legendFormat": "{{ name }}"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"thresholds": {
"steps": [
{ "value": 0, "color": "green" },
{ "value": 50, "color": "yellow" },
{ "value": 80, "color": "red" }
]
}
}
}
}Alertmanager Integration
Route Prometheus alerts to Ntfy for push notifications:
# alertmanager.yml
route:
receiver: "ntfy"
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receivers:
- name: "ntfy"
webhook_configs:
- url: "https://notify.example.com/ops-alerts"
send_resolved: true<div style="margin:2.5rem auto;max-width:600px;width:100%;text-align:center;"><svg viewBox="0 0 600 200" xmlns="http://www.w3.org/2000/svg" style="width:100%;height:auto;"><rect width="600" height="200" rx="12" fill="#1a1a2e"/><rect x="15" y="10" width="570" height="25" rx="6" fill="#6366f1" opacity="0.3"/><circle cx="30" cy="22" r="4" fill="#ef4444"/><circle cx="42" cy="22" r="4" fill="#f59e0b"/><circle cx="54" cy="22" r="4" fill="#2dd4bf"/><text x="300" y="27" text-anchor="middle" fill="#ffffff" font-size="10" font-family="system-ui">Monitoring Dashboard</text><rect x="20" y="45" width="130" height="55" rx="6" fill="#6366f1" opacity="0.2"/><text x="85" y="65" text-anchor="middle" fill="#94a3b8" font-size="9" font-family="system-ui">CPU Usage</text><text x="85" y="88" text-anchor="middle" fill="#2dd4bf" font-size="18" font-family="system-ui" font-weight="bold">23%</text><rect x="160" y="45" width="130" height="55" rx="6" fill="#6366f1" opacity="0.2"/><text x="225" y="65" text-anchor="middle" fill="#94a3b8" font-size="9" font-family="system-ui">Memory</text><text x="225" y="88" text-anchor="middle" fill="#f59e0b" font-size="18" font-family="system-ui" font-weight="bold">6.2 GB</text><rect x="300" y="45" width="130" height="55" rx="6" fill="#6366f1" opacity="0.2"/><text x="365" y="65" text-anchor="middle" fill="#94a3b8" font-size="9" font-family="system-ui">Requests/s</text><text x="365" y="88" text-anchor="middle" fill="#6366f1" font-size="18" font-family="system-ui" font-weight="bold">1.2K</text><rect x="440" y="45" width="140" height="55" rx="6" fill="#6366f1" opacity="0.2"/><text x="510" y="65" text-anchor="middle" fill="#94a3b8" font-size="9" font-family="system-ui">Uptime</text><text x="510" y="88" text-anchor="middle" fill="#2dd4bf" font-size="18" font-family="system-ui" font-weight="bold">99.9%</text><rect x="20" y="110" width="560" height="80" rx="6" fill="#6366f1" opacity="0.1"/><text x="45" y="125" fill="#94a3b8" font-size="8" font-family="system-ui">Response Time (ms)</text><polyline points="40,170 80,155 120,160 160,140 200,145 240,135 280,150 320,130 360,125 400,140 440,120 480,115 520,125 560,110" fill="none" stroke="#6366f1" stroke-width="2"/><polyline points="40,170 80,155 120,160 160,140 200,145 240,135 280,150 320,130 360,125 400,140 440,120 480,115 520,125 560,110" fill="url(#chartGrad)" stroke="none" opacity="0.3"/><defs><linearGradient id="chartGrad" x1="0" y1="0" x2="0" y2="1"><stop offset="0%" stop-color="#6366f1"/><stop offset="100%" stop-color="transparent"/></linearGradient></defs><line x1="40" y1="130" x2="560" y2="130" stroke="#e2e8f0" stroke-width="0.3" opacity="0.2"/><line x1="40" y1="150" x2="560" y2="150" stroke="#e2e8f0" stroke-width="0.3" opacity="0.2"/><line x1="40" y1="170" x2="560" y2="170" stroke="#e2e8f0" stroke-width="0.3" opacity="0.2"/></svg><p style="margin-top:0.75rem;font-size:0.85rem;color:#94a3b8;font-style:italic;line-height:1.4;">Real-time monitoring dashboard showing CPU, memory, request rate, and response time trends.</p></div>
Resource Usage
The monitoring stack itself is lightweight:
Total overhead: ~280MB RAM for monitoring 50+ containers.
At TechSaaS, we run Grafana + Loki + Promtail for log aggregation and add cAdvisor + Prometheus for container metrics. The entire monitoring stack uses about 127MB and gives us full visibility into every container.
Need production monitoring for your Docker infrastructure? Contact [email protected].
Need help with devops?
TechSaaS provides expert consulting and managed services for cloud infrastructure, DevOps, and AI/ML operations.