add container-monitor example

This commit is contained in:
2025-11-28 20:52:48 +05:30
parent 600993f626
commit bbe4990878
7 changed files with 774 additions and 515 deletions

View File

@ -0,0 +1,49 @@
# Container Monitor TUI
A beautiful terminal-based container monitoring tool that combines syscall tracking, file I/O monitoring, and network traffic analysis using eBPF.
## Features
- 🎯 **Interactive Cgroup Selection** - Navigate and select cgroups with arrow keys
- 📊 **Real-time Monitoring** - Live graphs and statistics
- 🔥 **Syscall Tracking** - Total syscall count per cgroup
- 💾 **File I/O Monitoring** - Read/write operations and bytes with graphs
- 🌐 **Network Traffic** - RX/TX packets and bytes with live graphs
-**Efficient Caching** - Reduced /proc lookups for better performance
- 🎨 **Beautiful TUI** - Clean, colorful terminal interface
## Requirements
- Python 3.7+
- pythonbpf
- Root privileges (for eBPF)
## Installation
```bash
# Ensure you have pythonbpf installed
pip install pythonbpf
# Run the monitor
sudo $(which python) container_monitor.py
```
## Usage
1. **Selection Screen**: Use ↑↓ arrow keys to navigate through cgroups, press ENTER to select
2. **Monitoring Screen**: View real-time graphs and statistics, press ESC or 'b' to go back
3. **Exit**: Press 'q' at any time to quit
## Architecture
- `container_monitor.py` - Main BPF program combining all three tracers
- `data_collector.py` - Data collection, caching, and history management
- `tui. py` - Terminal user interface with selection and monitoring screens
## BPF Programs
- **vfs_read/vfs_write** - Track file I/O operations
- **__netif_receive_skb/__dev_queue_xmit** - Track network traffic
- **raw_syscalls/sys_enter** - Count all syscalls
All programs filter by cgroup ID for per-container monitoring.

View File

@ -0,0 +1,221 @@
"""Container Monitor - TUI-based cgroup monitoring combining syscall, file I/O, and network tracking."""
import time
import os
from pathlib import Path
from pythonbpf import bpf, map, section, bpfglobal, struct, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import get_current_cgroup_id
from ctypes import c_int32, c_uint64, c_void_p
from vmlinux import struct_pt_regs, struct_sk_buff
from data_collector import ContainerDataCollector
from tui import ContainerMonitorTUI
# ==================== BPF Structs ====================
@bpf
@struct
class read_stats:
bytes: c_uint64
ops: c_uint64
@bpf
@struct
class write_stats:
bytes: c_uint64
ops: c_uint64
@bpf
@struct
class net_stats:
rx_packets: c_uint64
tx_packets: c_uint64
rx_bytes: c_uint64
tx_bytes: c_uint64
# ==================== BPF Maps ====================
@bpf
@map
def read_map() -> HashMap:
return HashMap(key=c_uint64, value=read_stats, max_entries=1024)
@bpf
@map
def write_map() -> HashMap:
return HashMap(key=c_uint64, value=write_stats, max_entries=1024)
@bpf
@map
def net_stats_map() -> HashMap:
return HashMap(key=c_uint64, value=net_stats, max_entries=1024)
@bpf
@map
def syscall_count() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=1024)
# ==================== File I/O Tracing ====================
@bpf
@section("kprobe/vfs_read")
def trace_read(ctx: struct_pt_regs) -> c_int32:
cg = get_current_cgroup_id()
count = c_uint64(ctx.dx)
ptr = read_map.lookup(cg)
if ptr:
s = read_stats()
s.bytes = ptr.bytes + count
s.ops = ptr.ops + 1
read_map.update(cg, s)
else:
s = read_stats()
s.bytes = count
s.ops = c_uint64(1)
read_map.update(cg, s)
return c_int32(0)
@bpf
@section("kprobe/vfs_write")
def trace_write(ctx1: struct_pt_regs) -> c_int32:
cg = get_current_cgroup_id()
count = c_uint64(ctx1.dx)
ptr = write_map.lookup(cg)
if ptr:
s = write_stats()
s.bytes = ptr.bytes + count
s.ops = ptr.ops + 1
write_map.update(cg, s)
else:
s = write_stats()
s.bytes = count
s.ops = c_uint64(1)
write_map.update(cg, s)
return c_int32(0)
# ==================== Network I/O Tracing ====================
@bpf
@section("kprobe/__netif_receive_skb")
def trace_netif_rx(ctx2: struct_pt_regs) -> c_int32:
cgroup_id = get_current_cgroup_id()
skb = struct_sk_buff(ctx2.di)
pkt_len = c_uint64(skb.len)
stats_ptr = net_stats_map.lookup(cgroup_id)
if stats_ptr:
stats = net_stats()
stats.rx_packets = stats_ptr.rx_packets + 1
stats.tx_packets = stats_ptr.tx_packets
stats.rx_bytes = stats_ptr.rx_bytes + pkt_len
stats.tx_bytes = stats_ptr.tx_bytes
net_stats_map.update(cgroup_id, stats)
else:
stats = net_stats()
stats.rx_packets = c_uint64(1)
stats.tx_packets = c_uint64(0)
stats.rx_bytes = pkt_len
stats.tx_bytes = c_uint64(0)
net_stats_map.update(cgroup_id, stats)
return c_int32(0)
@bpf
@section("kprobe/__dev_queue_xmit")
def trace_dev_xmit(ctx3: struct_pt_regs) -> c_int32:
cgroup_id = get_current_cgroup_id()
skb = struct_sk_buff(ctx3.di)
pkt_len = c_uint64(skb.len)
stats_ptr = net_stats_map.lookup(cgroup_id)
if stats_ptr:
stats = net_stats()
stats.rx_packets = stats_ptr.rx_packets
stats.tx_packets = stats_ptr.tx_packets + 1
stats.rx_bytes = stats_ptr.rx_bytes
stats.tx_bytes = stats_ptr.tx_bytes + pkt_len
net_stats_map.update(cgroup_id, stats)
else:
stats = net_stats()
stats.rx_packets = c_uint64(0)
stats.tx_packets = c_uint64(1)
stats.rx_bytes = c_uint64(0)
stats.tx_bytes = pkt_len
net_stats_map.update(cgroup_id, stats)
return c_int32(0)
# ==================== Syscall Tracing ====================
@bpf
@section("tracepoint/raw_syscalls/sys_enter")
def count_syscalls(ctx: c_void_p) -> c_int32:
cgroup_id = get_current_cgroup_id()
count_ptr = syscall_count.lookup(cgroup_id)
if count_ptr:
new_count = count_ptr + c_uint64(1)
syscall_count.update(cgroup_id, new_count)
else:
syscall_count.update(cgroup_id, c_uint64(1))
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# ==================== Main ====================
if __name__ == "__main__":
print("🔥 Loading BPF programs...")
# Load and attach BPF program
b = BPF()
b.load()
b.attach_all()
# Get map references and enable struct deserialization
read_map_ref = b["read_map"]
write_map_ref = b["write_map"]
net_stats_map_ref = b["net_stats_map"]
syscall_count_ref = b["syscall_count"]
read_map_ref.set_value_struct("read_stats")
write_map_ref.set_value_struct("write_stats")
net_stats_map_ref.set_value_struct("net_stats")
print("✅ BPF programs loaded and attached")
# Setup data collector
collector = ContainerDataCollector(
read_map_ref,
write_map_ref,
net_stats_map_ref,
syscall_count_ref
)
# Create and run TUI
tui = ContainerMonitorTUI(collector)
tui.run()

View File

@ -0,0 +1,208 @@
"""Data collection and management for container monitoring."""
import os
import time
from pathlib import Path
from typing import Dict, List, Set, Optional, Tuple
from dataclasses import dataclass
from collections import deque, defaultdict
@dataclass
class CgroupInfo:
"""Information about a cgroup."""
id: int
name: str
path: str
@dataclass
class ContainerStats:
"""Statistics for a container/cgroup."""
cgroup_id: int
cgroup_name: str
# File I/O
read_ops: int = 0
read_bytes: int = 0
write_ops: int = 0
write_bytes: int = 0
# Network I/O
rx_packets: int = 0
rx_bytes: int = 0
tx_packets: int = 0
tx_bytes: int = 0
# Syscalls
syscall_count: int = 0
# Timestamp
timestamp: float = 0.0
class ContainerDataCollector:
"""Collects and manages container monitoring data from BPF."""
def __init__(self, read_map, write_map, net_stats_map, syscall_map, history_size: int = 100):
self.read_map = read_map
self.write_map = write_map
self.net_stats_map = net_stats_map
self.syscall_map = syscall_map
# Caching
self._cgroup_cache: Dict[int, CgroupInfo] = {}
self._cgroup_cache_time = 0
self._cache_ttl = 5.
0 # Refresh cache every 5 seconds
# Historical data for graphing
self._history_size = history_size
self._history: Dict[int, deque] = defaultdict(lambda: deque(maxlen=history_size))
def get_all_cgroups(self) -> List[CgroupInfo]:
"""Get all cgroups with caching."""
current_time = time.time()
# Use cached data if still valid
if current_time - self._cgroup_cache_time < self._cache_ttl:
return list(self._cgroup_cache.values())
# Refresh cache
self._refresh_cgroup_cache()
return list(self._cgroup_cache.values())
def _refresh_cgroup_cache(self):
"""Refresh the cgroup cache from /proc."""
cgroup_map: Dict[int, Set[str]] = defaultdict(set)
# Scan /proc to find all cgroups
for proc_dir in Path("/proc").glob("[0-9]*"):
try:
cgroup_file = proc_dir / "cgroup"
if not cgroup_file.exists():
continue
with open(cgroup_file) as f:
for line in f:
parts = line.strip().split(":")
if len(parts) >= 3:
cgroup_path = parts[2]
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
if os.path.exists(cgroup_mount):
stat_info = os.stat(cgroup_mount)
cgroup_id = stat_info.st_ino
cgroup_map[cgroup_id].add(cgroup_path)
except (PermissionError, FileNotFoundError, OSError):
continue
# Update cache with best names
new_cache = {}
for cgroup_id, paths in cgroup_map.items():
# Pick the most descriptive path
best_path = self._get_best_cgroup_path(paths)
name = self._get_cgroup_name(best_path)
new_cache[cgroup_id] = CgroupInfo(
id=cgroup_id,
name=name,
path=best_path
)
self._cgroup_cache = new_cache
self._cgroup_cache_time = time.time()
def _get_best_cgroup_path(self, paths: Set[str]) -> str:
"""Select the most descriptive cgroup path."""
path_list = list(paths)
# Prefer paths with more components (more specific)
# Prefer paths containing docker, podman, etc.
for keyword in ['docker', 'podman', 'kubernetes', 'k8s', 'systemd']:
for path in path_list:
if keyword in path.lower():
return path
# Return longest path (most specific)
return max(path_list, key=lambda p: (len(p.split('/')), len(p)))
def _get_cgroup_name(self, path: str) -> str:
"""Extract a friendly name from cgroup path."""
if not path or path == "/":
return "root"
# Remove leading/trailing slashes
path = path.strip("/")
# Try to extract container ID or service name
parts = path.split("/")
# For Docker: /docker/<container_id>
if "docker" in path.lower():
for i, part in enumerate(parts):
if part.lower() == "docker" and i + 1 < len(parts):
container_id = parts[i + 1][:12] # Short ID
return f"docker:{container_id}"
# For systemd services
if "system. slice" in path:
for part in parts:
if part.endswith(". service"):
return part.replace(".service", "")
# For user slices
if "user.slice" in path:
return f"user:{parts[-1]}" if parts else "user"
# Default: use last component
return parts[-1] if parts else path
def get_stats_for_cgroup(self, cgroup_id: int) -> ContainerStats:
"""Get current statistics for a specific cgroup."""
cgroup_info = self._cgroup_cache.get(cgroup_id)
cgroup_name = cgroup_info.name if cgroup_info else f"cgroup-{cgroup_id}"
stats = ContainerStats(
cgroup_id=cgroup_id,
cgroup_name=cgroup_name,
timestamp=time.time()
)
# Get file I/O stats
read_stat = self.read_map.lookup(cgroup_id)
if read_stat:
stats.read_ops = int(read_stat.ops)
stats.read_bytes = int(read_stat.bytes)
write_stat = self.write_map.lookup(cgroup_id)
if write_stat:
stats.write_ops = int(write_stat.ops)
stats.write_bytes = int(write_stat.bytes)
# Get network stats
net_stat = self.net_stats_map.lookup(cgroup_id)
if net_stat:
stats.rx_packets = int(net_stat.rx_packets)
stats.rx_bytes = int(net_stat.rx_bytes)
stats.tx_packets = int(net_stat.tx_packets)
stats.tx_bytes = int(net_stat.tx_bytes)
# Get syscall count
syscall_cnt = self.syscall_map.lookup(cgroup_id)
if syscall_cnt is not None:
stats.syscall_count = int(syscall_cnt)
# Add to history
self._history[cgroup_id].append(stats)
return stats
def get_history(self, cgroup_id: int) -> List[ContainerStats]:
"""Get historical statistics for graphing."""
return list(self._history[cgroup_id])
def get_cgroup_info(self, cgroup_id: int) -> Optional[CgroupInfo]:
"""Get cached cgroup information."""
return self._cgroup_cache.get(cgroup_id)

View File

@ -1,191 +0,0 @@
import logging
import time
import os
from pathlib import Path
from pythonbpf import bpf, map, section, bpfglobal, struct, compile, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import get_current_cgroup_id
from ctypes import c_int32, c_uint64
from vmlinux import struct_pt_regs
@bpf
@struct
class read_stats:
bytes: c_uint64
ops: c_uint64
@bpf
@struct
class write_stats:
bytes: c_uint64
ops: c_uint64
@bpf
@map
def read_map() -> HashMap:
return HashMap(key=c_uint64, value=read_stats, max_entries=1024)
@bpf
@map
def write_map() -> HashMap:
return HashMap(key=c_uint64, value=write_stats, max_entries=1024)
@bpf
@section("kprobe/vfs_read")
def trace_read(ctx: struct_pt_regs) -> c_int32:
cg = get_current_cgroup_id()
count = c_uint64(ctx.dx)
ptr = read_map.lookup(cg)
if ptr:
s = read_stats()
s.bytes = ptr.bytes + count
s.ops = ptr.ops + 1
read_map.update(cg, s)
else:
s = read_stats()
s.bytes = count
s.ops = c_uint64(1)
read_map.update(cg, s)
return c_int32(0)
@bpf
@section("kprobe/vfs_write")
def trace_write(ctx1: struct_pt_regs) -> c_int32:
cg = get_current_cgroup_id()
count = c_uint64(ctx1.dx)
ptr = write_map.lookup(cg)
if ptr:
s = write_stats()
s.bytes = ptr.bytes + count
s.ops = ptr.ops + 1
write_map.update(cg, s)
else:
s = write_stats()
s.bytes = count
s.ops = c_uint64(1)
write_map.update(cg, s)
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Load and attach BPF program
b = BPF()
b.load()
b.attach_all()
# Get map references and enable struct deserialization
read_map_ref = b["read_map"]
write_map_ref = b["write_map"]
read_map_ref.set_value_struct("read_stats")
write_map_ref.set_value_struct("write_stats")
def get_cgroup_ids():
"""Get all cgroup IDs from the system"""
cgroup_ids = set()
# Get cgroup IDs from running processes
for proc_dir in Path("/proc").glob("[0-9]*"):
try:
cgroup_file = proc_dir / "cgroup"
if cgroup_file.exists():
with open(cgroup_file) as f:
for line in f:
# Parse cgroup path and get inode
parts = line.strip().split(":")
if len(parts) >= 3:
cgroup_path = parts[2]
# Try to get the cgroup inode which is used as ID
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
if os.path.exists(cgroup_mount):
stat_info = os.stat(cgroup_mount)
cgroup_ids.add(stat_info.st_ino)
except (PermissionError, FileNotFoundError, OSError):
continue
return cgroup_ids
# Display function
def display_stats():
"""Read and display file I/O statistics from BPF maps"""
print("\n" + "=" * 80)
print(f"{'CGROUP ID':<20} {'READ OPS':<15} {'READ BYTES':<20} {'WRITE OPS':<15} {'WRITE BYTES':<20}")
print("=" * 80)
# Get cgroup IDs from the system
cgroup_ids = get_cgroup_ids()
if not cgroup_ids:
print("No cgroups found...")
print("=" * 80)
return
# Initialize totals
total_read_ops = 0
total_read_bytes = 0
total_write_ops = 0
total_write_bytes = 0
# Track which cgroups have data
cgroups_with_data = []
# Display stats for each cgroup
for cgroup_id in sorted(cgroup_ids):
# Get read stats using lookup
read_ops = 0
read_bytes = 0
read_stat = read_map_ref.lookup(cgroup_id)
if read_stat:
read_ops = int(read_stat.ops)
read_bytes = int(read_stat.bytes)
total_read_ops += read_ops
total_read_bytes += read_bytes
# Get write stats using lookup
write_ops = 0
write_bytes = 0
write_stat = write_map_ref.lookup(cgroup_id)
if write_stat:
write_ops = int(write_stat.ops)
write_bytes = int(write_stat.bytes)
total_write_ops += write_ops
total_write_bytes += write_bytes
# Only print if there's data for this cgroup
if read_stat or write_stat:
print(f"{cgroup_id:<20} {read_ops:<15} {read_bytes:<20} {write_ops:<15} {write_bytes:<20}")
cgroups_with_data.append(cgroup_id)
if not cgroups_with_data:
print("No data collected yet...")
print("=" * 80)
print(f"{'TOTAL':<20} {total_read_ops:<15} {total_read_bytes:<20} {total_write_ops:<15} {total_write_bytes:<20}")
print()
# Main loop
if __name__ == "__main__":
print("Tracing file I/O operations... Press Ctrl+C to exit\n")
try:
while True:
time.sleep(5) # Update every 5 seconds
display_stats()
except KeyboardInterrupt:
print("\nStopped")

View File

@ -1,192 +0,0 @@
import logging
import time
import os
from pathlib import Path
from pythonbpf import bpf, map, section, bpfglobal, struct, compile, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import get_current_cgroup_id
from ctypes import c_int32, c_uint64, c_void_p, c_uint32
from vmlinux import struct_sk_buff, struct_pt_regs
@bpf
@struct
class net_stats:
rx_packets: c_uint64
tx_packets: c_uint64
rx_bytes: c_uint64
tx_bytes: c_uint64
@bpf
@map
def net_stats_map() -> HashMap:
return HashMap(key=c_uint64, value=net_stats, max_entries=1024)
@bpf
@section("kprobe/__netif_receive_skb")
def trace_netif_rx(ctx: struct_pt_regs) -> c_int32:
cgroup_id = get_current_cgroup_id()
# Read skb pointer from first argument (PT_REGS_PARM1)
skb = struct_sk_buff(ctx.di) # x86_64: first arg in rdi
# Read skb->len
pkt_len = c_uint64(skb.len)
stats_ptr = net_stats_map.lookup(cgroup_id)
if stats_ptr:
stats = net_stats()
stats.rx_packets = stats_ptr.rx_packets + 1
stats.tx_packets = stats_ptr.tx_packets
stats.rx_bytes = stats_ptr.rx_bytes + pkt_len
stats.tx_bytes = stats_ptr.tx_bytes
net_stats_map.update(cgroup_id, stats)
else:
stats = net_stats()
stats.rx_packets = c_uint64(1)
stats.tx_packets = c_uint64(0)
stats.rx_bytes = pkt_len
stats.tx_bytes = c_uint64(0)
net_stats_map.update(cgroup_id, stats)
return c_int32(0)
@bpf
@section("kprobe/__dev_queue_xmit")
def trace_dev_xmit(ctx1: struct_pt_regs) -> c_int32:
cgroup_id = get_current_cgroup_id()
# Read skb pointer from first argument
skb = struct_sk_buff(ctx1.di)
pkt_len = c_uint64(skb.len)
stats_ptr = net_stats_map.lookup(cgroup_id)
if stats_ptr:
stats = net_stats()
stats.rx_packets = stats_ptr.rx_packets
stats.tx_packets = stats_ptr.tx_packets + 1
stats.rx_bytes = stats_ptr.rx_bytes
stats.tx_bytes = stats_ptr.tx_bytes + pkt_len
net_stats_map.update(cgroup_id, stats)
else:
stats = net_stats()
stats.rx_packets = c_uint64(0)
stats.tx_packets = c_uint64(1)
stats.rx_bytes = c_uint64(0)
stats.tx_bytes = pkt_len
net_stats_map.update(cgroup_id, stats)
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Load and attach BPF program
b = BPF()
b.load()
b.attach_all()
# Get map reference and enable struct deserialization
net_stats_map_ref = b["net_stats_map"]
net_stats_map_ref.set_value_struct("net_stats")
def get_cgroup_ids():
"""Get all cgroup IDs from the system"""
cgroup_ids = set()
# Get cgroup IDs from running processes
for proc_dir in Path("/proc").glob("[0-9]*"):
try:
cgroup_file = proc_dir / "cgroup"
if cgroup_file.exists():
with open(cgroup_file) as f:
for line in f:
# Parse cgroup path and get inode
parts = line.strip().split(":")
if len(parts) >= 3:
cgroup_path = parts[2]
# Try to get the cgroup inode which is used as ID
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
if os.path.exists(cgroup_mount):
stat_info = os.stat(cgroup_mount)
cgroup_ids.add(stat_info.st_ino)
except (PermissionError, FileNotFoundError, OSError):
continue
return cgroup_ids
# Display function
def display_stats():
"""Read and display network I/O statistics from BPF maps"""
print("\n" + "=" * 100)
print(f"{'CGROUP ID':<20} {'RX PACKETS':<15} {'RX BYTES':<20} {'TX PACKETS':<15} {'TX BYTES':<20}")
print("=" * 100)
# Get cgroup IDs from the system
cgroup_ids = get_cgroup_ids()
if not cgroup_ids:
print("No cgroups found...")
print("=" * 100)
return
# Initialize totals
total_rx_packets = 0
total_rx_bytes = 0
total_tx_packets = 0
total_tx_bytes = 0
# Track which cgroups have data
cgroups_with_data = []
# Display stats for each cgroup
for cgroup_id in sorted(cgroup_ids):
# Get network stats using lookup
rx_packets = 0
rx_bytes = 0
tx_packets = 0
tx_bytes = 0
net_stat = net_stats_map_ref.lookup(cgroup_id)
if net_stat:
rx_packets = int(net_stat.rx_packets)
rx_bytes = int(net_stat.rx_bytes)
tx_packets = int(net_stat.tx_packets)
tx_bytes = int(net_stat.tx_bytes)
total_rx_packets += rx_packets
total_rx_bytes += rx_bytes
total_tx_packets += tx_packets
total_tx_bytes += tx_bytes
print(f"{cgroup_id:<20} {rx_packets:<15} {rx_bytes:<20} {tx_packets:<15} {tx_bytes:<20}")
cgroups_with_data.append(cgroup_id)
if not cgroups_with_data:
print("No data collected yet...")
print("=" * 100)
print(f"{'TOTAL':<20} {total_rx_packets:<15} {total_rx_bytes:<20} {total_tx_packets:<15} {total_tx_bytes:<20}")
print()
# Main loop
if __name__ == "__main__":
print("Tracing network I/O operations... Press Ctrl+C to exit\n")
try:
while True:
time.sleep(5) # Update every 5 seconds
display_stats()
except KeyboardInterrupt:
print("\nStopped")

View File

@ -1,132 +0,0 @@
import time
import os
from pathlib import Path
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import get_current_cgroup_id
from ctypes import c_void_p, c_int32, c_uint64
@bpf
@map
def syscall_count() -> HashMap:
"""Map tracking syscall count by cgroup ID"""
return HashMap(key=c_uint64, value=c_uint64, max_entries=1024)
@bpf
@section("tracepoint/raw_syscalls/sys_enter")
def count_syscalls(ctx: c_void_p) -> c_int32:
"""
Increment syscall counter for the current cgroup.
Attached to raw_syscalls/sys_enter tracepoint to catch all syscalls.
"""
cgroup_id = get_current_cgroup_id()
# Lookup current count
count_ptr = syscall_count.lookup(cgroup_id)
if count_ptr:
# Increment existing count
new_count = count_ptr + c_uint64(1)
syscall_count.update(cgroup_id, new_count)
else:
# First syscall for this cgroup
syscall_count.update(cgroup_id, c_uint64(1))
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Load and attach BPF program
b = BPF()
b.load()
b.attach_all()
# Get map reference
syscall_count_ref = b["syscall_count"]
def get_cgroup_ids():
"""Get all cgroup IDs from the system"""
cgroup_ids = set()
# Get cgroup IDs from running processes
for proc_dir in Path("/proc").glob("[0-9]*"):
try:
cgroup_file = proc_dir / "cgroup"
if cgroup_file.exists():
with open(cgroup_file) as f:
for line in f:
# Parse cgroup path and get inode
parts = line.strip().split(":")
if len(parts) >= 3:
cgroup_path = parts[2]
# Try to get the cgroup inode which is used as ID
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
if os.path.exists(cgroup_mount):
stat_info = os.stat(cgroup_mount)
cgroup_ids.add(stat_info.st_ino)
except (PermissionError, FileNotFoundError, OSError):
continue
return cgroup_ids
# Display function
def display_stats():
"""Read and display syscall statistics from BPF maps"""
print("\n" + "=" * 60)
print(f"{'CGROUP ID':<20} {'SYSCALL COUNT':<20}")
print("=" * 60)
# Get cgroup IDs from the system
cgroup_ids = get_cgroup_ids()
if not cgroup_ids:
print("No cgroups found...")
print("=" * 60)
return
# Initialize totals
total_syscalls = 0
# Track which cgroups have data
cgroups_with_data = []
# Display stats for each cgroup
for cgroup_id in sorted(cgroup_ids):
# Get syscall count using lookup
syscall_cnt = 0
count = syscall_count_ref.lookup(cgroup_id)
if count is not None:
syscall_cnt = int(count)
total_syscalls += syscall_cnt
print(f"{cgroup_id:<20} {syscall_cnt:<20}")
cgroups_with_data.append(cgroup_id)
if not cgroups_with_data:
print("No data collected yet...")
print("=" * 60)
print(f"{'TOTAL':<20} {total_syscalls:<20}")
print()
# Main loop
if __name__ == "__main__":
print("Tracing syscalls per cgroup... Press Ctrl+C to exit\n")
try:
while True:
time.sleep(5) # Update every 5 seconds
display_stats()
except KeyboardInterrupt:
print("\nStopped")

View File

@ -0,0 +1,296 @@
"""Terminal User Interface for container monitoring."""
import sys
import time
import curses
from typing import Optional, List
from data_collector import ContainerDataCollector, CgroupInfo, ContainerStats
class ContainerMonitorTUI:
"""TUI for container monitoring with cgroup selection and live graphs."""
def __init__(self, collector: ContainerDataCollector):
self.collector = collector
self.selected_cgroup: Optional[int] = None
self.current_screen = "selection" # "selection" or "monitoring"
self.selected_index = 0
self.scroll_offset = 0
def run(self):
"""Run the TUI application."""
curses.wrapper(self._main_loop)
def _main_loop(self, stdscr):
"""Main curses loop."""
# Configure curses
curses.curs_set(0) # Hide cursor
stdscr.nodelay(True) # Non-blocking input
stdscr.timeout(100) # Refresh every 100ms
# Initialize colors
curses.start_color()
curses.init_pair(1, curses.COLOR_CYAN, curses.COLOR_BLACK)
curses.init_pair(2, curses.COLOR_GREEN, curses.COLOR_BLACK)
curses.init_pair(3, curses.COLOR_YELLOW, curses.COLOR_BLACK)
curses.init_pair(4, curses.COLOR_RED, curses.COLOR_BLACK)
curses.init_pair(5, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
curses.init_pair(6, curses.COLOR_WHITE, curses.COLOR_BLUE)
while True:
stdscr.clear()
try:
if self.current_screen == "selection":
self._draw_selection_screen(stdscr)
elif self.current_screen == "monitoring":
self._draw_monitoring_screen(stdscr)
stdscr.refresh()
# Handle input
key = stdscr.getch()
if key != -1:
if not self._handle_input(key):
break # Exit requested
except KeyboardInterrupt:
break
except Exception as e:
# Show error
stdscr.addstr(0, 0, f"Error: {str(e)}")
stdscr.refresh()
time.sleep(2)
def _draw_selection_screen(self, stdscr):
"""Draw the cgroup selection screen."""
height, width = stdscr.getmaxyx()
# Header
title = "🐳 Container Monitor - Select Cgroup"
stdscr.attron(curses.color_pair(6))
stdscr.addstr(0, (width - len(title)) // 2, title)
stdscr.attroff(curses.color_pair(6))
# Instructions
instructions = "↑↓: Navigate | ENTER: Select | q: Quit | r: Refresh"
stdscr.attron(curses.color_pair(3))
stdscr.addstr(1, (width - len(instructions)) // 2, instructions)
stdscr.attroff(curses.color_pair(3))
# Get cgroups
cgroups = self.collector.get_all_cgroups()
if not cgroups:
msg = "No cgroups found. Waiting for activity..."
stdscr.attron(curses.color_pair(4))
stdscr.addstr(height // 2, (width - len(msg)) // 2, msg)
stdscr.attroff(curses.color_pair(4))
return
# Sort cgroups by name
cgroups.sort(key=lambda c: c.name)
# Adjust selection bounds
if self.selected_index >= len(cgroups):
self.selected_index = len(cgroups) - 1
if self.selected_index < 0:
self.selected_index = 0
# Calculate visible range
list_height = height - 6
if self.selected_index < self.scroll_offset:
self.scroll_offset = self.selected_index
elif self.selected_index >= self.scroll_offset + list_height:
self.scroll_offset = self.selected_index - list_height + 1
# Draw cgroup list
start_y = 3
stdscr.attron(curses.color_pair(1))
stdscr.addstr(start_y, 2, "" * (width - 4))
stdscr.attroff(curses.color_pair(1))
for i in range(list_height):
idx = self.scroll_offset + i
if idx >= len(cgroups):
break
cgroup = cgroups[idx]
y = start_y + 1 + i
if idx == self.selected_index:
# Highlight selected
stdscr.attron(curses.color_pair(2) | curses.A_BOLD | curses.A_REVERSE)
line = f"{cgroup.name:<40} ID: {cgroup.id}"
stdscr.addstr(y, 2, line[:width - 4])
stdscr.attroff(curses.color_pair(2) | curses.A_BOLD | curses.A_REVERSE)
else:
line = f" {cgroup.name:<40} ID: {cgroup.id}"
stdscr.addstr(y, 2, line[:width - 4])
# Footer with count
footer = f"Total cgroups: {len(cgroups)}"
stdscr.attron(curses.color_pair(1))
stdscr.addstr(height - 2, (width - len(footer)) // 2, footer)
stdscr.attroff(curses.color_pair(1))
def _draw_monitoring_screen(self, stdscr):
"""Draw the monitoring screen for selected cgroup."""
height, width = stdscr.getmaxyx()
if self.selected_cgroup is None:
return
# Get current stats
stats = self.collector.get_stats_for_cgroup(self.selected_cgroup)
history = self.collector.get_history(self.selected_cgroup)
# Header
title = f"📊 Monitoring: {stats.cgroup_name}"
stdscr.attron(curses.color_pair(6))
stdscr.addstr(0, (width - len(title)) // 2, title)
stdscr.attroff(curses.color_pair(6))
# Instructions
instructions = "ESC/b: Back | q: Quit"
stdscr.attron(curses.color_pair(3))
stdscr.addstr(1, (width - len(instructions)) // 2, instructions)
stdscr.attroff(curses.color_pair(3))
# Syscall count (large number display)
y = 3
stdscr.attron(curses.color_pair(5) | curses.A_BOLD)
stdscr.addstr(y, 2, "SYSCALLS")
stdscr.attroff(curses.color_pair(5) | curses.A_BOLD)
syscall_str = f"{stats.syscall_count:,}"
stdscr.attron(curses.color_pair(2) | curses.A_BOLD)
stdscr.addstr(y + 1, 4, syscall_str)
stdscr.attroff(curses.color_pair(2) | curses.A_BOLD)
# Network I/O graphs
y = 6
self._draw_section_header(stdscr, y, "NETWORK I/O", 1)
# RX graph
y += 2
rx_label = f"RX: {self._format_bytes(stats.rx_bytes)} ({stats.rx_packets:,} packets)"
stdscr.addstr(y, 2, rx_label)
if len(history) > 1:
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
[s.rx_bytes for s in history],
curses.color_pair(2))
# TX graph
y += 5
tx_label = f"TX: {self._format_bytes(stats.tx_bytes)} ({stats.tx_packets:,} packets)"
stdscr.addstr(y, 2, tx_label)
if len(history) > 1:
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
[s.tx_bytes for s in history],
curses.color_pair(3))
# File I/O graphs
y += 5
self._draw_section_header(stdscr, y, "FILE I/O", 1)
# Read graph
y += 2
read_label = f"READ: {self._format_bytes(stats.read_bytes)} ({stats.read_ops:,} ops)"
stdscr.addstr(y, 2, read_label)
if len(history) > 1:
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
[s.read_bytes for s in history],
curses.color_pair(4))
# Write graph
y += 5
write_label = f"WRITE: {self._format_bytes(stats.write_bytes)} ({stats.write_ops:,} ops)"
stdscr.addstr(y, 2, write_label)
if len(history) > 1:
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
[s.write_bytes for s in history],
curses.color_pair(5))
def _draw_section_header(self, stdscr, y: int, title: str, color_pair: int):
"""Draw a section header."""
height, width = stdscr.getmaxyx()
stdscr.attron(curses.color_pair(color_pair) | curses.A_BOLD)
stdscr.addstr(y, 2, title)
stdscr.addstr(y, len(title) + 3, "" * (width - len(title) - 5))
stdscr.attroff(curses.color_pair(color_pair) | curses.A_BOLD)
def _draw_bar_graph(self, stdscr, y: int, x: int, width: int, height: int,
data: List[float], color_pair: int):
"""Draw a simple bar graph."""
if not data or width < 2:
return
# Normalize data to graph width
max_val = max(data) if max(data) > 0 else 1
# Take last 'width' data points
recent_data = data[-width:]
# Draw bars
for row in range(height):
threshold = (height - row) / height
bar_line = ""
for val in recent_data:
normalized = val / max_val
if normalized >= threshold:
bar_line += ""
elif normalized >= threshold - 0.2:
bar_line += ""
elif normalized >= threshold - 0.4:
bar_line += ""
elif normalized >= threshold - 0.6:
bar_line += ""
else:
bar_line += " "
try:
stdscr.attron(color_pair)
stdscr.addstr(y + row, x, bar_line[:width])
stdscr.attroff(color_pair)
except:
pass # Ignore errors at screen edges
def _format_bytes(self, bytes_val: int) -> str:
"""Format bytes into human-readable string."""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if bytes_val < 1024.0:
return f"{bytes_val:.2f} {unit}"
bytes_val /= 1024.
0
return f"{bytes_val:.2f} PB"
def _handle_input(self, key: int) -> bool:
"""Handle keyboard input. Returns False to exit."""
if key == ord('q') or key == ord('Q'):
return False # Exit
if self.current_screen == "selection":
if key == curses.KEY_UP:
self.selected_index = max(0, self.selected_index - 1)
elif key == curses.KEY_DOWN:
cgroups = self.collector.get_all_cgroups()
self.selected_index = min(len(cgroups) - 1, self.selected_index + 1)
elif key == ord('\n') or key == curses.KEY_ENTER or key == 10:
# Select cgroup
cgroups = self.collector.get_all_cgroups()
if cgroups and 0 <= self.selected_index < len(cgroups):
cgroups.sort(key=lambda c: c.name)
self.selected_cgroup = cgroups[self.selected_index].id
self.current_screen = "monitoring"
elif key == ord('r') or key == ord('R'):
# Force refresh cache
self.collector._cgroup_cache_time = 0
elif self.current_screen == "monitoring":
if key == 27 or key == ord('b') or key == ord('B'): # ESC or 'b'
self.current_screen = "selection"
self.selected_cgroup = None
return True # Continue running