mirror of
https://github.com/varun-r-mallya/Python-BPF.git
synced 2025-12-31 21:06:25 +00:00
add container-monitor example
This commit is contained in:
49
BCC-Examples/container-monitor/README.md
Normal file
49
BCC-Examples/container-monitor/README.md
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# Container Monitor TUI
|
||||||
|
|
||||||
|
A beautiful terminal-based container monitoring tool that combines syscall tracking, file I/O monitoring, and network traffic analysis using eBPF.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- 🎯 **Interactive Cgroup Selection** - Navigate and select cgroups with arrow keys
|
||||||
|
- 📊 **Real-time Monitoring** - Live graphs and statistics
|
||||||
|
- 🔥 **Syscall Tracking** - Total syscall count per cgroup
|
||||||
|
- 💾 **File I/O Monitoring** - Read/write operations and bytes with graphs
|
||||||
|
- 🌐 **Network Traffic** - RX/TX packets and bytes with live graphs
|
||||||
|
- ⚡ **Efficient Caching** - Reduced /proc lookups for better performance
|
||||||
|
- 🎨 **Beautiful TUI** - Clean, colorful terminal interface
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Python 3.7+
|
||||||
|
- pythonbpf
|
||||||
|
- Root privileges (for eBPF)
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Ensure you have pythonbpf installed
|
||||||
|
pip install pythonbpf
|
||||||
|
|
||||||
|
# Run the monitor
|
||||||
|
sudo $(which python) container_monitor.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. **Selection Screen**: Use ↑↓ arrow keys to navigate through cgroups, press ENTER to select
|
||||||
|
2. **Monitoring Screen**: View real-time graphs and statistics, press ESC or 'b' to go back
|
||||||
|
3. **Exit**: Press 'q' at any time to quit
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
- `container_monitor.py` - Main BPF program combining all three tracers
|
||||||
|
- `data_collector.py` - Data collection, caching, and history management
|
||||||
|
- `tui. py` - Terminal user interface with selection and monitoring screens
|
||||||
|
|
||||||
|
## BPF Programs
|
||||||
|
|
||||||
|
- **vfs_read/vfs_write** - Track file I/O operations
|
||||||
|
- **__netif_receive_skb/__dev_queue_xmit** - Track network traffic
|
||||||
|
- **raw_syscalls/sys_enter** - Count all syscalls
|
||||||
|
|
||||||
|
All programs filter by cgroup ID for per-container monitoring.
|
||||||
221
BCC-Examples/container-monitor/container_monitor.py
Normal file
221
BCC-Examples/container-monitor/container_monitor.py
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
"""Container Monitor - TUI-based cgroup monitoring combining syscall, file I/O, and network tracking."""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from pythonbpf import bpf, map, section, bpfglobal, struct, BPF
|
||||||
|
from pythonbpf.maps import HashMap
|
||||||
|
from pythonbpf.helper import get_current_cgroup_id
|
||||||
|
from ctypes import c_int32, c_uint64, c_void_p
|
||||||
|
from vmlinux import struct_pt_regs, struct_sk_buff
|
||||||
|
|
||||||
|
from data_collector import ContainerDataCollector
|
||||||
|
from tui import ContainerMonitorTUI
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== BPF Structs ====================
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@struct
|
||||||
|
class read_stats:
|
||||||
|
bytes: c_uint64
|
||||||
|
ops: c_uint64
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@struct
|
||||||
|
class write_stats:
|
||||||
|
bytes: c_uint64
|
||||||
|
ops: c_uint64
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@struct
|
||||||
|
class net_stats:
|
||||||
|
rx_packets: c_uint64
|
||||||
|
tx_packets: c_uint64
|
||||||
|
rx_bytes: c_uint64
|
||||||
|
tx_bytes: c_uint64
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== BPF Maps ====================
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@map
|
||||||
|
def read_map() -> HashMap:
|
||||||
|
return HashMap(key=c_uint64, value=read_stats, max_entries=1024)
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@map
|
||||||
|
def write_map() -> HashMap:
|
||||||
|
return HashMap(key=c_uint64, value=write_stats, max_entries=1024)
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@map
|
||||||
|
def net_stats_map() -> HashMap:
|
||||||
|
return HashMap(key=c_uint64, value=net_stats, max_entries=1024)
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@map
|
||||||
|
def syscall_count() -> HashMap:
|
||||||
|
return HashMap(key=c_uint64, value=c_uint64, max_entries=1024)
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== File I/O Tracing ====================
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@section("kprobe/vfs_read")
|
||||||
|
def trace_read(ctx: struct_pt_regs) -> c_int32:
|
||||||
|
cg = get_current_cgroup_id()
|
||||||
|
count = c_uint64(ctx.dx)
|
||||||
|
ptr = read_map.lookup(cg)
|
||||||
|
if ptr:
|
||||||
|
s = read_stats()
|
||||||
|
s.bytes = ptr.bytes + count
|
||||||
|
s.ops = ptr.ops + 1
|
||||||
|
read_map.update(cg, s)
|
||||||
|
else:
|
||||||
|
s = read_stats()
|
||||||
|
s.bytes = count
|
||||||
|
s.ops = c_uint64(1)
|
||||||
|
read_map.update(cg, s)
|
||||||
|
|
||||||
|
return c_int32(0)
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@section("kprobe/vfs_write")
|
||||||
|
def trace_write(ctx1: struct_pt_regs) -> c_int32:
|
||||||
|
cg = get_current_cgroup_id()
|
||||||
|
count = c_uint64(ctx1.dx)
|
||||||
|
ptr = write_map.lookup(cg)
|
||||||
|
|
||||||
|
if ptr:
|
||||||
|
s = write_stats()
|
||||||
|
s.bytes = ptr.bytes + count
|
||||||
|
s.ops = ptr.ops + 1
|
||||||
|
write_map.update(cg, s)
|
||||||
|
else:
|
||||||
|
s = write_stats()
|
||||||
|
s.bytes = count
|
||||||
|
s.ops = c_uint64(1)
|
||||||
|
write_map.update(cg, s)
|
||||||
|
|
||||||
|
return c_int32(0)
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Network I/O Tracing ====================
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@section("kprobe/__netif_receive_skb")
|
||||||
|
def trace_netif_rx(ctx2: struct_pt_regs) -> c_int32:
|
||||||
|
cgroup_id = get_current_cgroup_id()
|
||||||
|
skb = struct_sk_buff(ctx2.di)
|
||||||
|
pkt_len = c_uint64(skb.len)
|
||||||
|
|
||||||
|
stats_ptr = net_stats_map.lookup(cgroup_id)
|
||||||
|
|
||||||
|
if stats_ptr:
|
||||||
|
stats = net_stats()
|
||||||
|
stats.rx_packets = stats_ptr.rx_packets + 1
|
||||||
|
stats.tx_packets = stats_ptr.tx_packets
|
||||||
|
stats.rx_bytes = stats_ptr.rx_bytes + pkt_len
|
||||||
|
stats.tx_bytes = stats_ptr.tx_bytes
|
||||||
|
net_stats_map.update(cgroup_id, stats)
|
||||||
|
else:
|
||||||
|
stats = net_stats()
|
||||||
|
stats.rx_packets = c_uint64(1)
|
||||||
|
stats.tx_packets = c_uint64(0)
|
||||||
|
stats.rx_bytes = pkt_len
|
||||||
|
stats.tx_bytes = c_uint64(0)
|
||||||
|
net_stats_map.update(cgroup_id, stats)
|
||||||
|
|
||||||
|
return c_int32(0)
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@section("kprobe/__dev_queue_xmit")
|
||||||
|
def trace_dev_xmit(ctx3: struct_pt_regs) -> c_int32:
|
||||||
|
cgroup_id = get_current_cgroup_id()
|
||||||
|
skb = struct_sk_buff(ctx3.di)
|
||||||
|
pkt_len = c_uint64(skb.len)
|
||||||
|
|
||||||
|
stats_ptr = net_stats_map.lookup(cgroup_id)
|
||||||
|
|
||||||
|
if stats_ptr:
|
||||||
|
stats = net_stats()
|
||||||
|
stats.rx_packets = stats_ptr.rx_packets
|
||||||
|
stats.tx_packets = stats_ptr.tx_packets + 1
|
||||||
|
stats.rx_bytes = stats_ptr.rx_bytes
|
||||||
|
stats.tx_bytes = stats_ptr.tx_bytes + pkt_len
|
||||||
|
net_stats_map.update(cgroup_id, stats)
|
||||||
|
else:
|
||||||
|
stats = net_stats()
|
||||||
|
stats.rx_packets = c_uint64(0)
|
||||||
|
stats.tx_packets = c_uint64(1)
|
||||||
|
stats.rx_bytes = c_uint64(0)
|
||||||
|
stats.tx_bytes = pkt_len
|
||||||
|
net_stats_map.update(cgroup_id, stats)
|
||||||
|
|
||||||
|
return c_int32(0)
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Syscall Tracing ====================
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@section("tracepoint/raw_syscalls/sys_enter")
|
||||||
|
def count_syscalls(ctx: c_void_p) -> c_int32:
|
||||||
|
cgroup_id = get_current_cgroup_id()
|
||||||
|
count_ptr = syscall_count.lookup(cgroup_id)
|
||||||
|
|
||||||
|
if count_ptr:
|
||||||
|
new_count = count_ptr + c_uint64(1)
|
||||||
|
syscall_count.update(cgroup_id, new_count)
|
||||||
|
else:
|
||||||
|
syscall_count.update(cgroup_id, c_uint64(1))
|
||||||
|
|
||||||
|
return c_int32(0)
|
||||||
|
|
||||||
|
|
||||||
|
@bpf
|
||||||
|
@bpfglobal
|
||||||
|
def LICENSE() -> str:
|
||||||
|
return "GPL"
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Main ====================
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("🔥 Loading BPF programs...")
|
||||||
|
|
||||||
|
# Load and attach BPF program
|
||||||
|
b = BPF()
|
||||||
|
b.load()
|
||||||
|
b.attach_all()
|
||||||
|
|
||||||
|
# Get map references and enable struct deserialization
|
||||||
|
read_map_ref = b["read_map"]
|
||||||
|
write_map_ref = b["write_map"]
|
||||||
|
net_stats_map_ref = b["net_stats_map"]
|
||||||
|
syscall_count_ref = b["syscall_count"]
|
||||||
|
|
||||||
|
read_map_ref.set_value_struct("read_stats")
|
||||||
|
write_map_ref.set_value_struct("write_stats")
|
||||||
|
net_stats_map_ref.set_value_struct("net_stats")
|
||||||
|
|
||||||
|
print("✅ BPF programs loaded and attached")
|
||||||
|
|
||||||
|
# Setup data collector
|
||||||
|
collector = ContainerDataCollector(
|
||||||
|
read_map_ref,
|
||||||
|
write_map_ref,
|
||||||
|
net_stats_map_ref,
|
||||||
|
syscall_count_ref
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create and run TUI
|
||||||
|
tui = ContainerMonitorTUI(collector)
|
||||||
|
tui.run()
|
||||||
208
BCC-Examples/container-monitor/data_collector.py
Normal file
208
BCC-Examples/container-monitor/data_collector.py
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
"""Data collection and management for container monitoring."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Set, Optional, Tuple
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from collections import deque, defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CgroupInfo:
|
||||||
|
"""Information about a cgroup."""
|
||||||
|
id: int
|
||||||
|
name: str
|
||||||
|
path: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContainerStats:
|
||||||
|
"""Statistics for a container/cgroup."""
|
||||||
|
cgroup_id: int
|
||||||
|
cgroup_name: str
|
||||||
|
|
||||||
|
# File I/O
|
||||||
|
read_ops: int = 0
|
||||||
|
read_bytes: int = 0
|
||||||
|
write_ops: int = 0
|
||||||
|
write_bytes: int = 0
|
||||||
|
|
||||||
|
# Network I/O
|
||||||
|
rx_packets: int = 0
|
||||||
|
rx_bytes: int = 0
|
||||||
|
tx_packets: int = 0
|
||||||
|
tx_bytes: int = 0
|
||||||
|
|
||||||
|
# Syscalls
|
||||||
|
syscall_count: int = 0
|
||||||
|
|
||||||
|
# Timestamp
|
||||||
|
timestamp: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class ContainerDataCollector:
|
||||||
|
"""Collects and manages container monitoring data from BPF."""
|
||||||
|
|
||||||
|
def __init__(self, read_map, write_map, net_stats_map, syscall_map, history_size: int = 100):
|
||||||
|
self.read_map = read_map
|
||||||
|
self.write_map = write_map
|
||||||
|
self.net_stats_map = net_stats_map
|
||||||
|
self.syscall_map = syscall_map
|
||||||
|
|
||||||
|
# Caching
|
||||||
|
self._cgroup_cache: Dict[int, CgroupInfo] = {}
|
||||||
|
self._cgroup_cache_time = 0
|
||||||
|
self._cache_ttl = 5.
|
||||||
|
0 # Refresh cache every 5 seconds
|
||||||
|
|
||||||
|
# Historical data for graphing
|
||||||
|
self._history_size = history_size
|
||||||
|
self._history: Dict[int, deque] = defaultdict(lambda: deque(maxlen=history_size))
|
||||||
|
|
||||||
|
def get_all_cgroups(self) -> List[CgroupInfo]:
|
||||||
|
"""Get all cgroups with caching."""
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Use cached data if still valid
|
||||||
|
if current_time - self._cgroup_cache_time < self._cache_ttl:
|
||||||
|
return list(self._cgroup_cache.values())
|
||||||
|
|
||||||
|
# Refresh cache
|
||||||
|
self._refresh_cgroup_cache()
|
||||||
|
return list(self._cgroup_cache.values())
|
||||||
|
|
||||||
|
def _refresh_cgroup_cache(self):
|
||||||
|
"""Refresh the cgroup cache from /proc."""
|
||||||
|
cgroup_map: Dict[int, Set[str]] = defaultdict(set)
|
||||||
|
|
||||||
|
# Scan /proc to find all cgroups
|
||||||
|
for proc_dir in Path("/proc").glob("[0-9]*"):
|
||||||
|
try:
|
||||||
|
cgroup_file = proc_dir / "cgroup"
|
||||||
|
if not cgroup_file.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(cgroup_file) as f:
|
||||||
|
for line in f:
|
||||||
|
parts = line.strip().split(":")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
cgroup_path = parts[2]
|
||||||
|
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
|
||||||
|
|
||||||
|
if os.path.exists(cgroup_mount):
|
||||||
|
stat_info = os.stat(cgroup_mount)
|
||||||
|
cgroup_id = stat_info.st_ino
|
||||||
|
cgroup_map[cgroup_id].add(cgroup_path)
|
||||||
|
|
||||||
|
except (PermissionError, FileNotFoundError, OSError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Update cache with best names
|
||||||
|
new_cache = {}
|
||||||
|
for cgroup_id, paths in cgroup_map.items():
|
||||||
|
# Pick the most descriptive path
|
||||||
|
best_path = self._get_best_cgroup_path(paths)
|
||||||
|
name = self._get_cgroup_name(best_path)
|
||||||
|
|
||||||
|
new_cache[cgroup_id] = CgroupInfo(
|
||||||
|
id=cgroup_id,
|
||||||
|
name=name,
|
||||||
|
path=best_path
|
||||||
|
)
|
||||||
|
|
||||||
|
self._cgroup_cache = new_cache
|
||||||
|
self._cgroup_cache_time = time.time()
|
||||||
|
|
||||||
|
def _get_best_cgroup_path(self, paths: Set[str]) -> str:
|
||||||
|
"""Select the most descriptive cgroup path."""
|
||||||
|
path_list = list(paths)
|
||||||
|
|
||||||
|
# Prefer paths with more components (more specific)
|
||||||
|
# Prefer paths containing docker, podman, etc.
|
||||||
|
for keyword in ['docker', 'podman', 'kubernetes', 'k8s', 'systemd']:
|
||||||
|
for path in path_list:
|
||||||
|
if keyword in path.lower():
|
||||||
|
return path
|
||||||
|
|
||||||
|
# Return longest path (most specific)
|
||||||
|
return max(path_list, key=lambda p: (len(p.split('/')), len(p)))
|
||||||
|
|
||||||
|
def _get_cgroup_name(self, path: str) -> str:
|
||||||
|
"""Extract a friendly name from cgroup path."""
|
||||||
|
if not path or path == "/":
|
||||||
|
return "root"
|
||||||
|
|
||||||
|
# Remove leading/trailing slashes
|
||||||
|
path = path.strip("/")
|
||||||
|
|
||||||
|
# Try to extract container ID or service name
|
||||||
|
parts = path.split("/")
|
||||||
|
|
||||||
|
# For Docker: /docker/<container_id>
|
||||||
|
if "docker" in path.lower():
|
||||||
|
for i, part in enumerate(parts):
|
||||||
|
if part.lower() == "docker" and i + 1 < len(parts):
|
||||||
|
container_id = parts[i + 1][:12] # Short ID
|
||||||
|
return f"docker:{container_id}"
|
||||||
|
|
||||||
|
# For systemd services
|
||||||
|
if "system. slice" in path:
|
||||||
|
for part in parts:
|
||||||
|
if part.endswith(". service"):
|
||||||
|
return part.replace(".service", "")
|
||||||
|
|
||||||
|
# For user slices
|
||||||
|
if "user.slice" in path:
|
||||||
|
return f"user:{parts[-1]}" if parts else "user"
|
||||||
|
|
||||||
|
# Default: use last component
|
||||||
|
return parts[-1] if parts else path
|
||||||
|
|
||||||
|
def get_stats_for_cgroup(self, cgroup_id: int) -> ContainerStats:
|
||||||
|
"""Get current statistics for a specific cgroup."""
|
||||||
|
cgroup_info = self._cgroup_cache.get(cgroup_id)
|
||||||
|
cgroup_name = cgroup_info.name if cgroup_info else f"cgroup-{cgroup_id}"
|
||||||
|
|
||||||
|
stats = ContainerStats(
|
||||||
|
cgroup_id=cgroup_id,
|
||||||
|
cgroup_name=cgroup_name,
|
||||||
|
timestamp=time.time()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get file I/O stats
|
||||||
|
read_stat = self.read_map.lookup(cgroup_id)
|
||||||
|
if read_stat:
|
||||||
|
stats.read_ops = int(read_stat.ops)
|
||||||
|
stats.read_bytes = int(read_stat.bytes)
|
||||||
|
|
||||||
|
write_stat = self.write_map.lookup(cgroup_id)
|
||||||
|
if write_stat:
|
||||||
|
stats.write_ops = int(write_stat.ops)
|
||||||
|
stats.write_bytes = int(write_stat.bytes)
|
||||||
|
|
||||||
|
# Get network stats
|
||||||
|
net_stat = self.net_stats_map.lookup(cgroup_id)
|
||||||
|
if net_stat:
|
||||||
|
stats.rx_packets = int(net_stat.rx_packets)
|
||||||
|
stats.rx_bytes = int(net_stat.rx_bytes)
|
||||||
|
stats.tx_packets = int(net_stat.tx_packets)
|
||||||
|
stats.tx_bytes = int(net_stat.tx_bytes)
|
||||||
|
|
||||||
|
# Get syscall count
|
||||||
|
syscall_cnt = self.syscall_map.lookup(cgroup_id)
|
||||||
|
if syscall_cnt is not None:
|
||||||
|
stats.syscall_count = int(syscall_cnt)
|
||||||
|
|
||||||
|
# Add to history
|
||||||
|
self._history[cgroup_id].append(stats)
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
def get_history(self, cgroup_id: int) -> List[ContainerStats]:
|
||||||
|
"""Get historical statistics for graphing."""
|
||||||
|
return list(self._history[cgroup_id])
|
||||||
|
|
||||||
|
def get_cgroup_info(self, cgroup_id: int) -> Optional[CgroupInfo]:
|
||||||
|
"""Get cached cgroup information."""
|
||||||
|
return self._cgroup_cache.get(cgroup_id)
|
||||||
@ -1,191 +0,0 @@
|
|||||||
import logging
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from pythonbpf import bpf, map, section, bpfglobal, struct, compile, BPF
|
|
||||||
from pythonbpf.maps import HashMap
|
|
||||||
from pythonbpf.helper import get_current_cgroup_id
|
|
||||||
from ctypes import c_int32, c_uint64
|
|
||||||
from vmlinux import struct_pt_regs
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@struct
|
|
||||||
class read_stats:
|
|
||||||
bytes: c_uint64
|
|
||||||
ops: c_uint64
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@struct
|
|
||||||
class write_stats:
|
|
||||||
bytes: c_uint64
|
|
||||||
ops: c_uint64
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@map
|
|
||||||
def read_map() -> HashMap:
|
|
||||||
return HashMap(key=c_uint64, value=read_stats, max_entries=1024)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@map
|
|
||||||
def write_map() -> HashMap:
|
|
||||||
return HashMap(key=c_uint64, value=write_stats, max_entries=1024)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@section("kprobe/vfs_read")
|
|
||||||
def trace_read(ctx: struct_pt_regs) -> c_int32:
|
|
||||||
cg = get_current_cgroup_id()
|
|
||||||
count = c_uint64(ctx.dx)
|
|
||||||
ptr = read_map.lookup(cg)
|
|
||||||
if ptr:
|
|
||||||
s = read_stats()
|
|
||||||
s.bytes = ptr.bytes + count
|
|
||||||
s.ops = ptr.ops + 1
|
|
||||||
read_map.update(cg, s)
|
|
||||||
else:
|
|
||||||
s = read_stats()
|
|
||||||
s.bytes = count
|
|
||||||
s.ops = c_uint64(1)
|
|
||||||
read_map.update(cg, s)
|
|
||||||
|
|
||||||
return c_int32(0)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@section("kprobe/vfs_write")
|
|
||||||
def trace_write(ctx1: struct_pt_regs) -> c_int32:
|
|
||||||
cg = get_current_cgroup_id()
|
|
||||||
count = c_uint64(ctx1.dx)
|
|
||||||
ptr = write_map.lookup(cg)
|
|
||||||
|
|
||||||
if ptr:
|
|
||||||
s = write_stats()
|
|
||||||
s.bytes = ptr.bytes + count
|
|
||||||
s.ops = ptr.ops + 1
|
|
||||||
write_map.update(cg, s)
|
|
||||||
else:
|
|
||||||
s = write_stats()
|
|
||||||
s.bytes = count
|
|
||||||
s.ops = c_uint64(1)
|
|
||||||
write_map.update(cg, s)
|
|
||||||
|
|
||||||
return c_int32(0)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@bpfglobal
|
|
||||||
def LICENSE() -> str:
|
|
||||||
return "GPL"
|
|
||||||
|
|
||||||
|
|
||||||
# Load and attach BPF program
|
|
||||||
b = BPF()
|
|
||||||
b.load()
|
|
||||||
b.attach_all()
|
|
||||||
|
|
||||||
# Get map references and enable struct deserialization
|
|
||||||
read_map_ref = b["read_map"]
|
|
||||||
write_map_ref = b["write_map"]
|
|
||||||
read_map_ref.set_value_struct("read_stats")
|
|
||||||
write_map_ref.set_value_struct("write_stats")
|
|
||||||
|
|
||||||
|
|
||||||
def get_cgroup_ids():
|
|
||||||
"""Get all cgroup IDs from the system"""
|
|
||||||
cgroup_ids = set()
|
|
||||||
|
|
||||||
# Get cgroup IDs from running processes
|
|
||||||
for proc_dir in Path("/proc").glob("[0-9]*"):
|
|
||||||
try:
|
|
||||||
cgroup_file = proc_dir / "cgroup"
|
|
||||||
if cgroup_file.exists():
|
|
||||||
with open(cgroup_file) as f:
|
|
||||||
for line in f:
|
|
||||||
# Parse cgroup path and get inode
|
|
||||||
parts = line.strip().split(":")
|
|
||||||
if len(parts) >= 3:
|
|
||||||
cgroup_path = parts[2]
|
|
||||||
# Try to get the cgroup inode which is used as ID
|
|
||||||
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
|
|
||||||
if os.path.exists(cgroup_mount):
|
|
||||||
stat_info = os.stat(cgroup_mount)
|
|
||||||
cgroup_ids.add(stat_info.st_ino)
|
|
||||||
except (PermissionError, FileNotFoundError, OSError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
return cgroup_ids
|
|
||||||
|
|
||||||
|
|
||||||
# Display function
|
|
||||||
def display_stats():
|
|
||||||
"""Read and display file I/O statistics from BPF maps"""
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print(f"{'CGROUP ID':<20} {'READ OPS':<15} {'READ BYTES':<20} {'WRITE OPS':<15} {'WRITE BYTES':<20}")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Get cgroup IDs from the system
|
|
||||||
cgroup_ids = get_cgroup_ids()
|
|
||||||
|
|
||||||
if not cgroup_ids:
|
|
||||||
print("No cgroups found...")
|
|
||||||
print("=" * 80)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Initialize totals
|
|
||||||
total_read_ops = 0
|
|
||||||
total_read_bytes = 0
|
|
||||||
total_write_ops = 0
|
|
||||||
total_write_bytes = 0
|
|
||||||
|
|
||||||
# Track which cgroups have data
|
|
||||||
cgroups_with_data = []
|
|
||||||
|
|
||||||
# Display stats for each cgroup
|
|
||||||
for cgroup_id in sorted(cgroup_ids):
|
|
||||||
# Get read stats using lookup
|
|
||||||
read_ops = 0
|
|
||||||
read_bytes = 0
|
|
||||||
read_stat = read_map_ref.lookup(cgroup_id)
|
|
||||||
if read_stat:
|
|
||||||
read_ops = int(read_stat.ops)
|
|
||||||
read_bytes = int(read_stat.bytes)
|
|
||||||
total_read_ops += read_ops
|
|
||||||
total_read_bytes += read_bytes
|
|
||||||
|
|
||||||
# Get write stats using lookup
|
|
||||||
write_ops = 0
|
|
||||||
write_bytes = 0
|
|
||||||
write_stat = write_map_ref.lookup(cgroup_id)
|
|
||||||
if write_stat:
|
|
||||||
write_ops = int(write_stat.ops)
|
|
||||||
write_bytes = int(write_stat.bytes)
|
|
||||||
total_write_ops += write_ops
|
|
||||||
total_write_bytes += write_bytes
|
|
||||||
|
|
||||||
# Only print if there's data for this cgroup
|
|
||||||
if read_stat or write_stat:
|
|
||||||
print(f"{cgroup_id:<20} {read_ops:<15} {read_bytes:<20} {write_ops:<15} {write_bytes:<20}")
|
|
||||||
cgroups_with_data.append(cgroup_id)
|
|
||||||
|
|
||||||
if not cgroups_with_data:
|
|
||||||
print("No data collected yet...")
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"{'TOTAL':<20} {total_read_ops:<15} {total_read_bytes:<20} {total_write_ops:<15} {total_write_bytes:<20}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
# Main loop
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Tracing file I/O operations... Press Ctrl+C to exit\n")
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(5) # Update every 5 seconds
|
|
||||||
display_stats()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nStopped")
|
|
||||||
@ -1,192 +0,0 @@
|
|||||||
import logging
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from pythonbpf import bpf, map, section, bpfglobal, struct, compile, BPF
|
|
||||||
from pythonbpf.maps import HashMap
|
|
||||||
from pythonbpf.helper import get_current_cgroup_id
|
|
||||||
from ctypes import c_int32, c_uint64, c_void_p, c_uint32
|
|
||||||
from vmlinux import struct_sk_buff, struct_pt_regs
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@struct
|
|
||||||
class net_stats:
|
|
||||||
rx_packets: c_uint64
|
|
||||||
tx_packets: c_uint64
|
|
||||||
rx_bytes: c_uint64
|
|
||||||
tx_bytes: c_uint64
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@map
|
|
||||||
def net_stats_map() -> HashMap:
|
|
||||||
return HashMap(key=c_uint64, value=net_stats, max_entries=1024)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@section("kprobe/__netif_receive_skb")
|
|
||||||
def trace_netif_rx(ctx: struct_pt_regs) -> c_int32:
|
|
||||||
cgroup_id = get_current_cgroup_id()
|
|
||||||
|
|
||||||
# Read skb pointer from first argument (PT_REGS_PARM1)
|
|
||||||
skb = struct_sk_buff(ctx.di) # x86_64: first arg in rdi
|
|
||||||
|
|
||||||
# Read skb->len
|
|
||||||
pkt_len = c_uint64(skb.len)
|
|
||||||
|
|
||||||
|
|
||||||
stats_ptr = net_stats_map.lookup(cgroup_id)
|
|
||||||
|
|
||||||
if stats_ptr:
|
|
||||||
stats = net_stats()
|
|
||||||
stats.rx_packets = stats_ptr.rx_packets + 1
|
|
||||||
stats.tx_packets = stats_ptr.tx_packets
|
|
||||||
stats.rx_bytes = stats_ptr.rx_bytes + pkt_len
|
|
||||||
stats.tx_bytes = stats_ptr.tx_bytes
|
|
||||||
net_stats_map.update(cgroup_id, stats)
|
|
||||||
else:
|
|
||||||
stats = net_stats()
|
|
||||||
stats.rx_packets = c_uint64(1)
|
|
||||||
stats.tx_packets = c_uint64(0)
|
|
||||||
stats.rx_bytes = pkt_len
|
|
||||||
stats.tx_bytes = c_uint64(0)
|
|
||||||
net_stats_map.update(cgroup_id, stats)
|
|
||||||
|
|
||||||
return c_int32(0)
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@section("kprobe/__dev_queue_xmit")
|
|
||||||
def trace_dev_xmit(ctx1: struct_pt_regs) -> c_int32:
|
|
||||||
cgroup_id = get_current_cgroup_id()
|
|
||||||
|
|
||||||
# Read skb pointer from first argument
|
|
||||||
skb = struct_sk_buff(ctx1.di)
|
|
||||||
pkt_len = c_uint64(skb.len)
|
|
||||||
|
|
||||||
stats_ptr = net_stats_map.lookup(cgroup_id)
|
|
||||||
|
|
||||||
if stats_ptr:
|
|
||||||
stats = net_stats()
|
|
||||||
stats.rx_packets = stats_ptr.rx_packets
|
|
||||||
stats.tx_packets = stats_ptr.tx_packets + 1
|
|
||||||
stats.rx_bytes = stats_ptr.rx_bytes
|
|
||||||
stats.tx_bytes = stats_ptr.tx_bytes + pkt_len
|
|
||||||
net_stats_map.update(cgroup_id, stats)
|
|
||||||
else:
|
|
||||||
stats = net_stats()
|
|
||||||
stats.rx_packets = c_uint64(0)
|
|
||||||
stats.tx_packets = c_uint64(1)
|
|
||||||
stats.rx_bytes = c_uint64(0)
|
|
||||||
stats.tx_bytes = pkt_len
|
|
||||||
net_stats_map.update(cgroup_id, stats)
|
|
||||||
|
|
||||||
return c_int32(0)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@bpfglobal
|
|
||||||
def LICENSE() -> str:
|
|
||||||
return "GPL"
|
|
||||||
|
|
||||||
# Load and attach BPF program
|
|
||||||
b = BPF()
|
|
||||||
b.load()
|
|
||||||
b.attach_all()
|
|
||||||
|
|
||||||
# Get map reference and enable struct deserialization
|
|
||||||
net_stats_map_ref = b["net_stats_map"]
|
|
||||||
net_stats_map_ref.set_value_struct("net_stats")
|
|
||||||
|
|
||||||
|
|
||||||
def get_cgroup_ids():
|
|
||||||
"""Get all cgroup IDs from the system"""
|
|
||||||
cgroup_ids = set()
|
|
||||||
|
|
||||||
# Get cgroup IDs from running processes
|
|
||||||
for proc_dir in Path("/proc").glob("[0-9]*"):
|
|
||||||
try:
|
|
||||||
cgroup_file = proc_dir / "cgroup"
|
|
||||||
if cgroup_file.exists():
|
|
||||||
with open(cgroup_file) as f:
|
|
||||||
for line in f:
|
|
||||||
# Parse cgroup path and get inode
|
|
||||||
parts = line.strip().split(":")
|
|
||||||
if len(parts) >= 3:
|
|
||||||
cgroup_path = parts[2]
|
|
||||||
# Try to get the cgroup inode which is used as ID
|
|
||||||
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
|
|
||||||
if os.path.exists(cgroup_mount):
|
|
||||||
stat_info = os.stat(cgroup_mount)
|
|
||||||
cgroup_ids.add(stat_info.st_ino)
|
|
||||||
except (PermissionError, FileNotFoundError, OSError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
return cgroup_ids
|
|
||||||
|
|
||||||
|
|
||||||
# Display function
|
|
||||||
def display_stats():
|
|
||||||
"""Read and display network I/O statistics from BPF maps"""
|
|
||||||
print("\n" + "=" * 100)
|
|
||||||
print(f"{'CGROUP ID':<20} {'RX PACKETS':<15} {'RX BYTES':<20} {'TX PACKETS':<15} {'TX BYTES':<20}")
|
|
||||||
print("=" * 100)
|
|
||||||
|
|
||||||
# Get cgroup IDs from the system
|
|
||||||
cgroup_ids = get_cgroup_ids()
|
|
||||||
|
|
||||||
if not cgroup_ids:
|
|
||||||
print("No cgroups found...")
|
|
||||||
print("=" * 100)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Initialize totals
|
|
||||||
total_rx_packets = 0
|
|
||||||
total_rx_bytes = 0
|
|
||||||
total_tx_packets = 0
|
|
||||||
total_tx_bytes = 0
|
|
||||||
|
|
||||||
# Track which cgroups have data
|
|
||||||
cgroups_with_data = []
|
|
||||||
|
|
||||||
# Display stats for each cgroup
|
|
||||||
for cgroup_id in sorted(cgroup_ids):
|
|
||||||
# Get network stats using lookup
|
|
||||||
rx_packets = 0
|
|
||||||
rx_bytes = 0
|
|
||||||
tx_packets = 0
|
|
||||||
tx_bytes = 0
|
|
||||||
|
|
||||||
net_stat = net_stats_map_ref.lookup(cgroup_id)
|
|
||||||
if net_stat:
|
|
||||||
rx_packets = int(net_stat.rx_packets)
|
|
||||||
rx_bytes = int(net_stat.rx_bytes)
|
|
||||||
tx_packets = int(net_stat.tx_packets)
|
|
||||||
tx_bytes = int(net_stat.tx_bytes)
|
|
||||||
|
|
||||||
total_rx_packets += rx_packets
|
|
||||||
total_rx_bytes += rx_bytes
|
|
||||||
total_tx_packets += tx_packets
|
|
||||||
total_tx_bytes += tx_bytes
|
|
||||||
|
|
||||||
print(f"{cgroup_id:<20} {rx_packets:<15} {rx_bytes:<20} {tx_packets:<15} {tx_bytes:<20}")
|
|
||||||
cgroups_with_data.append(cgroup_id)
|
|
||||||
|
|
||||||
if not cgroups_with_data:
|
|
||||||
print("No data collected yet...")
|
|
||||||
|
|
||||||
print("=" * 100)
|
|
||||||
print(f"{'TOTAL':<20} {total_rx_packets:<15} {total_rx_bytes:<20} {total_tx_packets:<15} {total_tx_bytes:<20}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
# Main loop
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Tracing network I/O operations... Press Ctrl+C to exit\n")
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(5) # Update every 5 seconds
|
|
||||||
display_stats()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nStopped")
|
|
||||||
@ -1,132 +0,0 @@
|
|||||||
import time
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from pythonbpf import bpf, map, section, bpfglobal, BPF
|
|
||||||
from pythonbpf.maps import HashMap
|
|
||||||
from pythonbpf.helper import get_current_cgroup_id
|
|
||||||
from ctypes import c_void_p, c_int32, c_uint64
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@map
|
|
||||||
def syscall_count() -> HashMap:
|
|
||||||
"""Map tracking syscall count by cgroup ID"""
|
|
||||||
return HashMap(key=c_uint64, value=c_uint64, max_entries=1024)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@section("tracepoint/raw_syscalls/sys_enter")
|
|
||||||
def count_syscalls(ctx: c_void_p) -> c_int32:
|
|
||||||
"""
|
|
||||||
Increment syscall counter for the current cgroup.
|
|
||||||
Attached to raw_syscalls/sys_enter tracepoint to catch all syscalls.
|
|
||||||
"""
|
|
||||||
cgroup_id = get_current_cgroup_id()
|
|
||||||
|
|
||||||
# Lookup current count
|
|
||||||
count_ptr = syscall_count.lookup(cgroup_id)
|
|
||||||
|
|
||||||
if count_ptr:
|
|
||||||
# Increment existing count
|
|
||||||
new_count = count_ptr + c_uint64(1)
|
|
||||||
syscall_count.update(cgroup_id, new_count)
|
|
||||||
else:
|
|
||||||
# First syscall for this cgroup
|
|
||||||
syscall_count.update(cgroup_id, c_uint64(1))
|
|
||||||
|
|
||||||
return c_int32(0)
|
|
||||||
|
|
||||||
|
|
||||||
@bpf
|
|
||||||
@bpfglobal
|
|
||||||
def LICENSE() -> str:
|
|
||||||
return "GPL"
|
|
||||||
|
|
||||||
|
|
||||||
# Load and attach BPF program
|
|
||||||
b = BPF()
|
|
||||||
b.load()
|
|
||||||
b.attach_all()
|
|
||||||
|
|
||||||
# Get map reference
|
|
||||||
syscall_count_ref = b["syscall_count"]
|
|
||||||
|
|
||||||
|
|
||||||
def get_cgroup_ids():
|
|
||||||
"""Get all cgroup IDs from the system"""
|
|
||||||
cgroup_ids = set()
|
|
||||||
|
|
||||||
# Get cgroup IDs from running processes
|
|
||||||
for proc_dir in Path("/proc").glob("[0-9]*"):
|
|
||||||
try:
|
|
||||||
cgroup_file = proc_dir / "cgroup"
|
|
||||||
if cgroup_file.exists():
|
|
||||||
with open(cgroup_file) as f:
|
|
||||||
for line in f:
|
|
||||||
# Parse cgroup path and get inode
|
|
||||||
parts = line.strip().split(":")
|
|
||||||
if len(parts) >= 3:
|
|
||||||
cgroup_path = parts[2]
|
|
||||||
# Try to get the cgroup inode which is used as ID
|
|
||||||
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
|
|
||||||
if os.path.exists(cgroup_mount):
|
|
||||||
stat_info = os.stat(cgroup_mount)
|
|
||||||
cgroup_ids.add(stat_info.st_ino)
|
|
||||||
except (PermissionError, FileNotFoundError, OSError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
return cgroup_ids
|
|
||||||
|
|
||||||
|
|
||||||
# Display function
|
|
||||||
def display_stats():
|
|
||||||
"""Read and display syscall statistics from BPF maps"""
|
|
||||||
print("\n" + "=" * 60)
|
|
||||||
print(f"{'CGROUP ID':<20} {'SYSCALL COUNT':<20}")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
# Get cgroup IDs from the system
|
|
||||||
cgroup_ids = get_cgroup_ids()
|
|
||||||
|
|
||||||
if not cgroup_ids:
|
|
||||||
print("No cgroups found...")
|
|
||||||
print("=" * 60)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Initialize totals
|
|
||||||
total_syscalls = 0
|
|
||||||
|
|
||||||
# Track which cgroups have data
|
|
||||||
cgroups_with_data = []
|
|
||||||
|
|
||||||
# Display stats for each cgroup
|
|
||||||
for cgroup_id in sorted(cgroup_ids):
|
|
||||||
# Get syscall count using lookup
|
|
||||||
syscall_cnt = 0
|
|
||||||
|
|
||||||
count = syscall_count_ref.lookup(cgroup_id)
|
|
||||||
if count is not None:
|
|
||||||
syscall_cnt = int(count)
|
|
||||||
total_syscalls += syscall_cnt
|
|
||||||
|
|
||||||
print(f"{cgroup_id:<20} {syscall_cnt:<20}")
|
|
||||||
cgroups_with_data.append(cgroup_id)
|
|
||||||
|
|
||||||
if not cgroups_with_data:
|
|
||||||
print("No data collected yet...")
|
|
||||||
|
|
||||||
print("=" * 60)
|
|
||||||
print(f"{'TOTAL':<20} {total_syscalls:<20}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
# Main loop
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Tracing syscalls per cgroup... Press Ctrl+C to exit\n")
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(5) # Update every 5 seconds
|
|
||||||
display_stats()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nStopped")
|
|
||||||
296
BCC-Examples/container-monitor/tui.py
Normal file
296
BCC-Examples/container-monitor/tui.py
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
"""Terminal User Interface for container monitoring."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import curses
|
||||||
|
from typing import Optional, List
|
||||||
|
from data_collector import ContainerDataCollector, CgroupInfo, ContainerStats
|
||||||
|
|
||||||
|
|
||||||
|
class ContainerMonitorTUI:
|
||||||
|
"""TUI for container monitoring with cgroup selection and live graphs."""
|
||||||
|
|
||||||
|
def __init__(self, collector: ContainerDataCollector):
|
||||||
|
self.collector = collector
|
||||||
|
self.selected_cgroup: Optional[int] = None
|
||||||
|
self.current_screen = "selection" # "selection" or "monitoring"
|
||||||
|
self.selected_index = 0
|
||||||
|
self.scroll_offset = 0
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""Run the TUI application."""
|
||||||
|
curses.wrapper(self._main_loop)
|
||||||
|
|
||||||
|
def _main_loop(self, stdscr):
|
||||||
|
"""Main curses loop."""
|
||||||
|
# Configure curses
|
||||||
|
curses.curs_set(0) # Hide cursor
|
||||||
|
stdscr.nodelay(True) # Non-blocking input
|
||||||
|
stdscr.timeout(100) # Refresh every 100ms
|
||||||
|
|
||||||
|
# Initialize colors
|
||||||
|
curses.start_color()
|
||||||
|
curses.init_pair(1, curses.COLOR_CYAN, curses.COLOR_BLACK)
|
||||||
|
curses.init_pair(2, curses.COLOR_GREEN, curses.COLOR_BLACK)
|
||||||
|
curses.init_pair(3, curses.COLOR_YELLOW, curses.COLOR_BLACK)
|
||||||
|
curses.init_pair(4, curses.COLOR_RED, curses.COLOR_BLACK)
|
||||||
|
curses.init_pair(5, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
|
||||||
|
curses.init_pair(6, curses.COLOR_WHITE, curses.COLOR_BLUE)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
stdscr.clear()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.current_screen == "selection":
|
||||||
|
self._draw_selection_screen(stdscr)
|
||||||
|
elif self.current_screen == "monitoring":
|
||||||
|
self._draw_monitoring_screen(stdscr)
|
||||||
|
|
||||||
|
stdscr.refresh()
|
||||||
|
|
||||||
|
# Handle input
|
||||||
|
key = stdscr.getch()
|
||||||
|
if key != -1:
|
||||||
|
if not self._handle_input(key):
|
||||||
|
break # Exit requested
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
# Show error
|
||||||
|
stdscr.addstr(0, 0, f"Error: {str(e)}")
|
||||||
|
stdscr.refresh()
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
def _draw_selection_screen(self, stdscr):
|
||||||
|
"""Draw the cgroup selection screen."""
|
||||||
|
height, width = stdscr.getmaxyx()
|
||||||
|
|
||||||
|
# Header
|
||||||
|
title = "🐳 Container Monitor - Select Cgroup"
|
||||||
|
stdscr.attron(curses.color_pair(6))
|
||||||
|
stdscr.addstr(0, (width - len(title)) // 2, title)
|
||||||
|
stdscr.attroff(curses.color_pair(6))
|
||||||
|
|
||||||
|
# Instructions
|
||||||
|
instructions = "↑↓: Navigate | ENTER: Select | q: Quit | r: Refresh"
|
||||||
|
stdscr.attron(curses.color_pair(3))
|
||||||
|
stdscr.addstr(1, (width - len(instructions)) // 2, instructions)
|
||||||
|
stdscr.attroff(curses.color_pair(3))
|
||||||
|
|
||||||
|
# Get cgroups
|
||||||
|
cgroups = self.collector.get_all_cgroups()
|
||||||
|
|
||||||
|
if not cgroups:
|
||||||
|
msg = "No cgroups found. Waiting for activity..."
|
||||||
|
stdscr.attron(curses.color_pair(4))
|
||||||
|
stdscr.addstr(height // 2, (width - len(msg)) // 2, msg)
|
||||||
|
stdscr.attroff(curses.color_pair(4))
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sort cgroups by name
|
||||||
|
cgroups.sort(key=lambda c: c.name)
|
||||||
|
|
||||||
|
# Adjust selection bounds
|
||||||
|
if self.selected_index >= len(cgroups):
|
||||||
|
self.selected_index = len(cgroups) - 1
|
||||||
|
if self.selected_index < 0:
|
||||||
|
self.selected_index = 0
|
||||||
|
|
||||||
|
# Calculate visible range
|
||||||
|
list_height = height - 6
|
||||||
|
if self.selected_index < self.scroll_offset:
|
||||||
|
self.scroll_offset = self.selected_index
|
||||||
|
elif self.selected_index >= self.scroll_offset + list_height:
|
||||||
|
self.scroll_offset = self.selected_index - list_height + 1
|
||||||
|
|
||||||
|
# Draw cgroup list
|
||||||
|
start_y = 3
|
||||||
|
stdscr.attron(curses.color_pair(1))
|
||||||
|
stdscr.addstr(start_y, 2, "─" * (width - 4))
|
||||||
|
stdscr.attroff(curses.color_pair(1))
|
||||||
|
|
||||||
|
for i in range(list_height):
|
||||||
|
idx = self.scroll_offset + i
|
||||||
|
if idx >= len(cgroups):
|
||||||
|
break
|
||||||
|
|
||||||
|
cgroup = cgroups[idx]
|
||||||
|
y = start_y + 1 + i
|
||||||
|
|
||||||
|
if idx == self.selected_index:
|
||||||
|
# Highlight selected
|
||||||
|
stdscr.attron(curses.color_pair(2) | curses.A_BOLD | curses.A_REVERSE)
|
||||||
|
line = f"► {cgroup.name:<40} ID: {cgroup.id}"
|
||||||
|
stdscr.addstr(y, 2, line[:width - 4])
|
||||||
|
stdscr.attroff(curses.color_pair(2) | curses.A_BOLD | curses.A_REVERSE)
|
||||||
|
else:
|
||||||
|
line = f" {cgroup.name:<40} ID: {cgroup.id}"
|
||||||
|
stdscr.addstr(y, 2, line[:width - 4])
|
||||||
|
|
||||||
|
# Footer with count
|
||||||
|
footer = f"Total cgroups: {len(cgroups)}"
|
||||||
|
stdscr.attron(curses.color_pair(1))
|
||||||
|
stdscr.addstr(height - 2, (width - len(footer)) // 2, footer)
|
||||||
|
stdscr.attroff(curses.color_pair(1))
|
||||||
|
|
||||||
|
def _draw_monitoring_screen(self, stdscr):
|
||||||
|
"""Draw the monitoring screen for selected cgroup."""
|
||||||
|
height, width = stdscr.getmaxyx()
|
||||||
|
|
||||||
|
if self.selected_cgroup is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get current stats
|
||||||
|
stats = self.collector.get_stats_for_cgroup(self.selected_cgroup)
|
||||||
|
history = self.collector.get_history(self.selected_cgroup)
|
||||||
|
|
||||||
|
# Header
|
||||||
|
title = f"📊 Monitoring: {stats.cgroup_name}"
|
||||||
|
stdscr.attron(curses.color_pair(6))
|
||||||
|
stdscr.addstr(0, (width - len(title)) // 2, title)
|
||||||
|
stdscr.attroff(curses.color_pair(6))
|
||||||
|
|
||||||
|
# Instructions
|
||||||
|
instructions = "ESC/b: Back | q: Quit"
|
||||||
|
stdscr.attron(curses.color_pair(3))
|
||||||
|
stdscr.addstr(1, (width - len(instructions)) // 2, instructions)
|
||||||
|
stdscr.attroff(curses.color_pair(3))
|
||||||
|
|
||||||
|
# Syscall count (large number display)
|
||||||
|
y = 3
|
||||||
|
stdscr.attron(curses.color_pair(5) | curses.A_BOLD)
|
||||||
|
stdscr.addstr(y, 2, "SYSCALLS")
|
||||||
|
stdscr.attroff(curses.color_pair(5) | curses.A_BOLD)
|
||||||
|
|
||||||
|
syscall_str = f"{stats.syscall_count:,}"
|
||||||
|
stdscr.attron(curses.color_pair(2) | curses.A_BOLD)
|
||||||
|
stdscr.addstr(y + 1, 4, syscall_str)
|
||||||
|
stdscr.attroff(curses.color_pair(2) | curses.A_BOLD)
|
||||||
|
|
||||||
|
# Network I/O graphs
|
||||||
|
y = 6
|
||||||
|
self._draw_section_header(stdscr, y, "NETWORK I/O", 1)
|
||||||
|
|
||||||
|
# RX graph
|
||||||
|
y += 2
|
||||||
|
rx_label = f"RX: {self._format_bytes(stats.rx_bytes)} ({stats.rx_packets:,} packets)"
|
||||||
|
stdscr.addstr(y, 2, rx_label)
|
||||||
|
if len(history) > 1:
|
||||||
|
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
|
||||||
|
[s.rx_bytes for s in history],
|
||||||
|
curses.color_pair(2))
|
||||||
|
|
||||||
|
# TX graph
|
||||||
|
y += 5
|
||||||
|
tx_label = f"TX: {self._format_bytes(stats.tx_bytes)} ({stats.tx_packets:,} packets)"
|
||||||
|
stdscr.addstr(y, 2, tx_label)
|
||||||
|
if len(history) > 1:
|
||||||
|
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
|
||||||
|
[s.tx_bytes for s in history],
|
||||||
|
curses.color_pair(3))
|
||||||
|
|
||||||
|
# File I/O graphs
|
||||||
|
y += 5
|
||||||
|
self._draw_section_header(stdscr, y, "FILE I/O", 1)
|
||||||
|
|
||||||
|
# Read graph
|
||||||
|
y += 2
|
||||||
|
read_label = f"READ: {self._format_bytes(stats.read_bytes)} ({stats.read_ops:,} ops)"
|
||||||
|
stdscr.addstr(y, 2, read_label)
|
||||||
|
if len(history) > 1:
|
||||||
|
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
|
||||||
|
[s.read_bytes for s in history],
|
||||||
|
curses.color_pair(4))
|
||||||
|
|
||||||
|
# Write graph
|
||||||
|
y += 5
|
||||||
|
write_label = f"WRITE: {self._format_bytes(stats.write_bytes)} ({stats.write_ops:,} ops)"
|
||||||
|
stdscr.addstr(y, 2, write_label)
|
||||||
|
if len(history) > 1:
|
||||||
|
self._draw_bar_graph(stdscr, y + 1, 2, width - 4, 3,
|
||||||
|
[s.write_bytes for s in history],
|
||||||
|
curses.color_pair(5))
|
||||||
|
|
||||||
|
def _draw_section_header(self, stdscr, y: int, title: str, color_pair: int):
|
||||||
|
"""Draw a section header."""
|
||||||
|
height, width = stdscr.getmaxyx()
|
||||||
|
stdscr.attron(curses.color_pair(color_pair) | curses.A_BOLD)
|
||||||
|
stdscr.addstr(y, 2, title)
|
||||||
|
stdscr.addstr(y, len(title) + 3, "─" * (width - len(title) - 5))
|
||||||
|
stdscr.attroff(curses.color_pair(color_pair) | curses.A_BOLD)
|
||||||
|
|
||||||
|
def _draw_bar_graph(self, stdscr, y: int, x: int, width: int, height: int,
|
||||||
|
data: List[float], color_pair: int):
|
||||||
|
"""Draw a simple bar graph."""
|
||||||
|
if not data or width < 2:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Normalize data to graph width
|
||||||
|
max_val = max(data) if max(data) > 0 else 1
|
||||||
|
|
||||||
|
# Take last 'width' data points
|
||||||
|
recent_data = data[-width:]
|
||||||
|
|
||||||
|
# Draw bars
|
||||||
|
for row in range(height):
|
||||||
|
threshold = (height - row) / height
|
||||||
|
bar_line = ""
|
||||||
|
|
||||||
|
for val in recent_data:
|
||||||
|
normalized = val / max_val
|
||||||
|
if normalized >= threshold:
|
||||||
|
bar_line += "█"
|
||||||
|
elif normalized >= threshold - 0.2:
|
||||||
|
bar_line += "▓"
|
||||||
|
elif normalized >= threshold - 0.4:
|
||||||
|
bar_line += "▒"
|
||||||
|
elif normalized >= threshold - 0.6:
|
||||||
|
bar_line += "░"
|
||||||
|
else:
|
||||||
|
bar_line += " "
|
||||||
|
|
||||||
|
try:
|
||||||
|
stdscr.attron(color_pair)
|
||||||
|
stdscr.addstr(y + row, x, bar_line[:width])
|
||||||
|
stdscr.attroff(color_pair)
|
||||||
|
except:
|
||||||
|
pass # Ignore errors at screen edges
|
||||||
|
|
||||||
|
def _format_bytes(self, bytes_val: int) -> str:
|
||||||
|
"""Format bytes into human-readable string."""
|
||||||
|
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
||||||
|
if bytes_val < 1024.0:
|
||||||
|
return f"{bytes_val:.2f} {unit}"
|
||||||
|
bytes_val /= 1024.
|
||||||
|
0
|
||||||
|
return f"{bytes_val:.2f} PB"
|
||||||
|
|
||||||
|
def _handle_input(self, key: int) -> bool:
|
||||||
|
"""Handle keyboard input. Returns False to exit."""
|
||||||
|
if key == ord('q') or key == ord('Q'):
|
||||||
|
return False # Exit
|
||||||
|
|
||||||
|
if self.current_screen == "selection":
|
||||||
|
if key == curses.KEY_UP:
|
||||||
|
self.selected_index = max(0, self.selected_index - 1)
|
||||||
|
elif key == curses.KEY_DOWN:
|
||||||
|
cgroups = self.collector.get_all_cgroups()
|
||||||
|
self.selected_index = min(len(cgroups) - 1, self.selected_index + 1)
|
||||||
|
elif key == ord('\n') or key == curses.KEY_ENTER or key == 10:
|
||||||
|
# Select cgroup
|
||||||
|
cgroups = self.collector.get_all_cgroups()
|
||||||
|
if cgroups and 0 <= self.selected_index < len(cgroups):
|
||||||
|
cgroups.sort(key=lambda c: c.name)
|
||||||
|
self.selected_cgroup = cgroups[self.selected_index].id
|
||||||
|
self.current_screen = "monitoring"
|
||||||
|
elif key == ord('r') or key == ord('R'):
|
||||||
|
# Force refresh cache
|
||||||
|
self.collector._cgroup_cache_time = 0
|
||||||
|
|
||||||
|
elif self.current_screen == "monitoring":
|
||||||
|
if key == 27 or key == ord('b') or key == ord('B'): # ESC or 'b'
|
||||||
|
self.current_screen = "selection"
|
||||||
|
self.selected_cgroup = None
|
||||||
|
|
||||||
|
return True # Continue running
|
||||||
Reference in New Issue
Block a user