Add enhanced vfsreadlat BCC example with live plotly and dash graphs on browser

2026-02-12 16:10:59 +00:00 · 2025-10-21 05:36:59 +05:30
parent e98d5684ea
commit 798f07986a
3 changed files with 479 additions and 0 deletions
--- a/BCC-Examples/vfsreadlat_plotly/bpf_program.py
+++ b/BCC-Examples/vfsreadlat_plotly/bpf_program.py
@ -0,0 +1,101 @@
 """BPF program for tracing VFS read latency."""
 from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
 from pythonbpf.helper import ktime, pid
 from pythonbpf.maps import HashMap, PerfEventArray
 from ctypes import c_void_p, c_uint64
 import argparse
 from data_collector import LatencyCollector
 from dashboard import LatencyDashboard
@bpf
@struct
 class latency_event:
    pid: c_uint64
    delta_us: c_uint64
@bpf
@map
 def start() -> HashMap:
    """Map to store start timestamps by PID."""
    return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@map
 def events() -> PerfEventArray:
    """Perf event array for sending latency events to userspace."""
    return PerfEventArray(key_size=c_uint64, value_size=c_uint64)
@bpf
@section("kprobe/vfs_read")
 def do_entry(ctx: c_void_p) -> c_uint64:
    """Record start time when vfs_read is called."""
    p, ts = pid(), ktime()
    start.update(p, ts)
    return 0  # type: ignore [return-value]
@bpf
@section("kretprobe/vfs_read")
 def do_return(ctx: c_void_p) -> c_uint64:
    """Calculate and record latency when vfs_read returns."""
    p = pid()
    tsp = start.lookup(p)
    if tsp:
        delta_ns = ktime() - tsp
        # Only track latencies > 1 microsecond
        if delta_ns > 1000:
            evt = latency_event()
            evt.pid, evt.delta_us = p, delta_ns // 1000
            events.output(evt)
        start.delete(p)
    return 0  # type: ignore [return-value]
@bpf
@bpfglobal
 def LICENSE() -> str:
    return "GPL"
 def parse_args():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Monitor VFS read latency with live dashboard"
    )
    parser.add_argument(
        "--host", default="0.0.0.0", help="Dashboard host (default: 0.0.0.0)"
    )
    parser.add_argument(
        "--port", type=int, default=8050, help="Dashboard port (default: 8050)"
    )
    parser.add_argument(
        "--buffer", type=int, default=10000, help="Recent data buffer size"
    )
    return parser.parse_args()
 args = parse_args()
 # Load BPF program
 print("Loading BPF program...")
 b = BPF()
 b.load()
 b.attach_all()
 print("✅ BPF program loaded and attached")
 # Setup data collector
 collector = LatencyCollector(b, buffer_size=args.buffer)
 collector.start()
 # Create and run dashboard
 dashboard = LatencyDashboard(collector)
 dashboard.run(host=args.host, port=args.port)
--- a/BCC-Examples/vfsreadlat_plotly/dashboard.py
+++ b/BCC-Examples/vfsreadlat_plotly/dashboard.py
@ -0,0 +1,282 @@
 """Plotly Dash dashboard for visualizing latency data."""
 import dash
 from dash import dcc, html
 from dash.dependencies import Input, Output
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import numpy as np
 class LatencyDashboard:
    """Interactive dashboard for latency visualization."""
    def __init__(self, collector, title: str = "VFS Read Latency Monitor"):
        self.collector = collector
        self.app = dash.Dash(__name__)
        self.app.title = title
        self._setup_layout()
        self._setup_callbacks()
    def _setup_layout(self):
        """Create dashboard layout."""
        self.app.layout = html.Div(
            [
                html.H1(
                    "🔥 VFS Read Latency Dashboard",
                    style={
                        "textAlign": "center",
                        "color": "#2c3e50",
                        "marginBottom": 20,
                    },
                ),
                # Stats cards
                html.Div(
                    [
                        self._create_stat_card(
                            "total-samples", "📊 Total Samples", "#3498db"
                        ),
                        self._create_stat_card(
                            "mean-latency", "⚡ Mean Latency", "#e74c3c"
                        ),
                        self._create_stat_card(
                            "p99-latency", "🔥 P99 Latency", "#f39c12"
                        ),
                    ],
                    style={
                        "display": "flex",
                        "justifyContent": "space-around",
                        "marginBottom": 30,
                    },
                ),
                # Graphs - ✅ Make sure these IDs match the callback outputs
                dcc.Graph(id="dual-histogram", style={"height": "450px"}),
                dcc.Graph(id="log2-buckets", style={"height": "350px"}),
                dcc.Graph(id="timeseries-graph", style={"height": "300px"}),
                # Auto-update
                dcc.Interval(id="interval-component", interval=1000, n_intervals=0),
            ],
            style={"padding": 20, "fontFamily": "Arial, sans-serif"},
        )
    def _create_stat_card(self, id_name: str, title: str, color: str):
        """Create a statistics card."""
        return html.Div(
            [
                html.H3(title, style={"color": color}),
                html.H2(id=id_name, style={"fontSize": 48, "color": "#2c3e50"}),
            ],
            className="stat-box",
            style={
                "background": "white",
                "padding": 20,
                "borderRadius": 10,
                "boxShadow": "0 4px 6px rgba(0,0,0,0.1)",
                "textAlign": "center",
                "flex": 1,
                "margin": "0 10px",
            },
        )
    def _setup_callbacks(self):
        """Setup dashboard callbacks."""
        @self.app.callback(
            [
                Output("total-samples", "children"),
                Output("mean-latency", "children"),
                Output("p99-latency", "children"),
                Output("dual-histogram", "figure"),  # ✅ Match layout IDs
                Output("log2-buckets", "figure"),  # ✅ Match layout IDs
                Output("timeseries-graph", "figure"),  # ✅ Match layout IDs
            ],
            [Input("interval-component", "n_intervals")],
        )
        def update_dashboard(n):
            stats = self.collector.get_stats()
            if stats.total == 0:
                return self._empty_state()
            return (
                f"{stats.total:,}",
                f"{stats.mean:.1f} µs",
                f"{stats.p99:.1f} µs",
                self._create_dual_histogram(),
                self._create_log2_buckets(),
                self._create_timeseries(),
            )
    def _empty_state(self):
        """Return empty state for dashboard."""
        empty_fig = go.Figure()
        empty_fig.update_layout(
            title="Waiting for data... Generate some disk I/O!", template="plotly_white"
        )
        # ✅ Return 6 values (3 stats + 3 figures)
        return "0", "0 µs", "0 µs", empty_fig, empty_fig, empty_fig
    def _create_dual_histogram(self) -> go.Figure:
        """Create side-by-side linear and log2 histograms."""
        latencies = self.collector.get_all_latencies()
        # Create subplots
        fig = make_subplots(
            rows=1,
            cols=2,
            subplot_titles=("Linear Scale", "Log2 Scale"),
            horizontal_spacing=0.12,
        )
        # Linear histogram
        fig.add_trace(
            go.Histogram(
                x=latencies,
                nbinsx=50,
                marker_color="rgb(55, 83, 109)",
                opacity=0.75,
                name="Linear",
            ),
            row=1,
            col=1,
        )
        # Log2 histogram
        log2_latencies = np.log2(latencies + 1)  # +1 to avoid log2(0)
        fig.add_trace(
            go.Histogram(
                x=log2_latencies,
                nbinsx=30,
                marker_color="rgb(243, 156, 18)",
                opacity=0.75,
                name="Log2",
            ),
            row=1,
            col=2,
        )
        # Update axes
        fig.update_xaxes(title_text="Latency (µs)", row=1, col=1)
        fig.update_xaxes(title_text="log2(Latency in µs)", row=1, col=2)
        fig.update_yaxes(title_text="Count", row=1, col=1)
        fig.update_yaxes(title_text="Count", row=1, col=2)
        fig.update_layout(
            title_text="📊 Latency Distribution (Linear vs Log2)",
            template="plotly_white",
            showlegend=False,
            height=450,
        )
        return fig
    def _create_log2_buckets(self) -> go.Figure:
        """Create bar chart of log2 buckets (like BCC histogram)."""
        buckets = self.collector.get_histogram_buckets()
        if not buckets:
            fig = go.Figure()
            fig.update_layout(
                title="🔥 Log2 Histogram - Waiting for data...", template="plotly_white"
            )
            return fig
        # Sort buckets
        sorted_buckets = sorted(buckets.keys())
        counts = [buckets[b] for b in sorted_buckets]
        # Create labels (e.g., "8-16µs", "16-32µs")
        labels = []
        hover_text = []
        for bucket in sorted_buckets:
            lower = 2**bucket
            upper = 2 ** (bucket + 1)
            labels.append(f"{lower}-{upper}")
            # Calculate percentage
            total = sum(counts)
            pct = (buckets[bucket] / total) * 100 if total > 0 else 0
            hover_text.append(
                f"Range: {lower}-{upper} µs<br>"
                f"Count: {buckets[bucket]:,}<br>"
                f"Percentage: {pct:.2f}%"
            )
        # Create bar chart
        fig = go.Figure()
        fig.add_trace(
            go.Bar(
                x=labels,
                y=counts,
                marker=dict(
                    color=counts,
                    colorscale="YlOrRd",
                    showscale=True,
                    colorbar=dict(title="Count"),
                ),
                text=counts,
                textposition="outside",
                hovertext=hover_text,
                hoverinfo="text",
            )
        )
        fig.update_layout(
            title="🔥 Log2 Histogram (BCC-style buckets)",
            xaxis_title="Latency Range (µs)",
            yaxis_title="Count",
            template="plotly_white",
            height=350,
            xaxis=dict(tickangle=-45),
        )
        return fig
    def _create_timeseries(self) -> go.Figure:
        """Create time series figure."""
        recent = self.collector.get_recent_latencies()
        if not recent:
            fig = go.Figure()
            fig.update_layout(
                title="⏱️ Real-time Latency - Waiting for data...",
                template="plotly_white",
            )
            return fig
        times = [d["time"] for d in recent]
        lats = [d["latency"] for d in recent]
        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                x=times,
                y=lats,
                mode="lines",
                line=dict(color="rgb(231, 76, 60)", width=2),
                fill="tozeroy",
                fillcolor="rgba(231, 76, 60, 0.2)",
            )
        )
        fig.update_layout(
            title="⏱️ Real-time Latency (Last 10,000 samples)",
            xaxis_title="Time (seconds)",
            yaxis_title="Latency (µs)",
            template="plotly_white",
            height=300,
        )
        return fig
    def run(self, host: str = "0.0.0.0", port: int = 8050, debug: bool = False):
        """Run the dashboard server."""
        print(f"\n{'=' * 60}")
        print(f"🚀 Dashboard running at: http://{host}:{port}")
        print("   Access from your browser to see live graphs")
        print(
            "   Generate disk I/O to see data: dd if=/dev/zero of=/tmp/test bs=1M count=100"
        )
        print(f"{'=' * 60}\n")
        self.app.run(debug=debug, host=host, port=port)
--- a/BCC-Examples/vfsreadlat_plotly/data_collector.py
+++ b/BCC-Examples/vfsreadlat_plotly/data_collector.py
@ -0,0 +1,96 @@
 """Data collection and management."""
 import threading
 import time
 import numpy as np
 from collections import deque
 from dataclasses import dataclass
 from typing import List, Dict
@dataclass
 class LatencyStats:
    """Statistics computed from latency data."""
    total: int = 0
    mean: float = 0.0
    median: float = 0.0
    min: float = 0.0
    max: float = 0.0
    p95: float = 0.0
    p99: float = 0.0
    @classmethod
    def from_array(cls, data: np.ndarray) -> "LatencyStats":
        """Compute stats from numpy array."""
        if len(data) == 0:
            return cls()
        return cls(
            total=len(data),
            mean=float(np.mean(data)),
            median=float(np.median(data)),
            min=float(np.min(data)),
            max=float(np.max(data)),
            p95=float(np.percentile(data, 95)),
            p99=float(np.percentile(data, 99)),
        )
 class LatencyCollector:
    """Collects and manages latency data from BPF."""
    def __init__(self, bpf_object, buffer_size: int = 10000):
        self.bpf = bpf_object
        self.all_latencies: List[float] = []
        self.recent_latencies = deque(maxlen=buffer_size)  # type: ignore [var-annotated]
        self.start_time = time.time()
        self._lock = threading.Lock()
        self._poll_thread = None
    def callback(self, cpu: int, event):
        """Callback for BPF events."""
        with self._lock:
            self.all_latencies.append(event.delta_us)
            self.recent_latencies.append(
                {"time": time.time() - self.start_time, "latency": event.delta_us}
            )
    def start(self):
        """Start collecting data."""
        self.bpf["events"].open_perf_buffer(self.callback, struct_name="latency_event")
        def poll_loop():
            while True:
                self.bpf["events"].poll(100)
        self._poll_thread = threading.Thread(target=poll_loop, daemon=True)
        self._poll_thread.start()
        print("✅ Data collection started")
    def get_all_latencies(self) -> np.ndarray:
        """Get all latencies as numpy array."""
        with self._lock:
            return np.array(self.all_latencies) if self.all_latencies else np.array([])
    def get_recent_latencies(self) -> List[Dict]:
        """Get recent latencies with timestamps."""
        with self._lock:
            return list(self.recent_latencies)
    def get_stats(self) -> LatencyStats:
        """Compute current statistics."""
        return LatencyStats.from_array(self.get_all_latencies())
    def get_histogram_buckets(self) -> Dict[int, int]:
        """Get log2 histogram buckets."""
        latencies = self.get_all_latencies()
        if len(latencies) == 0:
            return {}
        log_buckets = np.floor(np.log2(latencies + 1)).astype(int)
        buckets = {}  # type: ignore [var-annotated]
        for bucket in log_buckets:
            buckets[bucket] = buckets.get(bucket, 0) + 1
        return buckets