Files
sysprof/src/libsysprof-profile/sysprof-linux-instrument.c
Christian Hergert 3de9f39348 libsysprof-profile: parse address ranges with GRegex
This uses G_REGEX_OPTIMIZE as well to get the benefit of the JIT when using
PCRE2 via GLib.

This turns out to take about half (or less) of the time that it takes to
use sscanf() to parse the template/input string. 0.03 vs 0.08 seconds on
my system.

If someone wants to take this further, and just look at the input string
rather than rely on GRegex, that'd be fine by me too.
2023-06-05 16:32:08 -07:00

431 lines
15 KiB
C

/* sysprof-linux-instrument.c
*
* Copyright 2023 Christian Hergert <chergert@redhat.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include "config.h"
#include <stdio.h>
#include "sysprof-linux-instrument-private.h"
#include "sysprof-podman-private.h"
#include "sysprof-recording-private.h"
#include "line-reader-private.h"
struct _SysprofLinuxInstrument
{
SysprofInstrument parent_instance;
};
enum {
PROP_0,
N_PROPS
};
G_DEFINE_FINAL_TYPE (SysprofLinuxInstrument, sysprof_linux_instrument, SYSPROF_TYPE_INSTRUMENT)
static GRegex *address_range_regex;
static char **
sysprof_linux_instrument_list_required_policy (SysprofInstrument *instrument)
{
static const char *policy[] = {"org.gnome.sysprof3.profile", NULL};
return g_strdupv ((char **)policy);
}
static void
add_mmaps (SysprofRecording *recording,
GPid pid,
const char *mapsstr,
gboolean ignore_inode)
{
SysprofCaptureWriter *writer;
LineReader reader;
const char *line;
gsize line_len;
g_assert (SYSPROF_IS_RECORDING (recording));
g_assert (mapsstr != NULL);
g_assert (pid > 0);
writer = _sysprof_recording_writer (recording);
line_reader_init (&reader, (char *)mapsstr, -1);
while ((line = line_reader_next (&reader, &line_len)))
{
g_autoptr(GMatchInfo) match_info = NULL;
if (g_regex_match_full (address_range_regex, line, line_len, 0, 0, &match_info, NULL))
{
g_autofree char *file = NULL;
guint64 begin_addr;
guint64 end_addr;
guint64 inode;
guint64 offset;
gboolean is_vdso;
int begin_addr_begin;
int begin_addr_end;
int end_addr_begin;
int end_addr_end;
int offset_begin;
int offset_end;
int inode_begin;
int inode_end;
int path_begin;
int path_end;
if (!g_match_info_fetch_pos (match_info, 1, &begin_addr_begin, &begin_addr_end) ||
!g_match_info_fetch_pos (match_info, 2, &end_addr_begin, &end_addr_end) ||
!g_match_info_fetch_pos (match_info, 3, &offset_begin, &offset_end) ||
!g_match_info_fetch_pos (match_info, 4, &inode_begin, &inode_end) ||
!g_match_info_fetch_pos (match_info, 5, &path_begin, &path_end))
continue;
begin_addr = g_ascii_strtoull (&line[begin_addr_begin], NULL, 16);
end_addr = g_ascii_strtoull (&line[end_addr_begin], NULL, 16);
offset = g_ascii_strtoull (&line[offset_begin], NULL, 16);
inode = g_ascii_strtoull (&line[inode_begin], NULL, 10);
if (memcmp (" (deleted",
&line[path_end] - strlen (" (deleted"),
strlen (" (deleted")) == 0)
path_end -= strlen (" (deleted)");
file = g_strndup (&line[path_begin], path_end-path_begin);
is_vdso = strcmp ("[vdso]", file) == 0;
if (ignore_inode || is_vdso)
inode = 0;
if (is_vdso)
offset = 0;
sysprof_capture_writer_add_map (writer,
SYSPROF_CAPTURE_CURRENT_TIME,
-1,
pid,
begin_addr,
end_addr,
offset,
inode,
file);
}
}
}
static DexFuture *
populate_overlays (SysprofRecording *recording,
SysprofPodman *podman,
int pid,
const char *cgroup)
{
static GRegex *flatpak_regex;
static GRegex *podman_regex;
g_autoptr(GMatchInfo) flatpak_match = NULL;
g_autoptr(GMatchInfo) podman_match = NULL;
SysprofCaptureWriter *writer;
g_assert (SYSPROF_IS_RECORDING (recording));
g_assert (cgroup != NULL);
if (strcmp (cgroup, "") == 0)
return dex_future_new_for_boolean (TRUE);
writer = _sysprof_recording_writer (recording);
/* This function tries to discover the podman container that contains the
* process identified on the host as @pid. We can only do anything with this
* if the pids are in containers that the running user controls (so that we
* can actually access the overlays).
*
* This stuff, and I want to emphasize, is a giant hack. Just like containers
* are on Linux. But if we are really careful, we can make this work for the
* one particular use case I care about, which is podman/toolbox on Fedora
* Workstation/Silverblue.
*
* -- Christian
*/
if G_UNLIKELY (podman_regex == NULL)
{
podman_regex = g_regex_new ("libpod-([a-z0-9]{64})\\.scope", G_REGEX_OPTIMIZE, 0, NULL);
g_assert (podman_regex != NULL);
}
if (flatpak_regex == NULL)
{
flatpak_regex = g_regex_new ("app-flatpak-([a-zA-Z_\\-\\.]+)-[0-9]+\\.scope", G_REGEX_OPTIMIZE, 0, NULL);
g_assert (flatpak_regex != NULL);
}
if (g_regex_match (podman_regex, cgroup, 0, &podman_match))
{
g_autofree char *word = g_match_info_fetch (podman_match, 1);
g_autofree char *path = g_strdup_printf ("/proc/%d/root/run/.containerenv", pid);
g_auto(GStrv) layers = NULL;
if ((layers = sysprof_podman_get_layers (podman, word)))
{
for (guint i = 0; layers[i]; i++)
sysprof_capture_writer_add_overlay (writer,
SYSPROF_CAPTURE_CURRENT_TIME,
-1, pid, i, layers[i], "/");
}
return _sysprof_recording_add_file (recording, path, FALSE);
}
else if (g_regex_match (flatpak_regex, cgroup, 0, &flatpak_match))
{
g_autofree char *path = g_strdup_printf ("/proc/%d/root/.flatpak-info", pid);
return _sysprof_recording_add_file (recording, path, FALSE);
}
return dex_future_new_for_boolean (TRUE);
}
static DexFuture *
add_process_info (SysprofRecording *recording,
GVariant *process_info)
{
g_autoptr(SysprofPodman) podman = NULL;
g_autoptr(GPtrArray) futures = NULL;
SysprofCaptureWriter *writer;
GVariantIter iter;
GVariant *pidinfo;
g_assert (process_info != NULL);
g_assert (g_variant_is_of_type (process_info, G_VARIANT_TYPE ("aa{sv}")));
writer = _sysprof_recording_writer (recording);
podman = sysprof_podman_snapshot_current_user ();
futures = g_ptr_array_new_with_free_func (dex_unref);
/* Loop through all the PIDs the server notified us about */
g_variant_iter_init (&iter, process_info);
while (g_variant_iter_loop (&iter, "@a{sv}", &pidinfo))
{
g_autofree char *mount_path = NULL;
GVariantDict dict;
const char *cmdline;
const char *comm;
const char *mountinfo;
const char *maps;
const char *cgroup;
gboolean ignore_inode;
gint32 pid;
g_variant_dict_init (&dict, pidinfo);
if (!g_variant_dict_lookup (&dict, "pid", "i", &pid))
goto skip;
if (!g_variant_dict_lookup (&dict, "cmdline", "&s", &cmdline))
cmdline = "";
if (!g_variant_dict_lookup (&dict, "comm", "&s", &comm))
comm = "";
if (!g_variant_dict_lookup (&dict, "mountinfo", "&s", &mountinfo))
mountinfo = "";
if (!g_variant_dict_lookup (&dict, "maps", "&s", &maps))
maps = "";
if (!g_variant_dict_lookup (&dict, "cgroup", "&s", &cgroup))
cgroup = "";
/* Notify the capture that a process was spawned */
sysprof_capture_writer_add_process (writer,
SYSPROF_CAPTURE_CURRENT_TIME,
-1,
pid,
*cmdline ? cmdline : comm);
/* Give the capture access to the mountinfo of that process to aid
* in resolving symbols later on.
*/
mount_path = g_strdup_printf ("/proc/%u/mountinfo", pid);
_sysprof_recording_add_file_data (recording, mount_path, mountinfo, -1);
/* Ignore inodes from podman/toolbox because they appear to always be
* wrong. We'll have to rely on CRC/build-id instead.
*/
ignore_inode = strstr (cgroup, "/libpod-") != NULL;
add_mmaps (recording, pid, maps, ignore_inode);
/* We might have overlays that need to be applied to the process
* which can be rather combursome for old-style Podman using
* FUSE overlayfs.
*/
g_ptr_array_add (futures, populate_overlays (recording, podman, pid, cgroup));
skip:
g_variant_dict_clear (&dict);
}
if (futures->len > 0)
return dex_future_allv ((DexFuture **)futures->pdata, futures->len);
return dex_future_new_for_boolean (TRUE);
}
static DexFuture *
sysprof_linux_instrument_prepare_fiber (gpointer user_data)
{
SysprofRecording *recording = user_data;
g_autoptr(GError) error = NULL;
g_assert (SYSPROF_IS_RECORDING (recording));
/* First get some basic information about the system into the capture. We can
* get the contents for all of these concurrently.
*/
if (!dex_await (dex_future_all (_sysprof_recording_add_file (recording, "/proc/cpuinfo", TRUE),
_sysprof_recording_add_file (recording, "/proc/mounts", TRUE),
NULL),
&error))
return dex_future_new_for_error (g_steal_pointer (&error));
return dex_future_new_for_boolean (TRUE);
}
static DexFuture *
sysprof_linux_instrument_prepare (SysprofInstrument *instrument,
SysprofRecording *recording)
{
g_assert (SYSPROF_IS_INSTRUMENT (instrument));
g_assert (SYSPROF_IS_RECORDING (recording));
return dex_scheduler_spawn (NULL, 0,
sysprof_linux_instrument_prepare_fiber,
g_object_ref (recording),
g_object_unref);
}
static DexFuture *
sysprof_linux_instrument_record_fiber (gpointer user_data)
{
SysprofRecording *recording = user_data;
g_autoptr(GDBusConnection) bus = NULL;
g_autoptr(GVariant) process_info_reply = NULL;
g_autoptr(GVariant) process_info = NULL;
g_autoptr(GError) error = NULL;
g_assert (SYSPROF_IS_RECORDING (recording));
/* We need access to the bus to call various sysprofd API directly */
if (!(bus = dex_await_object (dex_bus_get (G_BUS_TYPE_SYSTEM), &error)))
return dex_future_new_for_error (g_steal_pointer (&error));
/* We also want to get a bunch of info on user processes so that we can add
* records about them to the recording.
*/
if (!(process_info_reply = dex_await_variant (dex_dbus_connection_call (bus,
"org.gnome.Sysprof3",
"/org/gnome/Sysprof3",
"org.gnome.Sysprof3.Service",
"GetProcessInfo",
g_variant_new ("(s)", "pid,maps,mountinfo,cmdline,comm,cgroup"),
G_VARIANT_TYPE ("(aa{sv})"),
G_DBUS_CALL_FLAGS_ALLOW_INTERACTIVE_AUTHORIZATION,
G_MAXINT),
&error)))
return dex_future_new_for_error (g_steal_pointer (&error));
/* Add process records for each of the processes discovered */
process_info = g_variant_get_child_value (process_info_reply, 0);
dex_await (add_process_info (recording, process_info), NULL);
return dex_future_new_for_boolean (TRUE);
}
static DexFuture *
sysprof_linux_instrument_record (SysprofInstrument *instrument,
SysprofRecording *recording,
GCancellable *cancellable)
{
g_assert (SYSPROF_IS_INSTRUMENT (instrument));
g_assert (SYSPROF_IS_RECORDING (recording));
return dex_scheduler_spawn (NULL, 0,
sysprof_linux_instrument_record_fiber,
g_object_ref (recording),
g_object_unref);
}
static DexFuture *
sysprof_linux_instrument_process_started (SysprofInstrument *instrument,
SysprofRecording *recording,
int pid)
{
g_autofree char *mountinfo_path = NULL;
g_assert (SYSPROF_IS_INSTRUMENT (instrument));
g_assert (SYSPROF_IS_RECORDING (recording));
mountinfo_path = g_strdup_printf ("/proc/%u/mountinfo", pid);
/* TODO: If we get the process cgroup and keep our saved podman
* state around, we could poopulate overlays for new processes.
*/
return _sysprof_recording_add_file (recording, mountinfo_path, TRUE);
}
static void
sysprof_linux_instrument_finalize (GObject *object)
{
G_OBJECT_CLASS (sysprof_linux_instrument_parent_class)->finalize (object);
}
static void
sysprof_linux_instrument_class_init (SysprofLinuxInstrumentClass *klass)
{
GObjectClass *object_class = G_OBJECT_CLASS (klass);
SysprofInstrumentClass *instrument_class = SYSPROF_INSTRUMENT_CLASS (klass);
g_autoptr(GError) error = NULL;
object_class->finalize = sysprof_linux_instrument_finalize;
instrument_class->list_required_policy = sysprof_linux_instrument_list_required_policy;
instrument_class->prepare = sysprof_linux_instrument_prepare;
instrument_class->record = sysprof_linux_instrument_record;
instrument_class->process_started = sysprof_linux_instrument_process_started;
address_range_regex = g_regex_new ("^([0-9a-f]+)-([0-9a-f]+) [r\\-][w\\-][x\\-][ps\\-] [0-9a-f]+ [0-9]{2}:[0-9]{2} ([0-9]+) +(.*)$",
G_REGEX_OPTIMIZE,
G_REGEX_MATCH_DEFAULT,
&error);
g_assert_no_error (error);
}
static void
sysprof_linux_instrument_init (SysprofLinuxInstrument *self)
{
}
SysprofInstrument *
_sysprof_linux_instrument_new (void)
{
return g_object_new (SYSPROF_TYPE_LINUX_INSTRUMENT, NULL);
}