mirror of
https://github.com/varun-r-mallya/sysprof.git
synced 2025-12-31 20:36:25 +00:00
596 lines
15 KiB
C
596 lines
15 KiB
C
/* mapped-ring-buffer.c
|
|
*
|
|
* Copyright 2020 Christian Hergert <chergert@redhat.com>
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
|
|
#include "sysprof-capture-util-private.h"
|
|
#include "sysprof-macros-internal.h"
|
|
#include "sysprof-platform.h"
|
|
|
|
#include "mapped-ring-buffer.h"
|
|
|
|
#define DEFAULT_N_PAGES 32
|
|
#define BUFFER_MAX_SIZE ((UINT32_MAX/2)-_sysprof_getpagesize())
|
|
|
|
enum {
|
|
MODE_READER = 1,
|
|
MODE_WRITER = 2,
|
|
MODE_READWRITE = 3,
|
|
};
|
|
|
|
/*
|
|
* MappedRingHeader is the header of the first page of the
|
|
* buffer. We use the whole buffer so that we can double map
|
|
* the body of the buffer.
|
|
*/
|
|
typedef struct _MappedRingHeader
|
|
{
|
|
uint32_t head;
|
|
uint32_t tail;
|
|
uint32_t offset;
|
|
uint32_t size;
|
|
} MappedRingHeader;
|
|
|
|
SYSPROF_STATIC_ASSERT (sizeof (MappedRingHeader) == 16, "MappedRingHeader changed size");
|
|
|
|
/*
|
|
* MappedRingBuffer is used to wrap both the reader and writer
|
|
* mapping structures.
|
|
*/
|
|
struct _MappedRingBuffer
|
|
{
|
|
volatile int ref_count;
|
|
int mode;
|
|
int fd;
|
|
void *map;
|
|
size_t body_size;
|
|
size_t page_size;
|
|
};
|
|
|
|
static inline MappedRingHeader *
|
|
get_header (MappedRingBuffer *self)
|
|
{
|
|
return (MappedRingHeader *)self->map;
|
|
}
|
|
|
|
static inline void *
|
|
get_body_at_pos (MappedRingBuffer *self,
|
|
size_t pos)
|
|
{
|
|
assert (pos < (self->body_size + self->body_size));
|
|
|
|
return (uint8_t *)self->map + self->page_size + pos;
|
|
}
|
|
|
|
static void *
|
|
map_head_and_body_twice (int fd,
|
|
size_t head_size,
|
|
size_t body_size)
|
|
{
|
|
void *map;
|
|
void *second;
|
|
|
|
/* First we map FD to the total size we want so that we can be
|
|
* certain the OS will give us a contiguous mapping for our buffers
|
|
* even though we can't dereference a portion of the mapping yet.
|
|
*
|
|
* We'll overwrite the secondary mapping in a moment to deal with
|
|
* wraparound for the ring buffer.
|
|
*/
|
|
map = mmap (NULL,
|
|
head_size + body_size + body_size,
|
|
PROT_READ | PROT_WRITE,
|
|
MAP_SHARED,
|
|
fd,
|
|
0);
|
|
|
|
if (map == MAP_FAILED)
|
|
return NULL;
|
|
|
|
/* At this point, we have [HEAD|BODY|BUSERR] mapped. But we want to map
|
|
* the body again over what would currently cause a BusError. That way
|
|
* when we need to wraparound we don't need to copy anything, we just
|
|
* have to check in mapped_ring_buffer_allocate() that the size does not
|
|
* step over what would be the real reader position.
|
|
*
|
|
* By mmap()'ing over the old region, the previous region is automatically
|
|
* munmap()'d for us.
|
|
*/
|
|
second = mmap ((uint8_t *)map + head_size + body_size,
|
|
body_size,
|
|
PROT_READ | PROT_WRITE,
|
|
MAP_SHARED | MAP_FIXED,
|
|
fd,
|
|
head_size);
|
|
|
|
if (second == MAP_FAILED)
|
|
{
|
|
munmap (map, head_size + body_size + body_size);
|
|
return NULL;
|
|
}
|
|
|
|
assert (second == (void *)((uint8_t *)map + head_size + body_size));
|
|
|
|
return map;
|
|
}
|
|
|
|
/**
|
|
* mapped_ring_buffer_new_reader:
|
|
* @buffer_size: the size of the buffer, which must be page-aligned
|
|
*
|
|
* Creates a new #MappedRingBuffer.
|
|
*
|
|
* This should be called by the process reading the buffer. It should
|
|
* then pass the FD of the buffer to another process or thread to
|
|
* advance the ring buffer by writing data.
|
|
*
|
|
* The other process or thread should create a new #MappedRingBuffer
|
|
* using mapped_ring_buffer_new_writer() with the FD retrieved from
|
|
* the reader using mapped_ring_buffer_get_fd(). If crossing a process
|
|
* boundary, you probably also want to dup() the FD and set O_CLOEXEC.
|
|
*
|
|
* @buffer_size must be a multiple of the system's page size which can
|
|
* be retrieved using sysprof_getpagesize().
|
|
*
|
|
* Returns: (transfer full): a #MappedRingBuffer
|
|
*/
|
|
MappedRingBuffer *
|
|
mapped_ring_buffer_new_reader (size_t buffer_size)
|
|
{
|
|
MappedRingBuffer *self;
|
|
MappedRingHeader *header;
|
|
size_t page_size;
|
|
void *map;
|
|
int fd;
|
|
|
|
assert ((buffer_size % _sysprof_getpagesize ()) == 0);
|
|
assert (buffer_size < BUFFER_MAX_SIZE);
|
|
|
|
page_size = _sysprof_getpagesize ();
|
|
|
|
/* Add 1 page for coordination header */
|
|
if (buffer_size == 0)
|
|
buffer_size = page_size * DEFAULT_N_PAGES;
|
|
buffer_size += page_size;
|
|
|
|
/* Create our memfd (or tmpfs) for writing */
|
|
if ((fd = sysprof_memfd_create ("[sysprof-ring-buffer]")) == -1)
|
|
return NULL;
|
|
|
|
/* Size our memfd to reserve space */
|
|
if (ftruncate (fd, buffer_size) != 0)
|
|
{
|
|
close (fd);
|
|
return NULL;
|
|
}
|
|
|
|
/* Map ring buffer [HEAD|BODY|BODY] */
|
|
if (!(map = map_head_and_body_twice (fd, page_size, buffer_size - page_size)))
|
|
{
|
|
close (fd);
|
|
return NULL;
|
|
}
|
|
|
|
/* Setup initial header */
|
|
header = map;
|
|
header->head = 0;
|
|
header->tail = 0;
|
|
header->offset = page_size;
|
|
header->size = buffer_size - page_size;
|
|
|
|
self = sysprof_malloc0 (sizeof (MappedRingBuffer));
|
|
if (self == NULL)
|
|
return NULL;
|
|
|
|
self->ref_count = 1;
|
|
self->mode = MODE_READER;
|
|
self->body_size = buffer_size - page_size;
|
|
self->fd = fd;
|
|
self->map = map;
|
|
self->page_size = page_size;
|
|
|
|
return sysprof_steal_pointer (&self);
|
|
}
|
|
|
|
MappedRingBuffer *
|
|
mapped_ring_buffer_new_readwrite (size_t buffer_size)
|
|
{
|
|
MappedRingBuffer *self;
|
|
|
|
if ((self = mapped_ring_buffer_new_reader (buffer_size)))
|
|
self->mode = MODE_READWRITE;
|
|
|
|
return self;
|
|
}
|
|
|
|
/**
|
|
* mapped_ring_buffer_new_writer:
|
|
* @fd: a FD to map
|
|
*
|
|
* Creates a new #MappedRingBuffer using a copy of @fd.
|
|
*
|
|
* The caller may close(fd) after calling this function regardless of
|
|
* success creating the #MappedRingBuffer.
|
|
*
|
|
* Returns: (transfer full) (nullable): a new #MappedRingBuffer
|
|
*/
|
|
MappedRingBuffer *
|
|
mapped_ring_buffer_new_writer (int fd)
|
|
{
|
|
MappedRingBuffer *self;
|
|
MappedRingHeader *header;
|
|
ssize_t buffer_size;
|
|
size_t page_size;
|
|
void *map;
|
|
|
|
assert (fd > -1);
|
|
|
|
page_size = _sysprof_getpagesize ();
|
|
|
|
/* Make our own copy of the FD */
|
|
if ((fd = dup (fd)) < 0)
|
|
{
|
|
fprintf (stderr, "Failed to dup() fd, cannot continue\n");
|
|
return NULL;
|
|
}
|
|
|
|
/* Seek to end to get buffer size */
|
|
if ((buffer_size = lseek (fd, 0, SEEK_END)) < 0)
|
|
{
|
|
fprintf (stderr, "Failed to seek to end of file. Cannot determine buffer size.\n");
|
|
return NULL;
|
|
}
|
|
|
|
/* Ensure non-zero sized buffer */
|
|
if (buffer_size < (page_size + page_size))
|
|
{
|
|
fprintf (stderr, "Buffer is too small, cannot continue.\n");
|
|
return NULL;
|
|
}
|
|
|
|
/* Make sure it is less than our max size */
|
|
if ((buffer_size - page_size) > BUFFER_MAX_SIZE)
|
|
{
|
|
fprintf (stderr, "Buffer is too large, cannot continue.\n");
|
|
return NULL;
|
|
}
|
|
|
|
/* Ensure we have page-aligned buffer */
|
|
if ((buffer_size % page_size) != 0)
|
|
{
|
|
fprintf (stderr, "Invalid buffer size, not page aligned.\n");
|
|
return NULL;
|
|
}
|
|
|
|
/* Map ring buffer [HEAD|BODY|BODY] */
|
|
if (!(map = map_head_and_body_twice (fd, page_size, buffer_size - page_size)))
|
|
{
|
|
close (fd);
|
|
return NULL;
|
|
}
|
|
|
|
/* Validate we got proper data in header */
|
|
header = map;
|
|
if (header->offset != page_size ||
|
|
header->size != (buffer_size - page_size))
|
|
{
|
|
munmap (map, page_size + ((buffer_size - page_size) * 2));
|
|
close (fd);
|
|
return NULL;
|
|
}
|
|
|
|
self = sysprof_malloc0 (sizeof (MappedRingBuffer));
|
|
if (self == NULL)
|
|
{
|
|
munmap (map, page_size + ((buffer_size - page_size) * 2));
|
|
close (fd);
|
|
return NULL;
|
|
}
|
|
|
|
self->ref_count = 1;
|
|
self->mode = MODE_WRITER;
|
|
self->fd = fd;
|
|
self->body_size = buffer_size - page_size;
|
|
self->map = map;
|
|
self->page_size = page_size;
|
|
|
|
return sysprof_steal_pointer (&self);
|
|
}
|
|
|
|
static void
|
|
mapped_ring_buffer_finalize (MappedRingBuffer *self)
|
|
{
|
|
if (self->map != NULL)
|
|
{
|
|
munmap (self->map, self->page_size + self->body_size + self->body_size);
|
|
self->map = NULL;
|
|
}
|
|
|
|
if (self->fd != -1)
|
|
{
|
|
close (self->fd);
|
|
self->fd = -1;
|
|
}
|
|
|
|
free (self);
|
|
}
|
|
|
|
void
|
|
mapped_ring_buffer_unref (MappedRingBuffer *self)
|
|
{
|
|
assert (self != NULL);
|
|
assert (self->ref_count > 0);
|
|
|
|
if (__atomic_fetch_sub (&self->ref_count, 1, __ATOMIC_SEQ_CST) == 1)
|
|
mapped_ring_buffer_finalize (self);
|
|
}
|
|
|
|
MappedRingBuffer *
|
|
mapped_ring_buffer_ref (MappedRingBuffer *self)
|
|
{
|
|
assert (self != NULL);
|
|
assert (self->ref_count > 0);
|
|
|
|
__atomic_fetch_add (&self->ref_count, 1, __ATOMIC_SEQ_CST);
|
|
|
|
return self;
|
|
}
|
|
|
|
int
|
|
mapped_ring_buffer_get_fd (MappedRingBuffer *self)
|
|
{
|
|
assert (self != NULL);
|
|
|
|
return self->fd;
|
|
}
|
|
|
|
/**
|
|
* mapped_ring_buffer_allocate:
|
|
* @self: a #MappedRingBuffer
|
|
*
|
|
* Ensures that @length bytes are available at the next position in
|
|
* the ring buffer and returns a pointer to the beginning of that zone.
|
|
*
|
|
* If the reader has not read enough bytes to allow @length to be added
|
|
* then a mark will be added or incremented notifying the peer of how
|
|
* many records they have lost and %NULL is returned.
|
|
*
|
|
* You must always check for %NULL before dereferencing the result of
|
|
* this function as space may not be immediately available.
|
|
*
|
|
* This only ensure that the space is available for you to write. To
|
|
* notify the peer that the zone is ready for reading you must call
|
|
* mapped_ring_buffer_advance() with the number of bytes to advance.
|
|
* This is useful in case you need to allocate more memory than you
|
|
* might need up-front but commit a smaller amount.
|
|
*
|
|
* Returns: (nullable): a pointer to data of at least @length bytes
|
|
* or %NULL if there is not enough space.
|
|
*/
|
|
void *
|
|
mapped_ring_buffer_allocate (MappedRingBuffer *self,
|
|
size_t length)
|
|
{
|
|
MappedRingHeader *header;
|
|
uint32_t headpos;
|
|
uint32_t tailpos;
|
|
|
|
assert (self != NULL);
|
|
assert (self->mode & MODE_WRITER);
|
|
assert (length > 0);
|
|
assert (length < self->body_size);
|
|
assert ((length & 0x7) == 0);
|
|
|
|
header = get_header (self);
|
|
__atomic_load (&header->head, &headpos, __ATOMIC_SEQ_CST);
|
|
__atomic_load (&header->tail, &tailpos, __ATOMIC_SEQ_CST);
|
|
|
|
/* We need to check that there is enough space for @length at the
|
|
* current position in the write buffer. We cannot fully catch up
|
|
* to head, we must be at least one byte short of it. If we do not
|
|
* have enough space, then return NULL.
|
|
*
|
|
* When we have finished writing our frame data, we will push the tail
|
|
* forward with an atomic write.
|
|
*/
|
|
|
|
if (tailpos == headpos)
|
|
return get_body_at_pos (self, tailpos);
|
|
|
|
if (headpos < tailpos)
|
|
headpos += self->body_size;
|
|
|
|
if (tailpos + length < headpos)
|
|
return get_body_at_pos (self, tailpos);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* mapped_ring_buffer_advance:
|
|
* @self: a #MappedRingBuffer
|
|
* @length: a 8-byte aligned length
|
|
*
|
|
* Advances the ring buffer @length bytes forward. @length must be
|
|
* 8-byte aligned so that the buffer may avoid memcpy() to read
|
|
* framing data.
|
|
*
|
|
* This should only be called by a writer created with
|
|
* mapped_ring_buffer_new_writer().
|
|
*
|
|
* Call this after writing your data into the buffer using
|
|
* mapped_ring_buffer_allocate().
|
|
*
|
|
* It is a programming error to call this with a value greater
|
|
* than was called to mapped_ring_buffer_allocate().
|
|
*/
|
|
void
|
|
mapped_ring_buffer_advance (MappedRingBuffer *self,
|
|
size_t length)
|
|
{
|
|
MappedRingHeader *header;
|
|
uint32_t tail;
|
|
|
|
assert (self != NULL);
|
|
assert (self->mode & MODE_WRITER);
|
|
assert (length > 0);
|
|
assert (length < self->body_size);
|
|
assert ((length & 0x7) == 0);
|
|
|
|
header = get_header (self);
|
|
tail = header->tail;
|
|
|
|
/* Calculate the new tail position */
|
|
tail = tail + length;
|
|
if (tail >= self->body_size)
|
|
tail -= self->body_size;
|
|
|
|
/* We have already checked that we could advance the buffer when the
|
|
* application called mapped_ring_buffer_allocate(), so at this point
|
|
* we just update the position as the only way the head could have
|
|
* moved is forward.
|
|
*/
|
|
__atomic_store (&header->tail, &tail, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
/**
|
|
* mapped_ring_buffer_drain:
|
|
* @self: a #MappedRingBuffer
|
|
* @callback: (scope call): a callback to execute for each frame
|
|
* @user_data: closure data for @callback
|
|
*
|
|
* Drains the buffer by calling @callback for each frame.
|
|
*
|
|
* This should only be called by a reader created with
|
|
* mapped_ring_buffer_new_reader().
|
|
*
|
|
* Returns: %TRUE if the buffer was drained, %FALSE if @callback prematurely
|
|
* returned while draining.
|
|
*/
|
|
bool
|
|
mapped_ring_buffer_drain (MappedRingBuffer *self,
|
|
MappedRingBufferCallback callback,
|
|
void *user_data)
|
|
{
|
|
MappedRingHeader *header;
|
|
uint32_t headpos;
|
|
uint32_t tailpos;
|
|
|
|
assert (self != NULL);
|
|
assert (self->mode & MODE_READER);
|
|
assert (callback != NULL);
|
|
|
|
header = get_header (self);
|
|
__atomic_load (&header->head, &headpos, __ATOMIC_SEQ_CST);
|
|
__atomic_load (&header->tail, &tailpos, __ATOMIC_SEQ_CST);
|
|
|
|
assert (headpos < self->body_size);
|
|
assert (tailpos < self->body_size);
|
|
|
|
if (headpos == tailpos)
|
|
return true;
|
|
|
|
/* If head needs to wrap around to get to tail, we can just rely on
|
|
* our double mapping instead actually manually wrapping/copying data.
|
|
*/
|
|
if (tailpos < headpos)
|
|
tailpos += self->body_size;
|
|
|
|
assert (headpos < tailpos);
|
|
|
|
while (headpos < tailpos)
|
|
{
|
|
const void *data = get_body_at_pos (self, headpos);
|
|
size_t len = tailpos - headpos;
|
|
uint32_t new_headpos;
|
|
|
|
if (!callback (data, &len, user_data))
|
|
return false;
|
|
|
|
if (len > (tailpos - headpos))
|
|
return false;
|
|
|
|
headpos += len;
|
|
|
|
if (headpos >= self->body_size)
|
|
new_headpos = headpos - self->body_size;
|
|
else
|
|
new_headpos = headpos;
|
|
|
|
__atomic_store (&header->head, &new_headpos, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* mapped_ring_buffer_is_empty:
|
|
* @self: a #MappedRingBuffer
|
|
*
|
|
* Checks whether the ring buffer is currently empty.
|
|
*
|
|
* This should only be called by a reader created with
|
|
* mapped_ring_buffer_new_reader().
|
|
*
|
|
* Returns: %TRUE if the buffer is empty, %FALSE otherwise
|
|
*/
|
|
bool
|
|
mapped_ring_buffer_is_empty (MappedRingBuffer *self)
|
|
{
|
|
MappedRingHeader *header;
|
|
uint32_t headpos, tailpos;
|
|
|
|
header = get_header (self);
|
|
|
|
__atomic_load (&header->head, &headpos, __ATOMIC_SEQ_CST);
|
|
__atomic_load (&header->tail, &tailpos, __ATOMIC_SEQ_CST);
|
|
|
|
return headpos == tailpos;
|
|
}
|
|
|
|
/**
|
|
* mapped_ring_buffer_clear:
|
|
* @self: a #MappedRingBuffer
|
|
*
|
|
* Resets the head and tail positions back to 0.
|
|
*
|
|
* This function is only safe to call when you control both the reader
|
|
* and writer sides with mapped_ring_buffer_new_readwrite(), or are in
|
|
* control of when each side reads or writes.
|
|
*/
|
|
void
|
|
mapped_ring_buffer_clear (MappedRingBuffer *self)
|
|
{
|
|
MappedRingHeader *header;
|
|
|
|
assert (self != NULL);
|
|
|
|
header = get_header (self);
|
|
header->head = 0;
|
|
header->tail = 0;
|
|
}
|