Skip to content

Commit

Permalink
gc: Start implementing a mark-and-sweep garbage collector
Browse files Browse the repository at this point in the history
This gc is very inspired by https://github.com/Manishearth/rust-gc, i
didn't know a lot about garbage collectors going into this and they
provided me with a design i could replicate.

I do believe that this is its own work, but i want to disclose the
source anyways.
  • Loading branch information
simonwuelker committed Mar 3, 2024
1 parent 3fe57b3 commit 792b6f2
Show file tree
Hide file tree
Showing 7 changed files with 393 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ members = [
"tests/runners/html5lib",
"crates/util/buildutils",
"crates/js",
"crates/gc",
]
resolver = "2"

Expand Down Expand Up @@ -56,6 +57,7 @@ ciphers = { path = "crates/crypto/ciphers" }
hash = { path = "crates/crypto/hash" }
tls = { path = "crates/crypto/tls" }
js = { path = "crates/js" }
gc = { path = "crates/gc" }

log = "0.4"
criterion = { version = "0.4", features = ["html_reports"] }
Expand Down
13 changes: 13 additions & 0 deletions crates/gc/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "gc"
version = "0.1.0"
authors.workspace = true
edition.workspace = true
repository.workspace = true
license.workspace = true

[dependencies]
log = { workspace = true }

[lints]
workspace = true
85 changes: 85 additions & 0 deletions crates/gc/src/cell.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use std::{cell::Cell, ptr::NonNull};

use crate::{heap::HEAP, Trace};

const MARKED_BIT: usize = 1 << (usize::BITS - 1);
const ROOTS_MASK: usize = !MARKED_BIT;

pub(crate) struct GcCell<T: ?Sized> {
/// Contains root count and whether or not the cell is marked
///
/// Highest bit indicates mark state, lower bits are the root count.
pub(crate) flags: Cell<usize>,

/// `GcCell`s make up a linked list, to keep track of all allocated objects
pub(crate) next: Cell<Option<NonNull<GcCell<dyn Trace>>>>,

/// The actual value allocated
pub(crate) value: T,
}

impl<T> GcCell<T>
where
T: Trace + 'static,
{
pub fn new(value: T) -> NonNull<Self> {
let cell = Self {
flags: Cell::new(0x1), // Not marked, one root
next: Cell::new(None),
value,
};

let cell = NonNull::from(Box::leak(Box::new(cell)));

// SAFETY: The ptr is valid, as we just constructed it
unsafe { HEAP.with(|heap| heap.borrow_mut().register_cell(cell)) }

cell
}
}

impl<T> GcCell<T>
where
T: ?Sized + Trace,
{
#[inline]
pub const fn value(&self) -> &T {
&self.value
}

/// Marks the cell and all its successors
pub fn mark(&self) {
if !self.is_marked() {
self.flags.set(self.flags.get() | MARKED_BIT);

// Also mark all of the connected cells
self.value.trace();
}
}

pub fn unmark(&self) {
self.flags.set(self.flags.get() & ROOTS_MASK);
}

#[must_use]
pub fn num_roots(&self) -> usize {
self.flags.get() & ROOTS_MASK
}

#[must_use]
pub fn is_marked(&self) -> bool {
self.flags.get() & MARKED_BIT != 0
}

pub fn decrement_root_count(&mut self) {
self.flags.set(self.flags.get() - 1);
}

pub fn increment_root_count(&mut self) {
if self.num_roots() == ROOTS_MASK {
panic!("Maximum number of gc roots exceeded");
}

self.flags.set(self.flags.get() + 1);
}
}
120 changes: 120 additions & 0 deletions crates/gc/src/heap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
use std::{
cell::{Cell, RefCell},
mem,
ptr::{self, NonNull},
};

use crate::{cell::GcCell, Trace};

const COLLECT_IF_MEMORY_USAGE_ABOVE: usize = 0x1000;

thread_local! {
pub static HEAP: RefCell<Heap> = RefCell::new(Heap {
bytes_allocated: 0,
collect_if_memory_usage_above: COLLECT_IF_MEMORY_USAGE_ABOVE,
head: None,
});
}

/// Forces a garbage collection
///
/// Returns the number of bytes that were freed
pub fn collect_garbage() -> usize {
HEAP.with(|heap| heap.borrow_mut().collect_garbage())
}

pub(crate) struct Heap {
bytes_allocated: usize,
collect_if_memory_usage_above: usize,

/// The most recently allocated gc cell
head: Option<NonNull<GcCell<dyn Trace>>>,
}

impl Heap {
pub(crate) unsafe fn register_cell(&mut self, cell: NonNull<GcCell<dyn Trace>>) {
debug_assert!(cell.as_ref().next.get().is_none());

// Make the new cell the head in the linked list of allocated cells
let old_head = self.head.replace(cell);
cell.as_ref().next.set(old_head);

self.bytes_allocated += mem::size_of_val(&cell);

if self.bytes_allocated > self.collect_if_memory_usage_above {
self.collect_garbage();
}
}

/// Performs a garbage collection on this threads heap
///
/// Returns the number of bytes that were freed
fn collect_garbage(&mut self) -> usize {
log::debug!("Collecting garbage...");

// Mark phase
let mut next = self.head;
while let Some(next_cell) = next {
// SAFETY: All the pointers in the chain are guaranteed to point to
// valid GcCells
let cell = unsafe { next_cell.as_ref() };

if cell.num_roots() > 0 {
cell.mark();
}
next = cell.next.get();
}

// Collect all unmarked nodes
struct UnmarkedCell<'a> {
cell: NonNull<GcCell<dyn Trace>>,
linked_by: &'a Cell<Option<NonNull<GcCell<dyn Trace>>>>,
}

let mut unmarked_cells = vec![];
let mut next = Cell::from_mut(&mut self.head);
while let Some(next_cell) = next.get() {
// SAFETY: All the pointers in the chain are guaranteed to point to
// valid GcCells
let cell = unsafe { next_cell.as_ref() };

if cell.is_marked() {
cell.unmark();
} else {
let unmarked_cell = UnmarkedCell {
cell: next_cell,
linked_by: next,
};
unmarked_cells.push(unmarked_cell);
}
next = &cell.next;
}

// Sweep Phase
let mut total_freed_size = 0;
for mut unmarked_cell in unmarked_cells {
total_freed_size += mem::size_of_val(&unmarked_cell.cell);

// Remove the unmarked cell from the linked list
// SAFETY: The cell ptr is guaranteed to point to a valid cell
let cell_to_be_dropped = unsafe { unmarked_cell.cell.as_mut() };
unmarked_cell.linked_by.set(cell_to_be_dropped.next.get());

// SAFETY: The cell ptr is guaranteed to point to a valid cell
unsafe { ptr::drop_in_place(ptr::from_mut(cell_to_be_dropped)) };
}

self.bytes_allocated -= total_freed_size;
log::debug!("Freed 0x{total_freed_size:x} bytes during garbage collection");

total_freed_size
}
}

impl Drop for Heap {
fn drop(&mut self) {
self.collect_garbage();

// Remaining memory is leaked
}
}
125 changes: 125 additions & 0 deletions crates/gc/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//! A mark-and-sweep garbage collection
mod cell;
mod heap;
mod trace;

use cell::GcCell;
pub use heap::collect_garbage;
pub use trace::Trace;

use std::{cell::Cell, fmt, ops::Deref, ptr::NonNull};

/// A pointer to the gc heap
///
/// Methods are implemented on the `Gc` itself, so as to not
/// interfer with the methods on `T`.
///
/// Cloning a [Gc] does not perform a deep copy.
#[derive(Clone)]
pub struct Gc<T>
where
T: 'static + Trace,
{
is_rooted: bool,

/// The value stored
///
/// It is a invariant of this type that `cell` must always point to a valid
/// `GcCell<T>` (ie. it may never be dangling).
referenced_cell: Cell<NonNull<GcCell<T>>>,
}

impl<T> Gc<T>
where
T: 'static + Trace,
{
/// Allocate a value on the gc-heap
///
/// The new pointer starts out rooted.
#[must_use]
pub fn new(value: T) -> Self {
// Allocate a new cell on the thread-local heap for this value
let gc_cell = GcCell::new(value);

let mut gc = Self {
is_rooted: true,
referenced_cell: Cell::new(gc_cell),
};

gc
}

fn make_root(value: &mut Self) {
debug_assert!(!value.is_rooted);

Self::cell_mut(value).increment_root_count();
value.is_rooted = true;
}

fn unroot(value: &mut Self) {
debug_assert!(value.is_rooted);

Self::cell_mut(value).decrement_root_count();
value.is_rooted = false;
}

fn cell(value: &Self) -> &GcCell<T> {
let raw_ptr = value.referenced_cell.get();

// SAFETY: self.cell must always point to a GcCell
unsafe { raw_ptr.as_ref() }
}

fn cell_mut(value: &mut Self) -> &mut GcCell<T> {
let raw_ptr = value.referenced_cell.get_mut();

// SAFETY: self.cell must always point to a GcCell
unsafe { raw_ptr.as_mut() }
}

pub fn mark(value: &Self) {
Self::cell(value).mark()
}
}

impl<T> fmt::Debug for Gc<T>
where
T: 'static + Trace + fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::cell(self).value().fmt(f)
}
}

impl<T> fmt::Display for Gc<T>
where
T: 'static + Trace + fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::cell(self).value().fmt(f)
}
}

impl<T> Drop for Gc<T>
where
T: 'static + Trace,
{
fn drop(&mut self) {
if self.is_rooted {
Self::cell_mut(self).decrement_root_count();
}
}
}

impl<T> Deref for Gc<T>
where
T: 'static + Trace,
{
type Target = T;

#[inline]
fn deref(&self) -> &T {
Gc::cell(self).value()
}
}
26 changes: 26 additions & 0 deletions crates/gc/src/trace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/// Trait for tracing active gc dom_objects
///
/// Containers should simply forward trace calls to each
/// of their referenced objects.
///
/// [Trace] is unsafe, since failing to trace all references
/// can lead to them being garbage-collected while still in use.
pub unsafe trait Trace {
fn trace(&self);
}

/// Used to impl an empty trace implementation for types that can never
/// contain `Gc<T>` types.
macro_rules! empty_trace {
($($type: ty,)*) => {
$(
unsafe impl Trace for $type {
fn trace(&self) {}
}
)*
};
}

empty_trace!(
u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize, bool, char, str, String,
);
Loading

0 comments on commit 792b6f2

Please sign in to comment.