-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
gc: Start implementing a mark-and-sweep garbage collector
This gc is very inspired by https://github.com/Manishearth/rust-gc, i didn't know a lot about garbage collectors going into this and they provided me with a design i could replicate. I do believe that this is its own work, but i want to disclose the source anyways.
- Loading branch information
1 parent
3fe57b3
commit 792b6f2
Showing
7 changed files
with
393 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[package] | ||
name = "gc" | ||
version = "0.1.0" | ||
authors.workspace = true | ||
edition.workspace = true | ||
repository.workspace = true | ||
license.workspace = true | ||
|
||
[dependencies] | ||
log = { workspace = true } | ||
|
||
[lints] | ||
workspace = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
use std::{cell::Cell, ptr::NonNull}; | ||
|
||
use crate::{heap::HEAP, Trace}; | ||
|
||
const MARKED_BIT: usize = 1 << (usize::BITS - 1); | ||
const ROOTS_MASK: usize = !MARKED_BIT; | ||
|
||
pub(crate) struct GcCell<T: ?Sized> { | ||
/// Contains root count and whether or not the cell is marked | ||
/// | ||
/// Highest bit indicates mark state, lower bits are the root count. | ||
pub(crate) flags: Cell<usize>, | ||
|
||
/// `GcCell`s make up a linked list, to keep track of all allocated objects | ||
pub(crate) next: Cell<Option<NonNull<GcCell<dyn Trace>>>>, | ||
|
||
/// The actual value allocated | ||
pub(crate) value: T, | ||
} | ||
|
||
impl<T> GcCell<T> | ||
where | ||
T: Trace + 'static, | ||
{ | ||
pub fn new(value: T) -> NonNull<Self> { | ||
let cell = Self { | ||
flags: Cell::new(0x1), // Not marked, one root | ||
next: Cell::new(None), | ||
value, | ||
}; | ||
|
||
let cell = NonNull::from(Box::leak(Box::new(cell))); | ||
|
||
// SAFETY: The ptr is valid, as we just constructed it | ||
unsafe { HEAP.with(|heap| heap.borrow_mut().register_cell(cell)) } | ||
|
||
cell | ||
} | ||
} | ||
|
||
impl<T> GcCell<T> | ||
where | ||
T: ?Sized + Trace, | ||
{ | ||
#[inline] | ||
pub const fn value(&self) -> &T { | ||
&self.value | ||
} | ||
|
||
/// Marks the cell and all its successors | ||
pub fn mark(&self) { | ||
if !self.is_marked() { | ||
self.flags.set(self.flags.get() | MARKED_BIT); | ||
|
||
// Also mark all of the connected cells | ||
self.value.trace(); | ||
} | ||
} | ||
|
||
pub fn unmark(&self) { | ||
self.flags.set(self.flags.get() & ROOTS_MASK); | ||
} | ||
|
||
#[must_use] | ||
pub fn num_roots(&self) -> usize { | ||
self.flags.get() & ROOTS_MASK | ||
} | ||
|
||
#[must_use] | ||
pub fn is_marked(&self) -> bool { | ||
self.flags.get() & MARKED_BIT != 0 | ||
} | ||
|
||
pub fn decrement_root_count(&mut self) { | ||
self.flags.set(self.flags.get() - 1); | ||
} | ||
|
||
pub fn increment_root_count(&mut self) { | ||
if self.num_roots() == ROOTS_MASK { | ||
panic!("Maximum number of gc roots exceeded"); | ||
} | ||
|
||
self.flags.set(self.flags.get() + 1); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
use std::{ | ||
cell::{Cell, RefCell}, | ||
mem, | ||
ptr::{self, NonNull}, | ||
}; | ||
|
||
use crate::{cell::GcCell, Trace}; | ||
|
||
const COLLECT_IF_MEMORY_USAGE_ABOVE: usize = 0x1000; | ||
|
||
thread_local! { | ||
pub static HEAP: RefCell<Heap> = RefCell::new(Heap { | ||
bytes_allocated: 0, | ||
collect_if_memory_usage_above: COLLECT_IF_MEMORY_USAGE_ABOVE, | ||
head: None, | ||
}); | ||
} | ||
|
||
/// Forces a garbage collection | ||
/// | ||
/// Returns the number of bytes that were freed | ||
pub fn collect_garbage() -> usize { | ||
HEAP.with(|heap| heap.borrow_mut().collect_garbage()) | ||
} | ||
|
||
pub(crate) struct Heap { | ||
bytes_allocated: usize, | ||
collect_if_memory_usage_above: usize, | ||
|
||
/// The most recently allocated gc cell | ||
head: Option<NonNull<GcCell<dyn Trace>>>, | ||
} | ||
|
||
impl Heap { | ||
pub(crate) unsafe fn register_cell(&mut self, cell: NonNull<GcCell<dyn Trace>>) { | ||
debug_assert!(cell.as_ref().next.get().is_none()); | ||
|
||
// Make the new cell the head in the linked list of allocated cells | ||
let old_head = self.head.replace(cell); | ||
cell.as_ref().next.set(old_head); | ||
|
||
self.bytes_allocated += mem::size_of_val(&cell); | ||
|
||
if self.bytes_allocated > self.collect_if_memory_usage_above { | ||
self.collect_garbage(); | ||
} | ||
} | ||
|
||
/// Performs a garbage collection on this threads heap | ||
/// | ||
/// Returns the number of bytes that were freed | ||
fn collect_garbage(&mut self) -> usize { | ||
log::debug!("Collecting garbage..."); | ||
|
||
// Mark phase | ||
let mut next = self.head; | ||
while let Some(next_cell) = next { | ||
// SAFETY: All the pointers in the chain are guaranteed to point to | ||
// valid GcCells | ||
let cell = unsafe { next_cell.as_ref() }; | ||
|
||
if cell.num_roots() > 0 { | ||
cell.mark(); | ||
} | ||
next = cell.next.get(); | ||
} | ||
|
||
// Collect all unmarked nodes | ||
struct UnmarkedCell<'a> { | ||
cell: NonNull<GcCell<dyn Trace>>, | ||
linked_by: &'a Cell<Option<NonNull<GcCell<dyn Trace>>>>, | ||
} | ||
|
||
let mut unmarked_cells = vec![]; | ||
let mut next = Cell::from_mut(&mut self.head); | ||
while let Some(next_cell) = next.get() { | ||
// SAFETY: All the pointers in the chain are guaranteed to point to | ||
// valid GcCells | ||
let cell = unsafe { next_cell.as_ref() }; | ||
|
||
if cell.is_marked() { | ||
cell.unmark(); | ||
} else { | ||
let unmarked_cell = UnmarkedCell { | ||
cell: next_cell, | ||
linked_by: next, | ||
}; | ||
unmarked_cells.push(unmarked_cell); | ||
} | ||
next = &cell.next; | ||
} | ||
|
||
// Sweep Phase | ||
let mut total_freed_size = 0; | ||
for mut unmarked_cell in unmarked_cells { | ||
total_freed_size += mem::size_of_val(&unmarked_cell.cell); | ||
|
||
// Remove the unmarked cell from the linked list | ||
// SAFETY: The cell ptr is guaranteed to point to a valid cell | ||
let cell_to_be_dropped = unsafe { unmarked_cell.cell.as_mut() }; | ||
unmarked_cell.linked_by.set(cell_to_be_dropped.next.get()); | ||
|
||
// SAFETY: The cell ptr is guaranteed to point to a valid cell | ||
unsafe { ptr::drop_in_place(ptr::from_mut(cell_to_be_dropped)) }; | ||
} | ||
|
||
self.bytes_allocated -= total_freed_size; | ||
log::debug!("Freed 0x{total_freed_size:x} bytes during garbage collection"); | ||
|
||
total_freed_size | ||
} | ||
} | ||
|
||
impl Drop for Heap { | ||
fn drop(&mut self) { | ||
self.collect_garbage(); | ||
|
||
// Remaining memory is leaked | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
//! A mark-and-sweep garbage collection | ||
mod cell; | ||
mod heap; | ||
mod trace; | ||
|
||
use cell::GcCell; | ||
pub use heap::collect_garbage; | ||
pub use trace::Trace; | ||
|
||
use std::{cell::Cell, fmt, ops::Deref, ptr::NonNull}; | ||
|
||
/// A pointer to the gc heap | ||
/// | ||
/// Methods are implemented on the `Gc` itself, so as to not | ||
/// interfer with the methods on `T`. | ||
/// | ||
/// Cloning a [Gc] does not perform a deep copy. | ||
#[derive(Clone)] | ||
pub struct Gc<T> | ||
where | ||
T: 'static + Trace, | ||
{ | ||
is_rooted: bool, | ||
|
||
/// The value stored | ||
/// | ||
/// It is a invariant of this type that `cell` must always point to a valid | ||
/// `GcCell<T>` (ie. it may never be dangling). | ||
referenced_cell: Cell<NonNull<GcCell<T>>>, | ||
} | ||
|
||
impl<T> Gc<T> | ||
where | ||
T: 'static + Trace, | ||
{ | ||
/// Allocate a value on the gc-heap | ||
/// | ||
/// The new pointer starts out rooted. | ||
#[must_use] | ||
pub fn new(value: T) -> Self { | ||
// Allocate a new cell on the thread-local heap for this value | ||
let gc_cell = GcCell::new(value); | ||
|
||
let mut gc = Self { | ||
is_rooted: true, | ||
referenced_cell: Cell::new(gc_cell), | ||
}; | ||
|
||
gc | ||
} | ||
|
||
fn make_root(value: &mut Self) { | ||
debug_assert!(!value.is_rooted); | ||
|
||
Self::cell_mut(value).increment_root_count(); | ||
value.is_rooted = true; | ||
} | ||
|
||
fn unroot(value: &mut Self) { | ||
debug_assert!(value.is_rooted); | ||
|
||
Self::cell_mut(value).decrement_root_count(); | ||
value.is_rooted = false; | ||
} | ||
|
||
fn cell(value: &Self) -> &GcCell<T> { | ||
let raw_ptr = value.referenced_cell.get(); | ||
|
||
// SAFETY: self.cell must always point to a GcCell | ||
unsafe { raw_ptr.as_ref() } | ||
} | ||
|
||
fn cell_mut(value: &mut Self) -> &mut GcCell<T> { | ||
let raw_ptr = value.referenced_cell.get_mut(); | ||
|
||
// SAFETY: self.cell must always point to a GcCell | ||
unsafe { raw_ptr.as_mut() } | ||
} | ||
|
||
pub fn mark(value: &Self) { | ||
Self::cell(value).mark() | ||
} | ||
} | ||
|
||
impl<T> fmt::Debug for Gc<T> | ||
where | ||
T: 'static + Trace + fmt::Debug, | ||
{ | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
Self::cell(self).value().fmt(f) | ||
} | ||
} | ||
|
||
impl<T> fmt::Display for Gc<T> | ||
where | ||
T: 'static + Trace + fmt::Display, | ||
{ | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
Self::cell(self).value().fmt(f) | ||
} | ||
} | ||
|
||
impl<T> Drop for Gc<T> | ||
where | ||
T: 'static + Trace, | ||
{ | ||
fn drop(&mut self) { | ||
if self.is_rooted { | ||
Self::cell_mut(self).decrement_root_count(); | ||
} | ||
} | ||
} | ||
|
||
impl<T> Deref for Gc<T> | ||
where | ||
T: 'static + Trace, | ||
{ | ||
type Target = T; | ||
|
||
#[inline] | ||
fn deref(&self) -> &T { | ||
Gc::cell(self).value() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/// Trait for tracing active gc dom_objects | ||
/// | ||
/// Containers should simply forward trace calls to each | ||
/// of their referenced objects. | ||
/// | ||
/// [Trace] is unsafe, since failing to trace all references | ||
/// can lead to them being garbage-collected while still in use. | ||
pub unsafe trait Trace { | ||
fn trace(&self); | ||
} | ||
|
||
/// Used to impl an empty trace implementation for types that can never | ||
/// contain `Gc<T>` types. | ||
macro_rules! empty_trace { | ||
($($type: ty,)*) => { | ||
$( | ||
unsafe impl Trace for $type { | ||
fn trace(&self) {} | ||
} | ||
)* | ||
}; | ||
} | ||
|
||
empty_trace!( | ||
u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize, bool, char, str, String, | ||
); |
Oops, something went wrong.