From 424482775da78b5d582373b119c434cf28897ae0 Mon Sep 17 00:00:00 2001
From: Albin Cassirer <cassirer@google.com>
Date: Mon, 1 Jun 2020 01:30:05 -0700
Subject: [PATCH] Reverb: Expand the documentation of Table.

PiperOrigin-RevId: 314094641
Change-Id: I5930c638316c6e2b95b684bd7bc8b7694a3d500e
---
 reverb/cc/table.h | 38 +++++++++++++++++++++-----------------
 reverb/server.py  | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 65 insertions(+), 19 deletions(-)

diff --git a/reverb/cc/table.h b/reverb/cc/table.h
index b2699909..6c3d8ca7 100644
--- a/reverb/cc/table.h
+++ b/reverb/cc/table.h
@@ -48,29 +48,33 @@ struct TableItem {
   std::vector<std::shared_ptr<ChunkStore::Chunk>> chunks;
 };
 
-// A Table is a structure for storing `PriorityItem` objects. The Table uses two
-// instances of ItemSelectorInterface, one for sampling (sampler) and another
-// for removing (remover). PriorityItems are registered with both the sampler
-// and remover when inserted in the `Table`. The `Table` uses the sampler to
-// determine which items it should return when `Table::Sample()` is called.
-// Similarly, the remover is used to determine which items should be deleted to
-// ensure capacity.
+// A `Table` is a structure for storing `TableItem` objects. The table uses two
+// instances of `ItemSelectorInterface`, one for sampling (`sampler`) and
+// another for removing (`remover`). All item operations (insert/update/delete)
+// on the table are propagated to the sampler and remover with the original
+// operation on the table. The `Table` uses the sampler to determine which items
+// it should return when `Table::Sample()` is called. Similarly, the remover is
+// used to determine which items should be deleted to ensure capacity.
 //
 // A `RateLimiter` is used to set the ratio of inserted to sampled
 // items. This means that calls to `Table::InsertOrAssign()` and
 // `Table::Sample()` may be blocked by the `RateLimiter` as it enforces this
 // ratio.
 //
-// Please note that the removing implementation only limits the number of items
-// in the table, not the number of timesteps (or actual memory) on this
-// server. When we delete an item of a table, the reference counts for
-// its chunks decreases and we can maybe delete the chunks. However, this is not
-// guaranteed, as other tables might still hold references to the
-// chunks in which case no memory is freed up. This means you must be careful
-// when choosing the remover strategy. A dangerous example would be using a FIFO
-// remover for one table and then introducing another with table with a
-// LIFO remover. In this scenario, the two tables would not share any
-// chunks and would this require twice the amount of storage.
+// Please note that the remover is only used to limit the number of items in
+// the table, not the number of data elements nor the memory used. Each item
+// references one or more chunks, each chunk holds one or more data elements and
+// consumes and "unknown" amount of memory. Each chunk can be referenced by any
+// number of items across all tables on the server. Deleting a single item from
+// one table simply decrements the reference count of the chunks it references
+// and only when a chunk is referenced by zero items is it destroyed and its
+// memory deallocated.
+//
+// This means you must be careful when choosing the remover strategy. A
+// dangerous example would be using a FIFO  remover for one table and then
+// introducing another with table with a  LIFO remover. In this scenario, the
+// two tables would not share any chunks and would this require twice the
+// amount of memory compared to two tables with the same type of remover.
 //
 class Table {
  public:
diff --git a/reverb/server.py b/reverb/server.py
index 3a9dbbd8..367e11c1 100644
--- a/reverb/server.py
+++ b/reverb/server.py
@@ -51,8 +51,50 @@ def build_internal_extensions(
 
 
 class Table:
-  # TODO(b/157149247): Improve docstring.
-  """Table defines how items are selected for sampling and removal."""
+  """Item collection with configurable strategies for insertion and sampling.
+
+  A `Table` is the structure used to interact with the data stored on a server.
+  Each table can contain a limited number of "items" that can be retrieved
+  according to the strategy defined by the `sampler`. The size of a table, in
+  terms of number of items, is limited to `max_size`. When items are inserted
+  into an already full table the `remover` is used to decide which item should
+  be removed.
+
+  In addition to the selection strategies used to select items for retrieval and
+  removal the flow of data is controlled by a `RateLimiter`. A rate limiter
+  controlls high level relations between inserts and samples by defining a
+  target ratio between the two and what level of deviations from the target is
+  acceptable. This is particularily useful when scaling up from single machine
+  use cases to distributed systems as the same "logical" throughput can be kept
+  constant even though the scale has changed by orders of magnitude.
+
+  It is important to note that "data elements" and "items" are related but
+  distinct types of entities.
+
+    Data element:
+      - The actual data written using `Writer.append`.
+      - Immutable once written.
+      - Is not stored in a `Table`.
+      - Can be referenced by items from one or more distinct `Table`.
+      - Cannot be retrieved in any other way than as a part of an item.
+
+    Item:
+      - The entity stored in a `Table`.
+      - Inserted using `Writer.create_item`.
+      - References one or more data elements, creating a "sequence".
+
+  The fact that data elements can be referenced by more than one item from one
+  or multiple tables means thats one has to be careful not to equate the size of
+  a table (in terms of items) with the amount of data it references. The data
+  will remain in memory on the server until the last item that references it is
+  removed from its table. Removing an item from a table does therefore not
+  neccesarily result in any (significant) change in memory usage and one must be
+  careful when selecting remover strategies for a multi table server. Consider
+  for example a server with two tables. One has a FIFO remover and the other
+  LIFO remover. In this scenario, the two tables would not share any chunks and
+  would eventually consume twice the amount of memory compared a similar setup
+  where the two tables share the same type of removal strategy.
+  """
 
   def __init__(self,
                name: str,