From d3b80beead6e7914425c811afe57e30960bf5bc2 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Mon, 6 Feb 2023 11:12:47 -0500 Subject: [PATCH 1/3] Use cloudpickle instead of JSON serialization. --- docs/changelog.md | 4 +++ docs/usage/index.md | 2 +- testbook/client.py | 52 +++++++++++++------------------- testbook/exceptions.py | 2 +- testbook/tests/test_reference.py | 24 +++------------ 5 files changed, 31 insertions(+), 53 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 9e2143d..8a758d1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +- replaced JSON serialization with cloudpickle. This allows extracting a much wider range of objects from the notebook subprocess. + ## 0.4.2 - Documentation and CoC updates to improve developer access (Thank you PyLadies Vancouver!) diff --git a/docs/usage/index.md b/docs/usage/index.md index f7e46bc..33f6ca3 100644 --- a/docs/usage/index.md +++ b/docs/usage/index.md @@ -63,7 +63,7 @@ my_list = ['list', 'from', 'notebook'] Reference objects to functions can be called with, -- explicit JSON serializable values (like `dict`, `list`, `int`, `float`, `str`, `bool`, etc) +- explicit cloudpickle-serializable values (almost all Python objects) - other reference objects ```{code-block} python diff --git a/testbook/client.py b/testbook/client.py index 5dc94d3..9b96a16 100644 --- a/testbook/client.py +++ b/testbook/client.py @@ -6,6 +6,7 @@ from nbclient import NotebookClient from nbclient.exceptions import CellExecutionError from nbformat.v4 import new_code_cell +import cloudpickle from .exceptions import ( TestbookCellTagNotFoundError, @@ -39,7 +40,7 @@ def ref(self, name: str) -> Union[TestbookObjectReference, Any]: # Check if exists self.inject(name, pop=True) try: - self.inject(f"import json; json.dumps({name})", pop=True) + self.inject(f"import cloudpickle; cloudpickle.dumps({name})", pop=True) return self.value(name) except Exception: return TestbookObjectReference(self, name) @@ -248,9 +249,8 @@ def inject( def value(self, code: str) -> Any: """ - Execute given code in the kernel and return JSON serializeable result. + Execute given code in the kernel and returns the serializeable result. - If the result is not JSON serializeable, it raises `TestbookAttributeError`. This error object will also contain an attribute called `save_varname` which can be used to create a reference object with :meth:`ref`. @@ -277,34 +277,24 @@ def value(self, code: str) -> Any: 'code provided does not produce execute_result' ) - save_varname = random_varname() - - inject_code = f""" - import json - from IPython import get_ipython - from IPython.display import JSON - - {save_varname} = get_ipython().last_execution_result.result - - json.dumps({save_varname}) - JSON({{"value" : {save_varname}}}) - """ - - try: - outputs = self.inject(inject_code, pop=True).outputs - - if outputs[0].output_type == "error": - # will receive error when `allow_errors` is set to True - raise TestbookRuntimeError( - outputs[0].evalue, outputs[0].traceback, outputs[0].ename - ) - - return outputs[0].data['application/json']['value'] - - except TestbookRuntimeError: - e = TestbookSerializeError('could not JSON serialize output') - e.save_varname = save_varname - raise e + import tempfile + with tempfile.NamedTemporaryFile() as tmp: + try: + inject_code = f""" + import cloudpickle + with open('{tmp.name}', 'wb') as f: + cloudpickle.dump(get_ipython().last_execution_result.result, f) + """ + outputs = self.inject(inject_code, pop=True).outputs + if len(outputs) > 0 and outputs[0].output_type == "error": + # will receive error when `allow_errors` is set to True + raise TestbookRuntimeError( + outputs[0].evalue, outputs[0].traceback, outputs[0].ename + ) + with open(tmp.name, 'rb') as f: + return cloudpickle.load(f) + except TestbookRuntimeError: + raise TestbookSerializeError('could not serialize output') @contextmanager def patch(self, target, **kwargs): diff --git a/testbook/exceptions.py b/testbook/exceptions.py index 2f7f7aa..dacbe29 100644 --- a/testbook/exceptions.py +++ b/testbook/exceptions.py @@ -10,7 +10,7 @@ class TestbookCellTagNotFoundError(TestbookError): class TestbookSerializeError(TestbookError): - """Raised when output cannot be JSON serialized""" + """Raised when output cannot be serialized""" pass diff --git a/testbook/tests/test_reference.py b/testbook/tests/test_reference.py index c766aa2..3d1e57b 100644 --- a/testbook/tests/test_reference.py +++ b/testbook/tests/test_reference.py @@ -47,24 +47,8 @@ def test_function_call_with_ref_object(notebook): assert double(a) == [2, 4, 6] -def test_reference(notebook): +def test_nontrivial_pickling(notebook): Foo = notebook.ref("Foo") - - # Check that when a non-serializeable object is returned, it returns - # a reference to that object instead - f = Foo('bar') - - assert repr(f) == "\"\"" - - # Valid attribute access - assert f.say_hello() - - # Invalid attribute access - with pytest.raises(TestbookAttributeError): - f.does_not_exist - - assert f.say_hello() == 'Hello bar!' - - # non JSON-serializeable output - with pytest.raises(TestbookSerializeError): - f.resolve() + f = Foo("bar") + assert repr(f) == "" + assert(f.say_hello() == "Hello bar!") \ No newline at end of file From c9e2ce9e6b9fcc861e3a9636f35efec830325b25 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Mon, 6 Feb 2023 11:25:05 -0500 Subject: [PATCH 2/3] Explicit reference semantics. --- testbook/client.py | 22 ++++++++++------------ testbook/tests/test_reference.py | 21 +++++++++++---------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/testbook/client.py b/testbook/client.py index 9b96a16..16382a5 100644 --- a/testbook/client.py +++ b/testbook/client.py @@ -39,17 +39,13 @@ def ref(self, name: str) -> Union[TestbookObjectReference, Any]: # Check if exists self.inject(name, pop=True) - try: - self.inject(f"import cloudpickle; cloudpickle.dumps({name})", pop=True) - return self.value(name) - except Exception: - return TestbookObjectReference(self, name) + return TestbookObjectReference(self, name) def get(self, item): - return self.ref(item) + return self.value(item) def __getitem__(self, item): - return self.ref(item) + return self.value(item) @staticmethod def _construct_call_code( @@ -251,9 +247,6 @@ def value(self, code: str) -> Any: """ Execute given code in the kernel and returns the serializeable result. - This error object will also contain an attribute called `save_varname` which - can be used to create a reference object with :meth:`ref`. - Parameters ---------- code: str @@ -276,14 +269,17 @@ def value(self, code: str) -> Any: raise TestbookExecuteResultNotFoundError( 'code provided does not produce execute_result' ) + + save_varname = random_varname() import tempfile with tempfile.NamedTemporaryFile() as tmp: try: inject_code = f""" import cloudpickle + {save_varname} = get_ipython().last_execution_result.result with open('{tmp.name}', 'wb') as f: - cloudpickle.dump(get_ipython().last_execution_result.result, f) + cloudpickle.dump({save_varname}, f) """ outputs = self.inject(inject_code, pop=True).outputs if len(outputs) > 0 and outputs[0].output_type == "error": @@ -294,7 +290,9 @@ def value(self, code: str) -> Any: with open(tmp.name, 'rb') as f: return cloudpickle.load(f) except TestbookRuntimeError: - raise TestbookSerializeError('could not serialize output') + e = TestbookSerializeError('could not serialize output') + e.save_varname = save_varname + raise e @contextmanager def patch(self, target, **kwargs): diff --git a/testbook/tests/test_reference.py b/testbook/tests/test_reference.py index 3d1e57b..21cb5a3 100644 --- a/testbook/tests/test_reference.py +++ b/testbook/tests/test_reference.py @@ -12,28 +12,28 @@ def notebook(): def test_create_reference(notebook): a = notebook.ref("a") - assert repr(a) == "[1, 2, 3]" + assert repr(a) == "'[1, 2, 3]'" -def test_create_reference_getitem(notebook): - a = notebook["a"] - assert repr(a) == "[1, 2, 3]" +def test_create_reference_resolve(notebook): + a = notebook.ref("a") + assert a.resolve() == [1, 2, 3] -def test_create_reference_get(notebook): +def test_notebook_get_value(notebook): a = notebook.get("a") - assert repr(a) == "[1, 2, 3]" + assert a == [1, 2, 3] def test_eq_in_notebook(notebook): a = notebook.ref("a") a.append(4) - assert a == [1, 2, 3, 4] + assert a.resolve() == [1, 2, 3, 4] def test_eq_in_notebook_ref(notebook): a, b = notebook.ref("a"), notebook.ref("b") - assert a == b + assert a.resolve()[:3] == b def test_function_call(notebook): @@ -43,8 +43,9 @@ def test_function_call(notebook): def test_function_call_with_ref_object(notebook): double, a = notebook.ref("double"), notebook.ref("a") - - assert double(a) == [2, 4, 6] + # a.append(4) above applied to the referenced "a" and therefore is + # reflected here + assert double(a) == [2, 4, 6, 8] def test_nontrivial_pickling(notebook): From 0b65427ce2948cd335158e3e0cda98a18a899589 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Mon, 6 Feb 2023 11:30:01 -0500 Subject: [PATCH 3/3] Describe the changes in the changelog. --- docs/changelog.md | 18 +++++++++++++++--- requirements.txt | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 8a758d1..81c34cc 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,8 +1,20 @@ # Changelog -## Unreleased - -- replaced JSON serialization with cloudpickle. This allows extracting a much wider range of objects from the notebook subprocess. +## 0.5.0 + +- replaced JSON serialization with cloudpickle. This allows extracting a much + wider range of objects from the notebook subprocess. +- Reference semantics have changed. + - Old behavior of `tb.get(name)` and `tb[name]`: + - a reference would be returned for non-JSON-serializable objects. + - a value would be returned for JSON-serializable objects. + - Old behavior of `tb.ref(name)` was identical to `tb.get(name)`. + - However, now almost all objects are serializable and as a result, under + the old semantics, a reference would almost never be returned. Therefore, + when a reference is desired, we now require explicitly requesting a + reference. The new behavior of `tb.get(name)` and `tb[name]` is to always + return the deserialized object and to never return a reference. The new + behavior of `tb.ref(name)` is to always return a reference. ## 0.4.2 diff --git a/requirements.txt b/requirements.txt index 0f9ff69..02ba471 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ nbformat>=5.0.4 nbclient>=0.4.0 +cloudpickle >= 2.0.0 \ No newline at end of file