From 89a25dc74b826f050587851fd38669dbe6d65b96 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Thu, 11 Jan 2024 13:53:19 +0100 Subject: [PATCH] Implement optimized iteration in `B2Dataset`. And adapt the related tests. Also remove the readme notice about the operation not being optimized. Fixes #2. --- README.rst | 7 ------- b2h5py/blosc2.py | 10 ++++++++++ b2h5py/tests/test_b2dataset.py | 9 ++++----- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index f9baef0..b385c10 100644 --- a/README.rst +++ b/README.rst @@ -33,13 +33,6 @@ Finally, you may explicitly enable optimizations for a given h5py dataset by wra b2dset = b2h5py.B2Dataset(dset) # ... slicing ``b2dset`` will use Blosc2 optimization ... -Please note that, for the moment, plain iteration in ``B2Dataset`` instances is not optimized (as it falls back to plain ``Dataset`` slicing). This does not affect the other approaches further above. Instead of ``for row in b2dset:`` loops, you may prefer to use slicing like:: - - for i in range(len(b2dset)): - # ... operate with ``b2dset[i]`` or ``b2dset[i, ...]`` ... - -We recommend that you test which approach works better for your datasets. This limitation may be fixed in the future. - Building -------- diff --git a/b2h5py/blosc2.py b/b2h5py/blosc2.py index d1ab8e5..a92e12a 100644 --- a/b2h5py/blosc2.py +++ b/b2h5py/blosc2.py @@ -235,6 +235,16 @@ def is_b2_fast_slicing(self) -> bool: """Whether or not Blosc2 optimized slicing is enabled""" return getattr(self, opt_dataset_ok_prop) + def __iter__(self): + # This needs to be reimplemented here, + # lest the base dataset iteration is called + # which uses its getitem, not ours. + shape = self.__dataset.shape + if len(shape) < 1: + return iter(self.__dataset) # scalar, let it fail + for row in range(shape[0]): + yield self[row] + def __getitem__(self, args): try: selection = opt_slice_check(self, args) diff --git a/b2h5py/tests/test_b2dataset.py b/b2h5py/tests/test_b2dataset.py index 600ad27..af89145 100644 --- a/b2h5py/tests/test_b2dataset.py +++ b/b2h5py/tests/test_b2dataset.py @@ -36,11 +36,10 @@ def testB2Dataset(self): self.assertArrayEqual(b2dataset[::2], self.arr[::2]) def testIter(self): - """Iteration does not hang""" + """Iteration does use optimization""" b2dataset = B2Dataset(self.dset) self.assertTrue(b2dataset.is_b2_fast_slicing) - b2dsiter = iter(b2dataset) - next(b2dsiter) - next(b2dsiter) - return + with checking_opt_slicing(): + for (b2row, arow) in zip(b2dataset, self.arr): + self.assertArrayEqual(b2row, arow)