qutip · MrRobot2211 · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021
diff --git a/benchmarks/test_benchmark_demonstration.py b/benchmarks/test_benchmark_demonstration.py
@@ -0,0 +1,58 @@
+# Remove this file after adding actual benchmarks
+import pytest
+import numpy as np
+import cupy as cp
+from qutip.core.data import Dense
+
+from qutip_cupy import CuPyDense
+
+import benchmark_tools
+from benchmark_tools.cpu_gpu_times_wrapper import GpuWrapper
+
+# Set device_id
+cp.cuda.Device(benchmark_tools._DEVICE).use()
+
+# Supported dtypes
+dtype_list = ["CuPyDense", "CuPyDense_half_precision", "Dense"]
+dtype_ids = ["CuPy", "CuPy_half", "qutip(Dense)"]
+
+
+@pytest.fixture(params=dtype_list, ids=dtype_ids)
+def dtype(request):
+    return request.param
+
+
+@pytest.fixture(scope="function", params=[50, 100, 1000])  # , 4000])
+def size(request):
+    return request.param
+
+
+@pytest.mark.benchmark()
+def test_matmul(dtype, size, benchmark, request):
+    # Group benchmark by operation, density and size.
+    group = request.node.callspec.id  # noqa:F821
+    group = group.split("-")
+    benchmark.group = "-".join(group[1:])
+    benchmark.extra_info["dtype"] = group[0]
+
+    array = np.random.uniform(size=(size, size)) + 1.0j * np.random.uniform(
+        size=(size, size)
+    )
+
+    if dtype == "CuPyDense":
+        arr = CuPyDense(array)
+    elif dtype == "CuPyDense_half_precision":
+        arr = CuPyDense(array, dtype=cp.complex64)
+
+    elif dtype == "Dense":
+        arr = Dense(array)
+
+    def matmul_(arr):
+        return arr @ arr
+
+    benchmark2 = GpuWrapper(benchmark)
+    cp_mult = benchmark2.pedanticupy(matmul_, (arr,))
+
+    np_mult = matmul_(array)
+
+    np.testing.assert_array_almost_equal(cp_mult.to_array(), np_mult)
diff --git a/setup.cfg b/setup.cfg
@@ -38,5 +38,11 @@ where = src
 [options.extras_require]
 tests =
     pytest>=6.0
+benchmarks = 
+    pytest>=6.0
+    pytest-benchmark>=3.4.1
+    pandas>=1.0
+    matplotlib>=3.0
 full =
     %(tests)s
+    %(benchmarks)s
diff --git a/src/benchmark_tools/__init__.py b/src/benchmark_tools/__init__.py
@@ -0,0 +1 @@
+_DEVICE = 0
diff --git a/src/benchmark_tools/benchmark.py b/src/benchmark_tools/benchmark.py
@@ -0,0 +1,158 @@
+# this has benen mostly borrowed from the qutip-tensorflow implementation
+import cupy as cp
+import json
+import pandas as pd
+import matplotlib.pyplot as plt
+import pytest
+import argparse
+import glob
+from pathlib import Path
+
+import benchmark_tools
+
+
+def unravel(data, key):
+    """Transforms {key:{another_key: values, another_key2: value2}} into
+    {key_another_key:value, key_another_key2:value}"""
+    for d in data:
+        values = d.pop(key)
+        for k, v in values.items():
+            d[key + "_" + k] = v
+    return data
+
+
+def benchmark_to_dataframe(filepath):
+    """Loads a JSON file where the benchmark is stored and returns a dataframe
+    with the benchmar information."""
+    with open(filepath) as f:
+        data = json.load(f)
+        data = data["benchmarks"]
+        data = unravel(data, "options")
+        data = unravel(data, "stats")
+        data = unravel(data, "params")
+        data = unravel(data, "extra_info")
+        data = pd.DataFrame(data)
+
+        # Set operation properly (for example: matmul instead of:
+        # UNSERIALIZABLE[<function Qobj.__matmul__ at 0x...)
+        # The name of the operation is obtained from the group name
+        data["params_get_operation"] = data.group.str.split("-")
+        data["params_get_operation"] = [d[-1] for d in data.params_get_operation]
+        print(data.params_get_operation)
+        return data
+
+
+def plot_benchmark(df, destination_folder):
+    """Plots results using matplotlib. It iterates params_get_operation and
+    params_density and plots time vs N (for NxN matrices)"""
+    grouped = df.groupby(["params_get_operation"])
+    for operation, group in grouped:
+        for dtype, g in group.groupby("extra_info_dtype"):
+            plt.errorbar(
+                g.params_size, g.stats_mean, g.stats_stddev, fmt=".-", label=dtype
+            )
+
+        plt.title(f"{operation}")
+        plt.legend()
+        plt.yscale("log")
+        plt.xscale("log")
+        plt.savefig(f".benchmarks/figures/{operation}.png")
+        plt.xlabel("Size")
+        plt.ylabel("Time (s)")
+        plt.close()
+
+
+def run_benchmarks(args):
+    "Run pytest benchmark with sensible defaults."
+    pytest.main(
+        [
+            "benchmarks",
+            "--benchmark-only",
+            "--benchmark-columns=Mean,StdDev,rounds,Iterations",
+            "--benchmark-sort=name",
+            "--benchmark-autosave",
+            "-Wdefault",
+        ]
+        + args
+    )
+
+
+def get_latest_benchmark_path():
+    """Returns the path to the latest benchmark run from `./.benchmarks/`"""
+
+    benchmark_paths = glob.glob("./.benchmarks/*/*.json")
+    dates = ["".join(_b.split("/")[-1].split("_")[2:4]) for _b in benchmark_paths]
+    benchmarks = {date: value for date, value in zip(dates, benchmark_paths)}
+
+    dates.sort()
+    latest = dates[-1]
+    benchmark_latest = benchmarks[latest]
+
+    return benchmark_latest
+
+
+def main(args=[]):
+    parser = argparse.ArgumentParser(
+        description="""Run and plot the benchmarks.
+                                     The script also accepts the same arguments
+                                     as pytest/pytest-benchmark. The script must be run
+                                     from the root of the repository."""
+    )
+    parser.add_argument(
+        "--save_csv",
+        default=".benchmarks/latest.csv",
+        help="""Path where the latest benchmark resulst will be
+                        stored as csv. If empty it will not store results as
+                        csv. Default: .benchmarks/latest.csv""",
+    )
+    parser.add_argument(
+        "--save_plots",
+        default=".benchmarks/figures",
+        help="""Path where the plots will be saved. If empty,
+                        it will not save the plots. Default:
+                        .benchmarks/figures""",
+    )
+    parser.add_argument(
+        "--plot_only",
+        action="store_true",
+        help="""If included, it will not run the benchmarks but
+                        just plot the latest results from .benchmaks/ folder.
+                        """,
+    )
+
+    parser.add_argument(
+        "--device_id",
+        default=0,
+        help="""Device id for benchmarking.
+                        """,
+    )
+
+    if args:
+        args, other_args = parser.parse_known_args([])
+    else:
+        args, other_args = parser.parse_known_args()
+
+    benchmark_tools._DEVICE = args.device_id
+
+    if not args.plot_only:
+        run_benchmarks(other_args)
+
+    with cp.cuda.device.Device(benchmark_tools._DEVICE) as device:
+
+        print("The sepcifications for your current device are:")
+        print(device.attributes)
+
+    benchmark_latest = get_latest_benchmark_path()
+    benchmark_latest = benchmark_to_dataframe(benchmark_latest)
+
+    # Save results as csv
+    if args.save_csv:
+        benchmark_latest.to_csv(args.save_csv)
+
+    if args.save_plots:
+        Path(args.save_plots).mkdir(parents=True, exist_ok=True)
+        plot_benchmark(benchmark_latest, args.save_plots)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/benchmark_tools/cpu_gpu_times_wrapper.py b/src/benchmark_tools/cpu_gpu_times_wrapper.py
@@ -0,0 +1,158 @@
+import warnings
+
+# from pytest_benchmark.fixture import BenchmarkFixture
+from pytest_benchmark.fixture import FixtureAlreadyUsed
+from pytest_benchmark.stats import Metadata
+from cupyx.time import repeat as cp_repeat
+
+
+class GpuWrapper(object):
+    """
+    This class will wrap a  pytest-benchmark  type Fixture
+    in order to provide separate CPU and GPU timings.
+    The GpuWrapper instance will share all its attributes
+    transparently with the pytest-benchmark fixture instance.
+    Ths class relies on CuPy's experimental timing
+    utility ``cupyx.time.repeat``
+    """
+
+    def __init__(self, wrapped_class, iterations=30, rounds=5, warmup_rounds=10):
+        # we need to initialize wrapped_class this way
+        # since we are overriding set_attr
+        self.__dict__["wrapped_class"] = wrapped_class
+        self.iterations = iterations
+        self.rounds = rounds
+        self.warmup_rounds = warmup_rounds
+
+    def __getattr__(self, attr):
+        # orig_attr = self.wrapped_class.__getattribute__(attr)
+        orig_attr = getattr(self.wrapped_class, attr)
+        if callable(orig_attr):
+
+            def hooked(*args, **kwargs):
+                result = orig_attr(*args, **kwargs)
+                # prevent wrapped_class from becoming unwrapped
+                if result == self.wrapped_class:
+                    return self
+                return result
+
+            return hooked
+        else:
+            return orig_attr
+
+    def __setattr__(self, attr, value):
+        setattr(self.wrapped_class, attr, value)
+
+    def pedanticupy(
+        self,
+        function_to_benchmark,
+        args=(),
+        kwargs={},
+        iterations=30,
+        rounds=5,
+        warmup_rounds=10,
+    ):
+        """
+        By using this method you have the same control on the benchmark as when using
+        pytest-benchamrk's own ``pedantic``
+        with the exception of providing an special setup function.
+        """
+        if self._mode:
+            self.has_error = True
+            raise FixtureAlreadyUsed(
+                "Fixture can only be used once. Previously it was used in %s mode."
+                % self._mode
+            )
+        try:
+            self._mode = "benchmark.pedantic(...)"
+            return self._raw_pedantic(
+                function_to_benchmark,
+                args,
+                kwargs,
+                iterations=iterations,
+                rounds=rounds,
+                warmup_rounds=warmup_rounds,
+            )
+        except Exception:
+            self.has_error = True
+            raise
+
+    def _raw_pedantic(
+        self,
+        function_to_benchmark,
+        args=(),
+        kwargs={},
+        iterations=30,
+        rounds=5,
+        warmup_rounds=10,
+    ):
+
+        if not isinstance(iterations, int) or iterations < 1:
+            raise ValueError("Must have positive int for `iterations`.")
+
+        if not isinstance(rounds, int) or rounds < 1:
+            raise ValueError("Must have positive int for `rounds`.")
+
+        if not isinstance(warmup_rounds, int) or warmup_rounds < 0:
+            raise ValueError("Must have positive int for `warmup_rounds`.")
+
+        iterations = self.iterations
+        rounds = self.rounds
+        warmup_rounds = self.warmup_rounds
+
+        if self.enabled:
+            self.stats = self._make_stats(iterations)
+            self.stats.group = "device_all"
+            self.statscpu = self._make_stats(iterations)
+            self.statscpu.group = "device_cpu"
+            self.statsgpu = self._make_stats(iterations)
+            self.statsgpu.group = "device_gpu"
+
+            self._logger.debug(
+                "  Running %s rounds x %s iterations ..." % (rounds, iterations),
+                yellow=True,
+                bold=True,
+            )
+
+            for _ in range(rounds):
+                with warnings.catch_warnings():
+                    warnings.filterwarnings(
+                        action="ignore",
+                        category=FutureWarning,
+                        message=r"cupyx.time.repeat is experimental.",
+                    )
+
+                    results = cp_repeat(
+                        function_to_benchmark,
+                        args,
+                        kwargs,
+                        n_warmup=warmup_rounds,
+                        max_duration=self._max_time,
+                        n_repeat=iterations,
+                    )
+
+                for tim_cpu, tim_gpu in zip(results.cpu_times, results.gpu_times[0]):
+
+                    self.stats.update(tim_cpu + tim_gpu)
+                    self.statscpu.update(tim_cpu)
+                    self.statsgpu.update(tim_gpu)
+
+        function_result = function_to_benchmark(*args, **kwargs)
+        return function_result
+
+    def _make_stats(self, iterations):
+        bench_stats = Metadata(
+            self,
+            iterations=iterations,
+            options={
+                "disable_gc": self._disable_gc,
+                "timer": self._timer,
+                "min_rounds": self._min_rounds,
+                "max_time": self._max_time,
+                "min_time": self._min_time,
+                "warmup": self._warmup,
+            },
+        )
+        self._add_stats(bench_stats)
+
+        return bench_stats