From 7f26e6ad7c0b9e669b8a8272941d45a9e7eb1cb8 Mon Sep 17 00:00:00 2001 From: Weizheng Lu Date: Tue, 23 Apr 2024 19:00:35 +0800 Subject: [PATCH] update book title (#38) --- .github/workflows/deploy.yml | 2 +- README.md | 4 +- ch-dask-dataframe/indexing.ipynb | 235 +++-- ch-dask-dataframe/map-partitions.ipynb | 495 ++++++---- ch-dask-dataframe/read-write.ipynb | 892 +++++++++++++++--- ch-data-science/index.md | 2 +- ch-mpi-large-model/data-parallel.md | 2 +- ch-ray-data/data-load-inspect-save.ipynb | 645 +++++++++---- ch-ray-data/data-transform.ipynb | 459 ++++++--- ch-ray-data/map-map-batches.svg | 4 + ch-ray-data/preprocessor.ipynb | 97 +- .../tune-algorithm-scheduler.ipynb | 19 +- conf.py | 2 +- datasets.py | 42 + .../ch-dask-dataframe/map-partitions.drawio | 61 ++ .../ch-mpi-large-model/data-paralism.drawio | 628 ------------ .../ch-mpi-large-model/data-parallel.drawio | 74 +- .../pipeline-parallel.drawio | 54 +- .../pipline-parallel.drawio | 500 ---------- drawio/ch-ray-data/map-map-batches.drawio | 24 +- img/ch-dask-dataframe/map-partitions.svg | 4 + img/ch-dask-dataframe/nyc-flights-graph.svg | 170 ++-- img/ch-mpi-large-model/data-parallel.svg | 2 +- img/ch-mpi-large-model/pipeline-parallel.svg | 2 +- img/ch-ray-data/map-map-batches.svg | 2 +- img/mpi-logo.png | Bin 0 -> 20734 bytes index.md | 27 +- 27 files changed, 2305 insertions(+), 2143 deletions(-) create mode 100644 ch-ray-data/map-map-batches.svg create mode 100644 datasets.py create mode 100644 drawio/ch-dask-dataframe/map-partitions.drawio delete mode 100644 drawio/ch-mpi-large-model/data-paralism.drawio delete mode 100644 drawio/ch-mpi-large-model/pipline-parallel.drawio create mode 100644 img/ch-dask-dataframe/map-partitions.svg create mode 100644 img/mpi-logo.png diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 35b6ad6..ced8a1b 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -34,4 +34,4 @@ jobs: run: ssh-keyscan -p ${{ secrets.REMOTE_PORT }} -H ${{ secrets.REMOTE_HOST }} >> ~/.ssh/known_hosts - name: Deploy with rsync - run: rsync -avz _build/html/* ${{ secrets.REMOTE_USER }}@${{ secrets.REMOTE_HOST }}:/var/www/distributed-python/ + run: rsync -avz _build/html/* ${{ secrets.REMOTE_USER }}@${{ secrets.REMOTE_HOST }}:/var/www/scale-python/ diff --git a/README.md b/README.md index 33b2120..0f77b8d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Python 分布式编程 +# Python 数据科学加速 -开源的、面向下一代人工智能应用的 Python 分布式编程书籍。 +开源的、面向下一代数据科学和人工智能应用的 Python 并行加速编程书籍。 ## 内容介绍 diff --git a/ch-dask-dataframe/indexing.ipynb b/ch-dask-dataframe/indexing.ipynb index 51eed82..8206544 100644 --- a/ch-dask-dataframe/indexing.ipynb +++ b/ch-dask-dataframe/indexing.ipynb @@ -16,15 +16,27 @@ "hide-cell" ] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n", + "Perhaps you already have a cluster running?\n", + "Hosting the HTTP server on port 51899 instead\n", + " warnings.warn(\n" + ] + } + ], "source": [ "%config InlineBackend.figure_format = 'svg'\n", "import os\n", - "import urllib\n", - "import shutil\n", - "from zipfile import ZipFile\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "from datasets import nyc_flights\n", "\n", "import dask\n", + "dask.config.set({'dataframe.query-planning': False})\n", "import dask.dataframe as dd\n", "import pandas as pd\n", "from dask.distributed import LocalCluster, Client\n", @@ -52,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -123,7 +135,7 @@ "3 qux three 4 40" ] }, - "execution_count": 15, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -147,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -220,7 +232,7 @@ "qux three 4 40" ] }, - "execution_count": 19, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -239,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -310,7 +322,7 @@ "3 qux three 4 40" ] }, - "execution_count": 20, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -343,7 +355,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -370,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -397,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -433,28 +445,28 @@ " \n", " \n", " 2018-01-01 00:00:00\n", - " 984\n", - " 0.660595\n", + " 992\n", + " -0.711756\n", " \n", " \n", " 2018-01-01 00:00:01\n", - " 960\n", - " -0.747564\n", + " 1018\n", + " -0.838596\n", " \n", " \n", " 2018-01-01 00:00:02\n", - " 1039\n", - " 0.777117\n", + " 1000\n", + " -0.735968\n", " \n", " \n", " 2018-01-01 00:00:03\n", - " 1038\n", - " -0.501949\n", + " 1004\n", + " 0.904384\n", " \n", " \n", " 2018-01-01 00:00:04\n", - " 992\n", - " 0.767979\n", + " 1021\n", + " 0.025423\n", " \n", " \n", " ...\n", @@ -463,28 +475,28 @@ " \n", " \n", " 2022-12-31 23:59:55\n", - " 1005\n", - " -0.102774\n", + " 1020\n", + " 0.961542\n", " \n", " \n", " 2022-12-31 23:59:56\n", - " 1040\n", - " -0.648857\n", + " 963\n", + " -0.663948\n", " \n", " \n", " 2022-12-31 23:59:57\n", - " 1019\n", - " -0.310174\n", + " 1010\n", + " 0.510401\n", " \n", " \n", " 2022-12-31 23:59:58\n", - " 987\n", - " 0.889037\n", + " 964\n", + " -0.882126\n", " \n", " \n", " 2022-12-31 23:59:59\n", - " 977\n", - " -0.078216\n", + " 1020\n", + " -0.532950\n", " \n", " \n", "\n", @@ -494,22 +506,22 @@ "text/plain": [ " id x\n", "timestamp \n", - "2018-01-01 00:00:00 984 0.660595\n", - "2018-01-01 00:00:01 960 -0.747564\n", - "2018-01-01 00:00:02 1039 0.777117\n", - "2018-01-01 00:00:03 1038 -0.501949\n", - "2018-01-01 00:00:04 992 0.767979\n", + "2018-01-01 00:00:00 992 -0.711756\n", + "2018-01-01 00:00:01 1018 -0.838596\n", + "2018-01-01 00:00:02 1000 -0.735968\n", + "2018-01-01 00:00:03 1004 0.904384\n", + "2018-01-01 00:00:04 1021 0.025423\n", "... ... ...\n", - "2022-12-31 23:59:55 1005 -0.102774\n", - "2022-12-31 23:59:56 1040 -0.648857\n", - "2022-12-31 23:59:57 1019 -0.310174\n", - "2022-12-31 23:59:58 987 0.889037\n", - "2022-12-31 23:59:59 977 -0.078216\n", + "2022-12-31 23:59:55 1020 0.961542\n", + "2022-12-31 23:59:56 963 -0.663948\n", + "2022-12-31 23:59:57 1010 0.510401\n", + "2022-12-31 23:59:58 964 -0.882126\n", + "2022-12-31 23:59:59 1020 -0.532950\n", "\n", "[157766400 rows x 2 columns]" ] }, - "execution_count": 4, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -527,31 +539,31 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/dask/dataframe/io/csv.py:640: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " head = reader(BytesIO(b_sample), nrows=sample_rows, **head_kwargs)\n" + ] + }, { "data": { "text/plain": [ "(None, None, None, None, None, None, None)" ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "folder_path = os.path.join(os.getcwd(), \"../data/\")\n", - "download_url = \"https://dp.godaai.org/nyc-flights.zip\"\n", - "zip_file_path = os.path.join(folder_path, \"nyc-flights.zip\")\n", - "if not os.path.exists(os.path.join(folder_path, \"nyc-flights\")):\n", - " with urllib.request.urlopen(download_url) as response, open(zip_file_path, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)\n", - " zf = ZipFile(zip_file_path, 'r')\n", - " zf.extractall(folder_path)\n", - " zf.close()\n", - "file_path = os.path.join(folder_path, \"nyc-flights\", \"*.csv\")\n", + "folder_path = nyc_flights()\n", + "file_path = os.path.join(folder_path, \"*.csv\")\n", "flights_ddf = dd.read_csv(file_path,\n", " parse_dates={'Date': [0, 1, 2]},\n", " dtype={'TailNum': object,\n", @@ -569,7 +581,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -602,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -633,16 +645,38 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-23 16:05:06,483 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle 008ee90768895dabe7a3e94389222068 initialized by task ('shuffle-transfer-008ee90768895dabe7a3e94389222068', 0) executed on worker tcp://127.0.0.1:51911\n", + "2024-04-23 16:05:06,505 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle 008ee90768895dabe7a3e94389222068 deactivated due to stimulus 'task-finished-1713859506.50483'\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ " col2\n", "col1 \n", - "01 a\n", + "01 a\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-23 16:05:06,545 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle 01fddf4f11082a43a6075f7888029dd3 initialized by task ('shuffle-transfer-01fddf4f11082a43a6075f7888029dd3', 1) executed on worker tcp://127.0.0.1:51912\n", + "2024-04-23 16:05:06,604 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle 01fddf4f11082a43a6075f7888029dd3 deactivated due to stimulus 'task-finished-1713859506.6028118'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ " col2\n", "col1 \n", "02 c\n", @@ -675,17 +709,25 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-23 16:05:16,522 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle d162433f4ca23d129354be4d414ea589 initialized by task ('shuffle-transfer-d162433f4ca23d129354be4d414ea589', 999) executed on worker tcp://127.0.0.1:51914\n", + "2024-04-23 16:05:27,101 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle d162433f4ca23d129354be4d414ea589 deactivated due to stimulus 'task-finished-1713859527.100699'\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "before set_index npartitions: 1826\n", - "after set_index npartitions: 163\n", - "CPU times: user 6.1 s, sys: 3.47 s, total: 9.57 s\n", - "Wall time: 19.6 s\n" + "after set_index npartitions: 165\n", + "CPU times: user 6.63 s, sys: 3.65 s, total: 10.3 s\n", + "Wall time: 20.6 s\n" ] } ], @@ -706,7 +748,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -716,15 +758,23 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-23 16:05:38,056 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle d162433f4ca23d129354be4d414ea589 initialized by task ('shuffle-transfer-d162433f4ca23d129354be4d414ea589', 999) executed on worker tcp://127.0.0.1:51914\n", + "2024-04-23 16:05:49,629 - distributed.shuffle._scheduler_plugin - WARNING - Shuffle d162433f4ca23d129354be4d414ea589 deactivated due to stimulus 'task-finished-1713859549.629161'\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3.25 s, sys: 1.09 s, total: 4.34 s\n", - "Wall time: 11.7 s\n" + "CPU times: user 3.24 s, sys: 1.7 s, total: 4.94 s\n", + "Wall time: 11.9 s\n" ] } ], @@ -743,15 +793,15 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.94 s, sys: 743 ms, total: 2.68 s\n", - "Wall time: 8.18 s\n" + "CPU times: user 1.88 s, sys: 1.09 s, total: 2.97 s\n", + "Wall time: 8.38 s\n" ] } ], @@ -782,9 +832,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/4n/v40br47s46ggrjm9bdm64lwh0000gn/T/ipykernel_76150/639704942.py:3: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " pdf = pd.read_csv(file_path,\n" + ] + }, { "data": { "text/html": [ @@ -837,14 +895,14 @@ "1 JFK 9.311532" ] }, - "execution_count": 13, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# pandas\n", - "file_path = os.path.join(folder_path, \"nyc-flights\", \"1991.csv\")\n", + "file_path = os.path.join(folder_path, \"1991.csv\")\n", "pdf = pd.read_csv(file_path,\n", " parse_dates={'Date': [0, 1, 2]},\n", " dtype={'TailNum': object,\n", @@ -865,7 +923,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -920,7 +978,7 @@ "1 JFK 9.311532" ] }, - "execution_count": 22, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -940,9 +998,23 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n" + ] + }, { "data": { "text/html": [ @@ -995,7 +1067,7 @@ "1 JFK 10.766914" ] }, - "execution_count": 14, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1017,6 +1089,13 @@ "source": [ "client.shutdown()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/ch-dask-dataframe/map-partitions.ipynb b/ch-dask-dataframe/map-partitions.ipynb index e077d3f..e3357cb 100644 --- a/ch-dask-dataframe/map-partitions.ipynb +++ b/ch-dask-dataframe/map-partitions.ipynb @@ -4,57 +4,37 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "(sec-dask-map_partitions)=\n", "# `map_partitions`\n", "\n", - "除了 {numref}`sec-dask-dataframe-shuffle` 中提到的一些需要通信的计算外,有一种最简单的并行方式,英文术语为 Embarrassingly Parallel,中文可翻译为易并行。它指的是该类计算不需要太多跨 Worker 的协调和通信。比如,对某个字段加一,每个 Worker 内执行加法操作即可,Worker 之间没有通信的开销。Dask DataFrame 中可以使用 `map_partitions()` 来做这类 Embarrassingly Parallel 的操作。`map_partitions(func)` 的参数是一个 `func`,这个 `func` 将在每个 Partition 上执行。\n", + "除了 {numref}`sec-dask-dataframe-shuffle` 中提到的一些需要通信的计算外,有一种最简单的并行方式,英文术语为 Embarrassingly Parallel,中文可翻译为**易并行**。它指的是该类计算不需要太多跨 Worker 的协调和通信。比如,对某个字段加一,每个 Worker 内执行加法操作即可,Worker 之间没有通信的开销。Dask DataFrame 中可以使用 `map_partitions(func)` 来做这类 Embarrassingly Parallel 的操作。`map_partitions(func)` 的参数是一个 `func`,这个 `func` 将在每个 pandas DataFrame 上执行,`func` 内可以使用 pandas DataFrame 的各类操作。如 {numref}`fig-dask-map-partitions` 所示,`map_partitions(func)` 对原来的 pandas DataFrame 进行了转换操作。\n", "\n", - "下面的案例对缺失值进行填充,它没有跨 Worker 的通信开销,因此是一种 Embarrassingly Parallel 的典型应用场景。" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import urllib\n", - "import shutil\n", - "from zipfile import ZipFile\n", - "import warnings\n", + "```{figure} ../img/ch-dask-dataframe/map-partitions.svg\n", + "---\n", + "width: 600px\n", + "name: fig-dask-map-partitions\n", + "---\n", + "map_partitions()\n", + "```\n", "\n", - "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "## 案例:纽约出租车数据\n", "\n", - "folder_path = os.path.join(os.getcwd(), \"../data/\")\n", - "download_url_prefix = \"https://gender-pay-gap.service.gov.uk/viewing/download-data/\"\n", - "file_path_prefix = os.path.join(folder_path, \"gender-pay\")\n", - "if not os.path.exists(file_path_prefix):\n", - " os.makedirs(file_path_prefix)\n", - "for year in [2017, 2018, 2019, 2020, 2021, 2022]:\n", - " download_url = download_url_prefix + str(year)\n", - " file_path = os.path.join(file_path_prefix, f\"{str(year)}.csv\")\n", - " if not os.path.exists(file_path):\n", - " with urllib.request.urlopen(download_url) as response, open(file_path, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)" + "我们使用纽约出租车数据集进行简单的数据预处理:计算每个订单的时长。原数据集中,`tpep_pickup_datetime` 和 `tpep_dropoff_datetime` 分别为乘客上车和下车时间,现在只需要将下车时间 `tpep_dropoff_datetime` 减去上车时间 `tpep_pickup_datetime`。这个计算没有跨 Worker 的通信开销,因此是一种 Embarrassingly Parallel 的典型应用场景。" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n", - "Perhaps you already have a cluster running?\n", - "Hosting the HTTP server on port 57481 instead\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ + "import sys\n", + "sys.path.append(\"..\")\n", + "from datasets import nyc_taxi\n", + "\n", + "import pandas as pd\n", + "import dask\n", + "dask.config.set({'dataframe.query-planning': False})\n", "import dask.dataframe as dd\n", "import pandas as pd\n", "from dask.distributed import LocalCluster, Client\n", @@ -65,31 +45,249 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "ddf = dd.read_csv(os.path.join(file_path_prefix, \"*.csv\"),\n", - " dtype={'CompanyNumber': 'str', 'DiffMeanHourlyPercent': 'float64'})\n", + "dataset_path = nyc_taxi()\n", + "ddf = dd.read_parquet(dataset_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VendorIDtrip_durationtpep_pickup_datetimetpep_dropoff_datetimepassenger_counttrip_distanceRatecodeIDstore_and_fwd_flagPULocationIDDOLocationIDpayment_typefare_amountextramta_taxtip_amounttolls_amountimprovement_surchargetotal_amountcongestion_surchargeAirport_fee
0112532023-06-01 00:08:482023-06-01 00:29:411.03.401.0N140238121.93.500.56.700.01.033.602.50.00
116142023-06-01 00:15:042023-06-01 00:25:180.03.401.0N50151115.63.500.53.000.01.023.602.50.00
2111232023-06-01 00:48:242023-06-01 01:07:071.010.201.0N13897140.87.750.510.000.01.060.050.01.75
3214062023-06-01 00:54:032023-06-01 01:17:293.09.831.0N100244139.41.000.58.880.01.053.282.50.00
425142023-06-01 00:18:442023-06-01 00:27:181.01.171.0N13723419.31.000.50.720.01.015.022.50.00
\n", + "
" + ], + "text/plain": [ + " VendorID trip_duration tpep_pickup_datetime tpep_dropoff_datetime \\\n", + "0 1 1253 2023-06-01 00:08:48 2023-06-01 00:29:41 \n", + "1 1 614 2023-06-01 00:15:04 2023-06-01 00:25:18 \n", + "2 1 1123 2023-06-01 00:48:24 2023-06-01 01:07:07 \n", + "3 2 1406 2023-06-01 00:54:03 2023-06-01 01:17:29 \n", + "4 2 514 2023-06-01 00:18:44 2023-06-01 00:27:18 \n", + "\n", + " passenger_count trip_distance RatecodeID store_and_fwd_flag \\\n", + "0 1.0 3.40 1.0 N \n", + "1 0.0 3.40 1.0 N \n", + "2 1.0 10.20 1.0 N \n", + "3 3.0 9.83 1.0 N \n", + "4 1.0 1.17 1.0 N \n", + "\n", + " PULocationID DOLocationID payment_type fare_amount extra mta_tax \\\n", + "0 140 238 1 21.9 3.50 0.5 \n", + "1 50 151 1 15.6 3.50 0.5 \n", + "2 138 97 1 40.8 7.75 0.5 \n", + "3 100 244 1 39.4 1.00 0.5 \n", + "4 137 234 1 9.3 1.00 0.5 \n", + "\n", + " tip_amount tolls_amount improvement_surcharge total_amount \\\n", + "0 6.70 0.0 1.0 33.60 \n", + "1 3.00 0.0 1.0 23.60 \n", + "2 10.00 0.0 1.0 60.05 \n", + "3 8.88 0.0 1.0 53.28 \n", + "4 0.72 0.0 1.0 15.02 \n", + "\n", + " congestion_surcharge Airport_fee \n", + "0 2.5 0.00 \n", + "1 2.5 0.00 \n", + "2 0.0 1.75 \n", + "3 2.5 0.00 \n", + "4 2.5 0.00 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def transform(df):\n", + " df[\"trip_duration\"] = (df[\"tpep_dropoff_datetime\"] - df[\"tpep_pickup_datetime\"]).dt.seconds\n", + " # 将 `trip_duration` 挪到前面\n", + " dur_column = df.pop('trip_duration')\n", + " df.insert(1, dur_column.name, dur_column)\n", + " return df\n", "\n", - "def fillna(df):\n", - " return df.fillna(value={\"PostCode\": \"UNKNOWN\"})\n", - " \n", - "ddf = ddf.map_partitions(fillna)" + "ddf = ddf.map_partitions(transform)\n", + "ddf.compute()\n", + "ddf.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Dask DataFrame 模拟了 pandas DataFrame,如果这个 API 的计算模式是 Embarrassingly Parallel,它的底层很可能就是使用 `map_partitions()` 实现的。\n", + "Dask DataFrame 的某些 API 的计算模式是 Embarrassingly Parallel,它的底层很可能就是使用 `map_partitions()` 实现的。\n", "\n", - "{numref}`sec-dask-dataframe-indexing` 提到过,Dask DataFrame 会在某个列上进行切分。我们可以在 `map_partitions()` 的 `func` 中实现任何我们想做的事情,但如果对这些切分的列做了改动,需要 `clear_divisions()` 或者重新 `set_index()`。" + "{numref}`sec-dask-dataframe-indexing` 提到过,Dask DataFrame 会在某个列上进行切分,但如果对这些切分的列做了改动,需要 `clear_divisions()` 或者重新 `set_index()`。" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -114,43 +312,29 @@ " \n", " \n", " \n", - " EmployerName\n", - " EmployerId\n", - " Address\n", - " PostCode\n", - " CompanyNumber\n", - " SicCodes\n", - " DiffMeanHourlyPercent\n", - " DiffMedianHourlyPercent\n", - " DiffMeanBonusPercent\n", - " DiffMedianBonusPercent\n", - " MaleBonusPercent\n", - " FemaleBonusPercent\n", - " MaleLowerQuartile\n", - " FemaleLowerQuartile\n", - " MaleLowerMiddleQuartile\n", - " FemaleLowerMiddleQuartile\n", - " MaleUpperMiddleQuartile\n", - " FemaleUpperMiddleQuartile\n", - " MaleTopQuartile\n", - " FemaleTopQuartile\n", - " CompanyLinkToGPGInfo\n", - " ResponsiblePerson\n", - " EmployerSize\n", - " CurrentName\n", - " SubmittedAfterTheDeadline\n", - " DueDate\n", - " DateSubmitted\n", + " VendorID\n", + " trip_duration\n", + " tpep_pickup_datetime\n", + " tpep_dropoff_datetime\n", + " passenger_count\n", + " trip_distance\n", + " RatecodeID\n", + " store_and_fwd_flag\n", + " PULocationID\n", + " DOLocationID\n", + " payment_type\n", + " fare_amount\n", + " extra\n", + " mta_tax\n", + " tip_amount\n", + " tolls_amount\n", + " improvement_surcharge\n", + " total_amount\n", + " congestion_surcharge\n", + " Airport_fee\n", " \n", " \n", - " npartitions=6\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " npartitions=1\n", " \n", " \n", " \n", @@ -176,17 +360,17 @@ " \n", " \n", " \n", - " string\n", + " int32\n", + " int32\n", + " datetime64[us]\n", + " datetime64[us]\n", " int64\n", - " string\n", - " string\n", - " string\n", - " string\n", - " float64\n", - " float64\n", - " float64\n", - " float64\n", " float64\n", + " int64\n", + " string\n", + " int32\n", + " int32\n", + " int64\n", " float64\n", " float64\n", " float64\n", @@ -196,73 +380,6 @@ " float64\n", " float64\n", " float64\n", - " string\n", - " string\n", - " string\n", - " string\n", - " bool\n", - " string\n", - " string\n", - " \n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", " \n", " \n", " \n", @@ -286,62 +403,22 @@ " ...\n", " ...\n", " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", " \n", " \n", "\n", "\n", - "
Dask Name: fillna, 3 graph layers
" + "
Dask Name: transform, 2 graph layers
" ], "text/plain": [ "Dask DataFrame Structure:\n", - " EmployerName EmployerId Address PostCode CompanyNumber SicCodes DiffMeanHourlyPercent DiffMedianHourlyPercent DiffMeanBonusPercent DiffMedianBonusPercent MaleBonusPercent FemaleBonusPercent MaleLowerQuartile FemaleLowerQuartile MaleLowerMiddleQuartile FemaleLowerMiddleQuartile MaleUpperMiddleQuartile FemaleUpperMiddleQuartile MaleTopQuartile FemaleTopQuartile CompanyLinkToGPGInfo ResponsiblePerson EmployerSize CurrentName SubmittedAfterTheDeadline DueDate DateSubmitted\n", - "npartitions=6 \n", - " string int64 string string string string float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 string string string string bool string string\n", - " ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - " ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - " ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "Dask Name: fillna, 3 graph layers" + " VendorID trip_duration tpep_pickup_datetime tpep_dropoff_datetime passenger_count trip_distance RatecodeID store_and_fwd_flag PULocationID DOLocationID payment_type fare_amount extra mta_tax tip_amount tolls_amount improvement_surcharge total_amount congestion_surcharge Airport_fee\n", + "npartitions=1 \n", + " int32 int32 datetime64[us] datetime64[us] int64 float64 int64 string int32 int32 int64 float64 float64 float64 float64 float64 float64 float64 float64 float64\n", + " ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", + "Dask Name: transform, 2 graph layers" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -352,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ diff --git a/ch-dask-dataframe/read-write.ipynb b/ch-dask-dataframe/read-write.ipynb index 3b1f5fe..a9ba5cd 100644 --- a/ch-dask-dataframe/read-write.ipynb +++ b/ch-dask-dataframe/read-write.ipynb @@ -40,63 +40,40 @@ "\n", "## 数据切分与并行读取\n", "\n", - "我们以读取逗号分隔的数值(Comma-Separated Values,CSV)文件为例,来展示 Dask DataFrame 与 pandas 的区别。" + "### 案例:飞机起降数据\n", + "\n", + "飞机起降数据集由多个逗号分隔的数值(Comma-Separated Values,CSV)文件组成,每个文件对应一个年份,我们以读取多个 CSV 为例,来展示 Dask DataFrame 与 pandas 的区别。" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/luweizheng/Projects/godaai/distributed-python/ch-dask-dataframe/../data/nyc-flights/*.csv\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import glob\n", - "import urllib\n", - "import shutil\n", - "from zipfile import ZipFile\n", - "import warnings\n", "\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "from datasets import nyc_flights\n", + "\n", + "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", - "folder_path = os.path.join(os.getcwd(), \"../data/\")\n", - "download_url = \"https://dp.godaai.org/nyc-flights.zip\"\n", - "zip_file_path = os.path.join(folder_path, \"nyc-flights.zip\")\n", - "if not os.path.exists(os.path.join(folder_path, \"nyc-flights\")):\n", - " with urllib.request.urlopen(download_url) as response, open(zip_file_path, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)\n", - " zf = ZipFile(zip_file_path, 'r')\n", - " zf.extractall(folder_path)\n", - " zf.close()\n", + "folder_path = nyc_flights()\n", "\n", - "file_path = os.path.join(folder_path, \"nyc-flights\", \"*.csv\")\n", - "print(file_path)" + "file_path = os.path.join(folder_path, \"*.csv\")" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n", - "Perhaps you already have a cluster running?\n", - "Hosting the HTTP server on port 55077 instead\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ + "import dask\n", + "dask.config.set({'dataframe.query-planning': False})\n", "import dask.dataframe as dd\n", "import pandas as pd\n", "from dask.distributed import LocalCluster, Client\n", @@ -114,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -130,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -153,9 +130,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 18, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n" + ] + }, { "data": { "text/html": [ @@ -302,7 +287,7 @@ "[3 rows x 21 columns]" ] }, - "execution_count": 4, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -313,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -457,7 +442,7 @@ "[3 rows x 21 columns]" ] }, - "execution_count": 5, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -477,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -486,255 +471,255 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", - "-7150382809904974857\n", + "3521720512806484412\n", "\n", "read-csv\n", "\n", - "\n", + "\n", "\n", - "-6194689680917011400\n", + "6736009212905862099\n", "\n", "0\n", "\n", - "\n", + "\n", "\n", - "-7150382809904974857->-6194689680917011400\n", + "3521720512806484412->6736009212905862099\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "6071941830101125281\n", + "-1563468653830855738\n", "\n", "to_pyarrow_string\n", "\n", - "\n", + "\n", "\n", - "-6194689680917011400->6071941830101125281\n", + "6736009212905862099->-1563468653830855738\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-65660599333282282\n", + "2484881100534163769\n", "\n", "read-csv\n", "\n", - "\n", + "\n", "\n", - "312951075184987025\n", + "-2555422199051202395\n", "\n", "1\n", "\n", - "\n", + "\n", "\n", - "-65660599333282282->312951075184987025\n", + "2484881100534163769->-2555422199051202395\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-8135483071169533727\n", + "2675850518608036870\n", "\n", "to_pyarrow_string\n", "\n", - "\n", + "\n", "\n", - "312951075184987025->-8135483071169533727\n", + "-2555422199051202395->2675850518608036870\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-1102500011605602925\n", + "1680754195385590727\n", "\n", "read-csv\n", "\n", - "\n", + "\n", "\n", - "-8978480336772077469\n", + "3952218557050796030\n", "\n", "2\n", "\n", - "\n", + "\n", "\n", - "-1102500011605602925->-8978480336772077469\n", + "1680754195385590727->3952218557050796030\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-1050760860597841152\n", + "4484414144468516194\n", "\n", "to_pyarrow_string\n", "\n", - "\n", + "\n", "\n", - "-8978480336772077469->-1050760860597841152\n", + "3952218557050796030->4484414144468516194\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-1906626916754175967\n", + "1719447117322426477\n", "\n", "read-csv\n", "\n", - "\n", + "\n", "\n", - "-2470839580670079044\n", + "-7986884760556757161\n", "\n", "3\n", "\n", - "\n", + "\n", "\n", - "-1906626916754175967->-2470839580670079044\n", + "1719447117322426477->-7986884760556757161\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "4071937302134756076\n", + "8723733316907408802\n", "\n", "to_pyarrow_string\n", "\n", - "\n", + "\n", "\n", - "-2470839580670079044->4071937302134756076\n", + "-7986884760556757161->8723733316907408802\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "5178095293817516608\n", + "5958766289761319085\n", "\n", "read-csv\n", "\n", - "\n", + "\n", "\n", - "4036801175431919381\n", + "5566785284180098089\n", "\n", "4\n", "\n", - "\n", + "\n", "\n", - "5178095293817516608->4036801175431919381\n", + "5958766289761319085->5566785284180098089\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "8508987607055959954\n", + "7919606411758835760\n", "\n", "to_pyarrow_string\n", "\n", - "\n", + "\n", "\n", - "4036801175431919381->8508987607055959954\n", + "5566785284180098089->7919606411758835760\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-9029329607453142400\n", + "-121519200098467208\n", "\n", "read-csv\n", "\n", - "\n", + "\n", "\n", - "-856272853540776985\n", + "7676068657297728386\n", "\n", "5\n", "\n", - "\n", + "\n", "\n", - "-9029329607453142400->-856272853540776985\n", + "-121519200098467208->7676068657297728386\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-2363636268343853305\n", + "2288159746428799421\n", "\n", "to_pyarrow_string\n", "\n", - "\n", + "\n", "\n", - "-856272853540776985->-2363636268343853305\n", + "7676068657297728386->2288159746428799421\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "8774990811659256194\n", + "-5288713516117402287\n", "\n", "0\n", "\n", - "\n", + "\n", "\n", - "6071941830101125281->8774990811659256194\n", + "-1563468653830855738->-5288713516117402287\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "3881916782686559828\n", + "8264956528619452963\n", "\n", "1\n", "\n", - "\n", + "\n", "\n", - "-8135483071169533727->3881916782686559828\n", + "2675850518608036870->8264956528619452963\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-8057186534920993363\n", + "-8072504171972468356\n", "\n", "2\n", "\n", - "\n", + "\n", "\n", - "-1050760860597841152->-8057186534920993363\n", + "4484414144468516194->-8072504171972468356\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "1098126126831493759\n", + "5481165872764386894\n", "\n", "3\n", "\n", - "\n", + "\n", "\n", - "4071937302134756076->1098126126831493759\n", + "8723733316907408802->5481165872764386894\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "7605766882933492184\n", + "-6457937444843166297\n", "\n", "4\n", "\n", - "\n", + "\n", "\n", - "8508987607055959954->7605766882933492184\n", + "7919606411758835760->-6457937444843166297\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "-4333336434674061007\n", + "49703311258832128\n", "\n", "5\n", "\n", - "\n", + "\n", "\n", - "-2363636268343853305->-4333336434674061007\n", + "2288159746428799421->49703311258832128\n", "\n", "\n", "\n", @@ -745,7 +730,7 @@ "" ] }, - "execution_count": 6, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -779,9 +764,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 21, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -813,10 +806,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-02-20 13:26:56,131 - distributed.worker - WARNING - Compute Failed\n", - "Key: ('to_pyarrow_string-be40c5ef4347788c673803e766c9f5ac', 5)\n", + "2024-04-23 15:57:08,658 - distributed.worker - WARNING - Compute Failed\n", + "Key: ('to_pyarrow_string-1ab0cc1658d1dd5cfa26f384700ac39c', 5)\n", "Function: execute_task\n", - "args: ((subgraph_callable-104274d7-85ea-4720-954d-78db2282e8c8, [(, , 0, 24979433, b'\\n'), None, True, True]))\n", + "args: ((subgraph_callable-490521c8fd346c8bafcb4403419b222a, [(, , 0, 24979433, b'\\n'), None, True, True]))\n", "kwargs: {}\n", "Exception: 'ValueError(\\'Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.\\\\n\\\\n+----------------+---------+----------+\\\\n| Column | Found | Expected |\\\\n+----------------+---------+----------+\\\\n| CRSElapsedTime | float64 | int64 |\\\\n| TailNum | object | float64 |\\\\n+----------------+---------+----------+\\\\n\\\\nThe following columns also raised exceptions on conversion:\\\\n\\\\n- TailNum\\\\n ValueError(\"could not convert string to float: \\\\\\'N14346\\\\\\'\")\\\\n\\\\nUsually this is due to dask\\\\\\'s dtype inference failing, and\\\\n*may* be fixed by specifying dtypes manually by adding:\\\\n\\\\ndtype={\\\\\\'CRSElapsedTime\\\\\\': \\\\\\'float64\\\\\\',\\\\n \\\\\\'TailNum\\\\\\': \\\\\\'object\\\\\\'}\\\\n\\\\nto the call to `read_csv`/`read_table`.\\')'\n", "\n" @@ -841,9 +834,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 22, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n" + ] + }, { "data": { "text/html": [ @@ -990,7 +991,7 @@ "[3 rows x 21 columns]" ] }, - "execution_count": 8, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1013,7 +1014,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1074,7 +1075,7 @@ "[0 rows x 21 columns]" ] }, - "execution_count": 9, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1092,9 +1093,515 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 24, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -1104,17 +1611,41 @@ "JFK 255485\n", "LGA 591305\n", "Name: Origin, dtype: int64\n", - "CPU times: user 1.67 s, sys: 328 ms, total: 2 s\n", - "Wall time: 5.49 s\n" + "CPU times: user 288 ms, sys: 63.2 ms, total: 351 ms\n", + "Wall time: 1.44 s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n" ] } ], "source": [ "%%time\n", - "ddf = dd.read_csv(file_path, \n", - " parse_dates={\"Date\": [0, 1, 2]},\n", - " dtype={\"TailNum\": str, \"CRSElapsedTime\": float, \"Cancelled\": bool},\n", - " blocksize=50_000)\n", + "ddf = dd.read_csv(file_path,\n", + " parse_dates={'Date': [0, 1, 2]},\n", + " dtype={'TailNum': object,\n", + " 'CRSElapsedTime': float,\n", + " 'Cancelled': bool},\n", + " blocksize=500_000)\n", "\n", "origin_cnt = ddf[~ddf.Cancelled].groupby(\"Origin\").Origin.count().compute()\n", "print(origin_cnt)" @@ -1122,9 +1653,65 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n", + "/Users/luweizheng/miniconda3/envs/dask/lib/python3.11/site-packages/dask/dataframe/io/csv.py:195: FutureWarning: Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated. Combine the desired columns with pd.to_datetime after parsing instead.\n", + " df = reader(bio, **kwargs)\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -1134,8 +1721,8 @@ "JFK 255485\n", "LGA 591305\n", "Name: Origin, dtype: int64\n", - "CPU times: user 51.4 ms, sys: 11.1 ms, total: 62.5 ms\n", - "Wall time: 539 ms\n" + "CPU times: user 65.5 ms, sys: 17.8 ms, total: 83.4 ms\n", + "Wall time: 1.05 s\n" ] } ], @@ -1144,7 +1731,8 @@ "ddf = dd.read_csv(file_path, \n", " parse_dates={\"Date\": [0, 1, 2]},\n", " dtype={\"TailNum\": str, \"CRSElapsedTime\": float, \"Cancelled\": bool},\n", - " blocksize=5_000_000)\n", + " blocksize=5_000_000\n", + " )\n", "\n", "origin_cnt = ddf[~ddf.Cancelled].groupby(\"Origin\").Origin.count().compute()\n", "print(origin_cnt)" @@ -1223,7 +1811,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": { "tags": [ "hide-cell" @@ -1258,7 +1846,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/ch-data-science/index.md b/ch-data-science/index.md index 7ddbb49..188b9b9 100644 --- a/ch-data-science/index.md +++ b/ch-data-science/index.md @@ -1,6 +1,6 @@ # 数据科学 -Python 分布式编程主要服务于数据科学和人工智能等领域,本章主要介绍数据科学相关背景知识,如果相关基础较好,也可以直接跳过本章。 +本章主要介绍数据科学相关背景知识,如果相关基础较好,也可以直接跳过本章。 ```{tableofcontents} ``` \ No newline at end of file diff --git a/ch-mpi-large-model/data-parallel.md b/ch-mpi-large-model/data-parallel.md index 8cff42d..65e9904 100644 --- a/ch-mpi-large-model/data-parallel.md +++ b/ch-mpi-large-model/data-parallel.md @@ -5,7 +5,7 @@ ```{figure} ../img/ch-mpi-large-model/data-parallel.svg --- -width: 600px +width: 500px name: fig-data-parallel-img --- 数据并行示意图 diff --git a/ch-ray-data/data-load-inspect-save.ipynb b/ch-ray-data/data-load-inspect-save.ipynb index 39b9421..3eaf6b7 100644 --- a/ch-ray-data/data-load-inspect-save.ipynb +++ b/ch-ray-data/data-load-inspect-save.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": { "tags": [ "hide-cell" @@ -23,11 +23,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-12-13 21:10:50,705\tINFO worker.py:1673 -- Started a local Ray instance.\n" + "2024-04-23 15:46:00,903\tINFO worker.py:1740 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8267 \u001b[39m\u001b[22m\n" ] }, { "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c1d146493b2841bf93612d26a92c2490", + "version_major": 2, + "version_minor": 0 + }, "text/html": [ "
\n", "
\n", @@ -48,23 +53,27 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + "\n", + "\n", "
Python version:3.11.53.11.7
Ray version:2.8.02.11.0
Dashboard:http://127.0.0.1:8267
\n", "\n", "
\n", "
\n" ], "text/plain": [ - "RayContext(dashboard_url='', python_version='3.11.5', ray_version='2.8.0', ray_commit='105355bd253d6538ed34d331f6a4bdf0e38ace3a', protocol_version=None)" + "RayContext(dashboard_url='127.0.0.1:8267', python_version='3.11.7', ray_version='2.11.0', ray_commit='2eb4a8119f903f79b78c01eaa9db06c6c390051c')" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -72,9 +81,12 @@ "source": [ "import os\n", "import shutil\n", - "import urllib.request\n", "from pathlib import Path\n", "\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "from datasets import nyc_taxi\n", + "\n", "import ray\n", "\n", "if ray.is_initialized:\n", @@ -91,12 +103,12 @@ "\n", "Ray Data 提供了很多预置的数据加载方法,包括读取文件、读取 pandas DataFrame 这种内存数据、读取数据库中的数据。这里我们以一个纽约出租车司机的例子来演示读取 Parquet 文件。\n", "\n", - "首先下载该数据,下面的代码会判断是否下载到目标文件夹,如果已经下载,则无需重复下载。" + "首先下载该数据,并使用 `ray.data` 提供的 [`read_parquet()`](https://docs.ray.io/en/latest/data/api/doc/ray.data.read_parquet.html) 方法读取数据,得到一个 `Dataset`。。" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": { "tags": [ "hide-output" @@ -104,52 +116,102 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "文件夹 /Users/luweizheng/Projects/py-101/distributed-python/ch-ray-data/../data/nyc-taxi 已存在,无需操作。\n" - ] - } - ], - "source": [ - "folder_path = os.path.join(os.getcwd(), \"../data/nyc-taxi\")\n", - "download_url = \"https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-06.parquet\"\n", - "file_name = download_url.split(\"/\")[-1]\n", - "parquet_file_path = os.path.join(folder_path, file_name)\n", - "if not os.path.exists(folder_path):\n", - " # 创建文件夹\n", - " os.makedirs(folder_path)\n", - " print(f\"文件夹 {folder_path} 不存在,已创建。\")\n", - " # 下载并保存 Parquet 文件\n", - " with urllib.request.urlopen(download_url) as response, open(parquet_file_path, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)\n", - " print(\"数据已下载并保存为 Parquet 文件。\")\n", - "else:\n", - " print(f\"文件夹 {folder_path} 已存在,无需操作。\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "使用 `ray.data` 提供的 [`read_parquet()`](https://docs.ray.io/en/latest/data/api/doc/ray.data.read_parquet.html) 方法读取数据,得到一个 `Dataset`。" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "661d0924a50a4963810566b722a7ad6d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Parquet Files Sample 0: 0%| | 0/2 [00:00 TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=1]\n" ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7bd391923ff1435bb3f0e03a5136aa0d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(5) 1: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=2]\n", - "2023-12-13 21:10:57,881\tINFO streaming_executor.py:105 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-12-13 21:10:57,882\tINFO streaming_executor.py:107 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "\u001b[36m(ReadParquet->SplitBlocks(173) pid=42048)\u001b[0m /Users/luweizheng/anaconda3/envs/dispy/lib/python3.11/site-packages/ray/data/_internal/arrow_block.py:128: FutureWarning: promote has been superseded by mode='default'.\n", - "\u001b[36m(ReadParquet->SplitBlocks(173) pid=42048)\u001b[0m return transform_pyarrow.concat(tables) \n", - " " + "2024-04-23 15:46:04,082\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-45-59_228244_75431/logs\n", + "2024-04-23 15:46:04,083\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=2]\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'passenger_count': 1, 'tip_amount': 6.7, 'payment_type': 1}\n", - "{'passenger_count': 1, 'tip_amount': 10.0, 'payment_type': 1}\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1353777be82f46c6ac53dddadf0eac47", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(17) 1: 0%| | 0/12 [00:00 6.0\n", ")\n", @@ -368,27 +457,30 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-12-13 21:10:58,370\tINFO plan.py:757 -- Using autodetected parallelism=173 for stage ReadParquet to satisfy output blocks of size at least DataContext.get_current().target_min_block_size=1.0MiB.\n", - "2023-12-13 21:10:58,372\tINFO plan.py:762 -- To satisfy the requested parallelism of 173, each read task output is split into 173 smaller blocks.\n", - "2023-12-13 21:10:58,389\tINFO plan.py:757 -- Using autodetected parallelism=173 for stage ReadParquet to satisfy output blocks of size at least DataContext.get_current().target_min_block_size=1.0MiB.\n", - "2023-12-13 21:10:58,390\tINFO plan.py:762 -- To satisfy the requested parallelism of 173, each read task output is split into 173 smaller blocks.\n", - " \r" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "408a4a5a85ea48a4a36a4b85342197f8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Read progress 0: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=1]\n", - "2023-12-13 21:11:00,841\tINFO streaming_executor.py:105 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-12-13 21:11:00,842\tINFO streaming_executor.py:107 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "\u001b[36m(ReadParquet->SplitBlocks(173) pid=42046)\u001b[0m /Users/luweizheng/anaconda3/envs/dispy/lib/python3.11/site-packages/ray/data/_internal/arrow_block.py:128: FutureWarning: promote has been superseded by mode='default'.\n", - "\u001b[36m(ReadParquet->SplitBlocks(173) pid=42046)\u001b[0m return transform_pyarrow.concat(tables)\n" + "2024-04-23 15:46:06,493\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-45-59_228244_75431/logs\n", + "2024-04-23 15:46:06,493\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=1]\n" ] }, { "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "79e31b15590646f191fd10002e54d78a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(17) 1: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=2]\n", - "2023-12-13 21:11:01,261\tINFO streaming_executor.py:105 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-12-13 21:11:01,262\tINFO streaming_executor.py:107 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - " \r" + "2024-04-23 15:46:06,732\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-45-59_228244_75431/logs\n", + "2024-04-23 15:46:06,732\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=2]\n" ] }, { "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7aebbbdf009849248dec2f702f64010a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(17) 1: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[Write]\n", - "2023-12-13 21:11:01,756\tINFO streaming_executor.py:105 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-12-13 21:11:01,758\tINFO streaming_executor.py:107 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "Running: 1.0/8.0 CPU, 0.0/0.0 GPU, 173.03 MiB/512.0 MiB object_store_memory: 0%| | 0/173 [00:00 TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[Write]\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-12-13 21:11:03,071\tWARNING plan.py:577 -- Warning: The Ray cluster currently does not have any available CPUs. The Dataset job will hang unless more CPUs are freed up. A common reason is that cluster resources are used by Actors or Tune trials; see the following link for more details: https://docs.ray.io/en/latest/data/data-internals.html#ray-data-and-tune\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "80397722f6684cf9ae389f2666280041", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(17) 1: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> AllToAllOperator[Repartition] -> TaskPoolMapOperator[Write]\n", - "2023-12-13 21:29:25,053\tINFO streaming_executor.py:105 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-12-13 21:29:25,054\tINFO streaming_executor.py:107 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "\n", - "\u001b[A\n", - "Running: 0.0/8.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 0%| | 0/3 [00:00 TaskPoolMapOperator[ReadParquet] -> AllToAllOperator[Repartition] -> TaskPoolMapOperator[Write]\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running: 1.0/8.0 CPU, 0.0/0.0 GPU, 173.91 MiB/512.0 MiB object_store_memory: 0%| | 0/3 [00:00SplitBlocks(173) pid=42044)\u001b[0m /Users/luweizheng/anaconda3/envs/dispy/lib/python3.11/site-packages/ray/data/_internal/arrow_block.py:128: FutureWarning: promote has been superseded by mode='default'.\n", - "\n", - "Running: 1.0/8.0 CPU, 0.0/0.0 GPU, 173.91 MiB/512.0 MiB object_store_memory: 0%| | 0/3 [00:00SplitBlocks(173) pid=42044)\u001b[0m return transform_pyarrow.concat(tables) \n", - "\n", - "Running: 0.0/8.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 0%| | 0/3 [00:01SplitBlocks(17) 1: 0%| | 0/12 [00:00\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
Python version:3.11.7
Ray version:2.11.0
Dashboard:http://127.0.0.1:8265
\n", + "\n", + "
\n", + "\n" + ], + "text/plain": [ + "RayContext(dashboard_url='127.0.0.1:8265', python_version='3.11.7', ray_version='2.11.0', ray_commit='2eb4a8119f903f79b78c01eaa9db06c6c390051c')" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "import os\n", - "import shutil\n", - "import warnings\n", - "import urllib.request\n", + "import sys\n", "from typing import Any, Dict\n", "\n", + "sys.path.append(\"..\")\n", + "from datasets import nyc_taxi\n", + "\n", "import numpy as np\n", "import pandas as pd\n", "import torch\n", "import ray\n", "\n", + "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "if ray.is_initialized:\n", " ray.shutdown()\n", "\n", - "ray.init()\n", - "\n", - "folder_path = os.path.join(os.getcwd(), \"../data/nyc-taxi\")\n", - "download_url = \"https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-06.parquet\"\n", - "file_name = download_url.split(\"/\")[-1]\n", - "parquet_file_path = os.path.join(folder_path, file_name)\n", - "if not os.path.exists(folder_path):\n", - " # 创建文件夹\n", - " os.makedirs(folder_path)\n", - " print(f\"文件夹 {folder_path} 不存在,已创建。\")\n", - " # 下载并保存 Parquet 文件\n", - " with urllib.request.urlopen(download_url) as response, open(parquet_file_path, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)\n", - " print(\"数据已下载并保存为 Parquet 文件。\")\n", - "else:\n", - " print(f\"文件夹 {folder_path} 已存在,无需操作。\")" + "ray.init()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "读取数据到 `Dataset` 类,先查看原有的数据格式,其中 `tpep_pickup_datetime` 和 `tpep_dropoff_datetime` 分别为乘客上车和下车时间,包含了日期和时间。" + "读取数据到 `Dataset` 类,查看原有的数据格式。" ] }, { @@ -106,12 +139,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "128b9acfa02540eca47e5a1f91c66b4f", + "model_id": "f76cbb15ee5640edb2fbb39e0ce7ad0c", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Parquet Files Sample 0: 0%| | 0/1 [00:00 TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=1]\n", - "2024-02-15 19:12:01,510\tINFO streaming_executor.py:113 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2024-02-15 19:12:01,510\tINFO streaming_executor.py:115 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n" + "2024-04-23 15:42:28,728\tINFO dataset.py:2370 -- Tip: Use `take_batch()` instead of `take() / show()` to return records in pandas or numpy batch format.\n", + "2024-04-23 15:42:28,731\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-42-25_575692_75375/logs\n", + "2024-04-23 15:42:28,731\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=1]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c3e92b7a33ea4421b9e828dacf491111", + "model_id": "395f0b0d7c5e4dfc8ba32f921ae2af2c", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Running 0: 0%| | 0/1 [00:00SplitBlocks(5) 1: 0%| | 0/12 [00:00SplitBlocks(50) pid=5966)\u001b[0m /Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/data/_internal/arrow_block.py:148: FutureWarning: promote has been superseded by mode='default'.\n", - "\u001b[36m(ReadParquet->SplitBlocks(50) pid=5966)\u001b[0m return transform_pyarrow.concat(tables)\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fb37e85d9c9a49d48c284d6656c89ab4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- limit=1 2: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[Map(transform_row)] -> LimitOperator[limit=1]\n", - "2024-02-15 19:12:02,069\tINFO streaming_executor.py:113 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2024-02-15 19:12:02,070\tINFO streaming_executor.py:115 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n" + "2024-04-23 15:42:29,570\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-42-25_575692_75375/logs\n", + "2024-04-23 15:42:29,571\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[Map(transform_row)] -> LimitOperator[limit=1]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "28fbc3a6677f4b1b809b0662a977f691", + "model_id": "d94215b543d749b3babaa614cc28ffa3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(5) 1: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[MapBatches(transform_df)] -> LimitOperator[limit=10]\n", - "2024-02-15 19:12:04,790\tINFO streaming_executor.py:113 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2024-02-15 19:12:04,790\tINFO streaming_executor.py:115 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n" + "2024-04-23 15:42:43,882\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-42-25_575692_75375/logs\n", + "2024-04-23 15:42:43,882\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[MapBatches(transform_df)] -> LimitOperator[limit=1]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3af6e0ef7f5643a2b57cc3fcb0292e1c", + "model_id": "c5464c754fb243e4a607eb3368707918", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Running 0: 0%| | 0/1 [00:00SplitBlocks(5) 1: 0%| | 0/12 [00:00SplitBlocks(50) pid=5960)\u001b[0m /Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/data/_internal/arrow_block.py:148: FutureWarning: promote has been superseded by mode='default'.\n", - "\u001b[36m(ReadParquet->SplitBlocks(50) pid=5960)\u001b[0m return transform_pyarrow.concat(tables)\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9499c0dac4f14c55acde44de35bdb088", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- MapBatches(transform_df) 2: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[MapBatches()]\n", - "2024-02-15 19:12:05,225\tINFO streaming_executor.py:113 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2024-02-15 19:12:05,226\tINFO streaming_executor.py:115 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n" + "2024-04-23 15:42:44,780\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-42-25_575692_75375/logs\n", + "2024-04-23 15:42:44,780\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[MapBatches()]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b77916ceb4b74390badf1461ce5fca01", + "model_id": "8390741d4e5548c9980167341e3d35a6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(5) 1: 0%| | 0/12 [00:00) 2: 0%| | 0/12 [00:00 TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[MapBatches(transform_df)] -> LimitOperator[limit=100] -> ActorPoolMapOperator[MapBatches(TorchPredictor)] -> LimitOperator[limit=3]\n", - "2024-02-15 19:12:06,831\tINFO streaming_executor.py:113 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2024-02-15 19:12:06,832\tINFO streaming_executor.py:115 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "2024-02-15 19:12:06,846\tINFO actor_pool_map_operator.py:114 -- MapBatches(TorchPredictor): Waiting for 2 pool actors to start...\n" + "2024-04-23 15:42:58,123\tWARNING util.py:560 -- The argument ``compute`` is deprecated in Ray 2.9. Please specify argument ``concurrency`` instead. For more information, see https://docs.ray.io/en/master/data/transforming-data.html#stateful-transforms.\n", + "2024-04-23 15:42:58,126\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-42-25_575692_75375/logs\n", + "2024-04-23 15:42:58,126\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadParquet] -> TaskPoolMapOperator[MapBatches(transform_df)] -> LimitOperator[limit=100] -> ActorPoolMapOperator[MapBatches(TorchPredictor)] -> LimitOperator[limit=3]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "95bb2562908a46ec97ef831ae5ad6072", + "model_id": "37cb4284f0594a68968ff52547dc771a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- ReadParquet->SplitBlocks(5) 1: 0%| | 0/12 [00:00SplitBlocks(50) pid=5964)\u001b[0m /Users/luweizheng/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/data/_internal/arrow_block.py:148: FutureWarning: promote has been superseded by mode='default'.\n", - "\u001b[36m(ReadParquet->SplitBlocks(50) pid=5964)\u001b[0m return transform_pyarrow.concat(tables)\n", - "2024-02-15 19:12:08,283\tWARNING actor_pool_map_operator.py:278 -- To ensure full parallelization across an actor pool of size 2, the Dataset should consist of at least 2 distinct blocks. Consider increasing the parallelism when creating the Dataset.\n" + "2024-04-23 15:43:00,636\tWARNING actor_pool_map_operator.py:292 -- To ensure full parallelization across an actor pool of size 2, the Dataset should consist of at least 2 distinct blocks. Consider increasing the parallelism when creating the Dataset.\n" ] }, { "data": { "text/plain": [ - "[{'output': 3.4000000953674316},\n", - " {'output': 3.4000000953674316},\n", - " {'output': 10.199999809265137}]" + "[{'output': 0.9700000286102295},\n", + " {'output': 1.100000023841858},\n", + " {'output': 2.509999990463257}]" ] }, "execution_count": 6, @@ -492,16 +691,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-02-15 19:12:08,306\tWARNING plan.py:588 -- Warning: The Ray cluster currently does not have any available CPUs. The Dataset job will hang unless more CPUs are freed up. A common reason is that cluster resources are used by Actors or Tune trials; see the following link for more details: https://docs.ray.io/en/latest/data/data-internals.html#ray-data-and-tune\n", - "2024-02-15 19:12:08,332\tINFO streaming_executor.py:112 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[Aggregate] -> LimitOperator[limit=20]\n", - "2024-02-15 19:12:08,333\tINFO streaming_executor.py:113 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2024-02-15 19:12:08,333\tINFO streaming_executor.py:115 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n" + "2024-04-23 15:43:00,727\tINFO streaming_executor.py:112 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-04-23_15-42-25_575692_75375/logs\n", + "2024-04-23 15:43:00,727\tINFO streaming_executor.py:113 -- Execution plan of Dataset: InputDataBuffer[Input] -> AllToAllOperator[Aggregate] -> LimitOperator[limit=20]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "54684a32e65142cda779ee0745f7b35e", + "model_id": "d97d55b85438459ea63d23561f58165d", "version_major": 2, "version_minor": 0 }, @@ -515,7 +712,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f2083525d44f443bacf0c63eb410f32b", + "model_id": "78efa492f1cc4dd3ad583d32ae78393e", "version_major": 2, "version_minor": 0 }, @@ -529,7 +726,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "618743a4df1c44b19c29e9aa9839f31d", + "model_id": "164a214aecbd4cbc918974f34097577a", "version_major": 2, "version_minor": 0 }, @@ -543,7 +740,21 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "36ab45bb3d474ab589df37f2b39f7e69", + "model_id": "bb86ebe071484972a761a2776034e671", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "- limit=20 4: 0%| | 0/4 [00:00 + + +
map()
map()
map_batches()
map_batche...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/ch-ray-data/preprocessor.ipynb b/ch-ray-data/preprocessor.ipynb index 9c793d0..3d6fc37 100644 --- a/ch-ray-data/preprocessor.ipynb +++ b/ch-ray-data/preprocessor.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "tags": [ "hide-cell" @@ -35,20 +35,21 @@ }, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/luweizheng/anaconda3/envs/dispy/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-12-15 14:07:48,341\tINFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", - "2023-12-15 14:07:50,654\tINFO worker.py:1673 -- Started a local Ray instance.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "文件夹 /Users/luweizheng/Projects/py-101/distributed-python/ch-ray-data/../data/nyc-taxi 已存在,无需操作。\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ray\u001b[38;5;241m.\u001b[39mis_initialized:\n\u001b[0;32m---> 13\u001b[0m \u001b[43mray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshutdown\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m ray\u001b[38;5;241m.\u001b[39minit()\n", + "File \u001b[0;32m~/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/_private/client_mode_hook.py:103\u001b[0m, in \u001b[0;36mclient_mode_hook..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minit\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m is_client_mode_enabled_by_default:\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(ray, func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m)(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/_private/worker.py:1838\u001b[0m, in \u001b[0;36mshutdown\u001b[0;34m(_exiting_interpreter)\u001b[0m\n\u001b[1;32m 1836\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _global_node\u001b[38;5;241m.\u001b[39mis_head():\n\u001b[1;32m 1837\u001b[0m _global_node\u001b[38;5;241m.\u001b[39mdestroy_external_storage()\n\u001b[0;32m-> 1838\u001b[0m \u001b[43m_global_node\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkill_all_processes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_alive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_graceful\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 1839\u001b[0m _global_node \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1840\u001b[0m storage\u001b[38;5;241m.\u001b[39m_reset()\n", + "File \u001b[0;32m~/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/_private/node.py:1604\u001b[0m, in \u001b[0;36mNode.kill_all_processes\u001b[0;34m(self, check_alive, allow_graceful, wait)\u001b[0m\n\u001b[1;32m 1598\u001b[0m \u001b[38;5;66;03m# Kill the raylet first. This is important for suppressing errors at\u001b[39;00m\n\u001b[1;32m 1599\u001b[0m \u001b[38;5;66;03m# shutdown because we give the raylet a chance to exit gracefully and\u001b[39;00m\n\u001b[1;32m 1600\u001b[0m \u001b[38;5;66;03m# clean up its child worker processes. If we were to kill the plasma\u001b[39;00m\n\u001b[1;32m 1601\u001b[0m \u001b[38;5;66;03m# store (or Redis) first, that could cause the raylet to exit\u001b[39;00m\n\u001b[1;32m 1602\u001b[0m \u001b[38;5;66;03m# ungracefully, leading to more verbose output from the workers.\u001b[39;00m\n\u001b[1;32m 1603\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ray_constants\u001b[38;5;241m.\u001b[39mPROCESS_TYPE_RAYLET \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mall_processes:\n\u001b[0;32m-> 1604\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_kill_process_type\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1605\u001b[0m \u001b[43m \u001b[49m\u001b[43mray_constants\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPROCESS_TYPE_RAYLET\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1606\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_alive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_alive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1607\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_graceful\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_graceful\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1608\u001b[0m \u001b[43m \u001b[49m\u001b[43mwait\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1609\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1611\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ray_constants\u001b[38;5;241m.\u001b[39mPROCESS_TYPE_GCS_SERVER \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mall_processes:\n\u001b[1;32m 1612\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_kill_process_type(\n\u001b[1;32m 1613\u001b[0m ray_constants\u001b[38;5;241m.\u001b[39mPROCESS_TYPE_GCS_SERVER,\n\u001b[1;32m 1614\u001b[0m check_alive\u001b[38;5;241m=\u001b[39mcheck_alive,\n\u001b[1;32m 1615\u001b[0m allow_graceful\u001b[38;5;241m=\u001b[39mallow_graceful,\n\u001b[1;32m 1616\u001b[0m wait\u001b[38;5;241m=\u001b[39mwait,\n\u001b[1;32m 1617\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/_private/node.py:1425\u001b[0m, in \u001b[0;36mNode._kill_process_type\u001b[0;34m(self, process_type, allow_graceful, check_alive, wait)\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[38;5;66;03m# Ensure thread safety\u001b[39;00m\n\u001b[1;32m 1424\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mremoval_lock:\n\u001b[0;32m-> 1425\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_kill_process_impl\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1426\u001b[0m \u001b[43m \u001b[49m\u001b[43mprocess_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1427\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_graceful\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_graceful\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1428\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_alive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_alive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1429\u001b[0m \u001b[43m \u001b[49m\u001b[43mwait\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1430\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/dispy/lib/python3.11/site-packages/ray/_private/node.py:1481\u001b[0m, in \u001b[0;36mNode._kill_process_impl\u001b[0;34m(self, process_type, allow_graceful, check_alive, wait)\u001b[0m\n\u001b[1;32m 1479\u001b[0m timeout_seconds \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 1480\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1481\u001b[0m \u001b[43mprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout_seconds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1482\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m subprocess\u001b[38;5;241m.\u001b[39mTimeoutExpired:\n\u001b[1;32m 1483\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/dispy/lib/python3.11/subprocess.py:1264\u001b[0m, in \u001b[0;36mPopen.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1262\u001b[0m endtime \u001b[38;5;241m=\u001b[39m _time() \u001b[38;5;241m+\u001b[39m timeout\n\u001b[1;32m 1263\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1264\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1265\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m:\n\u001b[1;32m 1266\u001b[0m \u001b[38;5;66;03m# https://bugs.python.org/issue25942\u001b[39;00m\n\u001b[1;32m 1267\u001b[0m \u001b[38;5;66;03m# The first keyboard interrupt waits briefly for the child to\u001b[39;00m\n\u001b[1;32m 1268\u001b[0m \u001b[38;5;66;03m# exit under the common assumption that it also received the ^C\u001b[39;00m\n\u001b[1;32m 1269\u001b[0m \u001b[38;5;66;03m# generated SIGINT and will exit rapidly.\u001b[39;00m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/dispy/lib/python3.11/subprocess.py:2040\u001b[0m, in \u001b[0;36mPopen._wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 2038\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m TimeoutExpired(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, timeout)\n\u001b[1;32m 2039\u001b[0m delay \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmin\u001b[39m(delay \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m, remaining, \u001b[38;5;241m.05\u001b[39m)\n\u001b[0;32m-> 2040\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(delay)\n\u001b[1;32m 2041\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2042\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturncode \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -58,68 +59,40 @@ "import urllib.request\n", "from typing import Any, Dict\n", "\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "from datasets import nyc_taxi\n", + "\n", "import ray\n", "\n", "if ray.is_initialized:\n", " ray.shutdown()\n", "\n", - "ray.init()\n", - "\n", - "folder_path = os.path.join(os.getcwd(), \"../data/nyc-taxi\")\n", - "download_url = \"https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-06.parquet\"\n", - "file_name = download_url.split(\"/\")[-1]\n", - "parquet_file_path = os.path.join(folder_path, file_name)\n", - "if not os.path.exists(folder_path):\n", - " # 创建文件夹\n", - " os.makedirs(folder_path)\n", - " print(f\"文件夹 {folder_path} 不存在,已创建。\")\n", - " # 下载并保存 Parquet 文件\n", - " with urllib.request.urlopen(download_url) as response, open(parquet_file_path, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)\n", - " print(\"数据已下载并保存为 Parquet 文件。\")\n", - "else:\n", - " print(f\"文件夹 {folder_path} 已存在,无需操作。\")" + "ray.init()" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "(pid=6866) Parquet Files Sample 0: 0%| | 0/1 [00:00 TaskPoolMapOperator[ReadParquet] -> LimitOperator[limit=1]\n", - "2023-12-15 14:17:24,492\tINFO streaming_executor.py:105 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-12-15 14:17:24,493\tINFO streaming_executor.py:107 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - " \r" + "ename": "NameError", + "evalue": "name 'nyc_taxi' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpreprocessors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MinMaxScaler\n\u001b[0;32m----> 3\u001b[0m dataset_path \u001b[38;5;241m=\u001b[39m \u001b[43mnyc_taxi\u001b[49m()\n\u001b[1;32m 4\u001b[0m ds \u001b[38;5;241m=\u001b[39m ray\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mread_parquet(dataset_path,\n\u001b[1;32m 5\u001b[0m columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrip_distance\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 6\u001b[0m ds\u001b[38;5;241m.\u001b[39mtake(\u001b[38;5;241m1\u001b[39m)\n", + "\u001b[0;31mNameError\u001b[0m: name 'nyc_taxi' is not defined" ] - }, - { - "data": { - "text/plain": [ - "[{'trip_distance': 3.4}]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "from ray.data.preprocessors import MinMaxScaler\n", "\n", - "ds = ray.data.read_parquet(parquet_file_path,\n", + "dataset_path = nyc_taxi()\n", + "ds = ray.data.read_parquet(dataset_path,\n", " columns=[\"trip_distance\"])\n", "ds.take(1)" ] @@ -133,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -189,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -310,7 +283,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/ch-ray-train-tune/tune-algorithm-scheduler.ipynb b/ch-ray-train-tune/tune-algorithm-scheduler.ipynb index 1f17d97..205b29c 100644 --- a/ch-ray-train-tune/tune-algorithm-scheduler.ipynb +++ b/ch-ray-train-tune/tune-algorithm-scheduler.ipynb @@ -116,11 +116,11 @@ "outputs": [], "source": [ "import os\n", - "import urllib\n", - "import shutil\n", "import tempfile\n", "\n", - "from zipfile import ZipFile\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "from datasets import nyc_flights\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -141,17 +141,8 @@ "from ray.tune.integration.xgboost import TuneReportCheckpointCallback\n", "from ray.tune.schedulers import PopulationBasedTraining\n", "\n", - "folder_path = os.path.join(os.getcwd(), \"../data/\")\n", - "download_url = \"https://dp.godaai.org/nyc-flights.zip\"\n", - "zip_file_path = os.path.join(folder_path, \"nyc-flights.zip\")\n", - "if not os.path.exists(os.path.join(folder_path, \"nyc-flights\")):\n", - " with urllib.request.urlopen(download_url) as response, open(zip_file_path, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)\n", - " zf = ZipFile(zip_file_path, 'r')\n", - " zf.extractall(folder_path)\n", - " zf.close()\n", - "\n", - "file_path = os.path.join(folder_path, \"nyc-flights\", \"1991.csv\")" + "folder_path = nyc_flights()\n", + "file_path = os.path.join(folder_path, \"1991.csv\")" ] }, { diff --git a/conf.py b/conf.py index 47937e7..cb16516 100644 --- a/conf.py +++ b/conf.py @@ -49,7 +49,7 @@ } html_static_path = ["_static"] html_css_files = ["custom.css"] -html_title = 'Python 分布式编程' +html_title = 'Python 数据科学加速' latex_engine = 'pdflatex' myst_enable_extensions = ['colon_fence', 'dollarmath', 'linkify', 'substitution', 'tasklist'] myst_url_schemes = ['mailto', 'http', 'https'] diff --git a/datasets.py b/datasets.py new file mode 100644 index 0000000..8331ca0 --- /dev/null +++ b/datasets.py @@ -0,0 +1,42 @@ +import os +import shutil +import urllib + +def nyc_taxi(data_path: str = "../data/nyc-taxi"): + + folder_path = os.path.join(os.getcwd(), "../data/nyc-taxi") + download_url = [ + "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet", + "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-02.parquet", + "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-03.parquet", + "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-04.parquet", + "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-05.parquet", + "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-06.parquet", + ] + if not os.path.exists(folder_path): + os.makedirs(folder_path) + for url in download_url: + file_name = url.split("/")[-1] + parquet_file_path = os.path.join(folder_path, file_name) + if not os.path.exists(os.path.join(folder_path, file_name)): + print(f"正在下载 {file_name}") + with urllib.request.urlopen(url) as response, open(parquet_file_path, 'wb') as out_file: + shutil.copyfileobj(response, out_file) + + return folder_path + + +def nyc_flights(data_path: str = "../data/"): + folder_path = os.path.join(os.getcwd(), data_path) + download_url = "https://dp.godaai.org/nyc-flights.zip" + zip_file_path = os.path.join(folder_path, "nyc-flights.zip") + if not os.path.exists(os.path.join(folder_path, "nyc-flights")): + print(f"正在下载 nyc-flights.zip") + with urllib.request.urlopen(download_url) as response, open(zip_file_path, 'wb') as out_file: + shutil.copyfileobj(response, out_file) + zf = ZipFile(zip_file_path, 'r') + zf.extractall(folder_path) + zf.close() + + folder_path = os.path.join(folder_path, "nyc-flights") + return folder_path \ No newline at end of file diff --git a/drawio/ch-dask-dataframe/map-partitions.drawio b/drawio/ch-dask-dataframe/map-partitions.drawio new file mode 100644 index 0000000..7dc874d --- /dev/null +++ b/drawio/ch-dask-dataframe/map-partitions.drawio @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/drawio/ch-mpi-large-model/data-paralism.drawio b/drawio/ch-mpi-large-model/data-paralism.drawio deleted file mode 100644 index b006cdd..0000000 --- a/drawio/ch-mpi-large-model/data-paralism.drawio +++ /dev/null @@ -1,628 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/drawio/ch-mpi-large-model/data-parallel.drawio b/drawio/ch-mpi-large-model/data-parallel.drawio index 8d88f2b..e0cb786 100644 --- a/drawio/ch-mpi-large-model/data-parallel.drawio +++ b/drawio/ch-mpi-large-model/data-parallel.drawio @@ -1,99 +1,99 @@ - + - + - + - + - + - - - - + - + - + - + - - - - + - + - + - + - - - - + - + - + - - - - - + + - + - + - + - + - + - + - + + + + + + + + + + + + + diff --git a/drawio/ch-mpi-large-model/pipeline-parallel.drawio b/drawio/ch-mpi-large-model/pipeline-parallel.drawio index 35bf846..79860f4 100644 --- a/drawio/ch-mpi-large-model/pipeline-parallel.drawio +++ b/drawio/ch-mpi-large-model/pipeline-parallel.drawio @@ -1,97 +1,97 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - + - + - + diff --git a/drawio/ch-mpi-large-model/pipline-parallel.drawio b/drawio/ch-mpi-large-model/pipline-parallel.drawio deleted file mode 100644 index 59ca275..0000000 --- a/drawio/ch-mpi-large-model/pipline-parallel.drawio +++ /dev/null @@ -1,500 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/drawio/ch-ray-data/map-map-batches.drawio b/drawio/ch-ray-data/map-map-batches.drawio index 651165b..198ae47 100644 --- a/drawio/ch-ray-data/map-map-batches.drawio +++ b/drawio/ch-ray-data/map-map-batches.drawio @@ -1,10 +1,10 @@ - + - + - + @@ -32,13 +32,13 @@ - + - + @@ -55,14 +55,14 @@ - + - + @@ -77,19 +77,19 @@ - + - + - + - + @@ -98,7 +98,7 @@ - + diff --git a/img/ch-dask-dataframe/map-partitions.svg b/img/ch-dask-dataframe/map-partitions.svg new file mode 100644 index 0000000..8200247 --- /dev/null +++ b/img/ch-dask-dataframe/map-partitions.svg @@ -0,0 +1,4 @@ + + + +
map_partitions()
map_partit...
pandas
DataFrame
pandas...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/img/ch-dask-dataframe/nyc-flights-graph.svg b/img/ch-dask-dataframe/nyc-flights-graph.svg index b9d0a86..b21a31d 100644 --- a/img/ch-dask-dataframe/nyc-flights-graph.svg +++ b/img/ch-dask-dataframe/nyc-flights-graph.svg @@ -1,262 +1,262 @@ - - + --7150382809904974857 +3521720512806484412 read-csv - + --6194689680917011400 +6736009212905862099 0 - + --7150382809904974857->-6194689680917011400 +3521720512806484412->6736009212905862099 - + -6071941830101125281 +-1563468653830855738 to_pyarrow_string - + --6194689680917011400->6071941830101125281 +6736009212905862099->-1563468653830855738 - + --65660599333282282 +2484881100534163769 read-csv - + -312951075184987025 +-2555422199051202395 1 - + --65660599333282282->312951075184987025 +2484881100534163769->-2555422199051202395 - + --8135483071169533727 +2675850518608036870 to_pyarrow_string - + -312951075184987025->-8135483071169533727 +-2555422199051202395->2675850518608036870 - + --1102500011605602925 +1680754195385590727 read-csv - + --8978480336772077469 +3952218557050796030 2 - + --1102500011605602925->-8978480336772077469 +1680754195385590727->3952218557050796030 - + --1050760860597841152 +4484414144468516194 to_pyarrow_string - + --8978480336772077469->-1050760860597841152 +3952218557050796030->4484414144468516194 - + --1906626916754175967 +1719447117322426477 read-csv - + --2470839580670079044 +-7986884760556757161 3 - + --1906626916754175967->-2470839580670079044 +1719447117322426477->-7986884760556757161 - + -4071937302134756076 +8723733316907408802 to_pyarrow_string - + --2470839580670079044->4071937302134756076 +-7986884760556757161->8723733316907408802 - + -5178095293817516608 +5958766289761319085 read-csv - + -4036801175431919381 +5566785284180098089 4 - + -5178095293817516608->4036801175431919381 +5958766289761319085->5566785284180098089 - + -8508987607055959954 +7919606411758835760 to_pyarrow_string - + -4036801175431919381->8508987607055959954 +5566785284180098089->7919606411758835760 - + --9029329607453142400 +-121519200098467208 read-csv - + --856272853540776985 +7676068657297728386 5 - + --9029329607453142400->-856272853540776985 +-121519200098467208->7676068657297728386 - + --2363636268343853305 +2288159746428799421 to_pyarrow_string - + --856272853540776985->-2363636268343853305 +7676068657297728386->2288159746428799421 - + -8774990811659256194 +-5288713516117402287 0 - + -6071941830101125281->8774990811659256194 +-1563468653830855738->-5288713516117402287 - + -3881916782686559828 +8264956528619452963 1 - + --8135483071169533727->3881916782686559828 +2675850518608036870->8264956528619452963 - + --8057186534920993363 +-8072504171972468356 2 - + --1050760860597841152->-8057186534920993363 +4484414144468516194->-8072504171972468356 - + -1098126126831493759 +5481165872764386894 3 - + -4071937302134756076->1098126126831493759 +8723733316907408802->5481165872764386894 - + -7605766882933492184 +-6457937444843166297 4 - + -8508987607055959954->7605766882933492184 +7919606411758835760->-6457937444843166297 - + --4333336434674061007 +49703311258832128 5 - + --2363636268343853305->-4333336434674061007 +2288159746428799421->49703311258832128 diff --git a/img/ch-mpi-large-model/data-parallel.svg b/img/ch-mpi-large-model/data-parallel.svg index 8a096fa..a3815ae 100644 --- a/img/ch-mpi-large-model/data-parallel.svg +++ b/img/ch-mpi-large-model/data-parallel.svg @@ -1,4 +1,4 @@ -
GPU 0
GPU 0
模型拷贝
模型拷贝
GPU 1
GPU 1
模型拷贝
模型拷贝
GPU 2
GPU 2
模型拷贝
模型拷贝
GPU 2
GPU 2
模型拷贝
模型拷贝
训练数据
训练数据
Text is not SVG - cannot display
\ No newline at end of file +
GPU 0
GPU 0
模型拷贝
模型拷贝
GPU 1
GPU 1
模型拷贝
模型拷贝
GPU 2
GPU 2
模型拷贝
模型拷贝
GPU 3
GPU 3
模型拷贝
模型拷贝
训练数据
训练数据
Text is not SVG - cannot display
\ No newline at end of file diff --git a/img/ch-mpi-large-model/pipeline-parallel.svg b/img/ch-mpi-large-model/pipeline-parallel.svg index 03d030d..fed840e 100644 --- a/img/ch-mpi-large-model/pipeline-parallel.svg +++ b/img/ch-mpi-large-model/pipeline-parallel.svg @@ -1,4 +1,4 @@ -
GPU 0
GPU 0
模型第 0 层
模型第 0 层
GPU 1
GPU 1
模型第 1 层
模型第 1 层
GPU 2
GPU 2
模型第 2 层
模型第 2 层
GPU 3
GPU 3
模型第 3 层
模型第 3 层
前向传播
前向传播
前向传播
前向传播
Text is not SVG - cannot display
\ No newline at end of file +
GPU 0
GPU 0
模型第 0 层
模型第 0 层
GPU 1
GPU 1
模型第 1 层
模型第 1 层
GPU 2
GPU 2
模型第 2 层
模型第 2 层
GPU 3
GPU 3
模型第 3 层
模型第 3 层
前向传播
前向传播
反向传播
反向传播
Text is not SVG - cannot display
\ No newline at end of file diff --git a/img/ch-ray-data/map-map-batches.svg b/img/ch-ray-data/map-map-batches.svg index 30c8538..90be5d0 100644 --- a/img/ch-ray-data/map-map-batches.svg +++ b/img/ch-ray-data/map-map-batches.svg @@ -1,4 +1,4 @@ -
map()
map()
map_batches()
map_batche...
Text is not SVG - cannot display
\ No newline at end of file +
map()
map()
map_batches()
map_batche...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/img/mpi-logo.png b/img/mpi-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..6d41a887b045a10ba665c3e5ea4c4a44cff091a8 GIT binary patch literal 20734 zcmb??Ra=}*(=C?Z7J|DC1h?SsPH=Y%?(PJ47+`RM2Y1)t?gSYmc#y$ezj@wk|Ac*T z%TaZgbl2)t)zROSWzbQGP+(wS(B)($)nH&?C!zl%NFSl^R6Fxa7#I{W8wrVTauO00 z-&~!nZ0s#zU}VvA)4ep*SMefex_P>1KT%Q8-VpgBQ_Ks<;v%R<%LRXfiwFaFN`8?3 zjQvH+LZc5>CX&ir>NDz(P_t#^E39wBzf~or_BZ0}kk6~XUT4d9toO{Fep+>+=77s{C&7o^~NDWj(3Fdo3t++5$qgz1;~X} zRJ((<_IG6mjGbc{Ai!P$k?u5M7gt_f=eNx)+U=q^gaU&M+P~IC{)m>{8gf}M`}gz< z3lBMpFl?VQjVBKQ1@_S#QlAOWqOag!kWvTvx?C}0ANjuL!FZQi@5LZQ%bAs8FK5{7 z{~NV>x@=T`G-GE>9Z=ei@>ZO=X+le2!w;@?N|%DA)pf}Ho(!obfk1Ts9DIN5UELxJ zRDTs3^?la71f0&~iN)pVZrPt!v3BFZ+|lS|lQ#CGX~Hx)d=%Z^u2MOtfVuhr)3lj$ zO%`Eqlrp%3Mu!@EdTQ?mn{UU$_>bo~Kh$ZwH-hgzbK>k-OP~~AB!pTHm(x;ezqG7W zL2_~u4Ffb|@p_N-zT3xVGc`8Al#}0we;NKyQ}tgea#-~D&TzpHog@|ul(3+s?H~ef zIz+<2T)WTTJ&a)j!uh_n$svw93BgEE!_zp5_`(I8=wq>?jP`VWuTHLYOXToI>Ri6_|76IhjKfAG(gbqLw80+3ziW@iTJI+2D=$z zgA{s!g--K?c-HAGo z41YZRM4YAI`uGYP5Qd!rw_u9C0@D(_S|>n)B-20fSJE3d2NAyyye;2`CydeBkH1ZP z0mBq-+z&(&9Yunllh#2ZzLXSC(xHLHrnF0@DuyeU%1Xj0CT@?HO?KIX{}@?dnn8d! zOt>4V8@FwyW%|cVo^2_^JVQ$n|A5;Hi!~YRGx%fxeH(D;-Nmr^`8*W!x7&r8KV=U} z5J?c(RwDUuFI^I{9)evcCS^JWjXjM977@}R(rXxYKbLCAFBNyH2rQpL1G6vn2`ge< zawGs{;AgoA8hkaHFPTjt8gjN&h5#WzHzjb4-$KTfKv&dArG_S-!k88_MdVv5J>av7 zJc*R}sxqH^yQq(-k3@+Y`Y*>tor;W(Ts@U;$pHBPRv92C=o+*;Rt>C7wo5w!`O&JR zE`z*4#w_`C#*{(wZ>2A7aH_vWf2)TS)oB$-Z#VS|h09506{eX*nPr(poBi1NI;2Y5 zl@gND`bni+^r&v0wuF*Me4)f=zO6~AiGR&r}%jRcrWbvbIff#=B7tFE^A?wwOW|F8KwWgMT#8)$n3DR+ z;--d4$4t`v@AWZ7a zCrsqJUvxj{I5ZvCJq@f4X4x5<2aH76?wU7Fmu?(1SC=zydGyt&+Ldok=~Kr822mgp7fnz0ZZ5?|F+wvFXMI*>HO&G7v##ea&@P_OruX1p`zhm z;4d;0xX!t3w~@Fevskdi8GAbyENX(&cJ)r4CY6@s4?PDR1~*oOR$tqm88H8qE z6$D;{D&&7C#R$>}T8P4^9LS$Ocpwb?hW*_cO3pfOOk=Q!2Pgg_;TbN7{1wd*_sNZb zg^8;QaO%^h=^g0pg}`hkB`Go3DVsPtKPnR&kEej=B|RoHB()@?B;_Xsl2QQaFzGh# zNyq8MQIrbdkrU(k(RjL+-_LI-MDCvMpzNNpa2ljHp02qz@0U-^)Fc`I)^%YZBPJ)^ z!g`O!=`Y_`thq66_u5BT26je1ryLG59>!l9K3KjaBY?sh!saAFgND)Oay4?ea=UOU zV$FX&8jP<(_R3R}Qj5NDmd}+>*@QJcG%bkJn#R;I8gdHJb+Zm|6|qTljN2@oY2p-$ z&Lu0-yXjhWW+c2R-pa<&WYhl8MQyZg*tC|h%pX^s#6Gy$%i43CARW6Ndk*|+_0`M6 zy2tY^;^p_(Gdqmx@IN&M;C5v%!`~g-w>>9`4ipZus}~c}6Y-YBTaGcCi>g0W>v~MS zH=Z91i?@rbZg(?J>tomt+2h(nHmn+}ZNVTaqq$qNiJh1o725TblP%om^O(PiA@~88oK3c zZDp3qj*E{&h!V37x{v)@A-QWK;N{*ehoA-6c=#biVY1I8T|uMw-|VWz9EMESx2pzb zP704ZdaZ+ugCrrcm&r3S;W8y7{fQn+0*lK$zj#u)`{PrF>vvs7oL>LlO8DVa9)061 zYJWiBrR2NZ8F?T-&qEVj^@$0nIHG&aziXM*i_t6iv;HT#rI_2sPtkAdD0m^4befb| zB>Ua}ChjgXt8v^~VJ3MRHz%7f-z)G+d^r3vHX1*YU)x={qiJtGx2CRUb0P#(YjoM} zS`O}he$fO~TG#Ly3IyZ^bVDLu&lS~o$&0jryPk-(l4lVxeEqK|SmzKqBR`!V8g}fF# zo8OEiADyZ^sHEk`i+K4Xzg&3(9IOtu?!&I1Irw!0p_mfcSytB#1_l-9zXuj3D~A9E zMifR)QcS}e7Lx0eWVvj(rEoM2x&_a+t;yVw${II2Qe{LDckSqOsw02T|@n{8@7xDbkz^jZWSB&+X>{ ze#*w(XMr05=YIFy_s^R7!Y4w9UOm>2n{H?A_a&b{AtAtph*Dtv|NO9pA}Pk1H#R+xT7zw_+60-_|KF(MKNpG#`)l0yFtIY1 zHcp`0`0p#~3D+kn|LR(J#-D%T@qjR7W@+GRAkkR44n|togV^ zSht$N7S%IN0~xW%0@cIiLWe&)GYe~p%Q|YTjoGogo`HevgmwDh*!b?cxL%e(TDl5L z>Aw2mJp1E}U~f_asZj1bXCKnoegK&`jeUc|gdVL)r&|M0NS1#<_X0mzkSsJ*&oU^<|Io{T_gLXjbsJ20s=79zrNv_CiXP ze1`ML!imU*80lSm6JJF*fD~KDh(C+$b^S+-`Tn?;ZxxFG9BxQRpECtk*lLbvG4l#l zzO0+Je-oc=IN*fJ+3GD2GN}9|52AyCIiz*kVIn0-!S;ZJV9H&FuA8e^x~jk;ukOaf zBU3$VVTh0*bW1xVmJ&EbgXCsdM@5B2;Q!x5<|2}4^?etPmRi>fwZE{~N9Yv{bs#aZ zk|?Qc+#3$fOn8U4J6=Wow^o14P#`t(ImqhAm1Qc3%#oyt13j>Lg@W>T62>olQ5Y3^ zwWzHB=PYVCv5cwjnrmXeMUz%31La0?XOCEMP=qy8-cn}z=;6t`Wp%Mup zvI2_5+C9(Q4kJdhXldfH9#BK@rslmKx8lxdF;yyT_QoB8;3#RZ2&|w}e%|V+UZQ23 zGz4w@3GxvCp@hvF@E}~mM>&vQr415{#6X75l~Nii`8|F@QXrkB>n?_?{bb`+n~rPc zbI7C2xhheM8Meqk9Ih2=;sZGn8Z&gduMTT}Dru@%YRv^a?Q43EC6BVH>fFyn*WQ&{ z%zXmOoyLc?PD?YSLPbqvv5@*tog`YcK>qdtXDJUlVnN`-&iNUV9)0B76S|>mg*-!F zg_h0}YR&&5m+61f`7g*7I*omFvvNAidn+W53@I;msP7p_&yB=W2^@XJp*x}p!heE| zsr@|ovTOWC=$c3c7DO})_iC_o1Y1Q&AGpaG|F+Wtv_OWi_|b6x882|NG6*s{g<^c4 zIe)Z*d8-mn-)@JXOxmVmgE>)0i)u=>L3X!I*XL?;Jl?dx$Wc=S=^|^-=D&2QcY)70L0ThBYcIjk3=q zTNc0fZr{fBjx=qbh;S*g@3+jk#sSGqhz39Qgs-J>d6dW**6Iv$>kob@tlEk7&4e48 z$-$kmAy0uZq7%-m*VC*dD<|D8DnrB8t{M%~Zr`_%of=9p{j8OUUw2)Tu$lOlWS~{L z>!wMuOiqr)M1Ja;N{H6?=J(%UMrz30UW?sVSS5req{XrounrCgmXbpSnE59)d>_HQ zr20LKwMCPoZgfaYd4UY1FDH?yL+pfS8eSajvv=ICc=L(KgO-Qfon&rM!N^N9w)GP@ zRiS@IlP3?K_HNyE(yMBfkL7YZs_CAdJqnd?kG)>6gng>rhbUUd_gunU%x~~Hf*2%v zJ;7^$fJdSa5ZUpSeeeA>o)a{cozcp;1?yHU(FT>t`R<++XbUb1{hMBSg&F8)z)LmcZ<<4CRJR)Uv zEvdc6UStRzkqo`S`*vv-3Mu9h0_QjhV6_Oa;$))U1wVnqlyb6VkuZcSoHVTcV?zJ# z$0o?t#iMwajI6ASy>1Am1dMnM|rJ*T^nD##*AFuNiFHwIWa(8JKJn#1Y? zT*yV&T`C%B#rg?N~<}<0um2et?A56u$Ke)6&`fKO4{jYa>jyC*N+qNDpa!=QPFEq&pMhR`K$HGi9Mu4V*L5jw!ml&;%#BVzHiT7q3}n8 z(uM%_;GU*0w}YwR<=g(2v6DkKse8kQ-yIbSWD(%NPKT|kw)=5^tpgUqx}Q_O2Wt^sV8n%cAfUy~k z^4Kc=sel17~rL`Pzg-2jF+z*-b6-5 zGTNQ?T(S?Fx3=?v$}m7-UgyAHmue+c zV7%(R)3-&_t}*@~iY7T$(3JCewlz-dZ01CU)FDf2#?WEo`+~BE#)ZP52mSKg8^~a` zF1RcAbhtFWjvWgSL0rZ<{^CGLCR4XooR@k`~FNw>n?=LlY=9#pJO#U|>LXPpa z=b+T`2lGDSQ`lM4db(^Pt6~`z&NyoT8hIXeM`;mG?vZW50-3(mY`+z{Zp*(@d z?^!_)*oK2{f&jE)aCBi>$92#>pIIkaU2bEA;EnXYcP|=FEFFIB$3KjxDV|{`DPTv6 z$ZC*G?Ev99cMtJKLV|($!U?~9b5~cOk4OtKRdKgs)=Ch~H4aBk=s_v_Rw->@r`aTZ zE&dL7X_v$87{z4s6c^C(WfI-S^&sBv$G|c#f{rR#fV6G-R-)ay*t|y-hdw!@N!k$2 zZ}~9feqd~l0G^6Ti&u{62SR7M=`gdDCWu_AI(F_sN|$n*TTq#xFR4|oz51J$JCkv1 znw_8Fva8R{wt?|_*xa9$re--{n@G}Q92%oio`av@sBZDVX3U?x_@6H5(q*s;1LeO;iY`|DoG>&RB?Cp>2ak zdi-Y$8E>;OZ4*T{3|7YJX!VH}0JSlRClm)0do~E{p!BY-kCH2|i+dswyoZ3g!wABq;j(F3n zb~Ye)pEjN{AEH08e?y?A8rre2>H zH)zpew2#QP?ht3#W8(|zcT5W5;v*Q2$9)}G9_8zT#+0o-kv~iKsZu`CW+_==7Ad&a z%~#Jbp>ma%&wzxZ!dux0n^EK*A`nhA!%PXEk48d8v0RN}^3+W?{P7wA?35=e5b`?b z*;TbP)~L?EBk>U)wlyD|8z>s|u*_j(IX$klMX`RgPohld(eDo53J7mk5E0Co60~UM zziNsobF?2JD2vLj->=+z?~jh0zbw0_XU#Kw+Gvz-AWKs828_M@m752aE*WFGM}i!T z@$XD1lUj?O;X5%O8GsUI)qG9qzgI~5q;uJP$Uc1JJc7Lct>!0vK0e@5`aX{xV(Eg< zs5qL!tUKC};IO*Ox?VJyiO@wlk$y^#oCwHZ5|Z;3H*lJ66NVoF+T(qmhL!Gv9KDGe zKYSsK0y^j-0jqU)a9w{{V!|e{^d3q8p3+S z%1y|LrH<(piJM+&_daKrgHnl%<}U#&s_tvhW;&qMn7u+2O|cu~0q>`FZmet@$kJl^ z)$xS)!fhv9APjkig;JsWPrlV|)Ec~g z5p2nlhCG&tS?aeaW<3a*@{@LQrMu<~43J?UxN`UdD-cZ0y82HSMIwq0Dn6tUYhL!{ z$+>|BJx?ia;FL#{kF8p~b7{z1aT$W{27eqa{yt2j4&!T@G3yXT3w|s_<*xil{Ic(} zyMe-pdc;OK5DnunasNZt-FYDFH)+@xtO(T%n9_NHpvR51gn(5xE$3gygMJ`?j9LnX zcsVVQM^1v!OU7d-DD8L}A%l1rJ;l6XP6k!4eD|G=@^8z1B&uQt zpACk3fc8m0>2LZNIv9tMo&)nDq`tnTOw4Nh$N&OhoOWh~NJ~#4Ia1KU+jA%Ft5*LN5a=AoKUasF`3xQ|Zteq*=>0L-s1o z?-!TP#{b2@!J$yb-%GkK8lyLpk; zV@TC`a#?Z9FvYpi(}}RAwuR-9;@bV;*0&Z#(JsHS;?J&u92U2IpU8<9Y}9Dbjy^BP zM86A&^+}GRlUg9Fl7URaNBjmd-f1Md#^zf>ZLwQIZJZ7?TRWe{m_OWcs(%z+7&KX! z_0*S%+~;;FW{i9!0#Bp*F-=WRmuAK~x>&5BOaFZEWD-!WE@kAZ_7%9BtgkI8QJ?wZ z6sVfl6NYRfM7Il*8vRKipF}tOV8=x|gO~|(Vj;rs1gXx|Li2EmwygcAF>HY)iJr&L zk*=RfvjyIVU7DBeF$pbjOLmMeXP&Dc!99+ckcSi&oD!Ky6Mc;kId$8f?~TfNZtr_C z)2l+q!m9M5u}DPeXM7yxd8s&Gc{E`PIq~k%^7n}l95fn-by91$e$%)ORDoimQcRDu0w(7-510Mx=+U zC*aPH1FEq4Kl4ckqR`7?L>9}&W(&rSdJ-6Md?Ppl<&t%hY9SmMn=-EK<35ci@>V(X znle`0_8LlW*Msqsdq~rPypPoXG#42g_1U{0MS4{R^6m?1JxK3#jrcS39*;K|LLfS4 zKiP7FH7-WSZYwT1Iov<-*P#baW?M30G>V)aWB)Xs7tys8&q|^lj3(c1VnAG8!}%;m z_BY%RLE~Y@Awg-YE}zO7bBWg$#gu5h-2HmSUxP6k6R#3k+%5C(femRvwmUV*2sH2E zw90EKR*03q<{Pgca6xNYY)AYgjlHEjqHPmXhv74K7~5p9>kkgFTT!bUYONdEV)=?)% zHoZxMk;EPmU1eZAkSbm-lYJ3`QR~tbpGL8+R(&lr3|DI~Igf4CybU){IhI|y7H`4q zQY8d;BGfTYyC0_pQ&emJua@X%n%PxmZH<|wVT8n52Y(!&S~M2Cut@1h29VKG+deT zqa&7Rf$~WuqN50fP1^vlBhmg76RC2dHMr#8#T^Xny)h0zYNtkj?T33a=Axm|%(WAo zshZ8lZG(=zrAs4=na)0L6#q-rIaKSK1{dA0$ztPW9-ajcBxJ?93om}|TN~mOA~tR4 zq7iSOgnvuJXV7Qzu=y120WHaDEGUH}`)F*k3;o2>g{sWnV~%-DeUp@mwO{`Y+1wS9 zACZ4nbzMdx@fDZ&X722vsXOp8dZdHlgR4Icaa8+i!@^7^H{%};(a{2B_@^8ZB~Yp$ zJp?unN+P>_3e*IrYa*HAbvg(Lo^Dc^G&F61=Oe3{09XM2$ z$2s%(ZPRsE3xcC%$H$)npFcGiY@}2d{9K5nRKR$4=E?_#%PKN4lU%*qcZ&PRX^ws( z9+<{T0a7_WJK#s}7|&ou2^-)nx<3Uf*!?y1-WYkRmQCII0EgI2s&5a>V){y_)qN#U zNjmTLWFlAJVG)?FVffL941E9LHIn{X;?rPQmDFD%AEgv%CiJozb1j?<9Uw$P!-RXK zQN*k(zd2AbsvvinM%=of5VZfw*Cex0GMX|K?BbEn{|5_-8hmfl?MVq(SV{h<_z3-~ zdenhEH@*_(KAZd?jyN|l5uU-Nl<`7l){f|H(8`Kj35z7zTk{M1NiIVpcbI9H|%`=Mq) zgnOk@{D^DC(fvqQJ6{{oWzPGTRhH%Z% zH#bYM^zp7^P4cI_yk%dNYs*Umjg221$MlD@d8}&igvuir#HQlBT|BzYUGjvXgc0@n zZ!~$KD3oL_Yw4_S_n%E89l*&U1V&if*#Xq8agZ#S~6IL zShaMp!1a@jIepAg4G|o8GY}P4!G~prsM_a;jR_P+_9oqK@vlGV9X4rr(D!gM+A6=- z@sAo+gd@NS$OKPs*_j46j|VIyzJ^5*=nGORN&j65tR8Cmq3i!k)XmH#v^l1|Uc+Ym z($k67&FM=}mo>&l(R&pWyI|{}_dJNxf)t^~K9bXuun{zxL9;=4>O_ZLse;titx*Fm z`%JP~O*L!RU%X8ZOoO<0Jm8XVR`e(LU?P$DD>9c72PulBfBoltmDFP14AkiID}QXO zfvo&U3=msuv>alj#oDh+wlrkqGGY#)7mC=TTXQUn@(wWu(wv- zUPlP&cXK&a?6`Pbmb4C zlOWVsVeJ8@++7V3nN9;&dKP08otMD;*>@tVA@qsF(P}E^8$#9ChZ67mlruGT&==*A ztJ|MME#{A&Omr5JnM@w{mnlnGp`J`6HwEybS;DmXWCNB#t>G0#xHG9gm^!8G zU)>+YgCR6v#eWy4BTa0RzS{uGnE$9p1GR%ukH~KhRyw%&|HlPT`J_cO%+E zNy3y9+W?1FwZZpWtiM(VM1-x#N1pR62ITY7KS5z~MROUuN8RtoU!T6zp2uo^=Y_K$ zapm`vkBttU{KxWjQvX^H3wNt@>3F`(Tsw%riYg9IMI;$R_@>U)+~Wwc62swSz`g$Y zAB%0p0qajgAnuojUEXd9oX<4n%gfYcHK|!)+$@INoPat^*Wt~{E{IxYPac3 zgpav^1#4GhE_Em8SVS^x!S6|9P zn34_33=XyIUl2oFw>|`gIk-BaT;E-g)+{8kc1sMkW_YKG)-Pc~+8dd;q-e z0AV;IfT#J6+Ij?RB#oH2pR}7?Y~qiufn2}AWBEUH{><=$dvjmbj?qhQt(Ibl(em!E zSWX!&9n&uT8XR$eC6Eqhe`(9axSJR@8?HQK<&yJGND=jqGYaj`4SUvI0KRtlg(Zq1 z&njUpHr4Bb!Ue?h)H}uT{X>hd8Wq^O4WA?vMnzo#l>JQLT+v4)0Y*hF{=MHTE%rP! z=M3;TG(Kq?;HhO_bRat4Cer{qc7$gi0cV|ApYQaLFNO*rTT9<{QdYCCpnb&dal12Q^j(?`MeX|Z102% z@4WctLuoSl5|Y~)z0S}R7;%KW?3y$9!|R#ecfz6nYj$)X zed^5&}DGS18OM^Yk5dj zy{z-}GO@yg_3I^gq6{}ulm;%l4Nz|ZlPv?S<{$y7)!rq+iOSD4UILm0=UTTmw!W?8 zlHUzZTc(T0(6pIp4qK>@uvf56|Zjnq;rV_j?0WUmiM z7mAlrE#`dfdR@e;MzC5*w^>Xp3C#Hk)xIZQ+k0N~m)7nTzq|u&`m5}&bkw^eymeGL z>ko!2_$J>R?X^JGjbSFb+`8`C(^m}Pv9rt;<_K{D*Ol6-D-Y~vhb_1o<+IX3Uw$|g zGJ(_ZR>CG>T9jSh7LK0C+pDKTsm^1tcyGKh#;;AgH`pJa~0YW|4FE_Z>x zh;gI3Q0@%dnn_P)dFSRSmkEX3=={f^c4k~I5;Z#06rOngG{^T^rzP8+j_qLkD7*S% z(ym=!Ce@cz{6i!`0AtPdT$$9U?|DZJChpI({^(xCGNTnn{2#s=V=2+7XyZ6KtF<)x zDaxcp>N-4`!UdB5fGSN75?S$m@M<#_V-NkQ+L;o|2_BySQ&V|QACC4#K9e)T#FtLQ zK3RT}wZDBVIq0j<4xAv;9=UCS*Y6oZX(?s#K5ef>a?}E$?R+3q8Ffua70g-kER)Q% z)Z~RiX)x~EFC0_fhUV^#m)#CutcR9{DP$mfI`H6Ii@s8$W<3|B)1>=afMresE>=9~ zv3m`3_DCnF?k7gLPjBO>aVC`7jjybb^Re`y?7sMmuK{;%l*Q!5QNA1^S$lQEW0~3K zD)Fe4vA6;&in%qr$G`7*gKqN?&c5iH63bDVQf@c{8R#h)(EvH7n#8m4y%h?0x_6qt z12179brv8o*={lz-6jQg9oHZHGz{mzRyP`AXc=bSavSN%RVMp;!jAS`WfWJ->}SIj z%CtOZ@+#J!ndo?&27o-c-SHp(ydc7x4#S+$X7?B}kPT>c&9zk^?ST#Oy0#xtE~(tS zxFkI$0NumEZg%fg3JB^mjjCxaF^UAH?d|=(Ls2&@K7NT`xI8>h1$LTX+L}Gdp`~uA z@KyCINwz$fCr(U@=$Q1=)X4GcE!XKIvDMrRB`>AX*0Q*$Lu(CC`V*p!Xeypgz)Wey zD%)Axp?Q{6O6~j)89Nh7C6l{qwf_82iHGt08OrJ_oR!(qTlhTMue~pIrt;yifs-F4 zna*X$^E|6FN158errj;%PWVwQj%9#>j4T#I1&U9o+bw!R@1

%%8a*9tWGYdF?C=&wgy$Sj(vgcH7yu#E78 zQEyiY>o{@zu6c!Az14PVXZJxjiw$|)!=CQHbI?nK`{-B-`*5S8y;g>!3}2+WyGA`8 z<<_eEKC!Tt!dbFSGZM+-kxrm(>;SKDyWZDi=3}c=$jTUBTEUsB$D8!tgPYg*k=Ut# z(MUqYYaK%#@Y~rL9e`wWo2X!De}b85K0q> z+SL}5h5rGN=!~cZr(+(iUy@=fhwZssz2Mu6zSLi*y(e`-Agy_<)9359A9tLlu5QQK ztF3Q|A*&uo`mwtv(?pc$;GYDs!ZPm`>z!?RD3?@%Me;8 z1PC#c9R{OXUmgHiaa`4VHV z6y#q{)-pSr!wBBF5z0Wt#7|vip?ja#LuF8>iOR5R1QKkZTgo-UaLLx-USdr){RH^% z^^cQ=06L_fWy+kx=y8GDeuAS$F?}V8ZhJ7BZJ)%bJcUgxnt3(T{Sw{HW;yMg&ev7C-&qy7$rqarwz^0dgwYb}#I5}&>*W6wjXg7L&_ic)vgD8} z&A#!jR&w9Wn73=m)PDUz1$Fy@G~amFc^C7MJ;JrYVgk3n30>7pR>Jpi`vsS)OAXgF z6swpo8QO9*WV5^vHUB8o{4(x9zH)2O(2>sZ{&OD6$*3v$Rd^C=@_%{Ch(A5A>)z3F zMwF~vU=y#@@bz9!_mkiJ4Nat>#IDmXPzw)=(05kilg90EYO=9$k_L`3Q!fcUpX}W` zkLgHm3gZ@??uJ1t=&=!ldUOBKhr#=yM82PwI{F|n=wWg=HYF(y(T79}RDZNR$M!GmfX8*F!Z4uoIDn264_V2$N z+)@zgvq(a)@59(>aLM<|PCt=oi;w=)>!e{XAOcP*-bmecT~|IyD=UVEJr)2x%9PQB z!|<+xms9{LU}!6WSt}6B3SB;bp(ueUGe_%{Sr$Q8al_76PH z>xxZ>+VI25+0K#!GA*3uA;$DvxG(H zVuvMgMoB~h45vi1=Pr$(XdH#s((JJ_Kdz8FVV!lt6xlg)5F=c=Q{(p zch?e@BQDU?R2GIDl|*Z$-EkFxnMj}TLlXjz*&0>#VEM@dwp6}{R)62~OPIfO zF9MfaG)JwAz}@Jkh&rqyks{e+Fo>r(tj~)>#C#cmdKRq?CYD-l!XMTT`iBGMs7w&7 z07_b*n*dL3?vwZ7*7dV?5tSk)30DM9`Ha0GcY$OhuahO;{PR-5YiAy4dmvpiY2{W= z7=_PyLxLCUY67#chA5#sw&GAjmouBtYnl_iwhCUh5&|>15B!9K0PSPcu zSmZocw}vpT=K=*41_M*d;i(m|9N~-whj-RpgFEeCXU>)-tFAKrw@GL2zK5)vrO}{! zHi@h_m|g-`=}0*`$qX!_N)auPCMpyk<|WPx*-|Ln2nFnsy+$(mBrHHI3W}*2u}X=F zJ5O7B9fMu_Zw4_v)=9*}F+Cga&eV866 zIcQsN<%FT_*RT5Z{Teu0?YIg<(`5RxBY6QVQwCkB>3Aa^|70R|+k9Is6z?)NO@9}& z--(Y zh;m}DAVcwnqASkPRDOpRxSj-nWWx_lm`$zb+l*O>K9xJTf3GnE7=<27L{x_rr`NyL zqb#PhtnjBa^Q-d5d-6}_`TQ^^kDge!HqgOv*pBfNlXC(#6(+K1aTjLMao7-63 zN8e1s8!vl6OMB@WwRC~Fe9myvU3#t_`fe}Wke_SJ!czwvN*O)dl!7!^-3qK8{VY&B4STRtUH{tLsk2f(rO_QT_Ng!LajpC5Jq(TRapj|hSr~6U zpA>qCS>u|CF~Nb6qJp)jP4{ouJiZc>PqaeC^1iZZX-MKd{+D4(mOb5cPNM*%S#GCv zb{jsvHzNKYjmFL7PM!3ubc*W$MQ|m4I5pOHI5+`HK7c5o^T>V3PGlu)JPae7!uc<@ zkV_tpQ2DBVkou$N>E-KDW~~T?(%rad6v0y_ofwcTAd+qL6Wm~_3cC#sqMR!!&KK^> zBb7 zT>;*0KJtubLNw5D3bZlu!ZJTkm<}f6{Z=9Z7VnOm?j;|7agxPH3Z~}acrjvj9Q2&f zJ`Wo2Z*$6E+MqjT7IMWz@C=3l&TEC+v8f+9h`h#d1z?NF6GNG4@0odUeHBY3Kz=#k zZLD^5Y&^v}-o9~vC%QphM_4xngA%P3;!5zQ?DL5Xx<9E{%ni@BS*xZATqoA3lLZWh z*$6SPa&e#`REV1hgvqV0ug;XT`d{I=KC<*O9OzP@U5-kE<3tVvexnC(| zYD^iuxD5ketK(}7>6gWSK73*A9{P>E$}jvw=8JWrFf_-jJogM~dtF(lGn4KF0kIy8 z^fPm~P5=rq7U9v~(zVl904h7s&gEgMI?aA}snUhcx4Byz-}`J5EKdn+6M>e8Oy=B) z9dm&pPkZ!ixa;RLJUBsOb2y8%F`AWFIh~;}dsx`Dy+<|Qe&0DQmF9^8rl&|b&Zn3@ zBpYEkeOkqNH!4p+z&qV;oys*>+oD}XFNT7yxu0pXJGJxJI)gFG{(gPRlPPS>52P4O zIVIIM^J$KaPNDOJN+myQdW^%^^pB>an?4m6F+DCG46{fdJcQB7|eL7L4|C^5qv1xGn%h~-qQ%&jTz=%;z6;*Y~QTYjL11L_Ys36>lf zP<8wdmuYlcPG<8onFD?_ustH{Ml$k3y}|OIG+VJJ!=tDBfQEKWlqoH2-Ue;;RL8%u zzapnL2B( -F*5*b$? zVFevyvLT5UT#s{9XtxWl-}w3GoXdXXctk{RhPBn#X|d$WIw6p*`+^x)!~Yk|yrke` zi~nVBG?IT=wT5zrM+O&>R%Hj+{SdFQTk*F=IcRr6RF>8M7y-bJGP0X zkL0;A-nqdQUQ$+Fg0_6Z=UzFv_rG4;YcpQ;ieI znLDeNJWAbw$SI++!*_=63#^$^iV5$)c0MyDd04T1I?EG^v92BxBnf)N!b!~6MMLiW z2xuWi`e(N8=LVV$ji*tkSJA92zo7($c(h|F6FIjzc1YVL^xPkkpB0n6^-Q}zsl^l_ z$7^|Ra8L+#J&fW^@gt!EVrV%bbwSLp^8fWl?Pnx%Uumz>yV@FOgY7D(Z#J}H!G z$sO88WN4(Txb-~1ii+{hfefSs8jt>^n5_ghycFXH3;^g9pW>(fQs8GL(7co`GU{$gsN6=35IeEtVE1@yH?x~S%FN{`?42ue)t^RJ_0-$0*#mjWM} z^9_fOR(_v&#X$mSn@^}%S^}|==D~HI%bHfICNHQ66_P)jT~Bk0w4p6f31y((-4TAe z*jZ^x=Zj0uqxG@fq3a2^GP?P_`LIAI6Py4SOYu?dbp7Iq!Eixc-l;x;3g+ zQ#DGBkXR)~X=BBz72|HHQlpBZcFdSnxsg_FwPKW@MeQQCP0cI_Em)y3YH&KChlq$^F&FmM!nmiJ#?GRB~Vq5WL2sn)-XmzvXjG zaBhTS%*oD42F6(p9T+~4rt%V+3Vs3cO>+@87(nz&z0TryMGV7>_7@}AkBc0lpE{rB z7)BjtnPl^7-Z|Vd>oV%w&P<+@aY%V}Yv~7bn9LolMS=j6f@ZvRx=H## zG)Ot5y%TV$T4s@y%L@J_y~u#RlmfvWQl=XkdELxRg%8PbtGot2{OidSqfuEw5`OY)8o!+ja5LICQe7K{UJ z(545=bQr+O@>Q{k0yeIM=<>kVrtbxm;jjFbaT_iUo}?c0dS#Y{`Ld&;&h?0`s6DR3 zTZFs|SB}~GvLZmKcA7|g@pGj?Ync*{?zx#qpYtj<1-8E_wB#}#_EgZK2cXuxW0Q!7 z?%q9>Um`{X^_SB4S$d(Ebf?PmMc1+FhYqjp3et@#b|SCw)1K!vudor;-1FOR;_pa>eSsrywoNor9LfNm0m zcr4sw(s0rhSBw4CItt+VehP;LB_9e3Gz6CNX8c1!E6}#}^K6W-xpsUgwah z2Yd^8c)*~h!Pna>12I^FZsjAXr)5y{nKA*lYgmvjE`{K*-pwC0QKnukZ9xBdkSfG1 zWF$(rH!&$=wl)h~7vh%P+x)0CPHomby6?F*pzT=>8JvC=v(YzfF-vk5* zr*mj>l025lwe$6+q)Lvpghh9L+P98zxFD6FaydKX(sv=C^-LurI=+nc8GH1I2jYG0 z+i2diU0Dwx_=~odR_OF{s8x2J25tB%$QX=BTG5<7?|VD8 z$s{w`kL7YUBTX)^%T_vBq=Nvtf5WR70nt%KoojdupYp&s!SY*Al*HOqG~m zP*cigr1*eeV#Plb7{Nfz3lP-wb7yh2gpa$hBQ8uU?hXqhCP4DWtZgG;Bn**=Z+=9( z-!~zycOrsXRq&PcXl&^A4Tlf2_AmhRI#sjELU-B8gp$yDM+;3glJ-KS;?~I5Qi*f1 z-h$?BFF{j%?NvWINwoIL&hTcmJF9a)=YxkoVr((6b#=m&93T`%RL_M85ns=hReZI3+ zz(@TIHcjO9kA6V(_e|M!q*t7HC2x0T`rWT-$oBc`8rGfpk1>$9CTkZfWQB&UGj-;Pa}vXr+QjyQ7sM^OY~)d7weO}WUU7m% z+_hzAYnN@jjp%-?NRWge9;a24LoRmCVZ8n2=4d&t@M=i5(c#5v9Zrz9E#hM@ zcU+Y_qDob)qkiefaeU*z$(2v81e@fzxbytW4!0oC=`!X9AFE3sAMx^Fg^ac^bFw12 z+i2143dm~m%M+XVV9xG$dz-M`+d^mEViC1Iq_N@c@zv(G*jvQUHD40T*#Sw7Pp@4a zF6g(pu>CGzKt*7I{<>Ox2eu3R5i*_@)3I}5)7_d~nu9biHcSoDk~MM`9=wr3JW1Yc z5nSW`b$2q3vTn29E!1)LDoA{(Y3h&4+mqotP>0If;3t$e-x(8E(TTf6j zRz?bCzC0&*rcp-Al1U@;GQC*FhUX*UNQ_I>6IJ3RtoO$DA#G!@QvK)l!IQ-~V?>&W zVv_q&n}aj4;OETtn`l>h+WFIQMI7LARkngc1*Th5go9qSJ2YBWMQ(;>R)h;$p z+eh+&K7^jdXX?%Mk+&FTNMChQ{q8t~v;(XFKXD#Ju0wu-lMjw_ts*x!W-dxxpY9C( zSW)|=v*w-OxR_ab{PFKq z%p{OL;=S}5#!s=N?p=isYL5v_5BBTnJJ%XY)j_FisP1VIHxW+RuS9^nA~{Xl{(qO? z{L^s)%J#avO%B>hg<>BDELHb#1|roGS$owjzT{L2{Ft*&^yt|LhZ3)pBj{^GW{vUn zjgWD*r$pgorU%cokBLT%GYwWt!{9o*Y}>ha zxkf?}0Wr<6Xc|qCh@Ev1=gfLISqOKCMsSa<+2h%(kX)1DO;m*ofk$esm~|D&AjC`j z!Re-TWoJr9q@PF{bFNz_4Mo`(I8NW?!3?k<{DK6!*v4^ zvf8Ptw8|}IllziFYEImhsB+z=>~1E5v*#y2f1YzVZ{?MYAMH&zKlhV2K-i=e`oAM{ z=)V}>+e(pBR(Rh9SsVLSDppwWDV@C_T`%bo&^g#o>0R6~4#TxUmir{PoRkEEQxWAE z;CN@R)L|N-8w?)n#1Y>30JWy-2{&?8tz<)0Vz#A^8B|%e1NXVNwxd)p!c5hSUVWR0 zaIU8)9}U{I2GY>boBiVg_>!Nc)NU*LCsg;F^q=)c0k6N#xgr<1#{vYqYGaY^`bKN% z6x-RJYKcxR5ar<-+G3I=ENPE|jw7*QJ?%0iy2b;SwEY3qL$!D8)bU;YQ|5KZ>DW+u zF}h9$IYX3{fOpEkUovPf1$bn=C{3msIv~E9R}FS1q7lj}ZOik5=7z^EjD-;0G3?vv zO$;~qja3IRUozfWUA3P^8A?#hDQF!O%)sWJ_Zgt#e>c=asFS<2?sOv0CQW8 z(i@O4h@*fL+x`G<2?NAvv?Y!IrL1BQ+#h3r__Cb*z@CX5hv|V#%XhlbO7#SnYVC-9 z!kE@Wzq^Lh#4rMBl!g7juTkO{1&w=x*hwt zMObuv6Eg7!_t|=Tg5Av2aDR>O6AQf}W>{>MTPYtbQ6+0TfDrdhINc)41uZ5<2bVp;$GN01V>EVvdI}-o zJJ>1VCC3S6P>_VdC$1-WBT;v~-$1G0<`j?cx4ve^Jv99ARyZ}E{rua>KgAD^t!lM= zd^@L6w>w}eHhEX{v>&zl3m??DlDaE(x+}v&6fJ#ZnO!D%Zdtb3G(aWGi8mC9ivIaF ztrg;wO3}w6FKOE&Ez{P$Kn@Sv;=-K-t|qY^r{zX#^mGWRD|Lw*Eg9cLdYL23R-LaS zIbjK@X6cstp=i`yy*XAMB8=AyJ0#nlujr+;Cmge9>DbJVFkDTDlqrSa46wU?R=N?S zW4j~;;;{uZk7utSc)9-z0QD$)xhF2rD{B|bWIz0_XIEw>c)Q(|pu61k&olC08j6pX zF9Eqw%kn5xiY6z)-kp}8xs~A$1>2hhUTsb<#NW}o-kd0yM>iY2E&TZqX$9}96QvZ= zjMWP4yX&dK@&lhH3;vz!-Z^Foxq(eNMLVTNIuAY~NO}S-4ZS*;lnwBfP zrSDJqnWF+z2Yu@e9vs-Mkrh+clq_S*S6~E*A6ch(W_9!kcG7kVeSau)2a2+*PSixa zL8~VZ)tNx>zXe``II0Z@i0EToZ#5t!W(V`^y340_iQl7$&30)OrqF5in<>m);M zQb{dC4%2*o|L*j^=D%!3`L8Ng{)53P+~|I*p2%G}1~GqXqB>}3bhQjM%haE}{y#{C B2r2*o literal 0 HcmV?d00001 diff --git a/index.md b/index.md index 8e591bf..816cc3c 100644 --- a/index.md +++ b/index.md @@ -1,19 +1,21 @@ -# Python 分布式编程 +# Python 数据科学加速 ::::{grid} 2 :reverse: :::{grid-item} -:columns: 4 +:columns: 3 :class: sd-m-auto ::: :::{grid-item} -:columns: 8 +:columns: 9 :class: sd-fs-3 -开源的、面向下一代人工智能应用的 Python 分布式编程书籍。 +Dask、Ray、Xorbits、mpi4py + +开源的、面向下一代数据科学和人工智能应用的 Python 数据科学并行加速书籍。 % The SVG rendering breaks latex builds for the GitHub badge, so only include in HTML ```{only} html @@ -24,7 +26,7 @@ :::: -::::{card-carousel} 3 +::::{card-carousel} 2 :::{card} :margin: 3 @@ -48,7 +50,9 @@ :height: 100 ``` ::: +:::: +::::{card-carousel} 2 :::{card} :margin: 3 @@ -61,6 +65,19 @@ :height: 100 ``` ::: + +:::{card} +:margin: 3 +:class-body: text-center +:class-header: bg-light text-center + +**mpi4py** +^^^ +```{image} ./img/mpi-logo.png +:height: 100 +``` +::: + :::: ## 主要作者