From 29ae99d0a9ffe05ea05a16940ec669b524334a5c Mon Sep 17 00:00:00 2001
From: Reed Johnson <reedajohns@gmail.com>
Date: Tue, 27 Aug 2024 16:42:55 -0500
Subject: [PATCH 1/5] Changed the default for upload max_image_size

---
 .../workflows/core_steps/sinks/roboflow/dataset_upload/v1.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
index 14df54def..f61825f48 100644
--- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
+++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
@@ -139,10 +139,10 @@ class BlockManifest(WorkflowBlockManifest):
         examples=[10, 60],
     )
     max_image_size: Tuple[int, int] = Field(
-        default=(512, 512),
+        default=(1920, 1080),
         description="Maximum size of the image to be registered - bigger images will be "
         "downsized preserving aspect ratio. Format of data: `(width, height)`",
-        examples=[(512, 512), (1920, 1080)],
+        examples=[(1920, 1080), (512, 512)],
     )
     compression_level: int = Field(
         default=75,

From a8657ca8f89c419e97d6a36e07f3c6322b07f993 Mon Sep 17 00:00:00 2001
From: Reed Johnson <reedajohns@gmail.com>
Date: Tue, 27 Aug 2024 16:57:44 -0500
Subject: [PATCH 2/5] Increased default compression to 100

---
 .../workflows/core_steps/sinks/roboflow/dataset_upload/v1.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
index f61825f48..f3dc459dd 100644
--- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
+++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
@@ -145,7 +145,7 @@ class BlockManifest(WorkflowBlockManifest):
         examples=[(1920, 1080), (512, 512)],
     )
     compression_level: int = Field(
-        default=75,
+        default=100,
         gt=0,
         le=100,
         description="Compression level for images registered",

From 89dfa2b3cbe83f9f4f24f005e82564b48d3a6160 Mon Sep 17 00:00:00 2001
From: Reed Johnson <reedajohns@gmail.com>
Date: Wed, 28 Aug 2024 17:17:09 -0500
Subject: [PATCH 3/5] Change compression to 95

---
 .../workflows/core_steps/sinks/roboflow/dataset_upload/v1.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
index f3dc459dd..5f218e58b 100644
--- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
+++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
@@ -145,7 +145,7 @@ class BlockManifest(WorkflowBlockManifest):
         examples=[(1920, 1080), (512, 512)],
     )
     compression_level: int = Field(
-        default=100,
+        default=95,
         gt=0,
         le=100,
         description="Compression level for images registered",

From 86759d44b7f2d29a5db84b1ff32fbb2519ec8e2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= <pawel@roboflow.com>
Date: Fri, 30 Aug 2024 09:07:53 +0200
Subject: [PATCH 4/5] Bring back the previous state

---
 .../core_steps/sinks/roboflow/dataset_upload/v1.py          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
index 5bce35e62..488b6952b 100644
--- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
+++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
@@ -139,13 +139,13 @@ class BlockManifest(WorkflowBlockManifest):
         examples=[10, 60],
     )
     max_image_size: Tuple[int, int] = Field(
-        default=(1920, 1080),
+        default=(512, 512),
         description="Maximum size of the image to be registered - bigger images will be "
         "downsized preserving aspect ratio. Format of data: `(width, height)`",
-        examples=[(1920, 1080), (512, 512)],
+        examples=[(512, 512), (1920, 1080)],
     )
     compression_level: int = Field(
-        default=95,
+        default=75,
         gt=0,
         le=100,
         description="Compression level for images registered",

From 56aa0c7dc2f09190b710c236ac3e9465efaa7367 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= <pawel@roboflow.com>
Date: Fri, 30 Aug 2024 10:30:11 +0200
Subject: [PATCH 5/5] Added implementation and tests for block v2

---
 inference/core/workflows/core_steps/loader.py |   4 +
 .../sinks/roboflow/dataset_upload/v1.py       |  17 +
 .../sinks/roboflow/dataset_upload/v2.py       | 322 +++++++++++++++
 .../models_predictions_tests/conftest.py      |   9 +-
 .../models_predictions_tests/test_sam2.py     |  31 +-
 .../core/utils/test_sqlite_wrapper.py         |  38 +-
 ...test_workflow_with_active_learning_sink.py | 177 +++++++++
 .../test_workflow_with_contours_detection.py  |   4 +-
 ...ith_counting_pixels_with_dominant_color.py |   4 +-
 .../test_workflow_with_dominant_color.py      |   4 +-
 .../execution/test_workflow_with_sahi.py      |   4 +-
 .../execution/test_workflow_with_sam2.py      |   7 +-
 .../execution/test_workflow_with_sift.py      |   4 +-
 ...t_workflow_with_stitch_for_dynamic_crop.py |   4 +-
 .../roboflow_dataset_upload/__init__.py       |   0
 .../test_v1.py}                               |   0
 .../roboflow_dataset_upload/test_v2.py        | 374 ++++++++++++++++++
 .../transformations/test_detection_offset.py  |   3 +-
 18 files changed, 962 insertions(+), 44 deletions(-)
 create mode 100644 inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py
 create mode 100644 tests/workflows/integration_tests/execution/test_workflow_with_active_learning_sink.py
 create mode 100644 tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/__init__.py
 rename tests/workflows/unit_tests/core_steps/sinks/roboflow/{test_roboflow_dataset_upload.py => roboflow_dataset_upload/test_v1.py} (100%)
 create mode 100644 tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/test_v2.py

diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py
index 8d63b5070..57f7f12db 100644
--- a/inference/core/workflows/core_steps/loader.py
+++ b/inference/core/workflows/core_steps/loader.py
@@ -102,6 +102,9 @@
 from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload.v1 import (
     RoboflowDatasetUploadBlockV1,
 )
+from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload.v2 import (
+    RoboflowDatasetUploadBlockV2,
+)
 from inference.core.workflows.core_steps.transformations.absolute_static_crop.v1 import (
     AbsoluteStaticCropBlockV1,
 )
@@ -287,6 +290,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
         ImageThresholdBlockV1,
         ImageContoursDetectionBlockV1,
         ClipComparisonBlockV2,
+        RoboflowDatasetUploadBlockV2,
     ]
 
 
diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
index 488b6952b..ba57666ef 100644
--- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
+++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py
@@ -1,3 +1,20 @@
+"""
+*****************************************************************
+*                           WARNING!                            *
+*****************************************************************
+This module contains the utility functions used by
+RoboflowDatasetUploadBlockV2.
+
+We do not recommend making multiple blocks dependent on the same code,
+but the change between v1 and v2 was basically the default value of
+some parameter - hence we decided not to replicate the code.
+
+If you need to modify this module beware that you may introduce
+change to RoboflowDatasetUploadBlockV2! If that happens,
+probably that's the time to disentangle those blocks and copy the
+code.
+"""
+
 import hashlib
 import json
 import logging
diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py
new file mode 100644
index 000000000..0d03667ab
--- /dev/null
+++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py
@@ -0,0 +1,322 @@
+import random
+from concurrent.futures import ThreadPoolExecutor
+from typing import List, Literal, Optional, Tuple, Type, Union
+
+import supervision as sv
+from fastapi import BackgroundTasks
+from pydantic import ConfigDict, Field
+from typing_extensions import Annotated
+
+from inference.core.cache.base import BaseCache
+from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload.v1 import (
+    register_datapoint_at_roboflow,
+)
+from inference.core.workflows.execution_engine.entities.base import (
+    Batch,
+    OutputDefinition,
+    WorkflowImageData,
+)
+from inference.core.workflows.execution_engine.entities.types import (
+    BATCH_OF_BOOLEAN_KIND,
+    BATCH_OF_CLASSIFICATION_PREDICTION_KIND,
+    BATCH_OF_INSTANCE_SEGMENTATION_PREDICTION_KIND,
+    BATCH_OF_KEYPOINT_DETECTION_PREDICTION_KIND,
+    BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+    BATCH_OF_STRING_KIND,
+    BOOLEAN_KIND,
+    FLOAT_KIND,
+    ROBOFLOW_PROJECT_KIND,
+    STRING_KIND,
+    ImageInputField,
+    StepOutputImageSelector,
+    StepOutputSelector,
+    WorkflowImageSelector,
+    WorkflowParameterSelector,
+)
+from inference.core.workflows.prototypes.block import (
+    BlockResult,
+    WorkflowBlock,
+    WorkflowBlockManifest,
+)
+
+FloatZeroToHundred = Annotated[float, Field(ge=0.0, le=100.0)]
+
+SHORT_DESCRIPTION = "Save images and predictions in your Roboflow Dataset"
+
+LONG_DESCRIPTION = """
+Block let users save their images and predictions into Roboflow Dataset. Persisting data from
+production environments helps iteratively building more robust models. 
+
+Block provides configuration options to decide how data should be stored and what are the limits 
+to be applied. We advice using this block in combination with rate limiter blocks to effectively 
+collect data that the model struggle with.
+"""
+
+WORKSPACE_NAME_CACHE_EXPIRE = 900  # 15 min
+TIMESTAMP_FORMAT = "%Y_%m_%d"
+DUPLICATED_STATUS = "Duplicated image"
+BatchCreationFrequency = Literal["never", "daily", "weekly", "monthly"]
+
+
+class BlockManifest(WorkflowBlockManifest):
+    model_config = ConfigDict(
+        json_schema_extra={
+            "name": "Roboflow Dataset Upload",
+            "version": "v2",
+            "short_description": SHORT_DESCRIPTION,
+            "long_description": LONG_DESCRIPTION,
+            "license": "Apache-2.0",
+            "block_type": "sink",
+        }
+    )
+    type: Literal["roboflow_core/roboflow_dataset_upload@v2"]
+    images: Union[WorkflowImageSelector, StepOutputImageSelector] = ImageInputField
+    predictions: Optional[
+        StepOutputSelector(
+            kind=[
+                BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+                BATCH_OF_INSTANCE_SEGMENTATION_PREDICTION_KIND,
+                BATCH_OF_KEYPOINT_DETECTION_PREDICTION_KIND,
+                BATCH_OF_CLASSIFICATION_PREDICTION_KIND,
+            ]
+        )
+    ] = Field(
+        default=None,
+        description="Reference q detection-like predictions",
+        examples=["$steps.object_detection_model.predictions"],
+    )
+    target_project: Union[
+        WorkflowParameterSelector(kind=[ROBOFLOW_PROJECT_KIND]), str
+    ] = Field(
+        description="name of Roboflow dataset / project to be used as target for collected data",
+        examples=["my_dataset", "$inputs.target_al_dataset"],
+    )
+    usage_quota_name: str = Field(
+        description="Unique name for Roboflow project pointed by `target_project` parameter, that identifies "
+        "usage quota applied for this block.",
+        examples=["quota-for-data-sampling-1"],
+    )
+    data_percentage: Union[
+        FloatZeroToHundred, WorkflowParameterSelector(kind=[FLOAT_KIND])
+    ] = Field(
+        description="Percent of data that will be saved (in range [0.0, 100.0])",
+        examples=[True, False, "$inputs.persist_predictions"],
+    )
+    persist_predictions: Union[bool, WorkflowParameterSelector(kind=[BOOLEAN_KIND])] = (
+        Field(
+            default=True,
+            description="Boolean flag to decide if predictions should be registered along with images",
+            examples=[True, False, "$inputs.persist_predictions"],
+        )
+    )
+    minutely_usage_limit: int = Field(
+        default=10,
+        description="Maximum number of data registration requests per minute accounted in scope of "
+        "single server or whole Roboflow platform, depending on context of usage.",
+        examples=[10, 60],
+    )
+    hourly_usage_limit: int = Field(
+        default=100,
+        description="Maximum number of data registration requests per hour accounted in scope of "
+        "single server or whole Roboflow platform, depending on context of usage.",
+        examples=[10, 60],
+    )
+    daily_usage_limit: int = Field(
+        default=1000,
+        description="Maximum number of data registration requests per day accounted in scope of "
+        "single server or whole Roboflow platform, depending on context of usage.",
+        examples=[10, 60],
+    )
+    max_image_size: Tuple[int, int] = Field(
+        default=(1920, 1080),
+        description="Maximum size of the image to be registered - bigger images will be "
+        "downsized preserving aspect ratio. Format of data: `(width, height)`",
+        examples=[(1920, 1080), (512, 512)],
+    )
+    compression_level: int = Field(
+        default=95,
+        gt=0,
+        le=100,
+        description="Compression level for images registered",
+        examples=[95, 75],
+    )
+    registration_tags: List[
+        Union[WorkflowParameterSelector(kind=[STRING_KIND]), str]
+    ] = Field(
+        default_factory=list,
+        description="Tags to be attached to registered datapoints",
+        examples=[["location-florida", "factory-name", "$inputs.dynamic_tag"]],
+    )
+    disable_sink: Union[bool, WorkflowParameterSelector(kind=[BOOLEAN_KIND])] = Field(
+        default=False,
+        description="boolean flag that can be also reference to input - to arbitrarily disable "
+        "data collection for specific request",
+        examples=[True, "$inputs.disable_active_learning"],
+    )
+    fire_and_forget: Union[bool, WorkflowParameterSelector(kind=[BOOLEAN_KIND])] = (
+        Field(
+            default=True,
+            description="Boolean flag dictating if sink is supposed to be executed in the background, "
+            "not waiting on status of registration before end of workflow run. Use `True` if best-effort "
+            "registration is needed, use `False` while debugging and if error handling is needed",
+        )
+    )
+    labeling_batch_prefix: Union[str, WorkflowParameterSelector(kind=[STRING_KIND])] = (
+        Field(
+            default="workflows_data_collector",
+            description="Prefix of the name for labeling batches that will be registered in Roboflow app",
+            examples=["my_labeling_batch_name"],
+        )
+    )
+    labeling_batches_recreation_frequency: BatchCreationFrequency = Field(
+        default="never",
+        description="Frequency in which new labeling batches are created in Roboflow app. New batches "
+        "are created with name prefix provided in `labeling_batch_prefix` in given time intervals."
+        "Useful in organising labeling flow.",
+        examples=["never", "daily"],
+    )
+
+    @classmethod
+    def accepts_batch_input(cls) -> bool:
+        return True
+
+    @classmethod
+    def describe_outputs(cls) -> List[OutputDefinition]:
+        return [
+            OutputDefinition(name="error_status", kind=[BATCH_OF_BOOLEAN_KIND]),
+            OutputDefinition(name="message", kind=[BATCH_OF_STRING_KIND]),
+        ]
+
+    @classmethod
+    def get_execution_engine_compatibility(cls) -> Optional[str]:
+        return ">=1.0.0,<2.0.0"
+
+
+class RoboflowDatasetUploadBlockV2(WorkflowBlock):
+
+    def __init__(
+        self,
+        cache: BaseCache,
+        api_key: Optional[str],
+        background_tasks: Optional[BackgroundTasks],
+        thread_pool_executor: Optional[ThreadPoolExecutor],
+    ):
+        self._cache = cache
+        self._api_key = api_key
+        self._background_tasks = background_tasks
+        self._thread_pool_executor = thread_pool_executor
+
+    @classmethod
+    def get_init_parameters(cls) -> List[str]:
+        return ["cache", "api_key", "background_tasks", "thread_pool_executor"]
+
+    @classmethod
+    def get_manifest(cls) -> Type[WorkflowBlockManifest]:
+        return BlockManifest
+
+    def run(
+        self,
+        images: Batch[WorkflowImageData],
+        predictions: Optional[Batch[Union[sv.Detections, dict]]],
+        target_project: str,
+        usage_quota_name: str,
+        data_percentage: float,
+        minutely_usage_limit: int,
+        persist_predictions: bool,
+        hourly_usage_limit: int,
+        daily_usage_limit: int,
+        max_image_size: Tuple[int, int],
+        compression_level: int,
+        registration_tags: List[str],
+        disable_sink: bool,
+        fire_and_forget: bool,
+        labeling_batch_prefix: str,
+        labeling_batches_recreation_frequency: BatchCreationFrequency,
+    ) -> BlockResult:
+        if self._api_key is None:
+            raise ValueError(
+                "RoboflowDataCollector block cannot run without Roboflow API key. "
+                "If you do not know how to get API key - visit "
+                "https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to "
+                "retrieve one."
+            )
+        if disable_sink:
+            return [
+                {
+                    "error_status": False,
+                    "message": "Sink was disabled by parameter `disable_sink`",
+                }
+                for _ in range(len(images))
+            ]
+        result = []
+        predictions = [None] * len(images) if predictions is None else predictions
+        for image, prediction in zip(images, predictions):
+            error_status, message = maybe_register_datapoint_at_roboflow(
+                image=image,
+                prediction=prediction,
+                target_project=target_project,
+                usage_quota_name=usage_quota_name,
+                data_percentage=data_percentage,
+                persist_predictions=persist_predictions,
+                minutely_usage_limit=minutely_usage_limit,
+                hourly_usage_limit=hourly_usage_limit,
+                daily_usage_limit=daily_usage_limit,
+                max_image_size=max_image_size,
+                compression_level=compression_level,
+                registration_tags=registration_tags,
+                fire_and_forget=fire_and_forget,
+                labeling_batch_prefix=labeling_batch_prefix,
+                new_labeling_batch_frequency=labeling_batches_recreation_frequency,
+                cache=self._cache,
+                background_tasks=self._background_tasks,
+                thread_pool_executor=self._thread_pool_executor,
+                api_key=self._api_key,
+            )
+            result.append({"error_status": error_status, "message": message})
+        return result
+
+
+def maybe_register_datapoint_at_roboflow(
+    image: WorkflowImageData,
+    prediction: Optional[Union[sv.Detections, dict]],
+    target_project: str,
+    usage_quota_name: str,
+    data_percentage: float,
+    persist_predictions: bool,
+    minutely_usage_limit: int,
+    hourly_usage_limit: int,
+    daily_usage_limit: int,
+    max_image_size: Tuple[int, int],
+    compression_level: int,
+    registration_tags: List[str],
+    fire_and_forget: bool,
+    labeling_batch_prefix: str,
+    new_labeling_batch_frequency: BatchCreationFrequency,
+    cache: BaseCache,
+    background_tasks: Optional[BackgroundTasks],
+    thread_pool_executor: Optional[ThreadPoolExecutor],
+    api_key: str,
+) -> Tuple[bool, str]:
+    normalised_probability = data_percentage / 100
+    if random.random() < normalised_probability:
+        return register_datapoint_at_roboflow(
+            image=image,
+            prediction=prediction,
+            target_project=target_project,
+            usage_quota_name=usage_quota_name,
+            persist_predictions=persist_predictions,
+            minutely_usage_limit=minutely_usage_limit,
+            hourly_usage_limit=hourly_usage_limit,
+            daily_usage_limit=daily_usage_limit,
+            max_image_size=max_image_size,
+            compression_level=compression_level,
+            registration_tags=registration_tags,
+            fire_and_forget=fire_and_forget,
+            labeling_batch_prefix=labeling_batch_prefix,
+            new_labeling_batch_frequency=new_labeling_batch_frequency,
+            cache=cache,
+            background_tasks=background_tasks,
+            thread_pool_executor=thread_pool_executor,
+            api_key=api_key,
+        )
+    return False, "Registration skipped due to sampling settings"
diff --git a/tests/inference/models_predictions_tests/conftest.py b/tests/inference/models_predictions_tests/conftest.py
index b15466df0..30878871b 100644
--- a/tests/inference/models_predictions_tests/conftest.py
+++ b/tests/inference/models_predictions_tests/conftest.py
@@ -1,14 +1,13 @@
+import json
 import os.path
 import shutil
 import zipfile
-from typing import Generator
+from typing import Dict, Generator
 
 import cv2
 import numpy as np
 import pytest
 import requests
-import json
-from typing import Dict
 
 from inference.core.env import MODEL_CACHE_DIR
 
@@ -29,12 +28,12 @@
 )
 
 
-
 @pytest.fixture(scope="function")
 def sam2_multipolygon_response() -> Dict:
     with open(SAM2_MULTI_POLY_RESPONSE_PATH) as f:
         return json.load(f)
 
+
 @pytest.fixture(scope="function")
 def example_image() -> np.ndarray:
     return cv2.imread(EXAMPLE_IMAGE_PATH)
@@ -197,6 +196,7 @@ def sam2_small_model() -> Generator[str, None, None]:
     yield model_id
     shutil.rmtree(model_cache_dir)
 
+
 @pytest.fixture(scope="function")
 def sam2_tiny_model() -> Generator[str, None, None]:
     model_id = "sam2/hiera_tiny"
@@ -217,6 +217,7 @@ def sam2_small_truck_logits() -> Generator[np.ndarray, None, None]:
 def sam2_small_truck_mask_from_cached_logits() -> Generator[np.ndarray, None, None]:
     yield np.load(SAM2_TRUCK_MASK_FROM_CACHE)
 
+
 def fetch_and_place_model_in_cache(
     model_id: str,
     model_package_url: str,
diff --git a/tests/inference/models_predictions_tests/test_sam2.py b/tests/inference/models_predictions_tests/test_sam2.py
index d1c4299bc..4fbfa9bf0 100644
--- a/tests/inference/models_predictions_tests/test_sam2.py
+++ b/tests/inference/models_predictions_tests/test_sam2.py
@@ -1,27 +1,27 @@
-import numpy as np
-import pytest
-import torch
 import json
-import requests
 from copy import deepcopy
+from io import BytesIO
+from typing import Dict
 
+import numpy as np
+import pytest
+import requests
+import torch
 from PIL import Image
-from io import BytesIO
-from inference.core.entities.requests.sam2 import Sam2PromptSet
-from inference.models.sam2 import SegmentAnything2
-from inference.models.sam2.segment_anything2 import (
-    hash_prompt_set,
-    maybe_load_low_res_logits_from_cache,
+
+from inference.core.entities.requests.sam2 import Sam2PromptSet, Sam2SegmentationRequest
+from inference.core.entities.responses.sam2 import Sam2SegmentationPrediction
+from inference.core.workflows.core_steps.common.utils import (
+    convert_inference_detections_batch_to_sv_detections,
 )
 from inference.core.workflows.core_steps.models.foundation.segment_anything2.v1 import (
     convert_sam2_segmentation_response_to_inference_instances_seg_response,
 )
-from inference.core.workflows.core_steps.common.utils import (
-    convert_inference_detections_batch_to_sv_detections,
+from inference.models.sam2 import SegmentAnything2
+from inference.models.sam2.segment_anything2 import (
+    hash_prompt_set,
+    maybe_load_low_res_logits_from_cache,
 )
-from inference.core.entities.responses.sam2 import Sam2SegmentationPrediction
-from inference.core.entities.requests.sam2 import Sam2SegmentationRequest
-from typing import Dict
 
 
 @pytest.mark.slow
@@ -236,6 +236,7 @@ def test_sam2_multi_poly(sam2_tiny_model: str, sam2_multipolygon_response: Dict)
     except Exception as e:
         raise e
 
+
 def test_model_clears_cache_properly(sam2_small_model, truck_image):
     cache_size = 2
     model = SegmentAnything2(
diff --git a/tests/inference/unit_tests/core/utils/test_sqlite_wrapper.py b/tests/inference/unit_tests/core/utils/test_sqlite_wrapper.py
index 3de78313a..784e8f1d9 100644
--- a/tests/inference/unit_tests/core/utils/test_sqlite_wrapper.py
+++ b/tests/inference/unit_tests/core/utils/test_sqlite_wrapper.py
@@ -8,7 +8,9 @@
 def test_count_empty_table():
     # given
     conn = sqlite3.connect(":memory:")
-    q = SQLiteWrapper(db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn)
+    q = SQLiteWrapper(
+        db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn
+    )
 
     # then
     assert q.count(connection=conn) == 0
@@ -18,7 +20,9 @@ def test_count_empty_table():
 def test_insert():
     # given
     conn = sqlite3.connect(":memory:")
-    q = SQLiteWrapper(db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn)
+    q = SQLiteWrapper(
+        db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn
+    )
 
     # when
     q.insert(values={"col1": "lorem"}, connection=conn)
@@ -31,7 +35,9 @@ def test_insert():
 def test_insert_incorrect_columns():
     # given
     conn = sqlite3.connect(":memory:")
-    q = SQLiteWrapper(db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn)
+    q = SQLiteWrapper(
+        db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn
+    )
 
     with pytest.raises(ValueError):
         q.insert(values={"col2": "lorem"}, connection=conn)
@@ -42,7 +48,9 @@ def test_insert_incorrect_columns():
 def test_select_no_limit():
     # given
     conn = sqlite3.connect(":memory:")
-    q = SQLiteWrapper(db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn)
+    q = SQLiteWrapper(
+        db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn
+    )
 
     # when
     q.insert(values={"col1": "lorem"}, connection=conn)
@@ -50,17 +58,16 @@ def test_select_no_limit():
     values = q.select(connection=conn)
 
     # then
-    assert values == [
-        {"id": 1, "col1": "lorem"},
-        {"id": 2, "col1": "ipsum"}
-    ]
+    assert values == [{"id": 1, "col1": "lorem"}, {"id": 2, "col1": "ipsum"}]
     conn.close()
 
 
 def test_select_limit():
     # given
     conn = sqlite3.connect(":memory:")
-    q = SQLiteWrapper(db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn)
+    q = SQLiteWrapper(
+        db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn
+    )
 
     # when
     q.insert(values={"col1": "lorem"}, connection=conn)
@@ -77,7 +84,9 @@ def test_select_limit():
 def test_flush_no_limit():
     # given
     conn = sqlite3.connect(":memory:")
-    q = SQLiteWrapper(db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn)
+    q = SQLiteWrapper(
+        db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn
+    )
 
     # when
     q.insert(values={"col1": "lorem"}, connection=conn)
@@ -85,10 +94,7 @@ def test_flush_no_limit():
     values = q.flush(connection=conn)
 
     # then
-    assert values == [
-        {"id": 1, "col1": "lorem"},
-        {"id": 2, "col1": "ipsum"}
-    ]
+    assert values == [{"id": 1, "col1": "lorem"}, {"id": 2, "col1": "ipsum"}]
     assert q.count(connection=conn) == 0
     conn.close()
 
@@ -96,7 +102,9 @@ def test_flush_no_limit():
 def test_flush_limit():
     # given
     conn = sqlite3.connect(":memory:")
-    q = SQLiteWrapper(db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn)
+    q = SQLiteWrapper(
+        db_file_path="", table_name="test", columns={"col1": "TEXT"}, connection=conn
+    )
 
     # when
     q.insert(values={"col1": "lorem"}, connection=conn)
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_active_learning_sink.py b/tests/workflows/integration_tests/execution/test_workflow_with_active_learning_sink.py
new file mode 100644
index 000000000..1d6658723
--- /dev/null
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_active_learning_sink.py
@@ -0,0 +1,177 @@
+import numpy as np
+import pytest
+
+from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS
+from inference.core.managers.base import ModelManager
+from inference.core.workflows.core_steps.common.entities import StepExecutionMode
+from inference.core.workflows.execution_engine.core import ExecutionEngine
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
+
+ACTIVE_LEARNING_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "data_percentage", "default_value": 50.0},
+        {
+            "type": "WorkflowParameter",
+            "name": "persist_predictions",
+            "default_value": True,
+        },
+        {"type": "WorkflowParameter", "name": "tag", "default_value": "my_tag"},
+        {"type": "WorkflowParameter", "name": "disable_sink", "default_value": False},
+        {"type": "WorkflowParameter", "name": "fire_and_forget", "default_value": True},
+        {
+            "type": "WorkflowParameter",
+            "name": "labeling_batch_prefix",
+            "default_value": "some",
+        },
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/roboflow_object_detection_model@v1",
+            "name": "general_detection",
+            "image": "$inputs.image",
+            "model_id": "yolov8n-640",
+            "class_filter": ["dog"],
+        },
+        {
+            "type": "roboflow_core/dynamic_crop@v1",
+            "name": "cropping",
+            "image": "$inputs.image",
+            "predictions": "$steps.general_detection.predictions",
+        },
+        {
+            "type": "roboflow_core/roboflow_classification_model@v1",
+            "name": "breds_classification",
+            "image": "$steps.cropping.crops",
+            "model_id": "dog-breed-xpaq6/1",
+        },
+        {
+            "type": "roboflow_core/roboflow_dataset_upload@v2",
+            "name": "data_collection",
+            "images": "$steps.cropping.crops",
+            "predictions": "$steps.breds_classification.predictions",
+            "target_project": "my_project",
+            "usage_quota_name": "my_quota",
+            "data_percentage": "$inputs.data_percentage",
+            "persist_predictions": "$inputs.persist_predictions",
+            "minutely_usage_limit": 10,
+            "hourly_usage_limit": 100,
+            "daily_usage_limit": 1000,
+            "max_image_size": (100, 200),
+            "compression_level": 85,
+            "registration_tags": ["a", "b", "$inputs.tag"],
+            "disable_sink": "$inputs.disable_sink",
+            "fire_and_forget": "$inputs.fire_and_forget",
+            "labeling_batch_prefix": "$inputs.labeling_batch_prefix",
+            "labeling_batches_recreation_frequency": "never",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "predictions",
+            "selector": "$steps.breds_classification.predictions",
+        },
+        {
+            "type": "JsonField",
+            "name": "registration_message",
+            "selector": "$steps.data_collection.message",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows enhanced by Roboflow Platform",
+    use_case_title="Data Collection for Active Learning",
+    use_case_description="""
+This example showcases how to stack models on top of each other - in this particular
+case, we detect objects using object detection models, requesting only "dogs" bounding boxes
+in the output of prediction. Additionally, we register cropped images in Roboflow dataset.
+
+Thanks to this setup, we are able to collect production data and continuously train better models over time.
+""",
+    workflow_definition=ACTIVE_LEARNING_WORKFLOW,
+)
+def test_detection_plus_classification_workflow_when_minimal_valid_input_provided(
+    model_manager: ModelManager,
+    dogs_image: np.ndarray,
+    roboflow_api_key: str,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.api_key": roboflow_api_key,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=ACTIVE_LEARNING_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": dogs_image,
+            "data_percentage": 0.0,
+        }
+    )
+
+    assert isinstance(result, list), "Expected list to be delivered"
+    assert len(result) == 1, "Expected 1 element in the output for one input image"
+    assert set(result[0].keys()) == {
+        "predictions",
+        "registration_message",
+    }, "Expected all declared outputs to be delivered"
+    assert (
+        len(result[0]["predictions"]) == 2
+    ), "Expected 2 dogs crops on input image, hence 2 nested classification results"
+    assert [result[0]["predictions"][0]["top"], result[0]["predictions"][1]["top"]] == [
+        "116.Parson_russell_terrier",
+        "131.Wirehaired_pointing_griffon",
+    ], "Expected predictions to be as measured in reference run"
+    assert (
+        result[0]["registration_message"]
+        == ["Registration skipped due to sampling settings"] * 2
+    ), "Expected data not registered due to sampling"
+
+
+def test_detection_plus_classification_workflow_when_nothing_to_be_registered(
+    model_manager: ModelManager,
+    crowd_image: np.ndarray,
+    roboflow_api_key: str,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.api_key": roboflow_api_key,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=ACTIVE_LEARNING_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": crowd_image,
+            "data_percentage": 0.0,
+        }
+    )
+
+    assert isinstance(result, list), "Expected list to be delivered"
+    assert len(result) == 1, "Expected 1 element in the output for one input image"
+    assert set(result[0].keys()) == {
+        "predictions",
+        "registration_message",
+    }, "Expected all declared outputs to be delivered"
+    assert (
+        len(result[0]["predictions"]) == 0
+    ), "Expected 0 dogs crops on input image, hence 0 nested classification results"
+    assert (
+        len(result[0]["registration_message"]) == 0
+    ), "Expected 0 dogs crops on input image, hence 0 nested statuses of registration"
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_contours_detection.py b/tests/workflows/integration_tests/execution/test_workflow_with_contours_detection.py
index cb79534c7..31b670991 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_contours_detection.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_contours_detection.py
@@ -4,7 +4,9 @@
 from inference.core.managers.base import ModelManager
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.core import ExecutionEngine
-from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import add_to_workflows_gallery
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
 
 WORKFLOW_WITH_CONTOUR_DETECTION = {
     "version": "1.0",
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_counting_pixels_with_dominant_color.py b/tests/workflows/integration_tests/execution/test_workflow_with_counting_pixels_with_dominant_color.py
index 0595258d9..438acbcf7 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_counting_pixels_with_dominant_color.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_counting_pixels_with_dominant_color.py
@@ -5,7 +5,9 @@
 from inference.core.managers.base import ModelManager
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.core import ExecutionEngine
-from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import add_to_workflows_gallery
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
 
 WORKFLOW_WITH_PIXELS_COUNTING = {
     "version": "1.0",
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_dominant_color.py b/tests/workflows/integration_tests/execution/test_workflow_with_dominant_color.py
index 5141a327c..65062094e 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_dominant_color.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_dominant_color.py
@@ -4,7 +4,9 @@
 from inference.core.managers.base import ModelManager
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.core import ExecutionEngine
-from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import add_to_workflows_gallery
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
 
 MINIMAL_DOMINANT_COLOR_WORKFLOW = {
     "version": "1.0",
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_sahi.py b/tests/workflows/integration_tests/execution/test_workflow_with_sahi.py
index 41494a175..b662b5ff4 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_sahi.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_sahi.py
@@ -7,7 +7,9 @@
 from inference.core.managers.base import ModelManager
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.core import ExecutionEngine
-from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import add_to_workflows_gallery
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
 
 SAHI_WORKFLOW = {
     "version": "1.0.0",
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_sam2.py b/tests/workflows/integration_tests/execution/test_workflow_with_sam2.py
index 25998df2a..822446f8e 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_sam2.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_sam2.py
@@ -11,7 +11,9 @@
     BlockManifest,
 )
 from inference.core.workflows.execution_engine.core import ExecutionEngine
-from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import add_to_workflows_gallery
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
 
 
 @pytest.mark.parametrize("images_field_alias", ["images", "image"])
@@ -241,5 +243,6 @@ def test_grounded_sam2_workflow(
         "dog",
     ], "Expected class names to be correct"
     assert result[0]["sam_predictions"].data["parent_id"].tolist() == [
-        'image.[0]', 'image.[0]'
+        "image.[0]",
+        "image.[0]",
     ], "Expected parent_ids to be correct"
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_sift.py b/tests/workflows/integration_tests/execution/test_workflow_with_sift.py
index a33e36468..f2b9c11ee 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_sift.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_sift.py
@@ -4,7 +4,9 @@
 from inference.core.managers.base import ModelManager
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.core import ExecutionEngine
-from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import add_to_workflows_gallery
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
 
 WORKFLOW_WITH_SIFT = {
     "version": "1.0",
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_stitch_for_dynamic_crop.py b/tests/workflows/integration_tests/execution/test_workflow_with_stitch_for_dynamic_crop.py
index e33512d07..3b0eb0815 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_stitch_for_dynamic_crop.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_stitch_for_dynamic_crop.py
@@ -4,7 +4,9 @@
 from inference.core.managers.base import ModelManager
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.core import ExecutionEngine
-from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import add_to_workflows_gallery
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
 
 WORKFLOW_WITH_DYNAMIC_CROP_AND_STITCH = {
     "version": "1.0.0",
diff --git a/tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/__init__.py b/tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/workflows/unit_tests/core_steps/sinks/roboflow/test_roboflow_dataset_upload.py b/tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/test_v1.py
similarity index 100%
rename from tests/workflows/unit_tests/core_steps/sinks/roboflow/test_roboflow_dataset_upload.py
rename to tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/test_v1.py
diff --git a/tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/test_v2.py b/tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/test_v2.py
new file mode 100644
index 000000000..6a9134881
--- /dev/null
+++ b/tests/workflows/unit_tests/core_steps/sinks/roboflow/roboflow_dataset_upload/test_v2.py
@@ -0,0 +1,374 @@
+from typing import Optional, Union
+from unittest import mock
+from unittest.mock import MagicMock
+
+import numpy as np
+import pytest
+from fastapi import BackgroundTasks
+from pydantic import ValidationError
+
+from inference.core.cache import MemoryCache
+from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload import v2
+from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload.v2 import (
+    BlockManifest,
+    RoboflowDatasetUploadBlockV2,
+    maybe_register_datapoint_at_roboflow,
+)
+from inference.core.workflows.execution_engine.entities.base import (
+    Batch,
+    ImageParentMetadata,
+    WorkflowImageData,
+)
+
+
+@mock.patch.object(v2, "register_datapoint_at_roboflow")
+@mock.patch.object(v2, "random")
+def test_maybe_register_datapoint_at_roboflow_when_data_sampled_off(
+    random_mock: MagicMock,
+    register_datapoint_at_roboflow_mock: MagicMock,
+) -> None:
+    # given
+    random_mock.random.return_value = 0.38
+
+    # when
+    result = maybe_register_datapoint_at_roboflow(
+        image=MagicMock(),
+        prediction=MagicMock(),
+        target_project="some",
+        usage_quota_name="some",
+        data_percentage=36.0,
+        persist_predictions=True,
+        minutely_usage_limit=10,
+        hourly_usage_limit=100,
+        daily_usage_limit=1000,
+        max_image_size=(128, 128),
+        compression_level=75,
+        registration_tags=[],
+        fire_and_forget=False,
+        labeling_batch_prefix="some",
+        new_labeling_batch_frequency="never",
+        cache=MagicMock(),
+        background_tasks=MagicMock(),
+        thread_pool_executor=MagicMock(),
+        api_key="XXX",
+    )
+
+    # then
+    register_datapoint_at_roboflow_mock.assert_not_called()
+    assert result == (False, "Registration skipped due to sampling settings")
+
+
+@mock.patch.object(v2, "register_datapoint_at_roboflow")
+@mock.patch.object(v2, "random")
+def test_maybe_register_datapoint_at_roboflow_when_data_sample_accepted(
+    random_mock: MagicMock,
+    register_datapoint_at_roboflow_mock: MagicMock,
+) -> None:
+    # given
+    random_mock.random.return_value = 0.38
+    register_datapoint_at_roboflow_mock.return_value = (False, "ok")
+
+    # when
+    result = maybe_register_datapoint_at_roboflow(
+        image=MagicMock(),
+        prediction=MagicMock(),
+        target_project="some",
+        usage_quota_name="some",
+        data_percentage=40.0,
+        persist_predictions=True,
+        minutely_usage_limit=10,
+        hourly_usage_limit=100,
+        daily_usage_limit=1000,
+        max_image_size=(128, 128),
+        compression_level=75,
+        registration_tags=[],
+        fire_and_forget=False,
+        labeling_batch_prefix="some",
+        new_labeling_batch_frequency="never",
+        cache=MagicMock(),
+        background_tasks=MagicMock(),
+        thread_pool_executor=MagicMock(),
+        api_key="XXX",
+    )
+
+    # then
+    register_datapoint_at_roboflow_mock.assert_called_once(), "Expected to be called when data is sampled"
+    assert result == (False, "ok"), "Expected to see mock return value"
+
+
+@pytest.mark.parametrize("image_field_name", ["image", "images"])
+@pytest.mark.parametrize("image_selector", ["$inputs.image", "$steps.some.image"])
+@pytest.mark.parametrize("predictions", ["$steps.some.predictions", None])
+def test_manifest_parsing_when_valid_input_provided_and_fields_not_linked(
+    image_field_name: str,
+    image_selector: str,
+    predictions: Optional[str],
+) -> None:
+    # given
+    raw_manifest = {
+        "type": "roboflow_core/roboflow_dataset_upload@v2",
+        "name": "some",
+        image_field_name: image_selector,
+        "predictions": predictions,
+        "target_project": "some1",
+        "usage_quota_name": "my_quota",
+        "data_percentage": 37.5,
+        "persist_predictions": False,
+        "minutely_usage_limit": 10,
+        "hourly_usage_limit": 100,
+        "daily_usage_limit": 1000,
+        "max_image_size": (100, 200),
+        "compression_level": 100,
+        "registration_tags": ["a", "b"],
+        "disable_sink": True,
+        "fire_and_forget": False,
+        "labeling_batch_prefix": "my_batch",
+        "labeling_batches_recreation_frequency": "never",
+    }
+
+    # when
+    result = BlockManifest.model_validate(raw_manifest)
+
+    # then
+    assert result == BlockManifest(
+        type="roboflow_core/roboflow_dataset_upload@v2",
+        name="some",
+        images=image_selector,
+        predictions=predictions,
+        target_project="some1",
+        usage_quota_name="my_quota",
+        data_percentage=37.5,
+        persist_predictions=False,
+        minutely_usage_limit=10,
+        hourly_usage_limit=100,
+        daily_usage_limit=1000,
+        max_image_size=(100, 200),
+        compression_level=100,
+        registration_tags=["a", "b"],
+        disable_sink=True,
+        fire_and_forget=False,
+        labeling_batch_prefix="my_batch",
+        labeling_batches_recreation_frequency="never",
+    )
+
+
+@pytest.mark.parametrize("image_field_name", ["image", "images"])
+@pytest.mark.parametrize("image_selector", ["$inputs.image", "$steps.some.image"])
+@pytest.mark.parametrize("predictions", ["$steps.some.predictions", None])
+def test_manifest_parsing_when_valid_input_provided_and_fields_linked(
+    image_field_name: str,
+    image_selector: str,
+    predictions: Optional[str],
+) -> None:
+    # given
+    raw_manifest = {
+        "type": "roboflow_core/roboflow_dataset_upload@v2",
+        "name": "some",
+        image_field_name: image_selector,
+        "predictions": predictions,
+        "target_project": "some1",
+        "usage_quota_name": "my_quota",
+        "data_percentage": "$inputs.data_percentage",
+        "persist_predictions": "$inputs.persist_predictions",
+        "minutely_usage_limit": 10,
+        "hourly_usage_limit": 100,
+        "daily_usage_limit": 1000,
+        "max_image_size": (100, 200),
+        "compression_level": 100,
+        "registration_tags": ["a", "b", "$inputs.tag"],
+        "disable_sink": "$inputs.disable_sink",
+        "fire_and_forget": "$inputs.fire_and_forget",
+        "labeling_batch_prefix": "$inputs.labeling_batch_prefix",
+        "labeling_batches_recreation_frequency": "never",
+    }
+
+    # when
+    result = BlockManifest.model_validate(raw_manifest)
+
+    # then
+    assert result == BlockManifest(
+        type="roboflow_core/roboflow_dataset_upload@v2",
+        name="some",
+        images=image_selector,
+        predictions=predictions,
+        target_project="some1",
+        usage_quota_name="my_quota",
+        data_percentage="$inputs.data_percentage",
+        persist_predictions="$inputs.persist_predictions",
+        minutely_usage_limit=10,
+        hourly_usage_limit=100,
+        daily_usage_limit=1000,
+        max_image_size=(100, 200),
+        compression_level=100,
+        registration_tags=["a", "b", "$inputs.tag"],
+        disable_sink="$inputs.disable_sink",
+        fire_and_forget="$inputs.fire_and_forget",
+        labeling_batch_prefix="$inputs.labeling_batch_prefix",
+        labeling_batches_recreation_frequency="never",
+    )
+
+
+@pytest.mark.parametrize("compression_level", [0, -1, 101])
+def test_manifest_parsing_when_compression_level_invalid(
+    compression_level: float,
+) -> None:
+    raw_manifest = {
+        "type": "roboflow_core/roboflow_dataset_upload@v2",
+        "name": "some",
+        "images": "$inputs.image",
+        "predictions": None,
+        "target_project": "some1",
+        "usage_quota_name": "my_quota",
+        "data_percentage": "$inputs.data_percentage",
+        "persist_predictions": "$inputs.persist_predictions",
+        "minutely_usage_limit": 10,
+        "hourly_usage_limit": 100,
+        "daily_usage_limit": 1000,
+        "max_image_size": (100, 200),
+        "compression_level": compression_level,
+        "registration_tags": ["a", "b", "$inputs.tag"],
+        "disable_sink": "$inputs.disable_sink",
+        "fire_and_forget": "$inputs.fire_and_forget",
+        "labeling_batch_prefix": "$inputs.labeling_batch_prefix",
+        "labeling_batches_recreation_frequency": "never",
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(raw_manifest)
+
+
+@pytest.mark.parametrize("data_percentage", [-1.0, 101.0])
+def test_manifest_parsing_when_data_percentage_invalid(data_percentage: float) -> None:
+    raw_manifest = {
+        "type": "roboflow_core/roboflow_dataset_upload@v2",
+        "name": "some",
+        "images": "$inputs.image",
+        "predictions": None,
+        "target_project": "some1",
+        "usage_quota_name": "my_quota",
+        "data_percentage": data_percentage,
+        "persist_predictions": "$inputs.persist_predictions",
+        "minutely_usage_limit": 10,
+        "hourly_usage_limit": 100,
+        "daily_usage_limit": 1000,
+        "max_image_size": (100, 200),
+        "compression_level": 85,
+        "registration_tags": ["a", "b", "$inputs.tag"],
+        "disable_sink": "$inputs.disable_sink",
+        "fire_and_forget": "$inputs.fire_and_forget",
+        "labeling_batch_prefix": "$inputs.labeling_batch_prefix",
+        "labeling_batches_recreation_frequency": "never",
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(raw_manifest)
+
+
+@mock.patch.object(v2, "register_datapoint_at_roboflow")
+def test_run_sink_when_data_sampled_off(
+    register_datapoint_at_roboflow_mock: MagicMock,
+) -> None:
+    # given
+    background_tasks = BackgroundTasks()
+    cache = MemoryCache()
+    data_collector_block = RoboflowDatasetUploadBlockV2(
+        cache=cache,
+        api_key="my_api_key",
+        background_tasks=background_tasks,
+        thread_pool_executor=None,
+    )
+    image = WorkflowImageData(
+        parent_metadata=ImageParentMetadata(parent_id="parent"),
+        numpy_image=np.zeros((512, 256, 3), dtype=np.uint8),
+    )
+    register_datapoint_at_roboflow_mock.return_value = False, "OK"
+    indices = [(0,), (1,), (2,)]
+
+    # when
+    result = data_collector_block.run(
+        images=Batch(content=[image, image, image], indices=indices),
+        predictions=None,
+        target_project="my_project",
+        usage_quota_name="my_quota",
+        data_percentage=0.0,
+        persist_predictions=True,
+        minutely_usage_limit=10,
+        hourly_usage_limit=100,
+        daily_usage_limit=1000,
+        max_image_size=(128, 128),
+        compression_level=75,
+        registration_tags=["some"],
+        disable_sink=False,
+        fire_and_forget=False,
+        labeling_batch_prefix="my_batch",
+        labeling_batches_recreation_frequency="never",
+    )
+
+    # then
+    assert (
+        result
+        == [
+            {
+                "error_status": False,
+                "message": "Registration skipped due to sampling settings",
+            }
+        ]
+        * 3
+    ), "Expected nothing registered"
+    register_datapoint_at_roboflow_mock.assert_not_called()
+
+
+@mock.patch.object(v2, "register_datapoint_at_roboflow")
+def test_run_sink_when_data_sampled(
+    register_datapoint_at_roboflow_mock: MagicMock,
+) -> None:
+    # given
+    background_tasks = BackgroundTasks()
+    cache = MemoryCache()
+    data_collector_block = RoboflowDatasetUploadBlockV2(
+        cache=cache,
+        api_key="my_api_key",
+        background_tasks=background_tasks,
+        thread_pool_executor=None,
+    )
+    image = WorkflowImageData(
+        parent_metadata=ImageParentMetadata(parent_id="parent"),
+        numpy_image=np.zeros((512, 256, 3), dtype=np.uint8),
+    )
+    register_datapoint_at_roboflow_mock.return_value = False, "OK"
+    indices = [(0,), (1,), (2,)]
+
+    # when
+    result = data_collector_block.run(
+        images=Batch(content=[image, image, image], indices=indices),
+        predictions=None,
+        target_project="my_project",
+        usage_quota_name="my_quota",
+        data_percentage=100.1,  # more than 1.0 to ensure always True on sampling
+        persist_predictions=True,
+        minutely_usage_limit=10,
+        hourly_usage_limit=100,
+        daily_usage_limit=1000,
+        max_image_size=(128, 128),
+        compression_level=75,
+        registration_tags=["some"],
+        disable_sink=False,
+        fire_and_forget=False,
+        labeling_batch_prefix="my_batch",
+        labeling_batches_recreation_frequency="never",
+    )
+
+    # then
+    assert (
+        result
+        == [
+            {
+                "error_status": False,
+                "message": "OK",
+            }
+        ]
+        * 3
+    ), "Expected data registered"
+    assert register_datapoint_at_roboflow_mock.call_count == 3
diff --git a/tests/workflows/unit_tests/core_steps/transformations/test_detection_offset.py b/tests/workflows/unit_tests/core_steps/transformations/test_detection_offset.py
index f20967c3a..f62815361 100644
--- a/tests/workflows/unit_tests/core_steps/transformations/test_detection_offset.py
+++ b/tests/workflows/unit_tests/core_steps/transformations/test_detection_offset.py
@@ -109,6 +109,7 @@ def test_offset_detection() -> None:
         detections["parent_id"][0]
     ), "New detection id (random) must be assigned"
 
+
 def test_offset_detection_when_larger_than_image() -> None:
     # given
     detections = sv.Detections(
@@ -144,8 +145,6 @@ def test_offset_detection_when_larger_than_image() -> None:
     ), "New detection id (random) must be assigned"
 
 
-
-
 def test_offset_detection_when_nothing_predicted() -> None:
     # given
     detections = sv.Detections.empty()