diff --git a/.github/workflows/.github-ci.yml b/.github/workflows/.github-ci.yml index 1f86384..3e98321 100644 --- a/.github/workflows/.github-ci.yml +++ b/.github/workflows/.github-ci.yml @@ -34,4 +34,4 @@ jobs: pylint --jobs=0 $(git ls-files '*.py') --ignore-paths=example --rcfile=.pylintrc - name: Test with pytest run: | - pytest AutoMLOps --cov=AutoMLOps + pytest tests --cov=AutoMLOps diff --git a/AutoMLOps/AutoMLOps.py b/AutoMLOps/AutoMLOps.py index 702da33..d70e654 100644 --- a/AutoMLOps/AutoMLOps.py +++ b/AutoMLOps/AutoMLOps.py @@ -17,49 +17,35 @@ # pylint: disable=C0103 # pylint: disable=line-too-long +# pylint: disable=unused-import import functools import logging import os -import re import sys import subprocess from typing import Callable, Dict, List, Optional -from AutoMLOps import BuilderUtils -from AutoMLOps import ComponentBuilder -from AutoMLOps import PipelineBuilder -from AutoMLOps import CloudRunBuilder +from AutoMLOps.utils.constants import ( + BASE_DIR, + GENERATED_DEFAULTS_FILE, + GENERATED_DIRS, + GENERATED_RESOURCES_SH_FILE, + OUTPUT_DIR +) +from AutoMLOps.utils.utils import ( + execute_process, + make_dirs, + read_yaml_file, + validate_schedule, +) +from AutoMLOps.frameworks.kfp import builder as KfpBuilder +from AutoMLOps.frameworks.kfp import scaffold as KfpScaffold +from AutoMLOps.deployments.cloudbuild import builder as CloudBuildBuilder logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') logger = logging.getLogger() -TOP_LVL_NAME = 'AutoMLOps/' -DEFAULTS_FILE = TOP_LVL_NAME + 'configs/defaults.yaml' -PIPELINE_SPEC_SH_FILE = TOP_LVL_NAME + 'scripts/build_pipeline_spec.sh' -BUILD_COMPONENTS_SH_FILE = TOP_LVL_NAME + 'scripts/build_components.sh' -RUN_PIPELINE_SH_FILE = TOP_LVL_NAME + 'scripts/run_pipeline.sh' -RUN_ALL_SH_FILE = TOP_LVL_NAME + 'scripts/run_all.sh' -RESOURCES_SH_FILE = TOP_LVL_NAME + 'scripts/create_resources.sh' -SUBMIT_JOB_FILE = TOP_LVL_NAME + 'scripts/submit_to_runner_svc.sh' -CLOUDBUILD_FILE = TOP_LVL_NAME + 'cloudbuild.yaml' -PIPELINE_FILE = TOP_LVL_NAME + 'pipelines/pipeline.py' -DEFAULT_IMAGE = 'python:3.9-slim' -COMPONENT_BASE = TOP_LVL_NAME + 'components/component_base' -COMPONENT_BASE_SRC = TOP_LVL_NAME + 'components/component_base/src' -OUTPUT_DIR = BuilderUtils.TMPFILES_DIR -DIRS = [ - TOP_LVL_NAME, - TOP_LVL_NAME + 'components', - TOP_LVL_NAME + 'components/component_base', - TOP_LVL_NAME + 'components/component_base/src', - TOP_LVL_NAME + 'configs', - TOP_LVL_NAME + 'images', - TOP_LVL_NAME + 'pipelines', - TOP_LVL_NAME + 'pipelines/runtime_parameters', - TOP_LVL_NAME + 'scripts', - TOP_LVL_NAME + 'scripts/pipeline_spec'] - def go(project_id: str, pipeline_params: Dict, af_registry_location: Optional[str] = 'us-central1', @@ -145,29 +131,31 @@ def generate(project_id: str, Args: See go() function. """ - BuilderUtils.validate_schedule(schedule_pattern, run_local) + # Validate that run_local=False if schedule_pattern parameter is set + validate_schedule(schedule_pattern, run_local) + + # Set defaults if none were given for bucket name and pipeline runner sa default_bucket_name = f'{project_id}-bucket' if gs_bucket_name is None else gs_bucket_name default_pipeline_runner_sa = f'vertex-pipelines@{project_id}.iam.gserviceaccount.com' if pipeline_runner_sa is None else pipeline_runner_sa - BuilderUtils.make_dirs(DIRS) - _create_default_config(af_registry_location, af_registry_name, cb_trigger_location, - cb_trigger_name, cloud_run_location, cloud_run_name, - cloud_tasks_queue_location, cloud_tasks_queue_name, csr_branch_name, - csr_name, gs_bucket_location, default_bucket_name, - default_pipeline_runner_sa, project_id, schedule_location, - schedule_name, schedule_pattern, vpc_connector) - _create_scripts(run_local) - _create_cloudbuild_config(run_local) - # copy tmp pipeline file over to AutoMLOps dir - BuilderUtils.execute_process(f'cp {BuilderUtils.PIPELINE_TMPFILE} {PIPELINE_FILE}', to_null=False) - # Create components and pipelines - components_path_list = BuilderUtils.get_components_list() - for path in components_path_list: - ComponentBuilder.formalize(path, TOP_LVL_NAME, DEFAULTS_FILE, use_kfp_spec) - PipelineBuilder.formalize(custom_training_job_specs, DEFAULTS_FILE, pipeline_params, TOP_LVL_NAME) - _create_requirements() - _create_dockerfile() - if not run_local: - CloudRunBuilder.formalize(TOP_LVL_NAME, DEFAULTS_FILE) + + # Make necessary directories + make_dirs(GENERATED_DIRS) + + # Switch statement to go here for different frameworks and deployments: + + # Build files required to run a Kubeflow Pipeline + KfpBuilder.build(project_id, pipeline_params, af_registry_location, + af_registry_name, cb_trigger_location, cb_trigger_name, + cloud_run_location, cloud_run_name, cloud_tasks_queue_location, + cloud_tasks_queue_name, csr_branch_name, csr_name, + custom_training_job_specs, gs_bucket_location, default_bucket_name, + default_pipeline_runner_sa, run_local, schedule_location, + schedule_name, schedule_pattern, use_kfp_spec, + vpc_connector) + + CloudBuildBuilder.build(af_registry_location, af_registry_name, cloud_run_location, + cloud_run_name, default_pipeline_runner_sa, project_id, + run_local, schedule_pattern, vpc_connector) def run(run_local: bool): """Builds, compiles, and submits the PipelineJob. @@ -175,17 +163,21 @@ def run(run_local: bool): Args: run_local: Flag that determines whether to use Cloud Run CI/CD. """ - BuilderUtils.execute_process('./'+RESOURCES_SH_FILE, to_null=False) + # Build resources + execute_process('./' + GENERATED_RESOURCES_SH_FILE, to_null=False) + + # Build, compile, and submit pipeline job if run_local: - os.chdir(TOP_LVL_NAME) + os.chdir(BASE_DIR) try: - subprocess.run(['./scripts/run_all.sh'], shell=True, check=True, - stderr=subprocess.STDOUT) + subprocess.run(['./scripts/run_all.sh'], shell=True, check=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: logging.info(e) os.chdir('../') else: _push_to_csr() + + # Log generated resources _resources_generation_manifest(run_local) def _resources_generation_manifest(run_local: bool): @@ -194,15 +186,15 @@ def _resources_generation_manifest(run_local: bool): Args: run_local: Flag that determines whether to use Cloud Run CI/CD. """ - defaults = BuilderUtils.read_yaml_file(DEFAULTS_FILE) + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) logging.info('\n' - '#################################################################\n' - '# #\n' - '# RESOURCES MANIFEST #\n' - '#---------------------------------------------------------------#\n' - '# Generated resources can be found at the following urls #\n' - '# #\n' - '#################################################################\n') + '#################################################################\n' + '# #\n' + '# RESOURCES MANIFEST #\n' + '#---------------------------------------------------------------#\n' + '# Generated resources can be found at the following urls #\n' + '# #\n' + '#################################################################\n') # pylint: disable=logging-fstring-interpolation logging.info(f'''Google Cloud Storage Bucket: https://console.cloud.google.com/storage/{defaults['gcp']['gs_bucket_name']}''') logging.info(f'''Artifact Registry: https://console.cloud.google.com/artifacts/docker/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_location']}/{defaults['gcp']['af_registry_name']}''') @@ -222,545 +214,35 @@ def _push_to_csr(): """Initializes a git repo if one doesn't already exist, then pushes to the specified branch and triggers the cloudbuild job. """ - defaults = BuilderUtils.read_yaml_file(DEFAULTS_FILE) + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + if not os.path.exists('.git'): - BuilderUtils.execute_process('git init', to_null=False) - BuilderUtils.execute_process('''git config --global credential.'https://source.developers.google.com'.helper gcloud.sh''', to_null=False) - BuilderUtils.execute_process(f'''git remote add origin https://source.developers.google.com/p/{defaults['gcp']['project_id']}/r/{defaults['gcp']['cloud_source_repository']}''', to_null=False) - BuilderUtils.execute_process(f'''git checkout -B {defaults['gcp']['cloud_source_repository_branch']}''', to_null=False) + + # Initialize git and configure credentials + execute_process('git init', to_null=False) + execute_process('''git config --global credential.'https://source.developers.google.com'.helper gcloud.sh''', to_null=False) + + # Add repo and branch + execute_process(f'''git remote add origin https://source.developers.google.com/p/{defaults['gcp']['project_id']}/r/{defaults['gcp']['cloud_source_repository']}''', to_null=False) + execute_process(f'''git checkout -B {defaults['gcp']['cloud_source_repository_branch']}''', to_null=False) has_remote_branch = subprocess.check_output([f'''git ls-remote origin {defaults['gcp']['cloud_source_repository_branch']}'''], shell=True, stderr=subprocess.STDOUT) - if not has_remote_branch: - # This will initialize the branch, a second push will be required to trigger the cloudbuild job after initializing - BuilderUtils.execute_process('touch .gitkeep', to_null=False) # needed to keep dir here - BuilderUtils.execute_process('git add .gitkeep', to_null=False) - BuilderUtils.execute_process('''git commit -m 'init' ''', to_null=False) - BuilderUtils.execute_process(f'''git push origin {defaults['gcp']['cloud_source_repository_branch']} --force''', to_null=False) - BuilderUtils.execute_process(f'touch {TOP_LVL_NAME}scripts/pipeline_spec/.gitkeep', to_null=False) # needed to keep dir here - BuilderUtils.execute_process('git add .', to_null=False) - BuilderUtils.execute_process('''git commit -m 'Run AutoMLOps' ''', to_null=False) - BuilderUtils.execute_process(f'''git push origin {defaults['gcp']['cloud_source_repository_branch']} --force''', to_null=False) + # This will initialize the branch, a second push will be required to trigger the cloudbuild job after initializing + if not has_remote_branch: + execute_process('touch .gitkeep', to_null=False) # needed to keep dir here + execute_process('git add .gitkeep', to_null=False) + execute_process('''git commit -m 'init' ''', to_null=False) + execute_process(f'''git push origin {defaults['gcp']['cloud_source_repository_branch']} --force''', to_null=False) + + # Add, commit, and push changes to CSR + execute_process(f'touch {BASE_DIR}scripts/pipeline_spec/.gitkeep', to_null=False) # needed to keep dir here + execute_process('git add .', to_null=False) + execute_process('''git commit -m 'Run AutoMLOps' ''', to_null=False) + execute_process(f'''git push origin {defaults['gcp']['cloud_source_repository_branch']} --force''', to_null=False) # pylint: disable=logging-fstring-interpolation logging.info(f'''Pushing code to {defaults['gcp']['cloud_source_repository_branch']} branch, triggering cloudbuild...''') logging.info(f'''Cloudbuild job running at: https://console.cloud.google.com/cloud-build/builds;region={defaults['gcp']['cb_trigger_location']}''') -def _create_default_config(af_registry_location: str, - af_registry_name: str, - cb_trigger_location: str, - cb_trigger_name: str, - cloud_run_location: str, - cloud_run_name: str, - cloud_tasks_queue_location: str, - cloud_tasks_queue_name: str, - csr_branch_name: str, - csr_name: str, - gs_bucket_location: str, - gs_bucket_name: str, - pipeline_runner_sa: str, - project_id: str, - schedule_location: str, - schedule_name: str, - schedule_pattern: str, - vpc_connector: str): - """Writes default variables to defaults.yaml. This defaults - file is used by subsequent functions and by the pipeline - files themselves. - - Args: - af_registry_location: Region of the Artifact Registry. - af_registry_name: Artifact Registry name where components are stored. - cb_trigger_location: The location of the cloudbuild trigger. - cb_trigger_name: The name of the cloudbuild trigger. - cloud_run_location: The location of the cloud runner service. - cloud_run_name: The name of the cloud runner service. - cloud_tasks_queue_location: The location of the cloud tasks queue. - cloud_tasks_queue_name: The name of the cloud tasks queue. - csr_branch_name: The name of the csr branch to push to to trigger cb job. - csr_name: The name of the cloud source repo to use. - gs_bucket_location: Region of the GS bucket. - gs_bucket_name: GS bucket name where pipeline run metadata is stored. - pipeline_runner_sa: Service Account to runner PipelineJobs. - project_id: The project ID. - schedule_location: The location of the scheduler resource. - schedule_name: The name of the scheduler resource. - schedule_pattern: Cron formatted value used to create a Scheduled retrain job. - vpc_connector: The name of the vpc connector to use. - """ - defaults = (BuilderUtils.LICENSE + - f'# These values are descriptive only - do not change.\n' - f'# Rerun AutoMLOps.generate() to change these values.\n' - f'gcp:\n' - f' af_registry_location: {af_registry_location}\n' - f' af_registry_name: {af_registry_name}\n' - f' cb_trigger_location: {cb_trigger_location}\n' - f' cb_trigger_name: {cb_trigger_name}\n' - f' cloud_run_location: {cloud_run_location}\n' - f' cloud_run_name: {cloud_run_name}\n' - f' cloud_tasks_queue_location: {cloud_tasks_queue_location}\n' - f' cloud_tasks_queue_name: {cloud_tasks_queue_name}\n' - f' cloud_schedule_location: {schedule_location}\n' - f' cloud_schedule_name: {schedule_name}\n' - f' cloud_schedule_pattern: {schedule_pattern}\n' - f' cloud_source_repository: {csr_name}\n' - f' cloud_source_repository_branch: {csr_branch_name}\n' - f' gs_bucket_name: {gs_bucket_name}\n' - f' pipeline_runner_service_account: {pipeline_runner_sa}\n' - f' project_id: {project_id}\n' - f' vpc_connector: {vpc_connector}\n' - f'\n' - f'pipelines:\n' - f' parameter_values_path: {BuilderUtils.PARAMETER_VALUES_PATH}\n' - f' pipeline_component_directory: components\n' - f' pipeline_job_spec_path: {BuilderUtils.PIPELINE_JOB_SPEC_PATH}\n' - f' pipeline_region: {gs_bucket_location}\n' - f' pipeline_storage_path: gs://{gs_bucket_name}/pipeline_root\n') - BuilderUtils.write_file(DEFAULTS_FILE, defaults, 'w+') - -def _create_scripts(run_local: bool): - """Writes various shell scripts used for pipeline and component - construction, as well as pipeline execution. - - Args: - run_local: Flag that determines whether to use Cloud Run CI/CD. - """ - build_pipeline_spec = ( - '#!/bin/bash\n' + BuilderUtils.LICENSE + - '# Builds the pipeline specs\n' - f'# This script should run from the {TOP_LVL_NAME} directory\n' - '# Change directory in case this is not the script root.\n' - '\n' - 'CONFIG_FILE=configs/defaults.yaml\n' - '\n' - 'python3 -m pipelines.pipeline --config $CONFIG_FILE\n') - build_components = ( - '#!/bin/bash\n' + BuilderUtils.LICENSE + - '# Submits a Cloud Build job that builds and deploys the components\n' - f'# This script should run from the {TOP_LVL_NAME} directory\n' - '# Change directory in case this is not the script root.\n' - '\n' - 'gcloud builds submit .. --config cloudbuild.yaml --timeout=3600\n') - run_pipeline = ( - '#!/bin/bash\n' + BuilderUtils.LICENSE + - '# Submits the PipelineJob to Vertex AI\n' - f'# This script should run from the {TOP_LVL_NAME} directory\n' - '# Change directory in case this is not the script root.\n' - '\n' - 'CONFIG_FILE=configs/defaults.yaml\n' - '\n' - 'python3 -m pipelines.pipeline_runner --config $CONFIG_FILE\n') - run_all = ( - '#!/bin/bash\n' + BuilderUtils.LICENSE + - '# Builds components, pipeline specs, and submits the PipelineJob.\n' - f'# This script should run from the {TOP_LVL_NAME} directory\n' - '# Change directory in case this is not the script root.\n' - '\n' - '''GREEN='\033[0;32m'\n''' - '''NC='\033[0m'\n''' - '\n' - 'echo -e "${GREEN} BUILDING COMPONENTS ${NC}"\n' - 'gcloud builds submit .. --config cloudbuild.yaml --timeout=3600\n' - '\n' - 'echo -e "${GREEN} BUILDING PIPELINE SPEC ${NC}"\n' - './scripts/build_pipeline_spec.sh\n' - '\n' - 'echo -e "${GREEN} RUNNING PIPELINE JOB ${NC}"\n' - './scripts/run_pipeline.sh\n') - BuilderUtils.write_and_chmod(PIPELINE_SPEC_SH_FILE, build_pipeline_spec) - BuilderUtils.write_and_chmod(BUILD_COMPONENTS_SH_FILE, build_components) - BuilderUtils.write_and_chmod(RUN_PIPELINE_SH_FILE, run_pipeline) - BuilderUtils.write_and_chmod(RUN_ALL_SH_FILE, run_all) - _create_resources_scripts(run_local) - -def _create_resources_scripts(run_local: bool): - """Writes create_resources.sh and create_scheduler.sh, which creates a specified - artifact registry and gs bucket if they do not already exist. Also creates - a service account to run Vertex AI Pipelines. Requires a defaults.yaml - config to pull config vars from. - - Args: - run_local: Flag that determines whether to use Cloud Run CI/CD. - """ - defaults = BuilderUtils.read_yaml_file(DEFAULTS_FILE) - left_bracket = '{' - right_bracket = '}' - newline = '\n' - # pylint: disable=anomalous-backslash-in-string - create_resources_script = ( - '#!/bin/bash\n' + BuilderUtils.LICENSE + - f'# This script will create an artifact registry and gs bucket if they do not already exist.\n' - f'\n' - f'''GREEN='\033[0;32m'\n''' - f'''NC='\033[0m'\n''' - f'''AF_REGISTRY_NAME={defaults['gcp']['af_registry_name']}\n''' - f'''AF_REGISTRY_LOCATION={defaults['gcp']['af_registry_location']}\n''' - f'''PROJECT_ID={defaults['gcp']['project_id']}\n''' - f'''PROJECT_NUMBER=`gcloud projects describe {defaults['gcp']['project_id']} --format 'value(projectNumber)'`\n''' - f'''BUCKET_NAME={defaults['gcp']['gs_bucket_name']}\n''' - f'''BUCKET_LOCATION={defaults['pipelines']['pipeline_region']}\n''' - f'''SERVICE_ACCOUNT_NAME={defaults['gcp']['pipeline_runner_service_account'].split('@')[0]}\n''' - f'''SERVICE_ACCOUNT_FULL={defaults['gcp']['pipeline_runner_service_account']}\n''' - f'''CLOUD_SOURCE_REPO={defaults['gcp']['cloud_source_repository']}\n''' - f'''CLOUD_SOURCE_REPO_BRANCH={defaults['gcp']['cloud_source_repository_branch']}\n''' - f'''CB_TRIGGER_LOCATION={defaults['gcp']['cb_trigger_location']}\n''' - f'''CB_TRIGGER_NAME={defaults['gcp']['cb_trigger_name']}\n''' - f'''CLOUD_TASKS_QUEUE_LOCATION={defaults['gcp']['cloud_tasks_queue_location']}\n''' - f'''CLOUD_TASKS_QUEUE_NAME={defaults['gcp']['cloud_tasks_queue_name']}\n''' - f'\n' - f'echo -e "$GREEN Updating required API services in project $PROJECT_ID $NC"\n' - f'gcloud services enable cloudresourcemanager.googleapis.com \{newline}' - f' aiplatform.googleapis.com \{newline}' - f' artifactregistry.googleapis.com \{newline}' - f' cloudbuild.googleapis.com \{newline}' - f' cloudscheduler.googleapis.com \{newline}' - f' cloudtasks.googleapis.com \{newline}' - f' compute.googleapis.com \{newline}' - f' iam.googleapis.com \{newline}' - f' iamcredentials.googleapis.com \{newline}' - f' ml.googleapis.com \{newline}' - f' run.googleapis.com \{newline}' - f' storage.googleapis.com \{newline}' - f' sourcerepo.googleapis.com\n' - f'\n' - f'echo -e "$GREEN Checking for Artifact Registry: $AF_REGISTRY_NAME in project $PROJECT_ID $NC"\n' - f'if ! (gcloud artifacts repositories list --project="$PROJECT_ID" --location=$AF_REGISTRY_LOCATION | grep -E "(^|[[:blank:]])$AF_REGISTRY_NAME($|[[:blank:]])"); then\n' - f'\n' - f' echo "Creating Artifact Registry: ${left_bracket}AF_REGISTRY_NAME{right_bracket} in project $PROJECT_ID"\n' - f' gcloud artifacts repositories create "$AF_REGISTRY_NAME" \{newline}' - f' --repository-format=docker \{newline}' - f' --location=$AF_REGISTRY_LOCATION \{newline}' - f' --project="$PROJECT_ID" \{newline}' - f' --description="Artifact Registry ${left_bracket}AF_REGISTRY_NAME{right_bracket} in ${left_bracket}AF_REGISTRY_LOCATION{right_bracket}." \n' - f'\n' - f'else\n' - f'\n' - f' echo "Artifact Registry: ${left_bracket}AF_REGISTRY_NAME{right_bracket} already exists in project $PROJECT_ID"\n' - f'\n' - f'fi\n' - f'\n' - f'\n' - f'echo -e "$GREEN Checking for GS Bucket: $BUCKET_NAME in project $PROJECT_ID $NC"\n' - f'if !(gsutil ls -b gs://$BUCKET_NAME | grep --fixed-strings "$BUCKET_NAME"); then\n' - f'\n' - f' echo "Creating GS Bucket: ${left_bracket}BUCKET_NAME{right_bracket} in project $PROJECT_ID"\n' - f' gsutil mb -l ${left_bracket}BUCKET_LOCATION{right_bracket} gs://$BUCKET_NAME\n' - f'\n' - f'else\n' - f'\n' - f' echo "GS Bucket: ${left_bracket}BUCKET_NAME{right_bracket} already exists in project $PROJECT_ID"\n' - f'\n' - f'fi\n' - f'\n' - f'echo -e "$GREEN Checking for Service Account: $SERVICE_ACCOUNT_NAME in project $PROJECT_ID $NC"\n' - f'if ! (gcloud iam service-accounts list --project="$PROJECT_ID" | grep -E "(^|[[:blank:]])$SERVICE_ACCOUNT_FULL($|[[:blank:]])"); then\n' - f'\n' - f' echo "Creating Service Account: ${left_bracket}SERVICE_ACCOUNT_NAME{right_bracket} in project $PROJECT_ID"\n' - f' gcloud iam service-accounts create $SERVICE_ACCOUNT_NAME \{newline}' - f' --description="For submitting PipelineJobs" \{newline}' - f' --display-name="Pipeline Runner Service Account"\n' - f'else\n' - f'\n' - f' echo "Service Account: ${left_bracket}SERVICE_ACCOUNT_NAME{right_bracket} already exists in project $PROJECT_ID"\n' - f'\n' - f'fi\n' - f'\n' - f'echo -e "$GREEN Updating required IAM roles in project $PROJECT_ID $NC"\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{newline}' - f' --role="roles/aiplatform.user" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{newline}' - f' --role="roles/artifactregistry.reader" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{newline}' - f' --role="roles/bigquery.user" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{newline}' - f' --role="roles/bigquery.dataEditor" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{newline}' - f' --role="roles/iam.serviceAccountUser" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{newline}' - f' --role="roles/storage.admin" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{newline}' - f' --role="roles/run.admin" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{newline}' - f' --role="roles/run.admin" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{newline}' - f' --role="roles/iam.serviceAccountUser" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{newline}' - f' --role="roles/cloudtasks.enqueuer" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'gcloud projects add-iam-policy-binding $PROJECT_ID \{newline}' - f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{newline}' - f' --role="roles/cloudscheduler.admin" \{newline}' - f' --no-user-output-enabled\n' - f'\n' - f'echo -e "$GREEN Checking for Cloud Source Repository: $CLOUD_SOURCE_REPO in project $PROJECT_ID $NC"\n' - f'if ! (gcloud source repos list --project="$PROJECT_ID" | grep -E "(^|[[:blank:]])$CLOUD_SOURCE_REPO($|[[:blank:]])"); then\n' - f'\n' - f' echo "Creating Cloud Source Repository: ${left_bracket}CLOUD_SOURCE_REPO{right_bracket} in project $PROJECT_ID"\n' - f' gcloud source repos create $CLOUD_SOURCE_REPO\n' - f'\n' - f'else\n' - f'\n' - f' echo "Cloud Source Repository: ${left_bracket}CLOUD_SOURCE_REPO{right_bracket} already exists in project $PROJECT_ID"\n' - f'\n' - f'fi\n') - if not run_local: - create_resources_script += ( - f'\n' - f'# Create cloud tasks queue\n' - f'echo -e "$GREEN Checking for Cloud Tasks Queue: $CLOUD_TASKS_QUEUE_NAME in project $PROJECT_ID $NC"\n' - f'if ! (gcloud tasks queues list --location $CLOUD_TASKS_QUEUE_LOCATION | grep -E "(^|[[:blank:]])$CLOUD_TASKS_QUEUE_NAME($|[[:blank:]])"); then\n' - f'\n' - f' echo "Creating Cloud Tasks Queue: ${left_bracket}CLOUD_TASKS_QUEUE_NAME{right_bracket} in project $PROJECT_ID"\n' - f' gcloud tasks queues create $CLOUD_TASKS_QUEUE_NAME \{newline}' - f' --location=$CLOUD_TASKS_QUEUE_LOCATION\n' - f'\n' - f'else\n' - f'\n' - f' echo "Cloud Tasks Queue: ${left_bracket}CLOUD_TASKS_QUEUE_NAME{right_bracket} already exists in project $PROJECT_ID"\n' - f'\n' - f'fi\n' - f'\n' - f'# Create cloud build trigger\n' - f'echo -e "$GREEN Checking for Cloudbuild Trigger: $CB_TRIGGER_NAME in project $PROJECT_ID $NC"\n' - f'if ! (gcloud beta builds triggers list --project="$PROJECT_ID" --region="$CB_TRIGGER_LOCATION" | grep -E "(^|[[:blank:]])name: $CB_TRIGGER_NAME($|[[:blank:]])"); then\n' - f'\n' - f' echo "Creating Cloudbuild Trigger on branch $CLOUD_SOURCE_REPO_BRANCH in project $PROJECT_ID for repo ${left_bracket}CLOUD_SOURCE_REPO{right_bracket}"\n' - f' gcloud beta builds triggers create cloud-source-repositories \{newline}' - f' --region=$CB_TRIGGER_LOCATION \{newline}' - f' --name=$CB_TRIGGER_NAME \{newline}' - f' --repo=$CLOUD_SOURCE_REPO \{newline}' - f' --branch-pattern="$CLOUD_SOURCE_REPO_BRANCH" \{newline}' - f' --build-config={TOP_LVL_NAME}cloudbuild.yaml\n' - f'\n' - f'else\n' - f'\n' - f' echo "Cloudbuild Trigger already exists in project $PROJECT_ID for repo ${left_bracket}CLOUD_SOURCE_REPO{right_bracket}"\n' - f'\n' - f'fi\n') - BuilderUtils.write_and_chmod(RESOURCES_SH_FILE, create_resources_script) - -def _create_cloudbuild_config(run_local: bool): - """Writes a cloudbuild.yaml to the base directory. - Requires a defaults.yaml config to pull config vars from. - - Args: - run_local: Flag that determines whether to use Cloud Run CI/CD. - """ - defaults = BuilderUtils.read_yaml_file(DEFAULTS_FILE) - vpc_connector = defaults['gcp']['vpc_connector'] - vpc_connector_tail = '' - if vpc_connector != 'No VPC Specified': - vpc_connector_tail = ( - f'\n' - f' "--ingress", "internal",\n' - f' "--vpc-connector", "{vpc_connector}",\n' - f' "--vpc-egress", "all-traffic"') - vpc_connector_tail += ']\n' - - cloudbuild_comp_config = (BuilderUtils.LICENSE + - f'steps:\n' - f'# ==============================================================================\n' - f'# BUILD CUSTOM IMAGES\n' - f'# ==============================================================================\n' - f'\n' - f''' # build the component_base image\n''' - f''' - name: "gcr.io/cloud-builders/docker"\n''' - f''' args: [ "build", "-t", "{defaults['gcp']['af_registry_location']}-docker.pkg.dev/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_name']}/components/component_base:latest", "." ]\n''' - f''' dir: "{TOP_LVL_NAME}components/component_base"\n''' - f''' id: "build_component_base"\n''' - f''' waitFor: ["-"]\n''' - f'\n' - f''' # build the run_pipeline image\n''' - f''' - name: 'gcr.io/cloud-builders/docker'\n''' - f''' args: [ "build", "-t", "{defaults['gcp']['af_registry_location']}-docker.pkg.dev/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_name']}/run_pipeline:latest", "-f", "cloud_run/run_pipeline/Dockerfile", "." ]\n''' - f''' dir: "{TOP_LVL_NAME}"\n''' - f''' id: "build_pipeline_runner_svc"\n''' - f''' waitFor: ['build_component_base']\n''') - cloudbuild_cloudrun_config = ( - f'\n' - f'# ==============================================================================\n' - f'# PUSH & DEPLOY CUSTOM IMAGES\n' - f'# ==============================================================================\n' - f'\n' - f''' # push the component_base image\n''' - f''' - name: "gcr.io/cloud-builders/docker"\n''' - f''' args: ["push", "{defaults['gcp']['af_registry_location']}-docker.pkg.dev/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_name']}/components/component_base:latest"]\n''' - f''' dir: "{TOP_LVL_NAME}components/component_base"\n''' - f''' id: "push_component_base"\n''' - f''' waitFor: ["build_pipeline_runner_svc"]\n''' - f'\n' - f''' # push the run_pipeline image\n''' - f''' - name: "gcr.io/cloud-builders/docker"\n''' - f''' args: ["push", "{defaults['gcp']['af_registry_location']}-docker.pkg.dev/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_name']}/run_pipeline:latest"]\n''' - f''' dir: "{TOP_LVL_NAME}"\n''' - f''' id: "push_pipeline_runner_svc"\n''' - f''' waitFor: ["push_component_base"]\n''' - f'\n' - f''' # deploy the cloud run service\n''' - f''' - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"\n''' - f''' entrypoint: gcloud\n''' - f''' args: ["run",\n''' - f''' "deploy",\n''' - f''' "{defaults['gcp']['cloud_run_name']}",\n''' - f''' "--image",\n''' - f''' "{defaults['gcp']['af_registry_location']}-docker.pkg.dev/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_name']}/run_pipeline:latest",\n''' - f''' "--region",\n''' - f''' "{defaults['gcp']['cloud_run_location']}",\n''' - f''' "--service-account",\n''' - f''' "{defaults['gcp']['pipeline_runner_service_account']}",{vpc_connector_tail}''' - f''' id: "deploy_pipeline_runner_svc"\n''' - f''' waitFor: ["push_pipeline_runner_svc"]\n''' - f'\n' - f''' # Copy runtime parameters\n''' - f''' - name: 'gcr.io/cloud-builders/gcloud'\n''' - f''' entrypoint: bash\n''' - f''' args:\n''' - f''' - '-e'\n''' - f''' - '-c'\n''' - f''' - |\n''' - f''' cp -r {TOP_LVL_NAME}cloud_run/queueing_svc .\n''' - f''' id: "setup_queueing_svc"\n''' - f''' waitFor: ["deploy_pipeline_runner_svc"]\n''' - f'\n' - f''' # Install dependencies\n''' - f''' - name: python\n''' - f''' entrypoint: pip\n''' - f''' args: ["install", "-r", "queueing_svc/requirements.txt", "--user"]\n''' - f''' id: "install_queueing_svc_deps"\n''' - f''' waitFor: ["setup_queueing_svc"]\n''' - f'\n' - f''' # Submit to queue\n''' - f''' - name: python\n''' - f''' entrypoint: python\n''' - f''' args: ["queueing_svc/main.py", "--setting", "queue_job"]\n''' - f''' id: "submit_job_to_queue"\n''' - f''' waitFor: ["install_queueing_svc_deps"]\n''') - cloudbuild_scheduler_config = ( - '\n' - ''' # Create Scheduler Job\n''' - ''' - name: python\n''' - ''' entrypoint: python\n''' - ''' args: ["queueing_svc/main.py", "--setting", "schedule_job"]\n''' - ''' id: "schedule_job"\n''' - ''' waitFor: ["submit_job_to_queue"]\n''') - custom_comp_image = ( - f'\n' - f'images:\n' - f''' # custom component images\n''' - f''' - "{defaults['gcp']['af_registry_location']}-docker.pkg.dev/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_name']}/components/component_base:latest"\n''') - cloudrun_image = ( - f''' # Cloud Run image\n''' - f''' - "{defaults['gcp']['af_registry_location']}-docker.pkg.dev/{defaults['gcp']['project_id']}/{defaults['gcp']['af_registry_name']}/run_pipeline:latest"\n''') - - if run_local: - cb_file_contents = cloudbuild_comp_config + custom_comp_image - else: - if defaults['gcp']['cloud_schedule_pattern'] == 'No Schedule Specified': - cb_file_contents = cloudbuild_comp_config + cloudbuild_cloudrun_config + custom_comp_image + cloudrun_image - else: - cb_file_contents = cloudbuild_comp_config + cloudbuild_cloudrun_config + cloudbuild_scheduler_config + custom_comp_image + cloudrun_image - BuilderUtils.write_file(CLOUDBUILD_FILE, cb_file_contents, 'w+') - -def _create_requirements(): - """Writes a requirements.txt to the component_base directory. - Infers pip requirements from the python srcfiles using - pipreqs. Takes user-inputted requirements, and addes some - default gcp packages as well as packages that are often missing - in setup.py files (e.g db_types, pyarrow, gcsfs, fsspec). - """ - reqs_filename = f'{COMPONENT_BASE}/requirements.txt' - default_gcp_reqs = [ - 'google-cloud-aiplatform', - 'google-cloud-appengine-logging', - 'google-cloud-audit-log', - 'google-cloud-bigquery', - 'google-cloud-bigquery-storage', - 'google-cloud-bigtable', - 'google-cloud-core', - 'google-cloud-dataproc', - 'google-cloud-datastore', - 'google-cloud-dlp', - 'google-cloud-firestore', - 'google-cloud-kms', - 'google-cloud-language', - 'google-cloud-logging', - 'google-cloud-monitoring', - 'google-cloud-notebooks', - 'google-cloud-pipeline-components', - 'google-cloud-pubsub', - 'google-cloud-pubsublite', - 'google-cloud-recommendations-ai', - 'google-cloud-resource-manager', - 'google-cloud-scheduler', - 'google-cloud-spanner', - 'google-cloud-speech', - 'google-cloud-storage', - 'google-cloud-tasks', - 'google-cloud-translate', - 'google-cloud-videointelligence', - 'google-cloud-vision', - 'db_dtypes', - 'pyarrow', - 'gcsfs', - 'fsspec'] - # Infer reqs using pipreqs - BuilderUtils.execute_process(f'python3 -m pipreqs.pipreqs {COMPONENT_BASE} --mode no-pin --force', to_null=False) - pipreqs = BuilderUtils.read_file(reqs_filename).splitlines() - # Get user-inputted requirements from .tmpfiles dir - user_inp_reqs = [] - components_path_list = BuilderUtils.get_components_list() - for component_path in components_path_list: - component_spec = BuilderUtils.read_yaml_file(component_path) - reqs = component_spec['implementation']['container']['command'][2] - formatted_reqs = re.findall('\'([^\']*)\'', reqs) - user_inp_reqs.extend(formatted_reqs) - # Remove duplicates - set_of_requirements = set(user_inp_reqs) if user_inp_reqs else set(pipreqs + default_gcp_reqs) - reqs_str = ''.join(r+'\n' for r in sorted(set_of_requirements)) - BuilderUtils.delete_file(reqs_filename) - BuilderUtils.write_file(reqs_filename, reqs_str, 'w') - -def _create_dockerfile(): - """Writes a Dockerfile to the component_base directory.""" - # pylint: disable=anomalous-backslash-in-string - dockerfile = (BuilderUtils.LICENSE + - f'FROM {DEFAULT_IMAGE}\n' - f'RUN python -m pip install --upgrade pip\n' - f'COPY requirements.txt .\n' - f'RUN python -m pip install -r \ \n' - f' requirements.txt --quiet --no-cache-dir \ \n' - f' && rm -f requirements.txt\n' - f'COPY ./src /pipelines/component/src\n' - f'ENTRYPOINT ["/bin/bash"]\n') - BuilderUtils.write_file(f'{COMPONENT_BASE}/Dockerfile', dockerfile, 'w') - def component(func: Optional[Callable] = None, *, packages_to_install: Optional[List[str]] = None): @@ -784,7 +266,7 @@ def my_function_one(input: str, output: Output[Model]): component, packages_to_install=packages_to_install) else: - return ComponentBuilder.create_component_scaffold( + return KfpScaffold.create_component_scaffold( func=func, packages_to_install=packages_to_install) @@ -821,7 +303,7 @@ def pipeline(bq_table: str, name=name, description=description) else: - return PipelineBuilder.create_pipeline_scaffold( + return KfpScaffold.create_pipeline_scaffold( func=func, name=name, description=description) diff --git a/AutoMLOps/CloudRunBuilder.py b/AutoMLOps/CloudRunBuilder.py deleted file mode 100644 index d068b3a..0000000 --- a/AutoMLOps/CloudRunBuilder.py +++ /dev/null @@ -1,391 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds cloud_run files.""" - -# pylint: disable=C0103 -# pylint: disable=line-too-long - -from AutoMLOps import BuilderUtils - -def formalize(top_lvl_name: str, - defaults_file: str,): - """Constructs and writes a Dockerfile, requirements.txt, and - main.py to the cloud_run/run_pipeline directory. Also - constructs and writes a main.py, requirements.txt, and - pipeline_parameter_values.json to the - cloud_run/queueing_svc directory. - - Args: - top_lvl_name: Top directory name. - defaults_file: Path to the default config variables yaml. - """ - BuilderUtils.make_dirs([top_lvl_name + 'cloud_run', - top_lvl_name + 'cloud_run/run_pipeline', - top_lvl_name + 'cloud_run/queueing_svc']) - create_dockerfile(top_lvl_name) - create_requirements(top_lvl_name) - create_mains(top_lvl_name, defaults_file) - # copy runtime parameters over to queueing_svc dir - BuilderUtils.execute_process(f'''cp -r {top_lvl_name + BuilderUtils.PARAMETER_VALUES_PATH} {top_lvl_name + 'cloud_run/queueing_svc'}''', to_null=False) - -def create_dockerfile(top_lvl_name: str): - """Writes a Dockerfile to the cloud_run/run_pipeline directory. - - Args: - top_lvl_name: Top directory name. - """ - cloudrun_base = top_lvl_name + 'cloud_run/run_pipeline' - dockerfile = (BuilderUtils.LICENSE + - 'FROM python:3.9-slim\n' - '\n' - '# Allow statements and log messages to immediately appear in the Knative logs\n' - 'ENV PYTHONUNBUFFERED True\n' - '\n' - '# Copy local code to the container image.\n' - 'ENV APP_HOME /app\n' - 'WORKDIR $APP_HOME\n' - 'COPY ./ ./\n' - '\n' - '# Upgrade pip\n' - 'RUN python -m pip install --upgrade pip\n' - '# Install requirements\n' - 'RUN pip install --no-cache-dir -r /app/cloud_run/run_pipeline/requirements.txt\n' - '# Compile pipeline spec\n' - 'RUN ./scripts/build_pipeline_spec.sh\n' - '# Change Directories\n' - 'WORKDIR "/app/cloud_run/run_pipeline"\n' - '# Run flask api server\n' - 'CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app\n' - ) - BuilderUtils.write_file(f'{cloudrun_base}/Dockerfile', dockerfile, 'w') - -def create_requirements(top_lvl_name: str): - """Writes a requirements.txt to the cloud_run/run_pipeline - directory, and a requirements.txt to the cloud_run/queueing_svc - directory. - - Args: - top_lvl_name: Top directory name. - """ - cloudrun_base = top_lvl_name + 'cloud_run/run_pipeline' - queueing_svc_base = top_lvl_name + 'cloud_run/queueing_svc' - cloud_run_reqs = ( - 'kfp\n' - 'google-cloud-aiplatform\n' - 'google-cloud-pipeline-components\n' - 'Flask\n' - 'gunicorn\n' - 'pyyaml\n' - ) - queueing_svc_reqs = ( - 'google-cloud\n' - 'google-cloud-tasks\n' - 'google-api-python-client\n' - 'google-cloud-run\n' - 'google-cloud-scheduler\n' - ) - BuilderUtils.write_file(f'{cloudrun_base}/requirements.txt', cloud_run_reqs, 'w') - BuilderUtils.write_file(f'{queueing_svc_base}/requirements.txt', queueing_svc_reqs, 'w') - -def create_mains(top_lvl_name: str, - defaults_file: str): - """Writes a main.py to the cloud_run/run_pipeline - directory. This file contains code for running - a flask service that will act as a pipeline - runner service. Also writes a main.py to - the cloud_run/queueing_svc directory. This file - contains code for submitting a job to the cloud - runner service, and creating a cloud scheduler job. - - Args: - top_lvl_name: Top directory name. - defaults_file: Path to the default config variables yaml. - """ - defaults = BuilderUtils.read_yaml_file(defaults_file) - cloudrun_base = top_lvl_name + 'cloud_run/run_pipeline' - queueing_svc_base = top_lvl_name + 'cloud_run/queueing_svc' - left_bracket = '{' - right_bracket = '}' - cloud_run_code = (BuilderUtils.LICENSE + - f'''"""Cloud Run to run pipeline spec"""\n''' - f'''import logging\n''' - f'''import os\n''' - f'''from typing import Tuple\n''' - f'\n' - f'''import flask\n''' - f'''from google.cloud import aiplatform\n''' - f'''import yaml\n''' - f'\n' - f'''app = flask.Flask(__name__)\n''' - f'\n' - f'''logger = logging.getLogger()\n''' - f'''log_level = os.environ.get('LOG_LEVEL', 'INFO')\n''' - f'''logger.setLevel(log_level)\n''' - f'\n' - f'''CONFIG_FILE = '../../configs/defaults.yaml'\n''' - f'''PIPELINE_SPEC_PATH_LOCAL = '../../scripts/pipeline_spec/pipeline_job.json'\n''' - f'\n' - f'''@app.route('/', methods=['POST'])\n''' - f'''def process_request() -> flask.Response:\n''' - f''' """HTTP web service to trigger pipeline execution.\n''' - f'\n' - f''' Returns:\n''' - f''' The response text, or any set of values that can be turned into a\n''' - f''' Response object using `make_response`\n''' - f''' .\n''' - f''' """\n''' - f''' content_type = flask.request.headers['content-type']\n''' - f''' if content_type == 'application/json':\n''' - f''' request_json = flask.request.json\n''' - f'\n' - f''' logging.debug('JSON Recieved:')\n''' - f''' logging.debug(request_json)\n''' - f'\n' - f''' with open(CONFIG_FILE, 'r', encoding='utf-8') as config_file:\n''' - f''' config = yaml.load(config_file, Loader=yaml.FullLoader)\n''' - f'\n' - f''' logging.debug('Calling run_pipeline()')\n''' - f''' dashboard_uri, resource_name = run_pipeline(\n''' - f''' project_id=config['gcp']['project_id'],\n''' - f''' pipeline_root=config['pipelines']['pipeline_storage_path'],\n''' - f''' pipeline_runner_sa=config['gcp']['pipeline_runner_service_account'],\n''' - f''' pipeline_params=request_json,\n''' - f''' pipeline_spec_path=PIPELINE_SPEC_PATH_LOCAL)\n''' - f''' return flask.make_response({left_bracket}\n''' - f''' 'dashboard_uri': dashboard_uri,\n''' - f''' 'resource_name': resource_name\n''' - f''' {right_bracket}, 200)\n''' - f'\n' - f''' else:\n''' - f''' raise ValueError(f'Unknown content type: {left_bracket}content_type{right_bracket}')\n''' - f'\n' - f'''def run_pipeline(\n''' - f''' project_id: str,\n''' - f''' pipeline_root: str,\n''' - f''' pipeline_runner_sa: str,\n''' - f''' pipeline_params: dict,\n''' - f''' pipeline_spec_path: str,\n''' - f''' display_name: str = 'mlops-pipeline-run',\n''' - f''' enable_caching: bool = False) -> Tuple[str, str]:\n''' - f''' """Executes a pipeline run.\n''' - f'\n' - f''' Args:\n''' - f''' project_id: The project_id.\n''' - f''' pipeline_root: GCS location of the pipeline runs metadata.\n''' - f''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' - f''' pipeline_params: Pipeline parameters values.\n''' - f''' pipeline_spec_path: Location of the pipeline spec JSON.\n''' - f''' display_name: Name to call the pipeline.\n''' - f''' enable_caching: Should caching be enabled (Boolean)\n''' - f''' """\n''' - f''' logging.debug('Pipeline Parms Configured:')\n''' - f''' logging.debug(pipeline_params)\n''' - f'\n' - f''' aiplatform.init(project=project_id)\n''' - f''' job = aiplatform.PipelineJob(\n''' - f''' display_name = display_name,\n''' - f''' template_path = pipeline_spec_path,\n''' - f''' pipeline_root = pipeline_root,\n''' - f''' parameter_values = pipeline_params,\n''' - f''' enable_caching = enable_caching)\n''' - f''' logging.debug('AI Platform job built. Submitting...')\n''' - f''' job.submit(service_account=pipeline_runner_sa)\n''' - f''' logging.debug('Job sent!')\n''' - f''' dashboard_uri = job._dashboard_uri()\n''' - f''' resource_name = job.resource_name\n''' - f''' return dashboard_uri, resource_name\n''' - f'\n' - f'''if __name__ == '__main__':\n''' - f''' app.run(debug=True, host='0.0.0.0', port=int(os.environ.get('PORT', 8080)))\n''') - - queueing_svc_code = (BuilderUtils.LICENSE + - f'''"""Submit pipeline job using Cloud Tasks and create Cloud Scheduler Job."""\n''' - f'''import argparse\n''' - f'''import json\n''' - f'\n' - f'''from google.cloud import run_v2\n''' - f'''from google.cloud import scheduler_v1\n''' - f'''from google.cloud import tasks_v2\n''' - f'\n' - f'''CLOUD_RUN_LOCATION = '{defaults['gcp']['cloud_run_location']}'\n''' - f'''CLOUD_RUN_NAME = '{defaults['gcp']['cloud_run_name']}'\n''' - f'''CLOUD_TASKS_QUEUE_LOCATION = '{defaults['gcp']['cloud_tasks_queue_location']}'\n''' - f'''CLOUD_TASKS_QUEUE_NAME = '{defaults['gcp']['cloud_tasks_queue_name']}'\n''' - f'''PARAMETER_VALUES_PATH = 'queueing_svc/pipeline_parameter_values.json'\n''' - f'''PIPELINE_RUNNER_SA = '{defaults['gcp']['pipeline_runner_service_account']}'\n''' - f'''PROJECT_ID = '{defaults['gcp']['project_id']}'\n''' - f'''SCHEDULE_LOCATION = '{defaults['gcp']['cloud_schedule_location']}'\n''' - f'''SCHEDULE_NAME = '{defaults['gcp']['cloud_schedule_name']}'\n''' - f'''SCHEDULE_PATTERN = '{defaults['gcp']['cloud_schedule_pattern']}'\n''' - f'\n' - f'''def get_runner_svc_uri(\n''' - f''' cloud_run_location: str,\n''' - f''' cloud_run_name: str,\n''' - f''' project_id: str):\n''' - f''' """Fetches the uri for the given cloud run instance.\n''' - f'\n' - f''' Args:\n''' - f''' cloud_run_location: The location of the cloud runner service.\n''' - f''' cloud_run_name: The name of the cloud runner service.\n''' - f''' project_id: The project ID.\n''' - f''' Returns:\n''' - f''' str: Uri of the Cloud Run instance.\n''' - f''' """\n''' - f''' client = run_v2.ServicesClient()\n''' - f''' parent = client.service_path(project_id, cloud_run_location, cloud_run_name)\n''' - f''' request = run_v2.GetServiceRequest(name=parent)\n''' - f''' response = client.get_service(request=request)\n''' - f''' return response.uri\n''' - f'\n' - f'''def get_json_bytes(file_path: str):\n''' - f''' """Reads a json file at the specified path and returns as bytes.\n''' - f'\n' - f''' Args:\n''' - f''' file_path: Path of the json file.\n''' - f''' Returns:\n''' - f''' bytes: Encode bytes of the file.\n''' - f''' """\n''' - f''' try:\n''' - f''' with open(file_path, 'r', encoding='utf-8') as file:\n''' - f''' data = json.load(file)\n''' - f''' file.close()\n''' - f''' except OSError as err:\n''' - f''' raise Exception(f'Error reading json file. {left_bracket}err{right_bracket}') from err\n''' - f''' return json.dumps(data).encode()\n''' - f'\n' - f'''def create_cloud_task(\n''' - f''' cloud_tasks_queue_location: str,\n''' - f''' cloud_tasks_queue_name: str,\n''' - f''' parameter_values_path: str,\n''' - f''' pipeline_runner_sa: str,\n''' - f''' project_id: str,\n''' - f''' runner_svc_uri: str):\n''' - f''' """Create a task to the queue with the runtime parameters.\n''' - f'\n' - f''' Args:\n''' - f''' cloud_run_location: The location of the cloud runner service.\n''' - f''' cloud_run_name: The name of the cloud runner service.\n''' - f''' cloud_tasks_queue_location: The location of the cloud tasks queue.\n''' - f''' cloud_tasks_queue_name: The name of the cloud tasks queue.\n''' - f''' parameter_values_path: Path to json pipeline params.\n''' - f''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' - f''' project_id: The project ID.\n''' - f''' runner_svc_uri: Uri of the Cloud Run instance.\n''' - f''' """\n''' - f''' client = tasks_v2.CloudTasksClient()\n''' - f''' parent = client.queue_path(project_id, cloud_tasks_queue_location, cloud_tasks_queue_name)\n''' - f''' task = {left_bracket}\n''' - f''' 'http_request': {left_bracket}\n''' - f''' 'http_method': tasks_v2.HttpMethod.POST,\n''' - f''' 'url': runner_svc_uri,\n''' - f''' 'oidc_token': {left_bracket}\n''' - f''' 'service_account_email': pipeline_runner_sa,\n''' - f''' 'audience': runner_svc_uri\n''' - f''' {right_bracket},\n''' - f''' 'headers': {left_bracket}\n''' - f''' 'Content-Type': 'application/json'\n''' - f''' {right_bracket}\n''' - f''' {right_bracket}\n''' - f''' {right_bracket}\n''' - f''' task['http_request']['body'] = get_json_bytes(parameter_values_path)\n''' - f''' response = client.create_task(request={left_bracket}'parent': parent, 'task': task{right_bracket})\n''' - f''' print(f'Created task {left_bracket}response.name{right_bracket}')\n''' - f'\n' - f'''def create_cloud_scheduler_job(\n''' - f''' parameter_values_path: str,\n''' - f''' pipeline_runner_sa: str,\n''' - f''' project_id: str,\n''' - f''' runner_svc_uri: str,\n''' - f''' schedule_location: str,\n''' - f''' schedule_name: str,\n''' - f''' schedule_pattern: str):\n''' - f''' """Creates a scheduled pipeline job.\n''' - f'\n' - f''' Args:\n''' - f''' parameter_values_path: Path to json pipeline params.\n''' - f''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' - f''' project_id: The project ID.\n''' - f''' runner_svc_uri: Uri of the Cloud Run instance.\n''' - f''' schedule_location: The location of the scheduler resource.\n''' - f''' schedule_name: The name of the scheduler resource.\n''' - f''' schedule_pattern: Cron formatted value used to create a Scheduled retrain job.\n''' - f''' """\n''' - f''' client = scheduler_v1.CloudSchedulerClient()\n''' - f''' parent = f'projects/{left_bracket}project_id{right_bracket}/locations/{left_bracket}schedule_location{right_bracket}'\n''' - f''' name = f'{left_bracket}parent{right_bracket}/jobs/{left_bracket}schedule_name{right_bracket}'\n''' - f'\n' - f''' request = scheduler_v1.ListJobsRequest(parent=parent)\n''' - f''' page_result = client.list_jobs(request=request)\n''' - f''' for response in page_result:\n''' - f''' if response.name == name:\n''' - f''' print(f'Cloud Scheduler {left_bracket}schedule_name{right_bracket} resource already exists in '\n''' - f''' f'project {left_bracket}project_id{right_bracket}.')\n''' - f''' return\n''' - f'\n' - f''' oidc_token = scheduler_v1.OidcToken(\n''' - f''' service_account_email=pipeline_runner_sa,\n''' - f''' audience=runner_svc_uri)\n''' - f'\n' - f''' target = scheduler_v1.HttpTarget(\n''' - f''' uri=runner_svc_uri,\n''' - f''' http_method=scheduler_v1.HttpMethod(1), # HTTP POST\n''' - f''' headers={left_bracket}'Content-Type': 'application/json'{right_bracket},\n''' - f''' body=get_json_bytes(parameter_values_path),\n''' - f''' oidc_token=oidc_token)\n''' - f'\n' - f''' job = scheduler_v1.Job(\n''' - f''' name=f'{left_bracket}parent{right_bracket}/jobs/{left_bracket}schedule_name{right_bracket}',\n''' - f''' description='AutoMLOps cloud scheduled run.',\n''' - f''' http_target=target,\n''' - f''' schedule=schedule_pattern)\n''' - f'\n' - f''' request = scheduler_v1.CreateJobRequest(\n''' - f''' parent=parent,\n''' - f''' job=job)\n''' - f'\n' - f''' response = client.create_job(request=request)\n''' - f''' print(response)\n''' - f'\n' - f'''if __name__ == '__main__':\n''' - f''' parser = argparse.ArgumentParser()\n''' - f''' parser.add_argument('--setting', type=str,\n''' - f''' help='The config file for setting default values.')\n''' - f''' args = parser.parse_args()\n''' - f'\n' - f''' uri = get_runner_svc_uri(\n''' - f''' cloud_run_location=CLOUD_RUN_LOCATION,\n''' - f''' cloud_run_name=CLOUD_RUN_NAME,\n''' - f''' project_id=PROJECT_ID)\n''' - f'\n' - f''' if args.setting == 'queue_job':\n''' - f''' create_cloud_task(\n''' - f''' cloud_tasks_queue_location=CLOUD_TASKS_QUEUE_LOCATION,\n''' - f''' cloud_tasks_queue_name=CLOUD_TASKS_QUEUE_NAME,\n''' - f''' parameter_values_path=PARAMETER_VALUES_PATH,\n''' - f''' pipeline_runner_sa=PIPELINE_RUNNER_SA,\n''' - f''' project_id=PROJECT_ID,\n''' - f''' runner_svc_uri=uri)\n''' - f'\n' - f''' if args.setting == 'schedule_job':\n''' - f''' create_cloud_scheduler_job(\n''' - f''' parameter_values_path=PARAMETER_VALUES_PATH,\n''' - f''' pipeline_runner_sa=PIPELINE_RUNNER_SA,\n''' - f''' project_id=PROJECT_ID,\n''' - f''' runner_svc_uri=uri,\n''' - f''' schedule_location=SCHEDULE_LOCATION,\n''' - f''' schedule_name=SCHEDULE_NAME,\n''' - f''' schedule_pattern=SCHEDULE_PATTERN)\n''') - BuilderUtils.write_file(f'{cloudrun_base}/main.py', cloud_run_code, 'w') - BuilderUtils.write_file(f'{queueing_svc_base}/main.py', queueing_svc_code, 'w') diff --git a/AutoMLOps/ComponentBuilder.py b/AutoMLOps/ComponentBuilder.py deleted file mode 100644 index 8b042c3..0000000 --- a/AutoMLOps/ComponentBuilder.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds component files.""" - -# pylint: disable=C0103 -# pylint: disable=line-too-long - -import inspect -from typing import Callable, List, Optional, TypeVar, Union - -import docstring_parser - -from AutoMLOps import BuilderUtils - -T = TypeVar('T') - -def formalize(component_path: str, - top_lvl_name: str, - defaults_file: str, - use_kfp_spec: bool): - """Constructs and writes component.yaml and {component_name}.py files. - component.yaml: Contains the Kubeflow custom component definition. - {component_name}.py: Contains the python code from the Jupyter cell. - - Args: - component_path: Path to the temporary component yaml. This file - is used to create the permanent component.yaml, and deleted - after calling AutoMLOps.generate(). - top_lvl_name: Top directory name. - defaults_file: Path to the default config variables yaml. - use_kfp_spec: Flag that determines the format of the component yamls. - """ - component_spec = BuilderUtils.read_yaml_file(component_path) - if use_kfp_spec: - component_spec['name'] = component_spec['name'].replace(' ', '_').lower() - component_dir = top_lvl_name + 'components/' + component_spec['name'] - task_filepath = (top_lvl_name + 'components/component_base/src/' + - component_spec['name'] + '.py') - BuilderUtils.make_dirs([component_dir]) - create_task(component_spec, task_filepath, use_kfp_spec) - create_component(component_spec, component_dir, defaults_file) - -def create_task(component_spec: dict, task_filepath: str, use_kfp_spec: bool): - """Writes cell python code to a file with required imports. - - Args: - component_spec: Component definition dictionary. - Contains cell code which is temporarily stored in - component_spec['implementation']['container']['command'] - task_filepath: Path to the file to be written. - use_kfp_spec: Flag that determines the format of the component yamls. - Raises: - Exception: If the imports tmpfile does not exist. - """ - custom_code = component_spec['implementation']['container']['command'][-1] - default_imports = (BuilderUtils.LICENSE + - 'import argparse\n' - 'import json\n' - 'from kfp.v2.components import executor\n') - if not use_kfp_spec: - custom_imports = ('import kfp\n' - 'from kfp.v2 import dsl\n' - 'from kfp.v2.dsl import *\n' - 'from typing import *\n' - '\n') - else: - custom_imports = '' # included as part of the kfp spec - main_func = ( - '\n' - '''def main():\n''' - ''' """Main executor."""\n''' - ''' parser = argparse.ArgumentParser()\n''' - ''' parser.add_argument('--executor_input', type=str)\n''' - ''' parser.add_argument('--function_to_execute', type=str)\n''' - '\n' - ''' args, _ = parser.parse_known_args()\n''' - ''' executor_input = json.loads(args.executor_input)\n''' - ''' function_to_execute = globals()[args.function_to_execute]\n''' - '\n' - ''' executor.Executor(\n''' - ''' executor_input=executor_input,\n''' - ''' function_to_execute=function_to_execute).execute()\n''' - '\n' - '''if __name__ == '__main__':\n''' - ''' main()\n''') - f_contents = default_imports + custom_imports + custom_code + main_func - BuilderUtils.write_file(task_filepath, f_contents, 'w+') - -def create_component(component_spec: dict, - component_dir: str, - defaults_file: str): - """Updates the component_spec to include the correct image - and startup command, then writes the component.yaml. - Requires a defaults.yaml config to pull config vars from. - - Args: - component_spec: Component definition dictionary. - component_dir: Path of the component directory. - defaults_file: Path to the default config variables yaml. - Raises: - Exception: If an error is encountered writing the file. - """ - defaults = BuilderUtils.read_yaml_file(defaults_file) - component_spec['implementation']['container']['image'] = ( - f'''{defaults['gcp']['af_registry_location']}-docker.pkg.dev/''' - f'''{defaults['gcp']['project_id']}/''' - f'''{defaults['gcp']['af_registry_name']}/''' - f'''components/component_base:latest''') - component_spec['implementation']['container']['command'] = [ - 'python3', - f'''/pipelines/component/src/{component_spec['name']+'.py'}'''] - filename = component_dir + '/component.yaml' - BuilderUtils.write_file(filename, BuilderUtils.LICENSE, 'w') - BuilderUtils.write_yaml_file(filename, component_spec, 'a') - -def create_component_scaffold(func: Optional[Callable] = None, - *, - packages_to_install: Optional[List[str]] = None): - """Creates a tmp component scaffold which will be used by - the formalize function. Code is temporarily stored in - component_spec['implementation']['container']['command']. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - # Todo: - # Figure out what to do with package_to_install - name = func.__name__ - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - description = parsed_docstring.short_description - # make yaml - component_spec = {} - component_spec['name'] = name - if description: - component_spec['description'] = description - component_spec['inputs'] = get_function_parameters(func) - component_spec['implementation'] = {} - component_spec['implementation']['container'] = {} - component_spec['implementation']['container']['image'] = 'TBD' - component_spec['implementation']['container']['command'] = get_packages_to_install_command(func, packages_to_install) - component_spec['implementation']['container']['args'] = ['--executor_input', - {'executorInput': None}, '--function_to_execute', name] - filename = BuilderUtils.TMPFILES_DIR + f'/{name}.yaml' - BuilderUtils.make_dirs([BuilderUtils.TMPFILES_DIR]) # if it doesn't already exist - BuilderUtils.write_yaml_file(filename, component_spec, 'w') - -def get_packages_to_install_command(func: Optional[Callable] = None, - packages_to_install: Optional[List[str]] = None): - """Returns a list of formatted list of commands, including code for tmp storage. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - newline = '\n' - if not packages_to_install: - packages_to_install = [] - concat_package_list = ' '.join( - [repr(str(package)) for package in packages_to_install]) - # pylint: disable=anomalous-backslash-in-string - install_python_packages_script = ( - f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' - f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' - f'''fi{newline}''' - f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' - f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' - f'''{newline}''') - src_code = BuilderUtils.get_function_source_definition(func) - return ['sh', '-c', install_python_packages_script, src_code] - -def get_function_parameters(func: Callable) -> dict: - """Returns a formatted list of parameters. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - Returns: - list: Params list with types converted to kubeflow spec. - Raises: - Exception: If parameter type hints are not provided. - """ - signature = inspect.signature(func) - parameters = list(signature.parameters.values()) - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} - - parameter_holder = [] - for param in parameters: - metadata = {} - metadata['name'] = param.name - metadata['description'] = doc_dict.get(param.name) - metadata['type'] = maybe_strip_optional_from_annotation( - param.annotation) - parameter_holder.append(metadata) - # pylint: disable=protected-access - if metadata['type'] == inspect._empty: - raise TypeError( - f'''Missing type hint for parameter "{metadata['name']}". ''' - f'''Please specify the type for this parameter.''') - return BuilderUtils.update_params(parameter_holder) - -def maybe_strip_optional_from_annotation(annotation: T) -> T: - """Strips 'Optional' from 'Optional[]' if applicable. - For example:: - Optional[str] -> str - str -> str - List[int] -> List[int] - Args: - annotation: The original type annotation which may or may not has - `Optional`. - Returns: - The type inside Optional[] if Optional exists, otherwise the original type. - """ - if getattr(annotation, '__origin__', - None) is Union and annotation.__args__[1] is type(None): - return annotation.__args__[0] - return annotation diff --git a/AutoMLOps/PipelineBuilder.py b/AutoMLOps/PipelineBuilder.py deleted file mode 100644 index 35d0f95..0000000 --- a/AutoMLOps/PipelineBuilder.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds pipeline files.""" - -# pylint: disable=C0103 -# pylint: disable=line-too-long - -import json -from typing import Callable, Dict, List, Optional - -from AutoMLOps import BuilderUtils - -DEFAULT_PIPELINE_NAME = 'automlops-pipeline' - -def formalize(custom_training_job_specs: List[Dict], - defaults_file: str, - pipeline_parameter_values: dict, - top_lvl_name: str): - """Constructs and writes pipeline.py, pipeline_runner.py, and pipeline_parameter_values.json files. - pipeline.py: Generates a Kubeflow pipeline spec from custom components. - pipeline_runner.py: Sends a PipelineJob to Vertex AI using pipeline spec. - pipeline_parameter_values.json: Provides runtime parameters for the PipelineJob. - - Args: - custom_training_job_specs: Specifies the specs to run the training job with. - defaults_file: Path to the default config variables yaml. - pipeline_parameter_values: Dictionary of runtime parameters for the PipelineJob. - top_lvl_name: Top directory name. - Raises: - Exception: If an error is encountered reading/writing to a file. - """ - defaults = BuilderUtils.read_yaml_file(defaults_file) - pipeline_file = top_lvl_name + 'pipelines/pipeline.py' - pipeline_runner_file = top_lvl_name + 'pipelines/pipeline_runner.py' - pipeline_params_file = top_lvl_name + BuilderUtils.PARAMETER_VALUES_PATH - # construct pipeline.py - pipeline_imports = get_pipeline_imports(custom_training_job_specs, defaults['gcp']['project_id']) - pipeline_argparse = get_pipeline_argparse() - try: - with open(pipeline_file, 'r+', encoding='utf-8') as file: - pipeline_scaffold = file.read() - file.seek(0, 0) - file.write(BuilderUtils.LICENSE) - file.write(pipeline_imports) - for line in pipeline_scaffold.splitlines(): - file.write(' ' + line + '\n') - file.write(pipeline_argparse) - file.close() - except OSError as err: - raise OSError(f'Error interacting with file. {err}') from err - # construct pipeline_runner.py - BuilderUtils.write_file(pipeline_runner_file, get_pipeline_runner(), 'w+') - # construct pipeline_parameter_values.json - serialized_params = json.dumps(pipeline_parameter_values, indent=4) - BuilderUtils.write_file(pipeline_params_file, serialized_params, 'w+') - -def get_pipeline_imports(custom_training_job_specs: List[Dict], project_id: str) -> str: - """Generates python code that imports modules and loads all custom components. - Args: - custom_training_job_specs: Specifies the specs to run the training job with. - project_id: The project_id to run the pipeline. - - Returns: - str: Python pipeline_imports code. - """ - components_list = BuilderUtils.get_components_list(full_path=False) - gcpc_imports = ( - 'from functools import partial\n' - 'from google_cloud_pipeline_components.v1.custom_job import create_custom_training_job_op_from_component\n') - quote = '\'' - newline_tab = '\n ' - return ( - f'''import argparse\n''' - f'''import os\n''' - f'''{gcpc_imports if custom_training_job_specs else ''}''' - f'''import kfp\n''' - f'''from kfp.v2 import compiler, dsl\n''' - f'''from kfp.v2.dsl import *\n''' - f'''from typing import *\n''' - f'''import yaml\n''' - f'\n' - f'''def load_custom_component(component_name: str):\n''' - f''' component_path = os.path.join('components',\n''' - f''' component_name,\n''' - f''' 'component.yaml')\n''' - f''' return kfp.components.load_component_from_file(component_path)\n''' - f'\n' - f'''def create_training_pipeline(pipeline_job_spec_path: str):\n''' - f''' {newline_tab.join(f'{component} = load_custom_component(component_name={quote}{component}{quote})' for component in components_list)}\n''' - f'\n' - f'''{get_custom_job_specs(custom_training_job_specs, project_id)}''') - -def get_custom_job_specs(custom_training_job_specs: List[Dict], project_id: str) -> str: - """Generates python code that creates a custom training op from the specified component. - Args: - custom_training_job_specs: Specifies the specs to run the training job with. - project_id: The project_id to run the pipeline. - - Returns: - str: Python custom training op code. - """ - quote = '\'' - newline_tab = '\n ' - output_string = '' if not custom_training_job_specs else ( - f''' {newline_tab.join(f'{spec["component_spec"]}_custom_training_job_specs = {format_spec_dict(spec)}' for spec in custom_training_job_specs)}''' - f'\n' - f''' {newline_tab.join(f'{spec["component_spec"]}_job_op = create_custom_training_job_op_from_component(**{spec["component_spec"]}_custom_training_job_specs)' for spec in custom_training_job_specs)}''' - f'\n' - f''' {newline_tab.join(f'{spec["component_spec"]} = partial({spec["component_spec"]}_job_op, project={quote}{project_id}{quote})' for spec in custom_training_job_specs)}''' - f'\n') - return output_string - -def format_spec_dict(job_spec: dict) -> str: - """Takes in a job spec dictionary and removes the quotes around the component op name. - e.g. 'component_spec': 'train_model' becomes 'component_spec': train_model. - This is necessary to in order for the op to be callable within the Python code. - - Args: - job_spec: Dictionary with job spec info. - - Returns: - str: Python formatted dictionary code. - """ - quote = '\'' - left_bracket = '{' - right_bracket = '}' - newline = '\n' - - return ( - f'''{left_bracket}\n''' - f''' {f'{newline} '.join(f" {quote}{k}{quote}: {quote if k != 'component_spec' else ''}{v}{quote if k != 'component_spec' else ''}," for k, v in job_spec.items())}{newline}''' - f''' {right_bracket}\n''') - -def get_pipeline_argparse() -> str: - """Generates python code that loads default pipeline parameters from the defaults config_file. - - Returns: - str: Python pipeline_argparse code. - """ - return ( - '''if __name__ == '__main__':\n''' - ''' parser = argparse.ArgumentParser()\n''' - ''' parser.add_argument('--config', type=str,\n''' - ''' help='The config file for setting default values.')\n''' - '\n' - ''' args = parser.parse_args()\n''' - '\n' - ''' with open(args.config, 'r', encoding='utf-8') as config_file:\n''' - ''' config = yaml.load(config_file, Loader=yaml.FullLoader)\n''' - '\n' - ''' pipeline = create_training_pipeline(\n''' - ''' pipeline_job_spec_path=config['pipelines']['pipeline_job_spec_path'])\n''') - -def get_pipeline_runner() -> str: - """Generates python code that sends a PipelineJob to Vertex AI. - - Returns: - str: Python pipeline_runner code. - """ - return (BuilderUtils.LICENSE + - '''import argparse\n''' - '''import json\n''' - '''import logging\n''' - '''import os\n''' - '''import yaml\n''' - '\n' - '''from google.cloud import aiplatform\n''' - '\n' - '''logger = logging.getLogger()\n''' - '''log_level = os.environ.get('LOG_LEVEL', 'INFO')\n''' - '''logger.setLevel(log_level)\n''' - '\n' - '''def run_pipeline(\n''' - ''' project_id: str,\n''' - ''' pipeline_root: str,\n''' - ''' pipeline_runner_sa: str,\n''' - ''' parameter_values_path: str,\n''' - ''' pipeline_spec_path: str,\n''' - ''' display_name: str = 'mlops-pipeline-run',\n''' - ''' enable_caching: bool = False):\n''' - ''' """Executes a pipeline run.\n''' - '\n' - ''' Args:\n''' - ''' project_id: The project_id.\n''' - ''' pipeline_root: GCS location of the pipeline runs metadata.\n''' - ''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' - ''' parameter_values_path: Location of parameter values JSON.\n''' - ''' pipeline_spec_path: Location of the pipeline spec JSON.\n''' - ''' display_name: Name to call the pipeline.\n''' - ''' enable_caching: Should caching be enabled (Boolean)\n''' - ''' """\n''' - ''' with open(parameter_values_path, 'r') as file:\n''' - ''' try:\n''' - ''' pipeline_params = json.load(file)\n''' - ''' except ValueError as exc:\n''' - ''' print(exc)\n''' - ''' logging.debug('Pipeline Parms Configured:')\n''' - ''' logging.debug(pipeline_params)\n''' - '\n' - ''' aiplatform.init(project=project_id)\n''' - ''' job = aiplatform.PipelineJob(\n''' - ''' display_name = display_name,\n''' - ''' template_path = pipeline_spec_path,\n''' - ''' pipeline_root = pipeline_root,\n''' - ''' parameter_values = pipeline_params,\n''' - ''' enable_caching = enable_caching)\n''' - ''' logging.debug('AI Platform job built. Submitting...')\n''' - ''' job.submit(service_account=pipeline_runner_sa)\n''' - ''' logging.debug('Job sent!')\n''' - '\n' - '''if __name__ == '__main__':\n''' - ''' parser = argparse.ArgumentParser()\n''' - ''' parser.add_argument('--config', type=str,\n''' - ''' help='The config file for setting default values.')\n''' - ''' args = parser.parse_args()\n''' - '\n' - ''' with open(args.config, 'r', encoding='utf-8') as config_file:\n''' - ''' config = yaml.load(config_file, Loader=yaml.FullLoader)\n''' - '\n' - ''' run_pipeline(project_id=config['gcp']['project_id'],\n''' - ''' pipeline_root=config['pipelines']['pipeline_storage_path'],\n''' - ''' pipeline_runner_sa=config['gcp']['pipeline_runner_service_account'],\n''' - ''' parameter_values_path=config['pipelines']['parameter_values_path'],\n''' - ''' pipeline_spec_path=config['pipelines']['pipeline_job_spec_path']) \n''') - -def create_pipeline_scaffold(func: Optional[Callable] = None, - *, - name: Optional[str] = None, - description: Optional[str] = None): - """Creates a temporary pipeline scaffold which will - be used by the formalize function. - - Args: - func: The python function to create a pipeline from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - name: The name of the pipeline. - description: Short description of what the pipeline does. - """ - pipeline_scaffold = (get_pipeline_decorator(name, description) + - BuilderUtils.get_function_source_definition(func) + - get_compile_step(func.__name__)) - BuilderUtils.make_dirs([BuilderUtils.TMPFILES_DIR]) # if it doesn't already exist - BuilderUtils.write_file(BuilderUtils.PIPELINE_TMPFILE, pipeline_scaffold, 'w') - -def get_pipeline_decorator(name: Optional[str] = None, - description: Optional[str] = None): - """Creates the kfp pipeline decorator. - - Args: - name: The name of the pipeline. - description: Short description of what the pipeline does. - - Returns: - str: Python compile function call. - """ - default_name = DEFAULT_PIPELINE_NAME if not name else name - name_str = f'''(\n name='{default_name}',\n''' - desc_str = f''' description='{description}',\n''' if description else '' - ending_str = ')\n' - return '@dsl.pipeline' + name_str + desc_str + ending_str - -def get_compile_step(func_name: str): - """Creates the compile function call. - - Args: - func_name: The name of the pipeline function. - - Returns: - str: Python compile function call. - """ - return ( - f'\n' - f'compiler.Compiler().compile(\n' - f' pipeline_func={func_name},\n' - f' package_path=pipeline_job_spec_path)\n' - f'\n' - ) diff --git a/AutoMLOps/__init__.py b/AutoMLOps/__init__.py index 39c6a88..82d54a7 100644 --- a/AutoMLOps/__init__.py +++ b/AutoMLOps/__init__.py @@ -23,6 +23,6 @@ series of directories to support the creation of Vertex Pipelines. """ # pylint: disable=invalid-name -__version__ = '1.1.0' +__version__ = '1.1.1' __author__ = 'Sean Rastatter' __credits__ = 'Google' diff --git a/AutoMLOps/deployments/__init__.py b/AutoMLOps/deployments/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/AutoMLOps/deployments/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/AutoMLOps/deployments/cloudbuild/__init__.py b/AutoMLOps/deployments/cloudbuild/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/AutoMLOps/deployments/cloudbuild/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/AutoMLOps/deployments/cloudbuild/builder.py b/AutoMLOps/deployments/cloudbuild/builder.py new file mode 100644 index 0000000..b7fc674 --- /dev/null +++ b/AutoMLOps/deployments/cloudbuild/builder.py @@ -0,0 +1,56 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Builds KFP components and pipeline.""" + +# pylint: disable=line-too-long + +from AutoMLOps.utils.utils import write_file +from AutoMLOps.utils.constants import ( + BASE_DIR, + GENERATED_CLOUDBUILD_FILE +) +from AutoMLOps.deployments.cloudbuild.constructs.scripts import CloudBuildScripts + +def build(af_registry_location: str, + af_registry_name: str, + cloud_run_location: str, + cloud_run_name: str, + pipeline_runner_sa: str, + project_id: str, + run_local: bool, + schedule_pattern: str, + vpc_connector: str): + """Constructs scripts for resource deployment and running Kubeflow pipelines. + + Args: + af_registry_location: Region of the Artifact Registry. + af_registry_name: Artifact Registry name where components are stored. + cloud_run_location: The location of the cloud runner service. + cloud_run_name: The name of the cloud runner service. + pipeline_runner_sa: Service Account to runner PipelineJobs. + project_id: The project ID. + run_local: Flag that determines whether to use Cloud Run CI/CD. + schedule_pattern: Cron formatted value used to create a Scheduled retrain job. + vpc_connector: The name of the vpc connector to use. + """ + # Get scripts builder object + cb_scripts = CloudBuildScripts( + af_registry_location, af_registry_name, cloud_run_location, + cloud_run_name, pipeline_runner_sa, project_id, + run_local, schedule_pattern, BASE_DIR, + vpc_connector) + + # Write cloud build config + write_file(GENERATED_CLOUDBUILD_FILE, cb_scripts.create_kfp_cloudbuild_config, 'w+') diff --git a/examples/training/AutoMLOps/scripts/pipeline_spec/.gitkeep b/AutoMLOps/deployments/cloudbuild/constructs/__init__.py similarity index 100% rename from examples/training/AutoMLOps/scripts/pipeline_spec/.gitkeep rename to AutoMLOps/deployments/cloudbuild/constructs/__init__.py diff --git a/AutoMLOps/deployments/cloudbuild/constructs/scripts.py b/AutoMLOps/deployments/cloudbuild/constructs/scripts.py new file mode 100644 index 0000000..404f958 --- /dev/null +++ b/AutoMLOps/deployments/cloudbuild/constructs/scripts.py @@ -0,0 +1,189 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Code strings for Cloudbuild scripts.""" + +# pylint: disable=line-too-long + +from AutoMLOps.utils.constants import GENERATED_LICENSE + +class CloudBuildScripts(): + """Generates CloudBuild yaml config file.""" + def __init__(self, + af_registry_location: str, + af_registry_name: str, + cloud_run_location: str, + cloud_run_name: str, + pipeline_runner_sa: str, + project_id: str, + run_local: str, + schedule_pattern: str, + base_dir: str, + vpc_connector: str): + """Constructs scripts for resource deployment and running Kubeflow pipelines. + + Args: + af_registry_location: Region of the Artifact Registry. + af_registry_name: Artifact Registry name where components are stored. + cloud_run_location: The location of the cloud runner service. + cloud_run_name: The name of the cloud runner service. + pipeline_runner_sa: Service Account to runner PipelineJobs. + project_id: The project ID. + run_local: Flag that determines whether to use Cloud Run CI/CD. + schedule_pattern: Cron formatted value used to create a Scheduled retrain job. + base_dir: Top directory name. + vpc_connector: The name of the vpc connector to use. + """ + + # Set passed variables as hidden attributes + self.__base_dir = base_dir + self.__run_local = run_local + + # Parse defaults file for hidden class attributes + self.__af_registry_name = af_registry_name + self.__af_registry_location = af_registry_location + self.__project_id = project_id + self.__pipeline_runner_service_account = pipeline_runner_sa + self.__vpc_connector = vpc_connector + self.__cloud_run_name = cloud_run_name + self.__cloud_run_location = cloud_run_location + self.__cloud_schedule_pattern = schedule_pattern + + # Set generated scripts as public attributes + self.create_kfp_cloudbuild_config = self._create_kfp_cloudbuild_config() + + def _create_kfp_cloudbuild_config(self): + """Builds the content of cloudbuild.yaml. + + Args: + str: Text content of cloudbuild.yaml. + """ + vpc_connector_tail = '' + if self.__vpc_connector != 'No VPC Specified': + vpc_connector_tail = ( + f'\n' + f' "--ingress", "internal",\n' + f' "--vpc-connector", "{self.__vpc_connector}",\n' + f' "--vpc-egress", "all-traffic"') + vpc_connector_tail += ']\n' + + cloudbuild_comp_config = ( + GENERATED_LICENSE + + f'steps:\n' + f'# ==============================================================================\n' + f'# BUILD CUSTOM IMAGES\n' + f'# ==============================================================================\n' + f'\n' + f''' # build the component_base image\n''' + f''' - name: "gcr.io/cloud-builders/docker"\n''' + f''' args: [ "build", "-t", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/components/component_base:latest", "." ]\n''' + f''' dir: "{self.__base_dir}components/component_base"\n''' + f''' id: "build_component_base"\n''' + f''' waitFor: ["-"]\n''' + f'\n' + f''' # build the run_pipeline image\n''' + f''' - name: 'gcr.io/cloud-builders/docker'\n''' + f''' args: [ "build", "-t", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest", "-f", "cloud_run/run_pipeline/Dockerfile", "." ]\n''' + f''' dir: "{self.__base_dir}"\n''' + f''' id: "build_pipeline_runner_svc"\n''' + f''' waitFor: ['build_component_base']\n''') + + cloudbuild_cloudrun_config = ( + f'\n' + f'# ==============================================================================\n' + f'# PUSH & DEPLOY CUSTOM IMAGES\n' + f'# ==============================================================================\n' + f'\n' + f''' # push the component_base image\n''' + f''' - name: "gcr.io/cloud-builders/docker"\n''' + f''' args: ["push", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/components/component_base:latest"]\n''' + f''' dir: "{self.__base_dir}components/component_base"\n''' + f''' id: "push_component_base"\n''' + f''' waitFor: ["build_pipeline_runner_svc"]\n''' + f'\n' + f''' # push the run_pipeline image\n''' + f''' - name: "gcr.io/cloud-builders/docker"\n''' + f''' args: ["push", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest"]\n''' + f''' dir: "{self.__base_dir}"\n''' + f''' id: "push_pipeline_runner_svc"\n''' + f''' waitFor: ["push_component_base"]\n''' + f'\n' + f''' # deploy the cloud run service\n''' + f''' - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"\n''' + f''' entrypoint: gcloud\n''' + f''' args: ["run",\n''' + f''' "deploy",\n''' + f''' "{self.__cloud_run_name}",\n''' + f''' "--image",\n''' + f''' "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest",\n''' + f''' "--region",\n''' + f''' "{self.__cloud_run_location}",\n''' + f''' "--service-account",\n''' + f''' "{self.__pipeline_runner_service_account}",{vpc_connector_tail}''' + f''' id: "deploy_pipeline_runner_svc"\n''' + f''' waitFor: ["push_pipeline_runner_svc"]\n''' + f'\n' + f''' # Copy runtime parameters\n''' + f''' - name: 'gcr.io/cloud-builders/gcloud'\n''' + f''' entrypoint: bash\n''' + f''' args:\n''' + f''' - '-e'\n''' + f''' - '-c'\n''' + f''' - |\n''' + f''' cp -r {self.__base_dir}cloud_run/queueing_svc .\n''' + f''' id: "setup_queueing_svc"\n''' + f''' waitFor: ["deploy_pipeline_runner_svc"]\n''' + f'\n' + f''' # Install dependencies\n''' + f''' - name: python\n''' + f''' entrypoint: pip\n''' + f''' args: ["install", "-r", "queueing_svc/requirements.txt", "--user"]\n''' + f''' id: "install_queueing_svc_deps"\n''' + f''' waitFor: ["setup_queueing_svc"]\n''' + f'\n' + f''' # Submit to queue\n''' + f''' - name: python\n''' + f''' entrypoint: python\n''' + f''' args: ["queueing_svc/main.py", "--setting", "queue_job"]\n''' + f''' id: "submit_job_to_queue"\n''' + f''' waitFor: ["install_queueing_svc_deps"]\n''') + + cloudbuild_scheduler_config = ( + '\n' + ''' # Create Scheduler Job\n''' + ''' - name: python\n''' + ''' entrypoint: python\n''' + ''' args: ["queueing_svc/main.py", "--setting", "schedule_job"]\n''' + ''' id: "schedule_job"\n''' + ''' waitFor: ["submit_job_to_queue"]\n''') + + custom_comp_image = ( + f'\n' + f'images:\n' + f''' # custom component images\n''' + f''' - "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/components/component_base:latest"\n''') + + cloudrun_image = ( + f''' # Cloud Run image\n''' + f''' - "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest"\n''') + + if self.__run_local: + cb_file_contents = cloudbuild_comp_config + custom_comp_image + else: + if self.__cloud_schedule_pattern == 'No Schedule Specified': + cb_file_contents = cloudbuild_comp_config + cloudbuild_cloudrun_config + custom_comp_image + cloudrun_image + else: + cb_file_contents = cloudbuild_comp_config + cloudbuild_cloudrun_config + cloudbuild_scheduler_config + custom_comp_image + cloudrun_image + + return cb_file_contents diff --git a/AutoMLOps/deployments/github_actions/.gitkeep b/AutoMLOps/deployments/github_actions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/AutoMLOps/deployments/gitlab_ci/.gitkeep b/AutoMLOps/deployments/gitlab_ci/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/AutoMLOps/deployments/jenkins/.gitkeep b/AutoMLOps/deployments/jenkins/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/AutoMLOps/frameworks/__init__.py b/AutoMLOps/frameworks/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/AutoMLOps/frameworks/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/AutoMLOps/frameworks/airflow/.gitkeep b/AutoMLOps/frameworks/airflow/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/AutoMLOps/frameworks/argo/.gitkeep b/AutoMLOps/frameworks/argo/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/AutoMLOps/frameworks/base.py b/AutoMLOps/frameworks/base.py new file mode 100644 index 0000000..ae06dcd --- /dev/null +++ b/AutoMLOps/frameworks/base.py @@ -0,0 +1,53 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines parent classes for a Component and Pipeline.""" + +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from typing import Dict, List +from AutoMLOps.utils.utils import read_yaml_file + +class Component(): + """Parent class that defined a general abstraction of a Component.""" + def __init__(self, component_spec: dict, defaults_file: str): + """Instantiate Component scripts object with all necessary attributes. + + Args: + component_spec (dict): Dictionary of component specs including details + of component image, startup command, and args. + defaults_file (str): Path to the default config variables yaml. + """ + self._component_spec = component_spec + + # Parse defaults file for hidden class attributes + defaults = read_yaml_file(defaults_file) + self._af_registry_location = defaults['gcp']['af_registry_location'] + self._project_id = defaults['gcp']['project_id'] + self._af_registry_name = defaults['gcp']['af_registry_name'] + +class Pipeline(): + """Parent class that defined a general abstraction of a Pipeline """ + def __init__(self, custom_training_job_specs: List[Dict], defaults_file: str): + """Instantiate Pipeline scripts object with all necessary attributes. + + Args: + custom_training_job_specs (List[Dict]): Specifies the specs to run the training job with. + defaults_file (str): Path to the default config variables yaml. + """ + self._custom_training_job_specs = custom_training_job_specs + + defaults = read_yaml_file(defaults_file) + self._project_id = defaults['gcp']['project_id'] diff --git a/AutoMLOps/frameworks/kfp/__init__.py b/AutoMLOps/frameworks/kfp/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/AutoMLOps/frameworks/kfp/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/AutoMLOps/frameworks/kfp/builder.py b/AutoMLOps/frameworks/kfp/builder.py new file mode 100644 index 0000000..aa05a19 --- /dev/null +++ b/AutoMLOps/frameworks/kfp/builder.py @@ -0,0 +1,271 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Builds KFP components and pipeline.""" + +# pylint: disable=line-too-long + +import json + +from typing import Dict, List, Optional +from AutoMLOps.utils.utils import ( + execute_process, + get_components_list, + make_dirs, + read_yaml_file, + write_and_chmod, + write_file, + write_yaml_file +) +from AutoMLOps.utils.constants import ( + BASE_DIR, + DEFAULT_IMAGE, + GENERATED_BUILD_COMPONENTS_SH_FILE, + GENERATED_CLOUDBUILD_FILE, + GENERATED_DEFAULTS_FILE, + GENERATED_COMPONENT_BASE, + GENERATED_PIPELINE_FILE, + GENERATED_PIPELINE_SPEC_SH_FILE, + GENERATED_RESOURCES_SH_FILE, + GENERATED_RUN_PIPELINE_SH_FILE, + GENERATED_RUN_ALL_SH_FILE, + PIPELINE_TMPFILE, + GENERATED_LICENSE, + GENERATED_PARAMETER_VALUES_PATH +) +from AutoMLOps.frameworks.kfp.constructs.cloudrun import KfpCloudRun +from AutoMLOps.frameworks.kfp.constructs.component import KfpComponent +from AutoMLOps.frameworks.kfp.constructs.pipeline import KfpPipeline +from AutoMLOps.frameworks.kfp.constructs.scripts import KfpScripts + +def build(project_id: str, + pipeline_params: Dict, + af_registry_location: Optional[str], + af_registry_name: Optional[str], + cb_trigger_location: Optional[str], + cb_trigger_name: Optional[str], + cloud_run_location: Optional[str], + cloud_run_name: Optional[str], + cloud_tasks_queue_location: Optional[str], + cloud_tasks_queue_name: Optional[str], + csr_branch_name: Optional[str], + csr_name: Optional[str], + custom_training_job_specs: Optional[List[Dict]], + gs_bucket_location: Optional[str], + gs_bucket_name: Optional[str], + pipeline_runner_sa: Optional[str], + run_local: Optional[bool], + schedule_location: Optional[str], + schedule_name: Optional[str], + schedule_pattern: Optional[str], + use_kfp_spec: Optional[bool], + vpc_connector: Optional[str]): + """Constructs scripts for resource deployment and running Kubeflow pipelines. + + Args: + af_registry_location: Region of the Artifact Registry. + af_registry_name: Artifact Registry name where components are stored. + cb_trigger_location: The location of the cloudbuild trigger. + cb_trigger_name: The name of the cloudbuild trigger. + cloud_run_location: The location of the cloud runner service. + cloud_run_name: The name of the cloud runner service. + cloud_tasks_queue_location: The location of the cloud tasks queue. + cloud_tasks_queue_name: The name of the cloud tasks queue. + csr_branch_name: The name of the csr branch to push to to trigger cb job. + csr_name: The name of the cloud source repo to use. + default_image: The image to use in the dockerfile. + gs_bucket_location: Region of the GS bucket. + gs_bucket_name: GS bucket name where pipeline run metadata is stored. + pipeline_runner_sa: Service Account to runner PipelineJobs. + project_id: The project ID. + run_local: Flag that determines whether to use Cloud Run CI/CD. + schedule_location: The location of the scheduler resource. + schedule_name: The name of the scheduler resource. + schedule_pattern: Cron formatted value used to create a Scheduled retrain job. + base_dir: Top directory name. + vpc_connector: The name of the vpc connector to use. + """ + + # Get scripts builder object + kfp_scripts = KfpScripts( + af_registry_location, af_registry_name, cb_trigger_location, + cb_trigger_name, cloud_run_location, cloud_run_name, + cloud_tasks_queue_location, cloud_tasks_queue_name, csr_branch_name, + csr_name, DEFAULT_IMAGE, gs_bucket_location, gs_bucket_name, + pipeline_runner_sa, project_id, run_local, schedule_location, + schedule_name, schedule_pattern, BASE_DIR, vpc_connector) + + # Write defaults.yaml + write_file(GENERATED_DEFAULTS_FILE, kfp_scripts.defaults, 'w+') + + # Write scripts for building pipeline, building components, running pipeline, and running all files + write_and_chmod(GENERATED_PIPELINE_SPEC_SH_FILE, kfp_scripts.build_pipeline_spec) + write_and_chmod(GENERATED_BUILD_COMPONENTS_SH_FILE, kfp_scripts.build_components) + write_and_chmod(GENERATED_RUN_PIPELINE_SH_FILE, kfp_scripts.run_pipeline) + write_and_chmod(GENERATED_RUN_ALL_SH_FILE, kfp_scripts.run_all) + + # Write scripts to create resources and cloud build config + write_and_chmod(GENERATED_RESOURCES_SH_FILE, kfp_scripts.create_resources_script) + write_file(GENERATED_CLOUDBUILD_FILE, kfp_scripts.create_cloudbuild_config, 'w+') + + # Copy tmp pipeline file over to AutoMLOps directory + execute_process(f'cp {PIPELINE_TMPFILE} {GENERATED_PIPELINE_FILE}', to_null=False) + + # Create components and pipelines + components_path_list = get_components_list() + for path in components_path_list: + build_component(path, BASE_DIR, GENERATED_DEFAULTS_FILE, use_kfp_spec) + build_pipeline(custom_training_job_specs, GENERATED_DEFAULTS_FILE, pipeline_params, BASE_DIR) + + # Write dockerfile to the component base directory + write_file(f'{GENERATED_COMPONENT_BASE}/Dockerfile', kfp_scripts.dockerfile, 'w') + + # Write requirements.txt to the component base directory + write_file(f'{GENERATED_COMPONENT_BASE}/requirements.txt', kfp_scripts.requirements, 'w') + + # Build the cloud run files + if not run_local: + build_cloudrun(BASE_DIR, GENERATED_DEFAULTS_FILE) + + +def build_component(component_path: str, + base_dir: str, + defaults_file: str, + use_kfp_spec: bool): + """Constructs and writes component.yaml and {component_name}.py files. + component.yaml: Contains the Kubeflow custom component definition. + {component_name}.py: Contains the python code from the Jupyter cell. + + Args: + component_path: Path to the temporary component yaml. This file + is used to create the permanent component.yaml, and deleted + after calling AutoMLOps.generate(). + base_dir: Top directory name. + defaults_file: Path to the default config variables yaml. + use_kfp_spec: Flag that determines the format of the component yamls. + """ + # Read in component specs + component_spec = read_yaml_file(component_path) + + # If using kfp, remove spaces in name and convert to lowercase + if use_kfp_spec: + component_spec['name'] = component_spec['name'].replace(' ', '_').lower() + + # Set and create directory for component, and set directory for task + component_dir = base_dir + 'components/' + component_spec['name'] + task_filepath = (base_dir + + 'components/component_base/src/' + + component_spec['name'] + + '.py') + make_dirs([component_dir]) + + # Initialize component scripts builder + kfp_comp = KfpComponent(component_spec, defaults_file) + + # Write task script to component base + write_file(task_filepath, kfp_comp.task, 'w+') + + # Update component_spec to include correct image and startup command + component_spec['implementation']['container']['image'] = kfp_comp.compspec_image + component_spec['implementation']['container']['command'] = [ + 'python3', + f'''/pipelines/component/src/{component_spec['name']+'.py'}'''] + + # Write license and component spec to the appropriate component.yaml file + filename = component_dir + '/component.yaml' + write_file(filename, GENERATED_LICENSE, 'w') + write_yaml_file(filename, component_spec, 'a') + + +def build_pipeline(custom_training_job_specs: List[Dict], + defaults_file: str, + pipeline_parameter_values: dict, + base_dir: str): + """Constructs and writes pipeline.py, pipeline_runner.py, and pipeline_parameter_values.json files. + pipeline.py: Generates a Kubeflow pipeline spec from custom components. + pipeline_runner.py: Sends a PipelineJob to Vertex AI using pipeline spec. + pipeline_parameter_values.json: Provides runtime parameters for the PipelineJob. + + Args: + custom_training_job_specs: Specifies the specs to run the training job with. + defaults_file: Path to the default config variables yaml. + pipeline_parameter_values: Dictionary of runtime parameters for the PipelineJob. + base_dir: Top directory name. + Raises: + Exception: If an error is encountered reading/writing to a file. + """ + # Set paths + pipeline_file = base_dir + 'pipelines/pipeline.py' + pipeline_runner_file = base_dir + 'pipelines/pipeline_runner.py' + pipeline_params_file = base_dir + GENERATED_PARAMETER_VALUES_PATH + + # Initializes pipeline scripts builder + kfp_pipeline = KfpPipeline(custom_training_job_specs, defaults_file) + try: + with open(pipeline_file, 'r+', encoding='utf-8') as file: + pipeline_scaffold = file.read() + file.seek(0, 0) + file.write(GENERATED_LICENSE) + file.write(kfp_pipeline.pipeline_imports) + for line in pipeline_scaffold.splitlines(): + file.write(' ' + line + '\n') + file.write(kfp_pipeline.pipeline_argparse) + file.close() + except OSError as err: + raise OSError(f'Error interacting with file. {err}') from err + + # Construct pipeline_runner.py + write_file(pipeline_runner_file, kfp_pipeline.pipeline_runner, 'w+') + + # Construct pipeline_parameter_values.json + serialized_params = json.dumps(pipeline_parameter_values, indent=4) + write_file(pipeline_params_file, serialized_params, 'w+') + +def build_cloudrun(base_dir: str, + defaults_file: str,): + """Constructs and writes a Dockerfile, requirements.txt, and + main.py to the cloud_run/run_pipeline directory. Also + constructs and writes a main.py, requirements.txt, and + pipeline_parameter_values.json to the + cloud_run/queueing_svc directory. + + Args: + base_dir: Top directory name. + defaults_file: Path to the default config variables yaml. + """ + # Make new directories + make_dirs([base_dir + 'cloud_run', + base_dir + 'cloud_run/run_pipeline', + base_dir + 'cloud_run/queueing_svc']) + + # Initialize cloud run scripts object + cloudrun_scripts = KfpCloudRun(defaults_file) + + # Set new folders as variables + cloudrun_base = base_dir + 'cloud_run/run_pipeline' + queueing_svc_base = base_dir + 'cloud_run/queueing_svc' + + # Write cloud run dockerfile + write_file(f'{cloudrun_base}/Dockerfile', cloudrun_scripts.dockerfile, 'w') + + # Write requirements files for cloud run base and queueing svc + write_file(f'{cloudrun_base}/requirements.txt', cloudrun_scripts.cloudrun_base_reqs, 'w') + write_file(f'{queueing_svc_base}/requirements.txt', cloudrun_scripts.queueing_svc_reqs, 'w') + + # Write main code files for cloud run base and queueing svc + write_file(f'{cloudrun_base}/main.py', cloudrun_scripts.cloudrun_base, 'w') + write_file(f'{queueing_svc_base}/main.py', cloudrun_scripts.queueing_svc, 'w') + + # Copy runtime parameters over to queueing_svc dir + execute_process(f'''cp -r {base_dir + GENERATED_PARAMETER_VALUES_PATH} {base_dir + 'cloud_run/queueing_svc'}''', to_null=False) diff --git a/AutoMLOps/frameworks/kfp/constructs/__init__.py b/AutoMLOps/frameworks/kfp/constructs/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/AutoMLOps/frameworks/kfp/constructs/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/AutoMLOps/frameworks/kfp/constructs/cloudrun.py b/AutoMLOps/frameworks/kfp/constructs/cloudrun.py new file mode 100644 index 0000000..3444896 --- /dev/null +++ b/AutoMLOps/frameworks/kfp/constructs/cloudrun.py @@ -0,0 +1,400 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Code strings for a kfp cloud run instance.""" + +# pylint: disable=line-too-long + +from AutoMLOps.utils.utils import read_yaml_file +from AutoMLOps.utils.constants import ( + GENERATED_LICENSE, + LEFT_BRACKET, + RIGHT_BRACKET +) + +class KfpCloudRun(): + """Generates files related to cloud runner service.""" + def __init__(self, defaults_file: str): + """Instantiate Cloud Run scripts object with all necessary attributes. + + Args: + defaults_file (str): Path to the default config variables yaml. + """ + + # Parse defaults file for hidden class attributes + defaults = read_yaml_file(defaults_file) + self.__project_id = defaults['gcp']['project_id'] + self.__pipeline_runner_service_account = defaults['gcp']['pipeline_runner_service_account'] + self.__cloud_tasks_queue_location = defaults['gcp']['cloud_tasks_queue_location'] + self.__cloud_tasks_queue_name = defaults['gcp']['cloud_tasks_queue_name'] + self.__cloud_run_name = defaults['gcp']['cloud_run_name'] + self.__cloud_run_location = defaults['gcp']['cloud_run_location'] + self.__cloud_schedule_pattern = defaults['gcp']['cloud_schedule_pattern'] + self.__cloud_schedule_location = defaults['gcp']['cloud_schedule_location'] + self.__cloud_schedule_name = defaults['gcp']['cloud_schedule_name'] + + # Set generated scripts as public attributes + self.dockerfile = self._create_dockerfile() + self.cloudrun_base_reqs = self._create_cloudrun_base_reqs() + self.queueing_svc_reqs = self._create_queuing_svc_reqs() + self.cloudrun_base = self._create_cloudrun_base() + self.queueing_svc = self._create_queueing_svc() + + def _create_dockerfile(self): + """Returns text for a Dockerfile that will be added to the cloudrun/run_pipeline directory. + + Returns: + str: Dockerfile text. + """ + return ( + GENERATED_LICENSE + + 'FROM python:3.9-slim\n' + '\n' + '# Allow statements and log messages to immediately appear in the Knative logs\n' + 'ENV PYTHONUNBUFFERED True\n' + '\n' + '# Copy local code to the container image.\n' + 'ENV APP_HOME /app\n' + 'WORKDIR $APP_HOME\n' + 'COPY ./ ./\n' + '\n' + '# Upgrade pip\n' + 'RUN python -m pip install --upgrade pip\n' + '# Install requirements\n' + 'RUN pip install --no-cache-dir -r /app/cloud_run/run_pipeline/requirements.txt\n' + '# Compile pipeline spec\n' + 'RUN ./scripts/build_pipeline_spec.sh\n' + '# Change Directories\n' + 'WORKDIR "/app/cloud_run/run_pipeline"\n' + '# Run flask api server\n' + 'CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app\n' + ) + + def _create_cloudrun_base_reqs(self): + """Returns the text of a cloudrun base requirements file to be written to the cloud_run/run_pipeline directory. + + Returns: + str: Package requirements for cloudrun base. + """ + return ( + 'kfp\n' + 'google-cloud-aiplatform\n' + 'google-cloud-pipeline-components\n' + 'Flask\n' + 'gunicorn\n' + 'pyyaml\n' + ) + + def _create_queuing_svc_reqs(self): + """Returns the text of a queueing svc requirements file to be written to the cloud_run/queueing_svc directory. + + Returns: + str: Package requirements for queueing svc. + """ + return ( + 'google-cloud\n' + 'google-cloud-tasks\n' + 'google-api-python-client\n' + 'google-cloud-run\n' + 'google-cloud-scheduler\n' + ) + + def _create_cloudrun_base(self): + """Creates content for a main.py to be written to the cloud_run/run_pipeline + directory. This file contains code for running a flask service that will act as + a pipeline runner service. + + Returns: + str: Content of cloudrun main.py. + """ + return ( + GENERATED_LICENSE + + f'''"""Cloud Run to run pipeline spec"""\n''' + f'''import logging\n''' + f'''import os\n''' + f'''from typing import Tuple\n''' + f'\n' + f'''import flask\n''' + f'''from google.cloud import aiplatform\n''' + f'''import yaml\n''' + f'\n' + f'''app = flask.Flask(__name__)\n''' + f'\n' + f'''logger = logging.getLogger()\n''' + f'''log_level = os.environ.get('LOG_LEVEL', 'INFO')\n''' + f'''logger.setLevel(log_level)\n''' + f'\n' + f'''CONFIG_FILE = '../../configs/defaults.yaml'\n''' + f'''PIPELINE_SPEC_PATH_LOCAL = '../../scripts/pipeline_spec/pipeline_job.json'\n''' + f'\n' + f'''@app.route('/', methods=['POST'])\n''' + f'''def process_request() -> flask.Response:\n''' + f''' """HTTP web service to trigger pipeline execution.\n''' + f'\n' + f''' Returns:\n''' + f''' The response text, or any set of values that can be turned into a\n''' + f''' Response object using `make_response`\n''' + f''' .\n''' + f''' """\n''' + f''' content_type = flask.request.headers['content-type']\n''' + f''' if content_type == 'application/json':\n''' + f''' request_json = flask.request.json\n''' + f'\n' + f''' logging.debug('JSON Recieved:')\n''' + f''' logging.debug(request_json)\n''' + f'\n' + f''' with open(CONFIG_FILE, 'r', encoding='utf-8') as config_file:\n''' + f''' config = yaml.load(config_file, Loader=yaml.FullLoader)\n''' + f'\n' + f''' logging.debug('Calling run_pipeline()')\n''' + f''' dashboard_uri, resource_name = run_pipeline(\n''' + f''' project_id=config['gcp']['project_id'],\n''' + f''' pipeline_root=config['pipelines']['pipeline_storage_path'],\n''' + f''' pipeline_runner_sa=config['gcp']['pipeline_runner_service_account'],\n''' + f''' pipeline_params=request_json,\n''' + f''' pipeline_spec_path=PIPELINE_SPEC_PATH_LOCAL)\n''' + f''' return flask.make_response({LEFT_BRACKET}\n''' + f''' 'dashboard_uri': dashboard_uri,\n''' + f''' 'resource_name': resource_name\n''' + f''' {RIGHT_BRACKET}, 200)\n''' + f'\n' + f''' else:\n''' + f''' raise ValueError(f'Unknown content type: {LEFT_BRACKET}content_type{RIGHT_BRACKET}')\n''' + f'\n' + f'''def run_pipeline(\n''' + f''' project_id: str,\n''' + f''' pipeline_root: str,\n''' + f''' pipeline_runner_sa: str,\n''' + f''' pipeline_params: dict,\n''' + f''' pipeline_spec_path: str,\n''' + f''' display_name: str = 'mlops-pipeline-run',\n''' + f''' enable_caching: bool = False) -> Tuple[str, str]:\n''' + f''' """Executes a pipeline run.\n''' + f'\n' + f''' Args:\n''' + f''' project_id: The project_id.\n''' + f''' pipeline_root: GCS location of the pipeline runs metadata.\n''' + f''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' + f''' pipeline_params: Pipeline parameters values.\n''' + f''' pipeline_spec_path: Location of the pipeline spec JSON.\n''' + f''' display_name: Name to call the pipeline.\n''' + f''' enable_caching: Should caching be enabled (Boolean)\n''' + f''' """\n''' + f''' logging.debug('Pipeline Parms Configured:')\n''' + f''' logging.debug(pipeline_params)\n''' + f'\n' + f''' aiplatform.init(project=project_id)\n''' + f''' job = aiplatform.PipelineJob(\n''' + f''' display_name = display_name,\n''' + f''' template_path = pipeline_spec_path,\n''' + f''' pipeline_root = pipeline_root,\n''' + f''' parameter_values = pipeline_params,\n''' + f''' enable_caching = enable_caching)\n''' + f''' logging.debug('AI Platform job built. Submitting...')\n''' + f''' job.submit(service_account=pipeline_runner_sa)\n''' + f''' logging.debug('Job sent!')\n''' + f''' dashboard_uri = job._dashboard_uri()\n''' + f''' resource_name = job.resource_name\n''' + f''' return dashboard_uri, resource_name\n''' + f'\n' + f'''if __name__ == '__main__':\n''' + f''' app.run(debug=True, host='0.0.0.0', port=int(os.environ.get('PORT', 8080)))\n''' + ) + + def _create_queueing_svc(self): + """Creates content for a main.py to be written to the cloud_run/queueing_svc + directory. This file contains code for submitting a job to the cloud runner + service, and creating a cloud scheduler job. + + Returns: + str: Content of queueing svc main.py. + """ + return ( + GENERATED_LICENSE + + f'''"""Submit pipeline job using Cloud Tasks and create Cloud Scheduler Job."""\n''' + f'''import argparse\n''' + f'''import json\n''' + f'\n' + f'''from google.cloud import run_v2\n''' + f'''from google.cloud import scheduler_v1\n''' + f'''from google.cloud import tasks_v2\n''' + f'\n' + f'''CLOUD_RUN_LOCATION = '{self.__cloud_run_location}'\n''' + f'''CLOUD_RUN_NAME = '{self.__cloud_run_name}'\n''' + f'''CLOUD_TASKS_QUEUE_LOCATION = '{self.__cloud_tasks_queue_location}'\n''' + f'''CLOUD_TASKS_QUEUE_NAME = '{self.__cloud_tasks_queue_name}'\n''' + f'''PARAMETER_VALUES_PATH = 'queueing_svc/pipeline_parameter_values.json'\n''' + f'''PIPELINE_RUNNER_SA = '{self.__pipeline_runner_service_account}'\n''' + f'''PROJECT_ID = '{self.__project_id}'\n''' + f'''SCHEDULE_LOCATION = '{self.__cloud_schedule_location}'\n''' + f'''SCHEDULE_PATTERN = '{self.__cloud_schedule_pattern}'\n''' + f'''SCHEDULE_NAME = '{self.__cloud_schedule_name}'\n''' + f'\n' + f'''def get_runner_svc_uri(\n''' + f''' cloud_run_location: str,\n''' + f''' cloud_run_name: str,\n''' + f''' project_id: str):\n''' + f''' """Fetches the uri for the given cloud run instance.\n''' + f'\n' + f''' Args:\n''' + f''' cloud_run_location: The location of the cloud runner service.\n''' + f''' cloud_run_name: The name of the cloud runner service.\n''' + f''' project_id: The project ID.\n''' + f''' Returns:\n''' + f''' str: Uri of the Cloud Run instance.\n''' + f''' """\n''' + f''' client = run_v2.ServicesClient()\n''' + f''' parent = client.service_path(project_id, cloud_run_location, cloud_run_name)\n''' + f''' request = run_v2.GetServiceRequest(name=parent)\n''' + f''' response = client.get_service(request=request)\n''' + f''' return response.uri\n''' + f'\n' + f'''def get_json_bytes(file_path: str):\n''' + f''' """Reads a json file at the specified path and returns as bytes.\n''' + f'\n' + f''' Args:\n''' + f''' file_path: Path of the json file.\n''' + f''' Returns:\n''' + f''' bytes: Encode bytes of the file.\n''' + f''' """\n''' + f''' try:\n''' + f''' with open(file_path, 'r', encoding='utf-8') as file:\n''' + f''' data = json.load(file)\n''' + f''' file.close()\n''' + f''' except OSError as err:\n''' + f''' raise Exception(f'Error reading json file. {LEFT_BRACKET}err{RIGHT_BRACKET}') from err\n''' + f''' return json.dumps(data).encode()\n''' + f'\n' + f'''def create_cloud_task(\n''' + f''' cloud_tasks_queue_location: str,\n''' + f''' cloud_tasks_queue_name: str,\n''' + f''' parameter_values_path: str,\n''' + f''' pipeline_runner_sa: str,\n''' + f''' project_id: str,\n''' + f''' runner_svc_uri: str):\n''' + f''' """Create a task to the queue with the runtime parameters.\n''' + f'\n' + f''' Args:\n''' + f''' cloud_run_location: The location of the cloud runner service.\n''' + f''' cloud_run_name: The name of the cloud runner service.\n''' + f''' cloud_tasks_queue_location: The location of the cloud tasks queue.\n''' + f''' cloud_tasks_queue_name: The name of the cloud tasks queue.\n''' + f''' parameter_values_path: Path to json pipeline params.\n''' + f''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' + f''' project_id: The project ID.\n''' + f''' runner_svc_uri: Uri of the Cloud Run instance.\n''' + f''' """\n''' + f''' client = tasks_v2.CloudTasksClient()\n''' + f''' parent = client.queue_path(project_id, cloud_tasks_queue_location, cloud_tasks_queue_name)\n''' + f''' task = {LEFT_BRACKET}\n''' + f''' 'http_request': {LEFT_BRACKET}\n''' + f''' 'http_method': tasks_v2.HttpMethod.POST,\n''' + f''' 'url': runner_svc_uri,\n''' + f''' 'oidc_token': {LEFT_BRACKET}\n''' + f''' 'service_account_email': pipeline_runner_sa,\n''' + f''' 'audience': runner_svc_uri\n''' + f''' {RIGHT_BRACKET},\n''' + f''' 'headers': {LEFT_BRACKET}\n''' + f''' 'Content-Type': 'application/json'\n''' + f''' {RIGHT_BRACKET}\n''' + f''' {RIGHT_BRACKET}\n''' + f''' {RIGHT_BRACKET}\n''' + f''' task['http_request']['body'] = get_json_bytes(parameter_values_path)\n''' + f''' response = client.create_task(request={LEFT_BRACKET}'parent': parent, 'task': task{RIGHT_BRACKET})\n''' + f''' print(f'Created task {LEFT_BRACKET}response.name{RIGHT_BRACKET}')\n''' + f'\n' + f'''def create_cloud_scheduler_job(\n''' + f''' parameter_values_path: str,\n''' + f''' pipeline_runner_sa: str,\n''' + f''' project_id: str,\n''' + f''' runner_svc_uri: str,\n''' + f''' schedule_location: str,\n''' + f''' schedule_name: str,\n''' + f''' schedule_pattern: str):\n''' + f''' """Creates a scheduled pipeline job.\n''' + f'\n' + f''' Args:\n''' + f''' parameter_values_path: Path to json pipeline params.\n''' + f''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' + f''' project_id: The project ID.\n''' + f''' runner_svc_uri: Uri of the Cloud Run instance.\n''' + f''' schedule_location: The location of the scheduler resource.\n''' + f''' schedule_name: The name of the scheduler resource.\n''' + f''' schedule_pattern: Cron formatted value used to create a Scheduled retrain job.\n''' + f''' """\n''' + f''' client = scheduler_v1.CloudSchedulerClient()\n''' + f''' parent = f'projects/{LEFT_BRACKET}project_id{RIGHT_BRACKET}/locations/{LEFT_BRACKET}schedule_location{RIGHT_BRACKET}'\n''' + f''' name = f'{LEFT_BRACKET}parent{RIGHT_BRACKET}/jobs/{LEFT_BRACKET}schedule_name{RIGHT_BRACKET}'\n''' + f'\n' + f''' request = scheduler_v1.ListJobsRequest(parent=parent)\n''' + f''' page_result = client.list_jobs(request=request)\n''' + f''' for response in page_result:\n''' + f''' if response.name == name:\n''' + f''' print(f'Cloud Scheduler {LEFT_BRACKET}schedule_name{RIGHT_BRACKET} resource already exists in '\n''' + f''' f'project {LEFT_BRACKET}project_id{RIGHT_BRACKET}.')\n''' + f''' return\n''' + f'\n' + f''' oidc_token = scheduler_v1.OidcToken(\n''' + f''' service_account_email=pipeline_runner_sa,\n''' + f''' audience=runner_svc_uri)\n''' + f'\n' + f''' target = scheduler_v1.HttpTarget(\n''' + f''' uri=runner_svc_uri,\n''' + f''' http_method=scheduler_v1.HttpMethod(1), # HTTP POST\n''' + f''' headers={LEFT_BRACKET}'Content-Type': 'application/json'{RIGHT_BRACKET},\n''' + f''' body=get_json_bytes(parameter_values_path),\n''' + f''' oidc_token=oidc_token)\n''' + f'\n' + f''' job = scheduler_v1.Job(\n''' + f''' name=f'{LEFT_BRACKET}parent{RIGHT_BRACKET}/jobs/{LEFT_BRACKET}schedule_name{RIGHT_BRACKET}',\n''' + f''' description='AutoMLOps cloud scheduled run.',\n''' + f''' http_target=target,\n''' + f''' schedule=schedule_pattern)\n''' + f'\n' + f''' request = scheduler_v1.CreateJobRequest(\n''' + f''' parent=parent,\n''' + f''' job=job)\n''' + f'\n' + f''' response = client.create_job(request=request)\n''' + f''' print(response)\n''' + f'\n' + f'''if __name__ == '__main__':\n''' + f''' parser = argparse.ArgumentParser()\n''' + f''' parser.add_argument('--setting', type=str,\n''' + f''' help='The config file for setting default values.')\n''' + f''' args = parser.parse_args()\n''' + f'\n' + f''' uri = get_runner_svc_uri(\n''' + f''' cloud_run_location=CLOUD_RUN_LOCATION,\n''' + f''' cloud_run_name=CLOUD_RUN_NAME,\n''' + f''' project_id=PROJECT_ID)\n''' + f'\n' + f''' if args.setting == 'queue_job':\n''' + f''' create_cloud_task(\n''' + f''' cloud_tasks_queue_location=CLOUD_TASKS_QUEUE_LOCATION,\n''' + f''' cloud_tasks_queue_name=CLOUD_TASKS_QUEUE_NAME,\n''' + f''' parameter_values_path=PARAMETER_VALUES_PATH,\n''' + f''' pipeline_runner_sa=PIPELINE_RUNNER_SA,\n''' + f''' project_id=PROJECT_ID,\n''' + f''' runner_svc_uri=uri)\n''' + f'\n' + f''' if args.setting == 'schedule_job':\n''' + f''' create_cloud_scheduler_job(\n''' + f''' parameter_values_path=PARAMETER_VALUES_PATH,\n''' + f''' pipeline_runner_sa=PIPELINE_RUNNER_SA,\n''' + f''' project_id=PROJECT_ID,\n''' + f''' runner_svc_uri=uri,\n''' + f''' schedule_location=SCHEDULE_LOCATION,\n''' + f''' schedule_name=SCHEDULE_NAME,\n''' + f''' schedule_pattern=SCHEDULE_PATTERN)\n''') diff --git a/AutoMLOps/frameworks/kfp/constructs/component.py b/AutoMLOps/frameworks/kfp/constructs/component.py new file mode 100644 index 0000000..86a8cb2 --- /dev/null +++ b/AutoMLOps/frameworks/kfp/constructs/component.py @@ -0,0 +1,85 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Code strings for a kfp component.""" + +# pylint: disable=line-too-long + +from AutoMLOps.utils.constants import GENERATED_LICENSE +from AutoMLOps.frameworks.base import Component + +class KfpComponent(Component): + """Child class that generates files related to kfp components.""" + def __init__(self, component_spec: dict, defaults_file: str): + """Instantiate Component scripts object with all necessary attributes. + + Args: + component_spec (dict): Dictionary of component specs including details + of component image, startup command, and args. + defaults_file (str): Path to the default config variables yaml. + """ + super().__init__(component_spec, defaults_file) + + # Get generated scripts as public attributes + self.task = self._create_task() + self.compspec_image = self._create_compspec_image() + + def _create_task(self): + """Creates the content of the cell python code to be written to a file with required imports. + + Returns: + str: Contents of component base source code. + """ + custom_code = self._component_spec['implementation']['container']['command'][-1] + default_imports = ( + GENERATED_LICENSE + + 'import argparse\n' + 'import json\n' + 'import kfp\n' + 'from kfp.v2 import dsl\n' + 'from kfp.v2.components import executor\n' + 'from kfp.v2.dsl import *\n' + 'from typing import *\n' + '\n') + main_func = ( + '\n' + '''def main():\n''' + ''' """Main executor."""\n''' + ''' parser = argparse.ArgumentParser()\n''' + ''' parser.add_argument('--executor_input', type=str)\n''' + ''' parser.add_argument('--function_to_execute', type=str)\n''' + '\n' + ''' args, _ = parser.parse_known_args()\n''' + ''' executor_input = json.loads(args.executor_input)\n''' + ''' function_to_execute = globals()[args.function_to_execute]\n''' + '\n' + ''' executor.Executor(\n''' + ''' executor_input=executor_input,\n''' + ''' function_to_execute=function_to_execute).execute()\n''' + '\n' + '''if __name__ == '__main__':\n''' + ''' main()\n''') + return default_imports + custom_code + main_func + + def _create_compspec_image(self): + """Write the correct image for the component spec. + + Returns: + str: Component spec image. + """ + return ( + f'''{self._af_registry_location}-docker.pkg.dev/''' + f'''{self._project_id}/''' + f'''{self._af_registry_name}/''' + f'''components/component_base:latest''') diff --git a/AutoMLOps/frameworks/kfp/constructs/pipeline.py b/AutoMLOps/frameworks/kfp/constructs/pipeline.py new file mode 100644 index 0000000..7bd6e5e --- /dev/null +++ b/AutoMLOps/frameworks/kfp/constructs/pipeline.py @@ -0,0 +1,177 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Code strings for a kfp pipeline.""" + +# pylint: disable=line-too-long + +from typing import Dict, List + +from AutoMLOps.utils.utils import get_components_list, format_spec_dict +from AutoMLOps.utils.constants import GENERATED_LICENSE +from AutoMLOps.frameworks.base import Pipeline + +class KfpPipeline(Pipeline): + """Child class that generates files related to kfp pipelines.""" + def __init__(self, custom_training_job_specs: List[Dict], defaults_file: str): + """Instantiate Pipeline scripts object with all necessary attributes. + + Args: + custom_training_job_specs (List[Dict]): Specifies the specs to run the training job with. + defaults_file (str): Path to the default config variables yaml. + """ + super().__init__(custom_training_job_specs, defaults_file) + self.pipeline_imports = self._get_pipeline_imports() + self.pipeline_argparse = self._get_pipeline_argparse() + self.pipeline_runner = self._get_pipeline_runner() + + def _get_pipeline_imports(self): + """Generates python code that imports modules and loads all custom components. + + Returns: + str: Python pipeline_imports code. + """ + components_list = get_components_list(full_path=False) + gcpc_imports = ( + 'from functools import partial\n' + 'from google_cloud_pipeline_components.v1.custom_job import create_custom_training_job_op_from_component\n') + quote = '\'' + newline_tab = '\n ' + + # If there is a custom training job specified, write those to feed to pipeline imports + if not self._custom_training_job_specs: + custom_specs = '' + else: + custom_specs = ( + f''' {newline_tab.join(f'{spec["component_spec"]}_custom_training_job_specs = {format_spec_dict(spec)}' for spec in self._custom_training_job_specs)}''' + f'\n' + f''' {newline_tab.join(f'{spec["component_spec"]}_job_op = create_custom_training_job_op_from_component(**{spec["component_spec"]}_custom_training_job_specs)' for spec in self._custom_training_job_specs)}''' + f'\n' + f''' {newline_tab.join(f'{spec["component_spec"]} = partial({spec["component_spec"]}_job_op, project={quote}{self._project_id}{quote})' for spec in self._custom_training_job_specs)}''' + f'\n') + + # Return standard code and customized specs + return ( + f'''import argparse\n''' + f'''import os\n''' + f'''{gcpc_imports if self._custom_training_job_specs else ''}''' + f'''import kfp\n''' + f'''from kfp.v2 import compiler, dsl\n''' + f'''from kfp.v2.dsl import *\n''' + f'''from typing import *\n''' + f'''import yaml\n''' + f'\n' + f'''def load_custom_component(component_name: str):\n''' + f''' component_path = os.path.join('components',\n''' + f''' component_name,\n''' + f''' 'component.yaml')\n''' + f''' return kfp.components.load_component_from_file(component_path)\n''' + f'\n' + f'''def create_training_pipeline(pipeline_job_spec_path: str):\n''' + f''' {newline_tab.join(f'{component} = load_custom_component(component_name={quote}{component}{quote})' for component in components_list)}\n''' + f'\n' + f'''{custom_specs}''') + + def _get_pipeline_argparse(self): + """Generates python code that loads default pipeline parameters from the defaults config_file. + + Returns: + str: Python pipeline_argparse code. + """ + return ( + '''if __name__ == '__main__':\n''' + ''' parser = argparse.ArgumentParser()\n''' + ''' parser.add_argument('--config', type=str,\n''' + ''' help='The config file for setting default values.')\n''' + '\n' + ''' args = parser.parse_args()\n''' + '\n' + ''' with open(args.config, 'r', encoding='utf-8') as config_file:\n''' + ''' config = yaml.load(config_file, Loader=yaml.FullLoader)\n''' + '\n' + ''' pipeline = create_training_pipeline(\n''' + ''' pipeline_job_spec_path=config['pipelines']['pipeline_job_spec_path'])\n''') + + def _get_pipeline_runner(self): + """Generates python code that sends a PipelineJob to Vertex AI. + + Returns: + str: Python pipeline_runner code. + """ + return ( + GENERATED_LICENSE + + '''import argparse\n''' + '''import json\n''' + '''import logging\n''' + '''import os\n''' + '''import yaml\n''' + '\n' + '''from google.cloud import aiplatform\n''' + '\n' + '''logger = logging.getLogger()\n''' + '''log_level = os.environ.get('LOG_LEVEL', 'INFO')\n''' + '''logger.setLevel(log_level)\n''' + '\n' + '''def run_pipeline(\n''' + ''' project_id: str,\n''' + ''' pipeline_root: str,\n''' + ''' pipeline_runner_sa: str,\n''' + ''' parameter_values_path: str,\n''' + ''' pipeline_spec_path: str,\n''' + ''' display_name: str = 'mlops-pipeline-run',\n''' + ''' enable_caching: bool = False):\n''' + ''' """Executes a pipeline run.\n''' + '\n' + ''' Args:\n''' + ''' project_id: The project_id.\n''' + ''' pipeline_root: GCS location of the pipeline runs metadata.\n''' + ''' pipeline_runner_sa: Service Account to runner PipelineJobs.\n''' + ''' parameter_values_path: Location of parameter values JSON.\n''' + ''' pipeline_spec_path: Location of the pipeline spec JSON.\n''' + ''' display_name: Name to call the pipeline.\n''' + ''' enable_caching: Should caching be enabled (Boolean)\n''' + ''' """\n''' + ''' with open(parameter_values_path, 'r') as file:\n''' + ''' try:\n''' + ''' pipeline_params = json.load(file)\n''' + ''' except ValueError as exc:\n''' + ''' print(exc)\n''' + ''' logging.debug('Pipeline Parms Configured:')\n''' + ''' logging.debug(pipeline_params)\n''' + '\n' + ''' aiplatform.init(project=project_id)\n''' + ''' job = aiplatform.PipelineJob(\n''' + ''' display_name = display_name,\n''' + ''' template_path = pipeline_spec_path,\n''' + ''' pipeline_root = pipeline_root,\n''' + ''' parameter_values = pipeline_params,\n''' + ''' enable_caching = enable_caching)\n''' + ''' logging.debug('AI Platform job built. Submitting...')\n''' + ''' job.submit(service_account=pipeline_runner_sa)\n''' + ''' logging.debug('Job sent!')\n''' + '\n' + '''if __name__ == '__main__':\n''' + ''' parser = argparse.ArgumentParser()\n''' + ''' parser.add_argument('--config', type=str,\n''' + ''' help='The config file for setting default values.')\n''' + ''' args = parser.parse_args()\n''' + '\n' + ''' with open(args.config, 'r', encoding='utf-8') as config_file:\n''' + ''' config = yaml.load(config_file, Loader=yaml.FullLoader)\n''' + '\n' + ''' run_pipeline(project_id=config['gcp']['project_id'],\n''' + ''' pipeline_root=config['pipelines']['pipeline_storage_path'],\n''' + ''' pipeline_runner_sa=config['gcp']['pipeline_runner_service_account'],\n''' + ''' parameter_values_path=config['pipelines']['parameter_values_path'],\n''' + ''' pipeline_spec_path=config['pipelines']['pipeline_job_spec_path']) \n''') diff --git a/AutoMLOps/frameworks/kfp/constructs/scripts.py b/AutoMLOps/frameworks/kfp/constructs/scripts.py new file mode 100644 index 0000000..6ed01ec --- /dev/null +++ b/AutoMLOps/frameworks/kfp/constructs/scripts.py @@ -0,0 +1,624 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Code strings for kfp scripts.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=line-too-long + +import re + +from AutoMLOps.utils.utils import ( + execute_process, + get_components_list, + read_file, + read_yaml_file +) +from AutoMLOps.utils.constants import ( + GENERATED_LICENSE, + NEWLINE, + LEFT_BRACKET, + RIGHT_BRACKET, + GENERATED_COMPONENT_BASE, + GENERATED_PARAMETER_VALUES_PATH, + GENERATED_PIPELINE_JOB_SPEC_PATH +) + +class KfpScripts(): + """Generates files related to running kubeflow pipelines.""" + def __init__(self, + af_registry_location: str, + af_registry_name: str, + cb_trigger_location: str, + cb_trigger_name: str, + cloud_run_location: str, + cloud_run_name: str, + cloud_tasks_queue_location: str, + cloud_tasks_queue_name: str, + csr_branch_name: str, + csr_name: str, + default_image: str, + gs_bucket_location: str, + gs_bucket_name: str, + pipeline_runner_sa: str, + project_id: str, + run_local: str, + schedule_location: str, + schedule_name: str, + schedule_pattern: str, + base_dir: str, + vpc_connector: str): + """Constructs scripts for resource deployment and running Kubeflow pipelines. + + Args: + af_registry_location: Region of the Artifact Registry. + af_registry_name: Artifact Registry name where components are stored. + cb_trigger_location: The location of the cloudbuild trigger. + cb_trigger_name: The name of the cloudbuild trigger. + cloud_run_location: The location of the cloud runner service. + cloud_run_name: The name of the cloud runner service. + cloud_tasks_queue_location: The location of the cloud tasks queue. + cloud_tasks_queue_name: The name of the cloud tasks queue. + csr_branch_name: The name of the csr branch to push to to trigger cb job. + csr_name: The name of the cloud source repo to use. + default_image: The image to use in the dockerfile. + gs_bucket_location: Region of the GS bucket. + gs_bucket_name: GS bucket name where pipeline run metadata is stored. + pipeline_runner_sa: Service Account to runner PipelineJobs. + project_id: The project ID. + run_local: Flag that determines whether to use Cloud Run CI/CD. + schedule_location: The location of the scheduler resource. + schedule_name: The name of the scheduler resource. + schedule_pattern: Cron formatted value used to create a Scheduled retrain job. + base_dir: Top directory name. + vpc_connector: The name of the vpc connector to use. + """ + # Set passed variables as hidden attributes + self.__base_dir = base_dir + self.__run_local = run_local + + # Parse defaults file for hidden class attributes + self.__af_registry_name = af_registry_name + self.__af_registry_location = af_registry_location + self.__project_id = project_id + self.__gs_bucket_name = gs_bucket_name + self.__gs_bucket_location = gs_bucket_location + self.__pipeline_region = gs_bucket_location + self.__pipeline_runner_service_account = pipeline_runner_sa + self.__cloud_source_repository = csr_name + self.__cloud_source_repository_branch = csr_branch_name + self.__cb_trigger_location = cb_trigger_location + self.__cb_trigger_name = cb_trigger_name + self.__cloud_tasks_queue_location = cloud_tasks_queue_location + self.__cloud_tasks_queue_name = cloud_tasks_queue_name + self.__vpc_connector = vpc_connector + self.__cloud_run_name = cloud_run_name + self.__cloud_run_location = cloud_run_location + self.__cloud_schedule_location = schedule_location + self.__cloud_schedule_name = schedule_name + self.__cloud_schedule_pattern = schedule_pattern + self.__default_image = default_image + + # Set generated scripts as public attributes + self.build_pipeline_spec = self._build_pipeline_spec() + self.build_components = self._build_components() + self.run_pipeline = self._run_pipeline() + self.run_all = self._run_all() + self.create_resources_script = self._create_resources_script() + self.create_cloudbuild_config = self._create_cloudbuild_config() + self.dockerfile = self._create_dockerfile() + self.defaults = self._create_default_config() + self.requirements = self._create_requirements() + + def _build_pipeline_spec(self): + """Builds content of a shell script to build the pipeline specs. + + Returns: + str: Text of script to build pipeline specs. + """ + return ( + '#!/bin/bash\n' + GENERATED_LICENSE + + '# Builds the pipeline specs\n' + f'# This script should run from the {self.__base_dir} directory\n' + '# Change directory in case this is not the script root.\n' + '\n' + 'CONFIG_FILE=configs/defaults.yaml\n' + '\n' + 'python3 -m pipelines.pipeline --config $CONFIG_FILE\n') + + def _build_components(self): + """Builds content of a shell script to build components. + + Returns: + str: Text of script to build components. + """ + return ( + '#!/bin/bash\n' + GENERATED_LICENSE + + '# Submits a Cloud Build job that builds and deploys the components\n' + f'# This script should run from the {self.__base_dir} directory\n' + '# Change directory in case this is not the script root.\n' + '\n' + 'gcloud builds submit .. --config cloudbuild.yaml --timeout=3600\n') + + def _run_pipeline(self): + """Builds content of a shell script to run the pipeline. + + Returns: + str: Text of script to run pipeline. + """ + return ( + '#!/bin/bash\n' + GENERATED_LICENSE + + '# Submits the PipelineJob to Vertex AI\n' + f'# This script should run from the {self.__base_dir} directory\n' + '# Change directory in case this is not the script root.\n' + '\n' + 'CONFIG_FILE=configs/defaults.yaml\n' + '\n' + 'python3 -m pipelines.pipeline_runner --config $CONFIG_FILE\n') + + def _run_all(self): + """Builds content of a shell script to run all other shell scripts. + + Returns: + str: Text of script to run all other scripts. + """ + return ( + '#!/bin/bash\n' + GENERATED_LICENSE + + '# Builds components, pipeline specs, and submits the PipelineJob.\n' + f'# This script should run from the {self.__base_dir} directory\n' + '# Change directory in case this is not the script root.\n' + '\n' + '''GREEN='\033[0;32m'\n''' + '''NC='\033[0m'\n''' + '\n' + 'echo -e "${GREEN} BUILDING COMPONENTS ${NC}"\n' + 'gcloud builds submit .. --config cloudbuild.yaml --timeout=3600\n' + '\n' + 'echo -e "${GREEN} BUILDING PIPELINE SPEC ${NC}"\n' + './scripts/build_pipeline_spec.sh\n' + '\n' + 'echo -e "${GREEN} RUNNING PIPELINE JOB ${NC}"\n' + './scripts/run_pipeline.sh\n') + + def _create_resources_script(self): + """Builds content of create_resources.sh, which creates a specified + artifact registry and gs bucket if they do not already exist. Also creates + a service account to run Vertex AI Pipelines. + + Returns: + str: Text to be written to create_resources.sh + """ + create_resources_script = ( + '#!/bin/bash\n' + GENERATED_LICENSE + + f'# This script will create an artifact registry and gs bucket if they do not already exist.\n' + f'\n' + f'''GREEN='\033[0;32m'\n''' + f'''NC='\033[0m'\n''' + f'''AF_REGISTRY_NAME={self.__af_registry_name}\n''' + f'''AF_REGISTRY_LOCATION={self.__af_registry_location}\n''' + f'''PROJECT_ID={self.__project_id}\n''' + f'''PROJECT_NUMBER=`gcloud projects describe {self.__project_id} --format 'value(projectNumber)'`\n''' + f'''BUCKET_NAME={self.__gs_bucket_name}\n''' + f'''BUCKET_LOCATION={self.__pipeline_region}\n''' + f'''SERVICE_ACCOUNT_NAME={self.__pipeline_runner_service_account.split('@')[0]}\n''' + f'''SERVICE_ACCOUNT_FULL={self.__pipeline_runner_service_account}\n''' + f'''CLOUD_SOURCE_REPO={self.__cloud_source_repository}\n''' + f'''CLOUD_SOURCE_REPO_BRANCH={self.__cloud_source_repository_branch}\n''' + f'''CB_TRIGGER_LOCATION={self.__cb_trigger_location}\n''' + f'''CB_TRIGGER_NAME={self.__cb_trigger_name}\n''' + f'''CLOUD_TASKS_QUEUE_LOCATION={self.__cloud_tasks_queue_location}\n''' + f'''CLOUD_TASKS_QUEUE_NAME={self.__cloud_tasks_queue_name}\n''' + f'\n' + f'echo -e "$GREEN Updating required API services in project $PROJECT_ID $NC"\n' + f'gcloud services enable cloudresourcemanager.googleapis.com \{NEWLINE}' + f' aiplatform.googleapis.com \{NEWLINE}' + f' artifactregistry.googleapis.com \{NEWLINE}' + f' cloudbuild.googleapis.com \{NEWLINE}' + f' cloudscheduler.googleapis.com \{NEWLINE}' + f' cloudtasks.googleapis.com \{NEWLINE}' + f' compute.googleapis.com \{NEWLINE}' + f' iam.googleapis.com \{NEWLINE}' + f' iamcredentials.googleapis.com \{NEWLINE}' + f' ml.googleapis.com \{NEWLINE}' + f' run.googleapis.com \{NEWLINE}' + f' storage.googleapis.com \{NEWLINE}' + f' sourcerepo.googleapis.com\n' + f'\n' + f'echo -e "$GREEN Checking for Artifact Registry: $AF_REGISTRY_NAME in project $PROJECT_ID $NC"\n' + f'if ! (gcloud artifacts repositories list --project="$PROJECT_ID" --location=$AF_REGISTRY_LOCATION | grep -E "(^|[[:blank:]])$AF_REGISTRY_NAME($|[[:blank:]])"); then\n' + f'\n' + f' echo "Creating Artifact Registry: ${LEFT_BRACKET}AF_REGISTRY_NAME{RIGHT_BRACKET} in project $PROJECT_ID"\n' + f' gcloud artifacts repositories create "$AF_REGISTRY_NAME" \{NEWLINE}' + f' --repository-format=docker \{NEWLINE}' + f' --location=$AF_REGISTRY_LOCATION \{NEWLINE}' + f' --project="$PROJECT_ID" \{NEWLINE}' + f' --description="Artifact Registry ${LEFT_BRACKET}AF_REGISTRY_NAME{RIGHT_BRACKET} in ${LEFT_BRACKET}AF_REGISTRY_LOCATION{RIGHT_BRACKET}." \n' + f'\n' + f'else\n' + f'\n' + f' echo "Artifact Registry: ${LEFT_BRACKET}AF_REGISTRY_NAME{RIGHT_BRACKET} already exists in project $PROJECT_ID"\n' + f'\n' + f'fi\n' + f'\n' + f'\n' + f'echo -e "$GREEN Checking for GS Bucket: $BUCKET_NAME in project $PROJECT_ID $NC"\n' + f'if !(gsutil ls -b gs://$BUCKET_NAME | grep --fixed-strings "$BUCKET_NAME"); then\n' + f'\n' + f' echo "Creating GS Bucket: ${LEFT_BRACKET}BUCKET_NAME{RIGHT_BRACKET} in project $PROJECT_ID"\n' + f' gsutil mb -l ${LEFT_BRACKET}BUCKET_LOCATION{RIGHT_BRACKET} gs://$BUCKET_NAME\n' + f'\n' + f'else\n' + f'\n' + f' echo "GS Bucket: ${LEFT_BRACKET}BUCKET_NAME{RIGHT_BRACKET} already exists in project $PROJECT_ID"\n' + f'\n' + f'fi\n' + f'\n' + f'echo -e "$GREEN Checking for Service Account: $SERVICE_ACCOUNT_NAME in project $PROJECT_ID $NC"\n' + f'if ! (gcloud iam service-accounts list --project="$PROJECT_ID" | grep -E "(^|[[:blank:]])$SERVICE_ACCOUNT_FULL($|[[:blank:]])"); then\n' + f'\n' + f' echo "Creating Service Account: ${LEFT_BRACKET}SERVICE_ACCOUNT_NAME{RIGHT_BRACKET} in project $PROJECT_ID"\n' + f' gcloud iam service-accounts create $SERVICE_ACCOUNT_NAME \{NEWLINE}' + f' --description="For submitting PipelineJobs" \{NEWLINE}' + f' --display-name="Pipeline Runner Service Account"\n' + f'else\n' + f'\n' + f' echo "Service Account: ${LEFT_BRACKET}SERVICE_ACCOUNT_NAME{RIGHT_BRACKET} already exists in project $PROJECT_ID"\n' + f'\n' + f'fi\n' + f'\n' + f'echo -e "$GREEN Updating required IAM roles in project $PROJECT_ID $NC"\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{NEWLINE}' + f' --role="roles/aiplatform.user" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{NEWLINE}' + f' --role="roles/artifactregistry.reader" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{NEWLINE}' + f' --role="roles/bigquery.user" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{NEWLINE}' + f' --role="roles/bigquery.dataEditor" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{NEWLINE}' + f' --role="roles/iam.serviceAccountUser" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{NEWLINE}' + f' --role="roles/storage.admin" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$SERVICE_ACCOUNT_FULL" \{NEWLINE}' + f' --role="roles/run.admin" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{NEWLINE}' + f' --role="roles/run.admin" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{NEWLINE}' + f' --role="roles/iam.serviceAccountUser" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{NEWLINE}' + f' --role="roles/cloudtasks.enqueuer" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'gcloud projects add-iam-policy-binding $PROJECT_ID \{NEWLINE}' + f' --member="serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com" \{NEWLINE}' + f' --role="roles/cloudscheduler.admin" \{NEWLINE}' + f' --no-user-output-enabled\n' + f'\n' + f'echo -e "$GREEN Checking for Cloud Source Repository: $CLOUD_SOURCE_REPO in project $PROJECT_ID $NC"\n' + f'if ! (gcloud source repos list --project="$PROJECT_ID" | grep -E "(^|[[:blank:]])$CLOUD_SOURCE_REPO($|[[:blank:]])"); then\n' + f'\n' + f' echo "Creating Cloud Source Repository: ${LEFT_BRACKET}CLOUD_SOURCE_REPO{RIGHT_BRACKET} in project $PROJECT_ID"\n' + f' gcloud source repos create $CLOUD_SOURCE_REPO\n' + f'\n' + f'else\n' + f'\n' + f' echo "Cloud Source Repository: ${LEFT_BRACKET}CLOUD_SOURCE_REPO{RIGHT_BRACKET} already exists in project $PROJECT_ID"\n' + f'\n' + f'fi\n') + + if not self.__run_local: + create_resources_script += ( + f'\n' + f'# Create cloud tasks queue\n' + f'echo -e "$GREEN Checking for Cloud Tasks Queue: $CLOUD_TASKS_QUEUE_NAME in project $PROJECT_ID $NC"\n' + f'if ! (gcloud tasks queues list --location $CLOUD_TASKS_QUEUE_LOCATION | grep -E "(^|[[:blank:]])$CLOUD_TASKS_QUEUE_NAME($|[[:blank:]])"); then\n' + f'\n' + f' echo "Creating Cloud Tasks Queue: ${LEFT_BRACKET}CLOUD_TASKS_QUEUE_NAME{RIGHT_BRACKET} in project $PROJECT_ID"\n' + f' gcloud tasks queues create $CLOUD_TASKS_QUEUE_NAME \{NEWLINE}' + f' --location=$CLOUD_TASKS_QUEUE_LOCATION\n' + f'\n' + f'else\n' + f'\n' + f' echo "Cloud Tasks Queue: ${LEFT_BRACKET}CLOUD_TASKS_QUEUE_NAME{RIGHT_BRACKET} already exists in project $PROJECT_ID"\n' + f'\n' + f'fi\n' + f'\n' + f'# Create cloud build trigger\n' + f'echo -e "$GREEN Checking for Cloudbuild Trigger: $CB_TRIGGER_NAME in project $PROJECT_ID $NC"\n' + f'if ! (gcloud beta builds triggers list --project="$PROJECT_ID" --region="$CB_TRIGGER_LOCATION" | grep -E "(^|[[:blank:]])name: $CB_TRIGGER_NAME($|[[:blank:]])"); then\n' + f'\n' + f' echo "Creating Cloudbuild Trigger on branch $CLOUD_SOURCE_REPO_BRANCH in project $PROJECT_ID for repo ${LEFT_BRACKET}CLOUD_SOURCE_REPO{RIGHT_BRACKET}"\n' + f' gcloud beta builds triggers create cloud-source-repositories \{NEWLINE}' + f' --region=$CB_TRIGGER_LOCATION \{NEWLINE}' + f' --name=$CB_TRIGGER_NAME \{NEWLINE}' + f' --repo=$CLOUD_SOURCE_REPO \{NEWLINE}' + f' --branch-pattern="$CLOUD_SOURCE_REPO_BRANCH" \{NEWLINE}' + f' --build-config={self.__base_dir}cloudbuild.yaml\n' + f'\n' + f'else\n' + f'\n' + f' echo "Cloudbuild Trigger already exists in project $PROJECT_ID for repo ${LEFT_BRACKET}CLOUD_SOURCE_REPO{RIGHT_BRACKET}"\n' + f'\n' + f'fi\n') + + return create_resources_script + + def _create_cloudbuild_config(self): + """Builds the content of cloudbuild.yaml. + + Args: + str: Text content of cloudbuild.yaml. + """ + vpc_connector_tail = '' + if self.__vpc_connector != 'No VPC Specified': + vpc_connector_tail = ( + f'\n' + f' "--ingress", "internal",\n' + f' "--vpc-connector", "{self.__vpc_connector}",\n' + f' "--vpc-egress", "all-traffic"') + vpc_connector_tail += ']\n' + + cloudbuild_comp_config = ( + GENERATED_LICENSE + + f'steps:\n' + f'# ==============================================================================\n' + f'# BUILD CUSTOM IMAGES\n' + f'# ==============================================================================\n' + f'\n' + f''' # build the component_base image\n''' + f''' - name: "gcr.io/cloud-builders/docker"\n''' + f''' args: [ "build", "-t", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/components/component_base:latest", "." ]\n''' + f''' dir: "{self.__base_dir}components/component_base"\n''' + f''' id: "build_component_base"\n''' + f''' waitFor: ["-"]\n''' + f'\n' + f''' # build the run_pipeline image\n''' + f''' - name: 'gcr.io/cloud-builders/docker'\n''' + f''' args: [ "build", "-t", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest", "-f", "cloud_run/run_pipeline/Dockerfile", "." ]\n''' + f''' dir: "{self.__base_dir}"\n''' + f''' id: "build_pipeline_runner_svc"\n''' + f''' waitFor: ['build_component_base']\n''') + + cloudbuild_cloudrun_config = ( + f'\n' + f'# ==============================================================================\n' + f'# PUSH & DEPLOY CUSTOM IMAGES\n' + f'# ==============================================================================\n' + f'\n' + f''' # push the component_base image\n''' + f''' - name: "gcr.io/cloud-builders/docker"\n''' + f''' args: ["push", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/components/component_base:latest"]\n''' + f''' dir: "{self.__base_dir}components/component_base"\n''' + f''' id: "push_component_base"\n''' + f''' waitFor: ["build_pipeline_runner_svc"]\n''' + f'\n' + f''' # push the run_pipeline image\n''' + f''' - name: "gcr.io/cloud-builders/docker"\n''' + f''' args: ["push", "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest"]\n''' + f''' dir: "{self.__base_dir}"\n''' + f''' id: "push_pipeline_runner_svc"\n''' + f''' waitFor: ["push_component_base"]\n''' + f'\n' + f''' # deploy the cloud run service\n''' + f''' - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"\n''' + f''' entrypoint: gcloud\n''' + f''' args: ["run",\n''' + f''' "deploy",\n''' + f''' "{self.__cloud_run_name}",\n''' + f''' "--image",\n''' + f''' "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest",\n''' + f''' "--region",\n''' + f''' "{self.__cloud_run_location}",\n''' + f''' "--service-account",\n''' + f''' "{self.__pipeline_runner_service_account}",{vpc_connector_tail}''' + f''' id: "deploy_pipeline_runner_svc"\n''' + f''' waitFor: ["push_pipeline_runner_svc"]\n''' + f'\n' + f''' # Copy runtime parameters\n''' + f''' - name: 'gcr.io/cloud-builders/gcloud'\n''' + f''' entrypoint: bash\n''' + f''' args:\n''' + f''' - '-e'\n''' + f''' - '-c'\n''' + f''' - |\n''' + f''' cp -r {self.__base_dir}cloud_run/queueing_svc .\n''' + f''' id: "setup_queueing_svc"\n''' + f''' waitFor: ["deploy_pipeline_runner_svc"]\n''' + f'\n' + f''' # Install dependencies\n''' + f''' - name: python\n''' + f''' entrypoint: pip\n''' + f''' args: ["install", "-r", "queueing_svc/requirements.txt", "--user"]\n''' + f''' id: "install_queueing_svc_deps"\n''' + f''' waitFor: ["setup_queueing_svc"]\n''' + f'\n' + f''' # Submit to queue\n''' + f''' - name: python\n''' + f''' entrypoint: python\n''' + f''' args: ["queueing_svc/main.py", "--setting", "queue_job"]\n''' + f''' id: "submit_job_to_queue"\n''' + f''' waitFor: ["install_queueing_svc_deps"]\n''') + + cloudbuild_scheduler_config = ( + '\n' + ''' # Create Scheduler Job\n''' + ''' - name: python\n''' + ''' entrypoint: python\n''' + ''' args: ["queueing_svc/main.py", "--setting", "schedule_job"]\n''' + ''' id: "schedule_job"\n''' + ''' waitFor: ["submit_job_to_queue"]\n''') + + custom_comp_image = ( + f'\n' + f'images:\n' + f''' # custom component images\n''' + f''' - "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/components/component_base:latest"\n''') + + cloudrun_image = ( + f''' # Cloud Run image\n''' + f''' - "{self.__af_registry_location}-docker.pkg.dev/{self.__project_id}/{self.__af_registry_name}/run_pipeline:latest"\n''') + + if self.__run_local: + cb_file_contents = cloudbuild_comp_config + custom_comp_image + else: + if self.__cloud_schedule_pattern == 'No Schedule Specified': + cb_file_contents = cloudbuild_comp_config + cloudbuild_cloudrun_config + custom_comp_image + cloudrun_image + else: + cb_file_contents = cloudbuild_comp_config + cloudbuild_cloudrun_config + cloudbuild_scheduler_config + custom_comp_image + cloudrun_image + + return cb_file_contents + + def _create_dockerfile(self): + """Creates the content of a Dockerfile to be written to the component_base directory. + + Args: + default_image: Default image used for this process. + + Returns: + str: Text content of dockerfile. + """ + return ( + GENERATED_LICENSE + + f'FROM {self.__default_image}\n' + f'RUN python -m pip install --upgrade pip\n' + f'COPY requirements.txt .\n' + f'RUN python -m pip install -r \ \n' + f' requirements.txt --quiet --no-cache-dir \ \n' + f' && rm -f requirements.txt\n' + f'COPY ./src /pipelines/component/src\n' + f'ENTRYPOINT ["/bin/bash"]\n') + + def _create_default_config(self): + """Creates default defaults.yaml file contents. This defaults + file is used by subsequent functions and by the pipeline + files themselves. + + Returns: + str: Defaults yaml file content + """ + return ( + GENERATED_LICENSE + + f'# These values are descriptive only - do not change.\n' + f'# Rerun AutoMLOps.generate() to change these values.\n' + f'gcp:\n' + f' af_registry_location: {self.__af_registry_location}\n' + f' af_registry_name: {self.__af_registry_name}\n' + f' cb_trigger_location: {self.__cb_trigger_location}\n' + f' cb_trigger_name: {self.__cb_trigger_name}\n' + f' cloud_run_location: {self.__cloud_run_location}\n' + f' cloud_run_name: {self.__cloud_run_name}\n' + f' cloud_tasks_queue_location: {self.__cloud_tasks_queue_location}\n' + f' cloud_tasks_queue_name: {self.__cloud_tasks_queue_name}\n' + f' cloud_schedule_location: {self.__cloud_schedule_location}\n' + f' cloud_schedule_name: {self.__cloud_schedule_name}\n' + f' cloud_schedule_pattern: {self.__cloud_schedule_pattern}\n' + f' cloud_source_repository: {self.__cloud_source_repository}\n' + f' cloud_source_repository_branch: {self.__cloud_source_repository_branch}\n' + f' gs_bucket_name: {self.__gs_bucket_name}\n' + f' pipeline_runner_service_account: {self.__pipeline_runner_service_account}\n' + f' project_id: {self.__project_id}\n' + f' vpc_connector: {self.__vpc_connector}\n' + f'\n' + f'pipelines:\n' + f' parameter_values_path: {GENERATED_PARAMETER_VALUES_PATH}\n' + f' pipeline_component_directory: components\n' + f' pipeline_job_spec_path: {GENERATED_PIPELINE_JOB_SPEC_PATH}\n' + f' pipeline_region: {self.__gs_bucket_location}\n' + f' pipeline_storage_path: gs://{self.__gs_bucket_name}/pipeline_root\n') + + def _create_requirements(self): + """Writes a requirements.txt to the component_base directory. + Infers pip requirements from the python srcfiles using + pipreqs. Takes user-inputted requirements, and addes some + default gcp packages as well as packages that are often missing + in setup.py files (e.g db_types, pyarrow, gcsfs, fsspec). + """ + reqs_filename = f'{GENERATED_COMPONENT_BASE}/requirements.txt' + default_gcp_reqs = [ + 'google-cloud-aiplatform', + 'google-cloud-appengine-logging', + 'google-cloud-audit-log', + 'google-cloud-bigquery', + 'google-cloud-bigquery-storage', + 'google-cloud-bigtable', + 'google-cloud-core', + 'google-cloud-dataproc', + 'google-cloud-datastore', + 'google-cloud-dlp', + 'google-cloud-firestore', + 'google-cloud-kms', + 'google-cloud-language', + 'google-cloud-logging', + 'google-cloud-monitoring', + 'google-cloud-notebooks', + 'google-cloud-pipeline-components', + 'google-cloud-pubsub', + 'google-cloud-pubsublite', + 'google-cloud-recommendations-ai', + 'google-cloud-resource-manager', + 'google-cloud-scheduler', + 'google-cloud-spanner', + 'google-cloud-speech', + 'google-cloud-storage', + 'google-cloud-tasks', + 'google-cloud-translate', + 'google-cloud-videointelligence', + 'google-cloud-vision', + 'db_dtypes', + 'pyarrow', + 'gcsfs', + 'fsspec'] + # Infer reqs using pipreqs + execute_process(f'python3 -m pipreqs.pipreqs {GENERATED_COMPONENT_BASE} --mode no-pin --force', to_null=False) + pipreqs = read_file(reqs_filename).splitlines() + # Get user-inputted requirements from .tmpfiles dir + user_inp_reqs = [] + components_path_list = get_components_list() + for component_path in components_path_list: + component_spec = read_yaml_file(component_path) + reqs = component_spec['implementation']['container']['command'][2] + formatted_reqs = re.findall('\'([^\']*)\'', reqs) + user_inp_reqs.extend(formatted_reqs) + # Remove duplicates + set_of_requirements = set(user_inp_reqs) if user_inp_reqs else set(pipreqs + default_gcp_reqs) + reqs_str = ''.join(r+'\n' for r in sorted(set_of_requirements)) + return reqs_str diff --git a/AutoMLOps/frameworks/kfp/scaffold.py b/AutoMLOps/frameworks/kfp/scaffold.py new file mode 100644 index 0000000..75eca7e --- /dev/null +++ b/AutoMLOps/frameworks/kfp/scaffold.py @@ -0,0 +1,209 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Builds temporary component scaffold yaml files.""" + +# pylint: disable=C0103 +# pylint: disable=line-too-long + +import inspect +from typing import Callable, List, Optional, TypeVar, Union + +import docstring_parser + +from AutoMLOps.utils.constants import ( + DEFAULT_PIPELINE_NAME, + PIPELINE_TMPFILE, + TMPFILES_DIR +) +from AutoMLOps.utils.utils import ( + get_function_source_definition, + make_dirs, + update_params, + write_file, + write_yaml_file +) + +T = TypeVar('T') + +def create_component_scaffold(func: Optional[Callable] = None, + *, + packages_to_install: Optional[List[str]] = None): + """Creates a tmp component scaffold which will be used by + the formalize function. Code is temporarily stored in + component_spec['implementation']['container']['command']. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + # Extract name, docstring, and component description + name = func.__name__ + parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + description = parsed_docstring.short_description + + # Instantiate component yaml attributes + component_spec = {} + component_spec['name'] = name + if description: + component_spec['description'] = description + component_spec['inputs'] = get_function_parameters(func) + component_spec['implementation'] = {} + component_spec['implementation']['container'] = {} + component_spec['implementation']['container']['image'] = 'TBD' + component_spec['implementation']['container']['command'] = get_packages_to_install_command(func, packages_to_install) + component_spec['implementation']['container']['args'] = ['--executor_input', + {'executorInput': None}, + '--function_to_execute', + name] + # Write component yaml + filename = TMPFILES_DIR + f'/{name}.yaml' + make_dirs([TMPFILES_DIR]) + write_yaml_file(filename, component_spec, 'w') + +def get_packages_to_install_command(func: Optional[Callable] = None, + packages_to_install: Optional[List[str]] = None): + """Returns a list of formatted list of commands, including code for tmp storage. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + # pylint: disable=anomalous-backslash-in-string + newline = '\n' + if not packages_to_install: + packages_to_install = [] + concat_package_list = ' '.join([repr(str(package)) for package in packages_to_install]) + install_python_packages_script = ( + f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' + f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' + f'''fi{newline}''' + f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' + f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' + f'''{newline}''') + src_code = get_function_source_definition(func) + return ['sh', '-c', install_python_packages_script, src_code] + +def get_function_parameters(func: Callable) -> dict: + """Returns a formatted list of parameters. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + Returns: + list: Params list with types converted to kubeflow spec. + Raises: + Exception: If parameter type hints are not provided. + """ + signature = inspect.signature(func) + parameters = list(signature.parameters.values()) + parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} + + # Extract parameter metadata + parameter_holder = [] + for param in parameters: + metadata = {} + metadata['name'] = param.name + metadata['description'] = doc_dict.get(param.name) + metadata['type'] = maybe_strip_optional_from_annotation( + param.annotation) + parameter_holder.append(metadata) + # pylint: disable=protected-access + if metadata['type'] == inspect._empty: + raise TypeError( + f'''Missing type hint for parameter "{metadata['name']}". ''' + f'''Please specify the type for this parameter.''') + return update_params(parameter_holder) + +def maybe_strip_optional_from_annotation(annotation: T) -> T: + """Strips 'Optional' from 'Optional[]' if applicable. + For example:: + Optional[str] -> str + str -> str + List[int] -> List[int] + Args: + annotation: The original type annotation which may or may not has `Optional`. + Returns: + The type inside Optional[] if Optional exists, otherwise the original type. + """ + if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): + return annotation.__args__[0] + else: + return annotation + +def create_pipeline_scaffold(func: Optional[Callable] = None, + *, + name: Optional[str] = None, + description: Optional[str] = None): + """Creates a temporary pipeline scaffold which will + be used by the formalize function. + + Args: + func: The python function to create a pipeline from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + name: The name of the pipeline. + description: Short description of what the pipeline does. + """ + pipeline_scaffold = (get_pipeline_decorator(name, description) + + get_function_source_definition(func) + + get_compile_step(func.__name__)) + make_dirs([TMPFILES_DIR]) # if it doesn't already exist + write_file(PIPELINE_TMPFILE, pipeline_scaffold, 'w') + +def get_pipeline_decorator(name: Optional[str] = None, + description: Optional[str] = None): + """Creates the kfp pipeline decorator. + + Args: + name: The name of the pipeline. + description: Short description of what the pipeline does. + + Returns: + str: Python compile function call. + """ + default_name = DEFAULT_PIPELINE_NAME if not name else name + name_str = f'''(\n name='{default_name}',\n''' + desc_str = f''' description='{description}',\n''' if description else '' + ending_str = ')\n' + return '@dsl.pipeline' + name_str + desc_str + ending_str + +def get_compile_step(func_name: str): + """Creates the compile function call. + + Args: + func_name: The name of the pipeline function. + + Returns: + str: Python compile function call. + """ + return ( + f'\n' + f'compiler.Compiler().compile(\n' + f' pipeline_func={func_name},\n' + f' package_path=pipeline_job_spec_path)\n' + f'\n' + ) diff --git a/AutoMLOps/frameworks/ray/.gitkeep b/AutoMLOps/frameworks/ray/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/AutoMLOps/frameworks/tfx/.gitkeep b/AutoMLOps/frameworks/tfx/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/AutoMLOps/utils/__init__.py b/AutoMLOps/utils/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/AutoMLOps/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/AutoMLOps/utils/constants.py b/AutoMLOps/utils/constants.py new file mode 100644 index 0000000..be392b4 --- /dev/null +++ b/AutoMLOps/utils/constants.py @@ -0,0 +1,82 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sets global constants.""" + +# pylint: disable=C0103 +# pylint: disable=line-too-long + +# temporary files +TMPFILES_DIR = '.tmpfiles' +IMPORTS_TMPFILE = f'{TMPFILES_DIR}/imports.py' +CELL_TMPFILE = f'{TMPFILES_DIR}/cell.py' +PIPELINE_TMPFILE = f'{TMPFILES_DIR}/pipeline_scaffold.py' + +# Apache license +GENERATED_LICENSE = ( + '# Licensed under the Apache License, Version 2.0 (the "License");\n' + '# you may not use this file except in compliance with the License.\n' + '# You may obtain a copy of the License at\n' + '#\n' + '# http://www.apache.org/licenses/LICENSE-2.0\n' + '#\n' + '# Unless required by applicable law or agreed to in writing, software\n' + '# distributed under the License is distributed on an "AS IS" BASIS,\n' + '# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n' + '# See the License for the specific language governing permissions and\n' + '# limitations under the License.\n' + '#\n' + '# DISCLAIMER: This code is generated as part of the AutoMLOps output.\n' + '\n' +) + +# AutoMLOps file paths +BASE_DIR = 'AutoMLOps/' +DEFAULT_IMAGE = 'python:3.9-slim' +GENERATED_DEFAULTS_FILE = BASE_DIR + 'configs/defaults.yaml' +GENERATED_PIPELINE_SPEC_SH_FILE = BASE_DIR + 'scripts/build_pipeline_spec.sh' +GENERATED_BUILD_COMPONENTS_SH_FILE = BASE_DIR + 'scripts/build_components.sh' +GENERATED_RUN_PIPELINE_SH_FILE = BASE_DIR + 'scripts/run_pipeline.sh' +GENERATED_RUN_ALL_SH_FILE = BASE_DIR + 'scripts/run_all.sh' +GENERATED_RESOURCES_SH_FILE = BASE_DIR + 'scripts/create_resources.sh' +GENERATED_SUBMIT_JOB_FILE = BASE_DIR + 'scripts/submit_to_runner_svc.sh' +GENERATED_CLOUDBUILD_FILE = BASE_DIR + 'cloudbuild.yaml' +GENERATED_PIPELINE_FILE = BASE_DIR + 'pipelines/pipeline.py' +GENERATED_COMPONENT_BASE = BASE_DIR + 'components/component_base' +GENERATED_COMPONENT_BASE_SRC = BASE_DIR + 'components/component_base/src' +GENERATED_PARAMETER_VALUES_PATH = 'pipelines/runtime_parameters/pipeline_parameter_values.json' +GENERATED_PIPELINE_JOB_SPEC_PATH = 'scripts/pipeline_spec/pipeline_job.json' +GENERATED_DIRS = [ + BASE_DIR, + BASE_DIR + 'components', + BASE_DIR + 'components/component_base', + BASE_DIR + 'components/component_base/src', + BASE_DIR + 'configs', + BASE_DIR + 'images', + BASE_DIR + 'pipelines', + BASE_DIR + 'pipelines/runtime_parameters', + BASE_DIR + 'scripts', + BASE_DIR + 'scripts/pipeline_spec' +] + +# KFP Spec output_file location +OUTPUT_DIR = TMPFILES_DIR + +# Generated kfp pipeline metadata name +DEFAULT_PIPELINE_NAME = 'automlops-pipeline' + +# Character substitution constants +LEFT_BRACKET = '{' +RIGHT_BRACKET = '}' +NEWLINE = '\n' diff --git a/AutoMLOps/BuilderUtils.py b/AutoMLOps/utils/utils.py similarity index 87% rename from AutoMLOps/BuilderUtils.py rename to AutoMLOps/utils/utils.py index 6948440..b69bbb1 100644 --- a/AutoMLOps/BuilderUtils.py +++ b/AutoMLOps/utils/utils.py @@ -27,27 +27,7 @@ from typing import Callable import yaml -TMPFILES_DIR = '.tmpfiles' -IMPORTS_TMPFILE = f'{TMPFILES_DIR}/imports.py' -CELL_TMPFILE = f'{TMPFILES_DIR}/cell.py' -PIPELINE_TMPFILE = f'{TMPFILES_DIR}/pipeline_scaffold.py' -PARAMETER_VALUES_PATH = 'pipelines/runtime_parameters/pipeline_parameter_values.json' -PIPELINE_JOB_SPEC_PATH = 'scripts/pipeline_spec/pipeline_job.json' -LICENSE = ( - '# Licensed under the Apache License, Version 2.0 (the "License");\n' - '# you may not use this file except in compliance with the License.\n' - '# You may obtain a copy of the License at\n' - '#\n' - '# http://www.apache.org/licenses/LICENSE-2.0\n' - '#\n' - '# Unless required by applicable law or agreed to in writing, software\n' - '# distributed under the License is distributed on an "AS IS" BASIS,\n' - '# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n' - '# See the License for the specific language governing permissions and\n' - '# limitations under the License.\n' - '#\n' - '# DISCLAIMER: This code is generated as part of the AutoMLOps output.\n' - '\n') +from AutoMLOps.utils.constants import TMPFILES_DIR def make_dirs(directories: list): """Makes directories with the specified names. @@ -205,9 +185,11 @@ def execute_process(command: str, to_null: bool): """ stdout = subprocess.DEVNULL if to_null else None try: - subprocess.run([command], shell=True, check=True, - stdout=stdout, - stderr=subprocess.STDOUT) + subprocess.run([command], + shell=True, + check=True, + stdout=stdout, + stderr=subprocess.STDOUT) except subprocess.CalledProcessError as err: raise RuntimeError(f'Error executing process. {err}') from err @@ -252,7 +234,7 @@ def update_params(params: list) -> list: param['type'] = python_kfp_types_mapper[param['type']] except KeyError as err: raise ValueError(f'Unsupported python type - we only support ' - f'primitive types at this time. {err}') from err + f'primitive types at this time. {err}') from err return params def get_function_source_definition(func: Callable) -> str: @@ -279,3 +261,24 @@ def get_function_source_definition(func: Callable) -> str: f'It is probably not properly indented.') return '\n'.join(source_code_lines) + +def format_spec_dict(job_spec: dict) -> str: + """Takes in a job spec dictionary and removes the quotes around the component op name. + e.g. 'component_spec': 'train_model' becomes 'component_spec': train_model. + This is necessary to in order for the op to be callable within the Python code. + + Args: + job_spec: Dictionary with job spec info. + + Returns: + str: Python formatted dictionary code. + """ + quote = '\'' + left_bracket = '{' + right_bracket = '}' + newline = '\n' + + return ( + f'''{left_bracket}\n''' + f''' {f'{newline} '.join(f" {quote}{k}{quote}: {quote if k != 'component_spec' else ''}{v}{quote if k != 'component_spec' else ''}," for k, v in job_spec.items())}{newline}''' + f''' {right_bracket}\n''') diff --git a/CHANGELOG.md b/CHANGELOG.md index 802b9a6..d883a72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,16 @@ # Change Log All notable changes to this project will be documented in this file. +## [1.1.1] - 2023-05-17 + +### Added +- Refactored backend modules to be structured based on frameworks (e.g. kfp, tfx, etc.) and deployments (e.g. cloudbuild, github actions, etc.) +- Added some unit tests for the utils.py module. + +### Changed + +- Moved unit tests to /tests directory. + ## [1.1.0] - 2023-04-28 ### Added diff --git a/README.md b/README.md index 391d851..a9a55a9 100644 --- a/README.md +++ b/README.md @@ -230,7 +230,7 @@ The [example notebook](./examples/training/00_training_example.ipynb) comes with [Tony DiLoreto](mailto:tonydiloreto@google.com): Project Manager -[Allegra Noto](mailto:allegranoto@google.com): Engineer +[Allegra Noto](mailto:allegranoto@google.com): Senior Project Engineer # Disclaimer diff --git a/examples/inferencing/00_batch_prediction_example.ipynb b/examples/inferencing/00_batch_prediction_example.ipynb index 373eea7..b2ab701 100644 --- a/examples/inferencing/00_batch_prediction_example.ipynb +++ b/examples/inferencing/00_batch_prediction_example.ipynb @@ -565,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "7ef279e8", "metadata": { "scrolled": false @@ -577,11 +577,11 @@ "text": [ "INFO: Successfully saved requirements file in AutoMLOps/components/component_base/requirements.txt\n", "\u001b[0;32m Updating required API services in project automlops-sandbox \u001b[0m\n", - "Operation \"operations/acat.p2-45373616427-dbcac703-c9e9-48f7-a57e-c598c1f3737a\" finished successfully.\n", + "Operation \"operations/acat.p2-45373616427-79a68013-c0ab-4ea0-b749-caa21aa87a80\" finished successfully.\n", "\u001b[0;32m Checking for Artifact Registry: vertex-mlops-af in project automlops-sandbox \u001b[0m\n", "Listing items under project automlops-sandbox, location us-central1.\n", "\n", - "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-04-28T17:09:46 33808.993\n", + "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-05-15T13:15:35 40493.254\n", "Artifact Registry: vertex-mlops-af already exists in project automlops-sandbox\n", "\u001b[0;32m Checking for GS Bucket: automlops-sandbox-bucket in project automlops-sandbox \u001b[0m\n", "gs://automlops-sandbox-bucket/\n", @@ -599,10 +599,50 @@ "\u001b[0;32m Checking for Cloudbuild Trigger: automlops-trigger in project automlops-sandbox \u001b[0m\n", "name: automlops-trigger\n", "Cloudbuild Trigger already exists in project automlops-sandbox for repo AutoMLOps-repo\n", - "[automlops 21f023f] Run AutoMLOps\n", - " 2 files changed, 38 insertions(+), 6 deletions(-)\n", + "Initialized empty Git repository in /Users/srastatter/Documents/2023/MLOps-graduation/AutoMLOps-github/examples/inferencing/.git/\n", + "Switched to a new branch 'automlops'\n", + "[automlops (root-commit) fbf3dfe] Run AutoMLOps\n", + " 38 files changed, 6876 insertions(+)\n", + " create mode 100644 .ipynb_checkpoints/00_batch_prediction_example-checkpoint.ipynb\n", + " create mode 100644 .ipynb_checkpoints/automlops_batch_prediction_pipeline-checkpoint.ipynb\n", + " create mode 100644 .ipynb_checkpoints/automlops_example_batch_predict-checkpoint.ipynb\n", + " create mode 100644 .ipynb_checkpoints/automlops_example_notebook-checkpoint.ipynb\n", + " create mode 100644 .tmpfiles/batch_predict.yaml\n", + " create mode 100644 .tmpfiles/batch_prediction.yaml\n", + " create mode 100644 .tmpfiles/create_dataset.yaml\n", + " create mode 100644 .tmpfiles/imports.py\n", + " create mode 100644 .tmpfiles/pipeline_scaffold.py\n", + " create mode 100644 00_batch_prediction_example.ipynb\n", + " create mode 100644 AutoMLOps/cloud_run/queueing_svc/main.py\n", + " create mode 100644 AutoMLOps/cloud_run/queueing_svc/pipeline_parameter_values.json\n", + " create mode 100644 AutoMLOps/cloud_run/queueing_svc/requirements.txt\n", + " create mode 100644 AutoMLOps/cloud_run/run_pipeline/Dockerfile\n", + " create mode 100644 AutoMLOps/cloud_run/run_pipeline/main.py\n", + " create mode 100644 AutoMLOps/cloud_run/run_pipeline/requirements.txt\n", + " create mode 100644 AutoMLOps/cloudbuild.yaml\n", + " create mode 100644 AutoMLOps/components/batch_predict/component.yaml\n", + " create mode 100644 AutoMLOps/components/batch_prediction/component.yaml\n", + " create mode 100644 AutoMLOps/components/component_base/Dockerfile\n", + " create mode 100644 AutoMLOps/components/component_base/requirements.txt\n", + " create mode 100644 AutoMLOps/components/component_base/src/batch_predict.py\n", + " create mode 100644 AutoMLOps/components/component_base/src/batch_prediction.py\n", + " create mode 100644 AutoMLOps/components/component_base/src/create_dataset.py\n", + " create mode 100644 AutoMLOps/components/create_dataset/component.yaml\n", + " create mode 100644 AutoMLOps/configs/defaults.yaml\n", + " create mode 100644 AutoMLOps/pipelines/pipeline.py\n", + " create mode 100644 AutoMLOps/pipelines/pipeline_runner.py\n", + " create mode 100644 AutoMLOps/pipelines/runtime_parameters/pipeline_parameter_values.json\n", + " create mode 100755 AutoMLOps/scripts/build_components.sh\n", + " create mode 100755 AutoMLOps/scripts/build_pipeline_spec.sh\n", + " create mode 100755 AutoMLOps/scripts/create_resources.sh\n", + " create mode 100644 AutoMLOps/scripts/pipeline_spec/.gitkeep\n", + " create mode 100755 AutoMLOps/scripts/run_all.sh\n", + " create mode 100755 AutoMLOps/scripts/run_pipeline.sh\n", + " create mode 100644 data/.DS_Store\n", + " create mode 100644 data/Dry_Beans_Dataset_Inferencing.csv\n", + " create mode 100644 data/load_data_to_bq.py\n", "To https://source.developers.google.com/p/automlops-sandbox/r/AutoMLOps-repo\n", - " a67d808..21f023f automlops -> automlops\n", + " + b905e9a...fbf3dfe automlops -> automlops (forced update)\n", "Pushing code to automlops branch, triggering cloudbuild...\n", "Cloudbuild job running at: https://console.cloud.google.com/cloud-build/builds;region=us-central1\n", "\n", diff --git a/examples/inferencing/AutoMLOps/cloud_run/queueing_svc/main.py b/examples/inferencing/AutoMLOps/cloud_run/queueing_svc/main.py index f5e2786..e8f3542 100644 --- a/examples/inferencing/AutoMLOps/cloud_run/queueing_svc/main.py +++ b/examples/inferencing/AutoMLOps/cloud_run/queueing_svc/main.py @@ -28,8 +28,8 @@ PIPELINE_RUNNER_SA = 'vertex-pipelines@automlops-sandbox.iam.gserviceaccount.com' PROJECT_ID = 'automlops-sandbox' SCHEDULE_LOCATION = 'us-central1' -SCHEDULE_NAME = 'AutoMLOps-schedule' SCHEDULE_PATTERN = 'No Schedule Specified' +SCHEDULE_NAME = 'AutoMLOps-schedule' def get_runner_svc_uri( cloud_run_location: str, diff --git a/examples/inferencing/AutoMLOps/components/component_base/src/batch_predict.py b/examples/inferencing/AutoMLOps/components/component_base/src/batch_predict.py index ae4c9cf..bf0148e 100644 --- a/examples/inferencing/AutoMLOps/components/component_base/src/batch_predict.py +++ b/examples/inferencing/AutoMLOps/components/component_base/src/batch_predict.py @@ -14,9 +14,9 @@ import argparse import json -from kfp.v2.components import executor import kfp from kfp.v2 import dsl +from kfp.v2.components import executor from kfp.v2.dsl import * from typing import * diff --git a/examples/inferencing/AutoMLOps/components/component_base/src/batch_prediction.py b/examples/inferencing/AutoMLOps/components/component_base/src/batch_prediction.py index 0b05cda..c2f5423 100644 --- a/examples/inferencing/AutoMLOps/components/component_base/src/batch_prediction.py +++ b/examples/inferencing/AutoMLOps/components/component_base/src/batch_prediction.py @@ -14,9 +14,9 @@ import argparse import json -from kfp.v2.components import executor import kfp from kfp.v2 import dsl +from kfp.v2.components import executor from kfp.v2.dsl import * from typing import * diff --git a/examples/inferencing/AutoMLOps/components/component_base/src/create_dataset.py b/examples/inferencing/AutoMLOps/components/component_base/src/create_dataset.py index e8e0ff9..ab7342a 100644 --- a/examples/inferencing/AutoMLOps/components/component_base/src/create_dataset.py +++ b/examples/inferencing/AutoMLOps/components/component_base/src/create_dataset.py @@ -14,9 +14,9 @@ import argparse import json -from kfp.v2.components import executor import kfp from kfp.v2 import dsl +from kfp.v2.components import executor from kfp.v2.dsl import * from typing import * diff --git a/examples/training/.gitkeep b/examples/training/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/examples/training/00_training_example.ipynb b/examples/training/00_training_example.ipynb index 6a7e19e..e6b7e99 100644 --- a/examples/training/00_training_example.ipynb +++ b/examples/training/00_training_example.ipynb @@ -513,7 +513,7 @@ "\n", " serving_container = 'us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest'\n", " uploaded_model = aiplatform.Model.upload(\n", - " artifact_uri = model_directory,\n", + " artifact_uri=model_directory,\n", " model_id=model_id,\n", " display_name=model_name,\n", " parent_model=parent_model,\n", @@ -618,7 +618,12 @@ } ], "source": [ - "AutoMLOps.generate(project_id=PROJECT_ID, pipeline_params=pipeline_params, use_kfp_spec=False, run_local=False, schedule_pattern='0 */12 * * *')" + "AutoMLOps.generate(project_id=PROJECT_ID,\n", + " pipeline_params=pipeline_params,\n", + " use_kfp_spec=False,\n", + " run_local=False,\n", + " schedule_pattern='0 */12 * * *'\n", + ")" ] }, { @@ -635,11 +640,11 @@ "text": [ "INFO: Successfully saved requirements file in AutoMLOps/components/component_base/requirements.txt\n", "\u001b[0;32m Updating required API services in project automlops-sandbox \u001b[0m\n", - "Operation \"operations/acat.p2-45373616427-0c976264-e517-4012-b829-7426f048f58e\" finished successfully.\n", + "Operation \"operations/acat.p2-45373616427-7b5e44db-ef7e-4eae-b7bf-58d8e58e41cc\" finished successfully.\n", "\u001b[0;32m Checking for Artifact Registry: vertex-mlops-af in project automlops-sandbox \u001b[0m\n", "Listing items under project automlops-sandbox, location us-central1.\n", "\n", - "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-04-28T16:07:31 32607.905\n", + "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-05-15T10:54:34 36487.292\n", "Artifact Registry: vertex-mlops-af already exists in project automlops-sandbox\n", "\u001b[0;32m Checking for GS Bucket: automlops-sandbox-bucket in project automlops-sandbox \u001b[0m\n", "gs://automlops-sandbox-bucket/\n", @@ -657,10 +662,14 @@ "\u001b[0;32m Checking for Cloudbuild Trigger: automlops-trigger in project automlops-sandbox \u001b[0m\n", "name: automlops-trigger\n", "Cloudbuild Trigger already exists in project automlops-sandbox for repo AutoMLOps-repo\n", - "[automlops a6665dc] Run AutoMLOps\n", - " 7 files changed, 16 insertions(+), 266 deletions(-)\n", + "[automlops 9ccc88c] Run AutoMLOps\n", + " 10 files changed, 15 insertions(+), 115 deletions(-)\n", + " delete mode 100644 AutoMLOps/.DS_Store\n", + " delete mode 100644 AutoMLOps/cloud_run/.DS_Store\n", + " delete mode 100644 AutoMLOps/components/.DS_Store\n", + " delete mode 100644 AutoMLOps/components/component_base/.DS_Store\n", "To https://source.developers.google.com/p/automlops-sandbox/r/AutoMLOps-repo\n", - " + c5ada7f...a6665dc automlops -> automlops (forced update)\n", + " c3db0b3..9ccc88c automlops -> automlops\n", "Pushing code to automlops branch, triggering cloudbuild...\n", "Cloudbuild job running at: https://console.cloud.google.com/cloud-build/builds;region=us-central1\n", "\n", @@ -687,7 +696,12 @@ } ], "source": [ - "AutoMLOps.go(project_id=PROJECT_ID, pipeline_params=pipeline_params, use_kfp_spec=False, run_local=False, schedule_pattern='0 */12 * * *')" + "AutoMLOps.go(project_id=PROJECT_ID,\n", + " pipeline_params=pipeline_params,\n", + " use_kfp_spec=False,\n", + " run_local=False,\n", + " schedule_pattern='0 */12 * * *'\n", + ")" ] }, { @@ -702,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "89f42645", "metadata": {}, "outputs": [ @@ -769,9 +783,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "c92f80a8", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", @@ -779,11 +795,11 @@ "text": [ "INFO: Successfully saved requirements file in AutoMLOps/components/component_base/requirements.txt\n", "\u001b[0;32m Updating required API services in project automlops-sandbox \u001b[0m\n", - "Operation \"operations/acat.p2-45373616427-5c716d85-fcb9-4552-b46d-e35fab927098\" finished successfully.\n", + "Operation \"operations/acat.p2-45373616427-2167cec8-5caa-4784-b1a7-a5d08d191743\" finished successfully.\n", "\u001b[0;32m Checking for Artifact Registry: vertex-mlops-af in project automlops-sandbox \u001b[0m\n", "Listing items under project automlops-sandbox, location us-central1.\n", "\n", - "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-04-28T14:31:14 30024.911\n", + "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-05-15T11:13:50 37301.233\n", "Artifact Registry: vertex-mlops-af already exists in project automlops-sandbox\n", "\u001b[0;32m Checking for GS Bucket: automlops-sandbox-bucket in project automlops-sandbox \u001b[0m\n", "gs://automlops-sandbox-bucket/\n", @@ -796,51 +812,54 @@ "AutoMLOps-repo automlops-sandbox https://source.developers.google.com/p/automlops-sandbox/r/AutoMLOps-repo\n", "Cloud Source Repository: AutoMLOps-repo already exists in project automlops-sandbox\n", "\u001b[0;32m BUILDING COMPONENTS \u001b[0m\n", - "Creating temporary tarball archive of 39 file(s) totalling 2.8 MiB before compression.\n", - "Uploading tarball of [..] to [gs://automlops-sandbox_cloudbuild/source/1682707179.355166-bca32254629049eeae9f7746b36d45dc.tgz]\n", - "Created [https://cloudbuild.googleapis.com/v1/projects/automlops-sandbox/locations/global/builds/6ac7d7b1-37d6-41f3-9093-fb20d9371d29].\n", - "Logs are available at [ https://console.cloud.google.com/cloud-build/builds/6ac7d7b1-37d6-41f3-9093-fb20d9371d29?project=45373616427 ].\n", + "Creating temporary tarball archive of 36 file(s) totalling 2.7 MiB before compression.\n", + "Uploading tarball of [..] to [gs://automlops-sandbox_cloudbuild/source/1684164252.433548-48128357d44949458503d65e48481e91.tgz]\n", + "Created [https://cloudbuild.googleapis.com/v1/projects/automlops-sandbox/locations/global/builds/3994fcb0-0bc8-4d73-be32-cb4a47c300c2].\n", + "Logs are available at [ https://console.cloud.google.com/cloud-build/builds/3994fcb0-0bc8-4d73-be32-cb4a47c300c2?project=45373616427 ].\n", "----------------------------- REMOTE BUILD OUTPUT ------------------------------\n", - "starting build \"6ac7d7b1-37d6-41f3-9093-fb20d9371d29\"\n", + "starting build \"3994fcb0-0bc8-4d73-be32-cb4a47c300c2\"\n", "\n", "FETCHSOURCE\n", - "Fetching storage object: gs://automlops-sandbox_cloudbuild/source/1682707179.355166-bca32254629049eeae9f7746b36d45dc.tgz#1682707180281309\n", - "Copying gs://automlops-sandbox_cloudbuild/source/1682707179.355166-bca32254629049eeae9f7746b36d45dc.tgz#1682707180281309...\n", + "Fetching storage object: gs://automlops-sandbox_cloudbuild/source/1684164252.433548-48128357d44949458503d65e48481e91.tgz#1684164253324677\n", + "Copying gs://automlops-sandbox_cloudbuild/source/1684164252.433548-48128357d44949458503d65e48481e91.tgz#1684164253324677...\n", "/ [1 files][ 1.1 MiB/ 1.1 MiB] \n", "Operation completed over 1 objects/1.1 MiB.\n", "BUILD\n", "Starting Step #0 - \"build_component_base\"\n", "Step #0 - \"build_component_base\": Already have image (with digest): gcr.io/cloud-builders/docker\n", - "Step #0 - \"build_component_base\": Sending build context to Docker daemon 22.02kB\n", + "Step #0 - \"build_component_base\": Sending build context to Docker daemon 14.85kB\n", "Step #0 - \"build_component_base\": Step 1/6 : FROM python:3.9-slim\n", "Step #0 - \"build_component_base\": 3.9-slim: Pulling from library/python\n", - "Step #0 - \"build_component_base\": 26c5c85e47da: Already exists\n", - "Step #0 - \"build_component_base\": 9e79879be9c7: Pulling fs layer\n", - "Step #0 - \"build_component_base\": bdf2b0f347d4: Pulling fs layer\n", - "Step #0 - \"build_component_base\": d65cf2430a1a: Pulling fs layer\n", - "Step #0 - \"build_component_base\": a8f1be862cc9: Pulling fs layer\n", - "Step #0 - \"build_component_base\": a8f1be862cc9: Waiting\n", - "Step #0 - \"build_component_base\": d65cf2430a1a: Verifying Checksum\n", - "Step #0 - \"build_component_base\": d65cf2430a1a: Download complete\n", - "Step #0 - \"build_component_base\": 9e79879be9c7: Verifying Checksum\n", - "Step #0 - \"build_component_base\": 9e79879be9c7: Download complete\n", - "Step #0 - \"build_component_base\": bdf2b0f347d4: Verifying Checksum\n", - "Step #0 - \"build_component_base\": bdf2b0f347d4: Download complete\n", - "Step #0 - \"build_component_base\": a8f1be862cc9: Verifying Checksum\n", - "Step #0 - \"build_component_base\": a8f1be862cc9: Download complete\n", - "Step #0 - \"build_component_base\": 9e79879be9c7: Pull complete\n", - "Step #0 - \"build_component_base\": bdf2b0f347d4: Pull complete\n", - "Step #0 - \"build_component_base\": d65cf2430a1a: Pull complete\n", - "Step #0 - \"build_component_base\": a8f1be862cc9: Pull complete\n", - "Step #0 - \"build_component_base\": Digest: sha256:a321a8513911c55888b9c1cc981a5ba646271447a82ece1b62e4a6a8ff1d431b\n", + "Step #0 - \"build_component_base\": 9e3ea8720c6d: Pulling fs layer\n", + "Step #0 - \"build_component_base\": fe9f5cfcf49b: Pulling fs layer\n", + "Step #0 - \"build_component_base\": a6a0b3a8b7c2: Pulling fs layer\n", + "Step #0 - \"build_component_base\": 3fb8f3d6dea7: Pulling fs layer\n", + "Step #0 - \"build_component_base\": f6c74773a296: Pulling fs layer\n", + "Step #0 - \"build_component_base\": 3fb8f3d6dea7: Waiting\n", + "Step #0 - \"build_component_base\": f6c74773a296: Waiting\n", + "Step #0 - \"build_component_base\": fe9f5cfcf49b: Verifying Checksum\n", + "Step #0 - \"build_component_base\": fe9f5cfcf49b: Download complete\n", + "Step #0 - \"build_component_base\": 3fb8f3d6dea7: Verifying Checksum\n", + "Step #0 - \"build_component_base\": 3fb8f3d6dea7: Download complete\n", + "Step #0 - \"build_component_base\": a6a0b3a8b7c2: Verifying Checksum\n", + "Step #0 - \"build_component_base\": a6a0b3a8b7c2: Download complete\n", + "Step #0 - \"build_component_base\": f6c74773a296: Verifying Checksum\n", + "Step #0 - \"build_component_base\": f6c74773a296: Download complete\n", + "Step #0 - \"build_component_base\": 9e3ea8720c6d: Download complete\n", + "Step #0 - \"build_component_base\": 9e3ea8720c6d: Pull complete\n", + "Step #0 - \"build_component_base\": fe9f5cfcf49b: Pull complete\n", + "Step #0 - \"build_component_base\": a6a0b3a8b7c2: Pull complete\n", + "Step #0 - \"build_component_base\": 3fb8f3d6dea7: Pull complete\n", + "Step #0 - \"build_component_base\": f6c74773a296: Pull complete\n", + "Step #0 - \"build_component_base\": Digest: sha256:1fc44d17b4ca49a8715af80786f21fa5ed8cfd257a1e14e24f4a79b4ec329388\n", "Step #0 - \"build_component_base\": Status: Downloaded newer image for python:3.9-slim\n", - "Step #0 - \"build_component_base\": ---> dafea68fa71e\n", + "Step #0 - \"build_component_base\": ---> 68ea36c1d076\n", "Step #0 - \"build_component_base\": Step 2/6 : RUN python -m pip install --upgrade pip\n", - "Step #0 - \"build_component_base\": ---> Running in da3576991143\n", + "Step #0 - \"build_component_base\": ---> Running in f8906afef511\n", "Step #0 - \"build_component_base\": Requirement already satisfied: pip in /usr/local/lib/python3.9/site-packages (22.0.4)\n", "Step #0 - \"build_component_base\": Collecting pip\n", "Step #0 - \"build_component_base\": Downloading pip-23.1.2-py3-none-any.whl (2.1 MB)\n", - "Step #0 - \"build_component_base\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 32.5 MB/s eta 0:00:00\n", + "Step #0 - \"build_component_base\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 26.5 MB/s eta 0:00:00\n", "Step #0 - \"build_component_base\": Installing collected packages: pip\n", "Step #0 - \"build_component_base\": Attempting uninstall: pip\n", "Step #0 - \"build_component_base\": Found existing installation: pip 22.0.4\n", @@ -848,49 +867,49 @@ "Step #0 - \"build_component_base\": Successfully uninstalled pip-22.0.4\n", "Step #0 - \"build_component_base\": Successfully installed pip-23.1.2\n", "Step #0 - \"build_component_base\": \u001b[91mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "Step #0 - \"build_component_base\": \u001b[0mRemoving intermediate container da3576991143\n", - "Step #0 - \"build_component_base\": ---> 809f5acd1757\n", + "Step #0 - \"build_component_base\": \u001b[0mRemoving intermediate container f8906afef511\n", + "Step #0 - \"build_component_base\": ---> d75d06f7b77b\n", "Step #0 - \"build_component_base\": Step 3/6 : COPY requirements.txt .\n", - "Step #0 - \"build_component_base\": ---> 9fa87ddfb382\n", + "Step #0 - \"build_component_base\": ---> 6e1b88b92a33\n", "Step #0 - \"build_component_base\": Step 4/6 : RUN python -m pip install -r requirements.txt --quiet --no-cache-dir && rm -f requirements.txt\n", - "Step #0 - \"build_component_base\": ---> Running in 1d5f1030b5c6\n", + "Step #0 - \"build_component_base\": ---> Running in 2f695b011320\n", "Step #0 - \"build_component_base\": \u001b[91mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "Step #0 - \"build_component_base\": \u001b[0mRemoving intermediate container 1d5f1030b5c6\n", - "Step #0 - \"build_component_base\": ---> 24daccb7963a\n", + "Step #0 - \"build_component_base\": \u001b[0mRemoving intermediate container 2f695b011320\n", + "Step #0 - \"build_component_base\": ---> 48df72520a5d\n", "Step #0 - \"build_component_base\": Step 5/6 : COPY ./src /pipelines/component/src\n", - "Step #0 - \"build_component_base\": ---> 9f475415d7e7\n", + "Step #0 - \"build_component_base\": ---> d32d47f5e843\n", "Step #0 - \"build_component_base\": Step 6/6 : ENTRYPOINT [\"/bin/bash\"]\n", - "Step #0 - \"build_component_base\": ---> Running in 6416ed3d4be0\n", - "Step #0 - \"build_component_base\": Removing intermediate container 6416ed3d4be0\n", - "Step #0 - \"build_component_base\": ---> 9d81406eb32b\n", - "Step #0 - \"build_component_base\": Successfully built 9d81406eb32b\n", + "Step #0 - \"build_component_base\": ---> Running in 49e12b15473b\n", + "Step #0 - \"build_component_base\": Removing intermediate container 49e12b15473b\n", + "Step #0 - \"build_component_base\": ---> 9db80a17406a\n", + "Step #0 - \"build_component_base\": Successfully built 9db80a17406a\n", "Step #0 - \"build_component_base\": Successfully tagged us-central1-docker.pkg.dev/automlops-sandbox/vertex-mlops-af/components/component_base:latest\n", "Finished Step #0 - \"build_component_base\"\n", "Starting Step #1 - \"build_pipeline_runner_svc\"\n", "Step #1 - \"build_pipeline_runner_svc\": Already have image (with digest): gcr.io/cloud-builders/docker\n", - "Step #1 - \"build_pipeline_runner_svc\": Sending build context to Docker daemon 103.4kB\n", + "Step #1 - \"build_pipeline_runner_svc\": Sending build context to Docker daemon 72.7kB\n", "Step #1 - \"build_pipeline_runner_svc\": Step 1/10 : FROM python:3.9-slim\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> dafea68fa71e\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> 68ea36c1d076\n", "Step #1 - \"build_pipeline_runner_svc\": Step 2/10 : ENV PYTHONUNBUFFERED True\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> Running in 2893f13fd8fd\n", - "Step #1 - \"build_pipeline_runner_svc\": Removing intermediate container 2893f13fd8fd\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> 3c46ba244d84\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> Running in fafbc8f836f9\n", + "Step #1 - \"build_pipeline_runner_svc\": Removing intermediate container fafbc8f836f9\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> 814a51164458\n", "Step #1 - \"build_pipeline_runner_svc\": Step 3/10 : ENV APP_HOME /app\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> Running in 8e6bbf46484f\n", - "Step #1 - \"build_pipeline_runner_svc\": Removing intermediate container 8e6bbf46484f\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> 56e4a9b5b492\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> Running in b5fb6510bb69\n", + "Step #1 - \"build_pipeline_runner_svc\": Removing intermediate container b5fb6510bb69\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> 2ce7f9fc15fe\n", "Step #1 - \"build_pipeline_runner_svc\": Step 4/10 : WORKDIR $APP_HOME\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> Running in 20ec87b9348f\n", - "Step #1 - \"build_pipeline_runner_svc\": Removing intermediate container 20ec87b9348f\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> 0bb86b18cd63\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> Running in ee542dbfa222\n", + "Step #1 - \"build_pipeline_runner_svc\": Removing intermediate container ee542dbfa222\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> 36f4bf1e3cf3\n", "Step #1 - \"build_pipeline_runner_svc\": Step 5/10 : COPY ./ ./\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> 60f95f22aef2\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> eab480fb05ef\n", "Step #1 - \"build_pipeline_runner_svc\": Step 6/10 : RUN python -m pip install --upgrade pip\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> Running in 038119c90d70\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> Running in 3faf58269916\n", "Step #1 - \"build_pipeline_runner_svc\": Requirement already satisfied: pip in /usr/local/lib/python3.9/site-packages (22.0.4)\n", "Step #1 - \"build_pipeline_runner_svc\": Collecting pip\n", "Step #1 - \"build_pipeline_runner_svc\": Downloading pip-23.1.2-py3-none-any.whl (2.1 MB)\n", - "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 9.8 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 25.3 MB/s eta 0:00:00\n", "Step #1 - \"build_pipeline_runner_svc\": Installing collected packages: pip\n", "Step #1 - \"build_pipeline_runner_svc\": Attempting uninstall: pip\n", "Step #1 - \"build_pipeline_runner_svc\": Found existing installation: pip 22.0.4\n", @@ -898,23 +917,91 @@ "Step #1 - \"build_pipeline_runner_svc\": Successfully uninstalled pip-22.0.4\n", "Step #1 - \"build_pipeline_runner_svc\": Successfully installed pip-23.1.2\n", "Step #1 - \"build_pipeline_runner_svc\": \u001b[91mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "Step #1 - \"build_pipeline_runner_svc\": \u001b[0mRemoving intermediate container 038119c90d70\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> 8f69b2cf40ca\n", + "Step #1 - \"build_pipeline_runner_svc\": \u001b[0mRemoving intermediate container 3faf58269916\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> 303a81e9be5c\n", "Step #1 - \"build_pipeline_runner_svc\": Step 7/10 : RUN pip install --no-cache-dir -r /app/cloud_run/run_pipeline/requirements.txt\n", - "Step #1 - \"build_pipeline_runner_svc\": ---> Running in 66b647424ea6\n", + "Step #1 - \"build_pipeline_runner_svc\": ---> Running in a5ed64f4acff\n", "Step #1 - \"build_pipeline_runner_svc\": Collecting kfp (from -r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", - "Step #1 - \"build_pipeline_runner_svc\": Downloading kfp-1.8.20.tar.gz (304 kB)\n", - "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 304.8/304.8 kB 6.1 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading kfp-1.8.21.tar.gz (304 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 304.9/304.9 kB 10.1 MB/s eta 0:00:00\n", "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): started\n", "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): finished with status 'done'\n", - "Step #1 - \"build_pipeline_runner_svc\": Collecting google-cloud-aiplatform (from -r /app/cloud_run/run_pipeline/requirements.txt (line 2))\n" + "Step #1 - \"build_pipeline_runner_svc\": Collecting google-cloud-aiplatform (from -r /app/cloud_run/run_pipeline/requirements.txt (line 2))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading google_cloud_aiplatform-1.25.0-py2.py3-none-any.whl (2.6 MB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.6/2.6 MB 88.3 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting google-cloud-pipeline-components (from -r /app/cloud_run/run_pipeline/requirements.txt (line 3))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading google_cloud_pipeline_components-1.0.43-py3-none-any.whl (1.0 MB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 207.8 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting Flask (from -r /app/cloud_run/run_pipeline/requirements.txt (line 4))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading Flask-2.3.2-py3-none-any.whl (96 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 96.9/96.9 kB 164.4 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting gunicorn (from -r /app/cloud_run/run_pipeline/requirements.txt (line 5))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 79.5/79.5 kB 164.9 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting pyyaml (from -r /app/cloud_run/run_pipeline/requirements.txt (line 6))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (661 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 661.8/661.8 kB 210.7 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting absl-py<2,>=0.9 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading absl_py-1.4.0-py3-none-any.whl (126 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 126.5/126.5 kB 168.8 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting pyyaml (from -r /app/cloud_run/run_pipeline/requirements.txt (line 6))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl (630 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 630.1/630.1 kB 195.9 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading google_api_core-2.11.0-py3-none-any.whl (120 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 120.3/120.3 kB 177.9 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting google-cloud-storage<3,>=1.20.0 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading google_cloud_storage-2.9.0-py2.py3-none-any.whl (113 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 113.5/113.5 kB 171.8 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting kubernetes<26,>=8.0.0 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading kubernetes-25.3.0-py2.py3-none-any.whl (1.4 MB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 201.2 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting google-api-python-client<2,>=1.7.8 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading google_api_python_client-1.12.11-py2.py3-none-any.whl (62 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.1/62.1 kB 144.6 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting google-auth<3,>=1.6.1 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading google_auth-2.18.0-py2.py3-none-any.whl (178 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 178.9/178.9 kB 178.8 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting requests-toolbelt<1,>=0.8.0 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading requests_toolbelt-0.10.1-py2.py3-none-any.whl (54 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.5/54.5 kB 141.5 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting cloudpickle<3,>=2.0.0 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading cloudpickle-2.2.1-py3-none-any.whl (25 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting kfp-server-api<2.0.0,>=1.1.2 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading kfp-server-api-1.8.5.tar.gz (58 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 58.1/58.1 kB 144.4 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): started\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Step #1 - \"build_pipeline_runner_svc\": Downloading google_cloud_aiplatform\n", + "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): finished with status 'done'\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting jsonschema<5,>=3.0.1 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading jsonschema-4.17.3-py3-none-any.whl (90 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 90.4/90.4 kB 150.9 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting tabulate<1,>=0.8.6 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting click<9,>=7.1.2 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading click-8.1.3-py3-none-any.whl (96 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 96.6/96.6 kB 169.6 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting Deprecated<2,>=1.2.7 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting strip-hints<1,>=0.1.8 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading strip-hints-0.1.10.tar.gz (29 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): started\n", + "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): finished with status 'done'\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting docstring-parser<1,>=0.7.3 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting kfp-pipeline-spec<0.2.0,>=0.1.16 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading kfp_pipeline_spec-0.1.16-py3-none-any.whl (19 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting fire<1,>=0.3.1 (from kfp->-r /app/cloud_run/run_pipeline/requirements.txt (line 1))\n", + "Step #1 - \"build_pipeline_runner_svc\": Downloading fire-0.5.0.tar.gz (88 kB)\n", + "Step #1 - \"build_pipeline_runner_svc\": ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 88.3/88.3 kB 150.4 MB/s eta 0:00:00\n", + "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): started\n", + "Step #1 - \"build_pipeline_runner_svc\": Preparing metadata (setup.py): finished with status 'done'\n", + "Step #1 - \"build_pipeline_runner_svc\": Collecting protobuf<4,>=3.13.0 (\n", "#################################################################\n", "# #\n", "# RESOURCES MANIFEST #\n", @@ -1168,7 +1255,7 @@ " vertex_model: Output[Model]\n", "):\n", " from google.cloud import aiplatform\n", - " aiplatform.init(project=project_id, location=region)\n", + " aiplatform.init(project=project, location=region)\n", " # Check if model exists\n", " models = aiplatform.Model.list()\n", " model_name = 'beans-model'\n", @@ -1180,14 +1267,14 @@ " version_description='challenger version'\n", " else:\n", " parent_model = None\n", - " model_id = model_name\n", + " model_id=model_name\n", " is_default_version=True\n", " version_aliases=['champion', 'custom-training', 'decision-tree']\n", " version_description='first version'\n", "\n", " serving_container = 'us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest'\n", " uploaded_model = aiplatform.Model.upload(\n", - " artifact_uri = model.uri,\n", + " artifact_uri=model.uri,\n", " model_id=model_id,\n", " display_name=model_name,\n", " parent_model=parent_model,\n", @@ -1203,7 +1290,7 @@ " machine_type='n1-standard-4',\n", " deployed_model_display_name='deployed-beans-model')\n", " vertex_endpoint.uri = endpoint.resource_name\n", - " vertex_model.uri = deployed_model.resource_name" + " vertex_model.uri = endpoint.resource_name" ] }, { @@ -1290,7 +1377,12 @@ } ], "source": [ - "AutoMLOps.generate(project_id=PROJECT_ID, pipeline_params=pipeline_params, use_kfp_spec=True, run_local=False, schedule_pattern='0 */12 * * *')" + "AutoMLOps.generate(project_id=PROJECT_ID,\n", + " pipeline_params=pipeline_params,\n", + " use_kfp_spec=True,\n", + " run_local=False,\n", + " schedule_pattern='0 */12 * * *'\n", + ")" ] }, { @@ -1305,11 +1397,11 @@ "text": [ "INFO: Successfully saved requirements file in AutoMLOps/components/component_base/requirements.txt\n", "\u001b[0;32m Updating required API services in project automlops-sandbox \u001b[0m\n", - "Operation \"operations/acat.p2-45373616427-63b9e2cb-b713-406f-b083-afe8cc4260f1\" finished successfully.\n", + "Operation \"operations/acat.p2-45373616427-180ae521-226a-40b8-a57c-0014fe278082\" finished successfully.\n", "\u001b[0;32m Checking for Artifact Registry: vertex-mlops-af in project automlops-sandbox \u001b[0m\n", "Listing items under project automlops-sandbox, location us-central1.\n", "\n", - "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-04-28T14:47:46 30801.594\n", + "vertex-mlops-af DOCKER STANDARD_REPOSITORY Artifact Registry vertex-mlops-af in us-central1. us-central1 Google-managed key 2023-01-11T17:12:26 2023-05-15T12:18:01 39689.266\n", "Artifact Registry: vertex-mlops-af already exists in project automlops-sandbox\n", "\u001b[0;32m Checking for GS Bucket: automlops-sandbox-bucket in project automlops-sandbox \u001b[0m\n", "gs://automlops-sandbox-bucket/\n", @@ -1327,12 +1419,10 @@ "\u001b[0;32m Checking for Cloudbuild Trigger: automlops-trigger in project automlops-sandbox \u001b[0m\n", "name: automlops-trigger\n", "Cloudbuild Trigger already exists in project automlops-sandbox for repo AutoMLOps-repo\n", - "[automlops b4726ae] Run AutoMLOps\n", - " 17 files changed, 2183 insertions(+), 410 deletions(-)\n", - " create mode 100644 .ipynb_checkpoints/00_training_example-checkpoint.ipynb\n", - " create mode 100644 AutoMLOps/scripts/pipeline_spec/pipeline_job.json\n", + "[automlops b905e9a] Run AutoMLOps\n", + " 6 files changed, 103 insertions(+), 12 deletions(-)\n", "To https://source.developers.google.com/p/automlops-sandbox/r/AutoMLOps-repo\n", - " fae262b..b4726ae automlops -> automlops\n", + " 0084a05..b905e9a automlops -> automlops\n", "Pushing code to automlops branch, triggering cloudbuild...\n", "Cloudbuild job running at: https://console.cloud.google.com/cloud-build/builds;region=us-central1\n", "\n", @@ -1373,6 +1463,14 @@ " }]\n", ")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28122ed3", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/examples/training/AutoMLOps/cloud_run/queueing_svc/main.py b/examples/training/AutoMLOps/cloud_run/queueing_svc/main.py index dd1655a..7952eb4 100644 --- a/examples/training/AutoMLOps/cloud_run/queueing_svc/main.py +++ b/examples/training/AutoMLOps/cloud_run/queueing_svc/main.py @@ -28,8 +28,8 @@ PIPELINE_RUNNER_SA = 'vertex-pipelines@automlops-sandbox.iam.gserviceaccount.com' PROJECT_ID = 'automlops-sandbox' SCHEDULE_LOCATION = 'us-central1' -SCHEDULE_NAME = 'AutoMLOps-schedule' SCHEDULE_PATTERN = '0 */12 * * *' +SCHEDULE_NAME = 'AutoMLOps-schedule' def get_runner_svc_uri( cloud_run_location: str, diff --git a/examples/training/AutoMLOps/cloud_run/queueing_svc/pipeline_parameter_values.json b/examples/training/AutoMLOps/cloud_run/queueing_svc/pipeline_parameter_values.json index 64e58c9..c430ba8 100644 --- a/examples/training/AutoMLOps/cloud_run/queueing_svc/pipeline_parameter_values.json +++ b/examples/training/AutoMLOps/cloud_run/queueing_svc/pipeline_parameter_values.json @@ -1,7 +1,7 @@ { "bq_table": "automlops-sandbox.test_dataset.dry-beans", - "model_directory": "gs://automlops-sandbox-bucket/trained_models/2023-04-28 16:26:29.543609", - "data_path": "gs://automlops-sandbox-bucket/data.csv", + "model_directory": "gs://automlops-sandbox-bucket/trained_models/2023-05-15 13:26:12.561035", + "data_path": "gs://automlops-sandbox-bucket/data", "project_id": "automlops-sandbox", "region": "us-central1" } \ No newline at end of file diff --git a/examples/training/AutoMLOps/components/component_base/requirements.txt b/examples/training/AutoMLOps/components/component_base/requirements.txt index 2e84fc4..7bbe550 100644 --- a/examples/training/AutoMLOps/components/component_base/requirements.txt +++ b/examples/training/AutoMLOps/components/component_base/requirements.txt @@ -1,3 +1,4 @@ + db_dtypes fsspec gcsfs @@ -30,10 +31,4 @@ google-cloud-tasks google-cloud-translate google-cloud-videointelligence google-cloud-vision -kfp -kfp_pipeline_spec -pandas -protobuf pyarrow -scikit_learn -tensorflow diff --git a/examples/training/AutoMLOps/components/component_base/src/create_dataset.py b/examples/training/AutoMLOps/components/component_base/src/create_dataset.py index 4ed8f0b..a4eea12 100644 --- a/examples/training/AutoMLOps/components/component_base/src/create_dataset.py +++ b/examples/training/AutoMLOps/components/component_base/src/create_dataset.py @@ -14,9 +14,9 @@ import argparse import json -from kfp.v2.components import executor import kfp from kfp.v2 import dsl +from kfp.v2.components import executor from kfp.v2.dsl import * from typing import * @@ -65,7 +65,7 @@ def load_bq_data(query: str, client: bigquery.Client) -> pd.DataFrame: dataframe = load_bq_data(get_query(bq_table), bq_client) le = preprocessing.LabelEncoder() dataframe['Class'] = le.fit_transform(dataframe['Class']) - dataframe.to_csv(data_path, index=False) + dataframe.to_csv(data_path) def main(): """Main executor.""" diff --git a/examples/training/AutoMLOps/components/component_base/src/deploy_model.py b/examples/training/AutoMLOps/components/component_base/src/deploy_model.py index 8cf7746..63f46d0 100644 --- a/examples/training/AutoMLOps/components/component_base/src/deploy_model.py +++ b/examples/training/AutoMLOps/components/component_base/src/deploy_model.py @@ -14,9 +14,9 @@ import argparse import json -from kfp.v2.components import executor import kfp from kfp.v2 import dsl +from kfp.v2.components import executor from kfp.v2.dsl import * from typing import * diff --git a/examples/training/AutoMLOps/components/component_base/src/train_model.py b/examples/training/AutoMLOps/components/component_base/src/train_model.py index cd7098f..f06b151 100644 --- a/examples/training/AutoMLOps/components/component_base/src/train_model.py +++ b/examples/training/AutoMLOps/components/component_base/src/train_model.py @@ -14,9 +14,9 @@ import argparse import json -from kfp.v2.components import executor import kfp from kfp.v2 import dsl +from kfp.v2.components import executor from kfp.v2.dsl import * from typing import * diff --git a/examples/training/AutoMLOps/pipelines/runtime_parameters/pipeline_parameter_values.json b/examples/training/AutoMLOps/pipelines/runtime_parameters/pipeline_parameter_values.json index 64e58c9..c430ba8 100644 --- a/examples/training/AutoMLOps/pipelines/runtime_parameters/pipeline_parameter_values.json +++ b/examples/training/AutoMLOps/pipelines/runtime_parameters/pipeline_parameter_values.json @@ -1,7 +1,7 @@ { "bq_table": "automlops-sandbox.test_dataset.dry-beans", - "model_directory": "gs://automlops-sandbox-bucket/trained_models/2023-04-28 16:26:29.543609", - "data_path": "gs://automlops-sandbox-bucket/data.csv", + "model_directory": "gs://automlops-sandbox-bucket/trained_models/2023-05-15 13:26:12.561035", + "data_path": "gs://automlops-sandbox-bucket/data", "project_id": "automlops-sandbox", "region": "us-central1" } \ No newline at end of file diff --git a/setup.py b/setup.py index b3f2915..6210ada 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Setup AutoMLOps modules""" +from setuptools import find_packages from setuptools import setup with open('README.md', 'r', encoding='utf-8') as file: @@ -19,7 +20,7 @@ setup( name='google-cloud-automlops', - version='1.1.0', + version='1.1.1', description='AutoMLOps is a service that generates a production-style \ MLOps pipeline from Jupyter Notebooks.', long_description=readme_contents, @@ -28,7 +29,7 @@ author='Sean Rastatter', author_email='srastatter@google.com', license='Apache-2.0', - packages=['AutoMLOps'], + packages=find_packages(), install_requires=['docopt==0.6.2', 'docstring-parser==0.15', 'pipreqs==0.4.11', diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/AutoMLOps/AutoMLOps_test.py b/tests/unit/AutoMLOps_test.py similarity index 100% rename from AutoMLOps/AutoMLOps_test.py rename to tests/unit/AutoMLOps_test.py diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/deployments/.gitkeep b/tests/unit/deployments/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/deployments/__init__.py b/tests/unit/deployments/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/tests/unit/deployments/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/deployments/github_actions/.gitkeep b/tests/unit/deployments/github_actions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/deployments/gitlab_ci/.gitkeep b/tests/unit/deployments/gitlab_ci/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/deployments/jenkins/.gitkeep b/tests/unit/deployments/jenkins/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/frameworks/__init__.py b/tests/unit/frameworks/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/tests/unit/frameworks/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/frameworks/airflow/.gitkeep b/tests/unit/frameworks/airflow/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/frameworks/argo/.gitkeep b/tests/unit/frameworks/argo/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/frameworks/kfp/__init__.py b/tests/unit/frameworks/kfp/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/tests/unit/frameworks/kfp/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/utils/__init__.py b/tests/unit/utils/__init__.py new file mode 100644 index 0000000..2379f87 --- /dev/null +++ b/tests/unit/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/utils/utils_test.py b/tests/unit/utils/utils_test.py new file mode 100644 index 0000000..77b99d1 --- /dev/null +++ b/tests/unit/utils/utils_test.py @@ -0,0 +1,395 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for utils module.""" + +# pylint: disable=line-too-long +# pylint: disable=missing-function-docstring + +import os +import pathlib +import pytest +import yaml + +from AutoMLOps.utils.utils import ( + delete_file, + make_dirs, + read_file, + read_yaml_file, + write_and_chmod, + write_file, + write_yaml_file, +) + +@pytest.fixture +def write_yaml(): + file_path = pathlib.Path('testing.yaml') + file_path.write_text( + '# ===================================================\n' + '# Test Yaml File' + '# ===================================================\n' + '\n' + 'Test1:\n' + ' - name: "my_name1"\n' + ' id: "my_id1"\n' + ' description: my_description1' + '\n' + 'Test2:\n' + ' - name: "my_name2"\n' + ' id: "my_id2"\n' + ' description: my_description2', encoding='utf-8') + yield file_path + file_path.unlink() + +def test_make_dirs(): + # Create a list of directories to create. + directories = ['dir1', 'dir2'] + + # Call the `make_dirs` function. + make_dirs(directories) + + # Assert that the directories were created. + for directory in directories: + assert os.path.exists(directory) + os.rmdir(directory) + +def test_read_yaml_file(): + # Create a yaml file. + with open('test.yaml', 'w', encoding='utf-8') as file: + yaml.dump({'key1': 'value1', 'key2': 'value2'}, file) + + # Call the `read_yaml_file` function. + file_dict = read_yaml_file('test.yaml') + + # Assert that the file_dict contains the expected values. + assert file_dict == {'key1': 'value1', 'key2': 'value2'} + + # Remove test file + os.remove('test.yaml') + +def test_write_yaml_file(): + + # Call the `write_yaml_file` function. + write_yaml_file('test.yaml', {'key1': 'value1', 'key2': 'value2'}, 'w') + + # Assert that the file contains the expected values. + with open('test.yaml', 'r', encoding='utf-8') as file: + file_dict = yaml.safe_load(file) + assert file_dict == {'key1': 'value1', 'key2': 'value2'} + + # Call the `write_yaml_file` function with an invalid mode. + with pytest.raises(IOError): + write_yaml_file('test.yaml', {'key1': 'value1', 'key2': 'value2'}, 'r') + + # Remove test file + os.remove('test.yaml') + + # This still works for an invalid content and file path parameter, is that right? + +def test_read_file(): + # Create a file. + with open('test.txt', 'w', encoding='utf-8') as file: + file.write('This is a test file.') + + # Call the `read_file` function. + contents = read_file('test.txt') + + # Assert that the contents of the file are correct. + assert contents == 'This is a test file.' + + # Remove test file + os.remove('test.txt') + + # THIS SHOULD WORK BUT IT DOESN'T + # Call the `read_file` function with an invalid file path. + #with pytest.raises(FileNotFoundError): + # read_file('invalid_file_path.txt') + +def test_write_file(): + # Create a file. + with open('test.txt', 'w', encoding='utf-8') as file: + file.write('This is a test file.') + + # Call the `write_file` function. + write_file('test.txt', 'This is a test file.', 'w') + + # Assert that the file exists. + assert os.path.exists('test.txt') + + # Assert that the contents of the file are correct. + with open('test.txt', 'r', encoding='utf-8') as file: + contents = file.read() + assert contents == 'This is a test file.' + + # Remove test file + os.remove('test.txt') + + # Call the `write_file` function with an invalid file path. + with pytest.raises(OSError): + write_file(15, 'This is a test file.', 'w') + +def test_write_and_chmod(): + # Create a file. + with open('test.txt', 'w', encoding='utf-8') as file: + file.write('This is a test file.') + + # Call the `write_and_chmod` function. + write_and_chmod('test.txt', 'This is a test file.') + + # Assert that the file exists and is executable. + assert os.path.exists('test.txt') + assert os.access('test.txt', os.X_OK) + + # Delete the file. + os.remove('test.txt') + + # THIS SHOULDN'T WORK BUT IT DOES + # Call the `write_and_chmod` function with an invalid file path. + #with pytest.raises(OSError): + # write_and_chmod('invalid_file_path.txt', 'This is a test file.') + +def test_delete_file(): + # Create a file. + with open('test.txt', 'w', encoding='utf-8') as file: + file.write('This is a test file.') + + # Call the `delete_file` function. + delete_file('test.txt') + + # Assert that the file does not exist. + assert not os.path.exists('test.txt') + + # THIS SHOULD WORK BUT IT DOESN'T + # Call the `delete_file` function with an invalid file path. + + #with pytest.raises(OSError): + # delete_file('invalid_file_path.txt') + +# TBD +# @pytest.mark.parametrize('full_path', [True, False]) +# def test_get_components_list(full_path: bool) -> None: +# # Create a temporary directory +# tmp_dir = pathlib.Path('.tmpfiles') +# try: +# tmp_dir.mkdir() +# except FileExistsError: +# pass + +# # Create some component yaml files +# component_yaml_1 = tmp_dir / 'component_1.yaml' +# component_yaml_1.write_text('name: example_component\ndescription: Custom component that takes in a BQ table and writes it to GCS.') +# component_yaml_2 = tmp_dir / 'component_2.yml' +# component_yaml_2.write_text('name: example_component_2\ndescription: Custom component that trains a decision tree on the training data.') + +# # Get the list of component yaml files +# components_list = get_components_list(full_path) +# print(components_list) + +# # Check that the list contains the correct files +# if full_path: +# assert components_list == [tmp_dir / 'component_1.yaml', tmp_dir / 'component_2.yml'] +# else: +# assert components_list == ['component_1', 'component_2'] + +# # Clean up the the temporary directory +# tmp_dir.rmdir() + + + + + + + + + + + + + +# def test_read_yaml_file(write_yaml): + +# assert(read_yaml_file(write_yaml) == +# {'Test1' : [ +# { +# 'name': 'my_name1', +# 'id': 'my_id1', +# 'description': 'my_description1' +# } +# ], +# 'Test2': [ +# { +# 'name': 'my_name2', +# 'id': 'my_id2', +# 'description': 'my_description2' +# } +# ]}) + +# def test_write_yaml_file(): +# assert True + +# def test_read_file(): +# assert True + +# def test_write_file(): +# assert True + +# def test_write_and_chmod(): +# assert True + +# def test_delete_file(): +# assert True + +# def test_get_components_list(): +# assert True + +# def test_is_component_config(): +# assert True + +# def test_execute_script(): +# assert True + +# def test_validate_schedule(): + +# # Check that error is raised when it should be +# with pytest.raises(Exception, match='run_local must be set to False to use Cloud Scheduler.'): +# validate_schedule(schedule_pattern="*", +# run_local=True) + +# # Check that error is not raised when it shouldn't be +# validate_schedule(schedule_pattern="*", +# run_local=False) + +# validate_schedule(schedule_pattern="No Schedule Specified", +# run_local=True) + +# validate_schedule(schedule_pattern="No Schedule Specified", +# run_local=False) + +# def test_validate_name(): + +# # Check that an error is raised when it should be +# with pytest.raises(Exception, match="Pipeline and Component names must be of type string."): +# validate_name(name=10) + +# # Check that error is not raised when it shouldn't be +# validate_name(name="My Name") + +# def test_validate_params(): + +# # Test for user providing a value for 'name' that is not a string +# with pytest.raises(Exception, match = 'Parameter name must be of type string.'): +# validate_params([ +# { +# 'name': 1, +# 'type': str, +# 'description': 'my_description' +# } +# ]) + +# # Test for user providing a value for 'type' that is not a valid python type +# with pytest.raises(Exception, match = 'Parameter type must be a valid python type.'): +# validate_params([ +# { +# 'name': 'my_name', +# 'type': 1, +# 'description': 'my_description' +# } +# ]) + +# # Test for user missing a required parameter value +# with pytest.raises(Exception, match = "Parameter {'name': 'my_name', 'description': 'my_description'} does not contain required keys. 'type'"): +# validate_params([ +# { +# 'name': 'my_name', +# 'description': 'my_description' +# } +# ]) + +# # don't think this can be tested +# validate_params([ +# { +# 'name': 'my_name', +# 'name': ',ajksdfj', +# 'type': int, +# 'type': float +# } +# ]) + +# # Test that a correct list of dictionaries passes as expected +# validate_params([ +# { +# 'name': 'my_name', +# 'type': str, +# 'description': 'my_description' +# } +# ]) + +# def test_validate_pipeline_structure(): +# assert True + +# def test_update_params(): + +# # Test for an exception with an incorrect value for 'type' +# with pytest.raises(Exception): +# update_params([ +# { +# 'name': 'my_name_1', +# 'type': str, +# 'description': 'my_description_1' +# }, +# { +# 'name': 'my_name_2', +# 'type': 10 +# } +# ]) + +# # Test for an exception with an incorrect value for 'type' +# with pytest.raises(Exception): +# update_params([ +# { +# 'name': 'my_name_1', +# 'type': str, +# 'description': 'my_description_1' +# }, +# { +# 'name': 'my_name_2', +# 'type': 'wrong_type' +# } +# ]) + +# # Test that correctly formatted parameters will pass +# update_params([ +# { +# 'name': 'my_name_1', +# 'type': str, +# 'description': 'my_description_1' +# }, +# { +# 'name': 'my_name_2', +# 'type': int +# } +# ]) + +# # Test that correctly formatted parameters will pass +# update_params([ +# { +# 'name': 'my_name_1', +# 'type': str, +# 'description': 'my_description_1' +# }, +# { +# 'name': 'my_name_2', +# 'type': float +# } +# ])