diff --git a/README.md b/README.md index a2a720a..8a251cb 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ pipenv shell # Build a new version of the base Docker container - ONLY NEEDED WHEN WE CHANGE DEPENDENCIES export SCRAPER_BASE_VERSION="0.8.0" # Only need to change this when we update dependencies -docker buildx use default # use same buildx context for all containers to build +docker buildx use typesense-builder || docker buildx create --name typesense-builder --driver docker-container --use --bootstrap # use same buildx context for all containers to build docker buildx build --load -f ./scraper/dev/docker/Dockerfile.base -t typesense/docsearch-scraper-base:${SCRAPER_BASE_VERSION} . docker push typesense/docsearch-scraper-base:${SCRAPER_BASE_VERSION} docker tag typesense/docsearch-scraper-base:${SCRAPER_BASE_VERSION} typesense/docsearch-scraper-base:latest @@ -157,7 +157,7 @@ docker push typesense/docsearch-scraper-base:latest # Build a new version of the scraper Docker container export SCRAPER_VERSION="0.9.1" export SCRAPER_BASE_VERSION="latest" -docker buildx use default # use same buildx context for all containers to build +docker buildx use typesense-builder || docker buildx create --name typesense-builder --driver docker-container --use --bootstrap # use same buildx context for all containers to build docker buildx build -f ./scraper/dev/docker/Dockerfile --build-arg SCRAPER_BASE_VERSION=${SCRAPER_BASE_VERSION} -t typesense/docsearch-scraper:${SCRAPER_VERSION} . docker push typesense/docsearch-scraper:${SCRAPER_VERSION} docker tag typesense/docsearch-scraper:${SCRAPER_VERSION} typesense/docsearch-scraper:latest diff --git a/cli/src/commands/abstract_build_docker.py b/cli/src/commands/abstract_build_docker.py index 9e464ee..674c8fd 100644 --- a/cli/src/commands/abstract_build_docker.py +++ b/cli/src/commands/abstract_build_docker.py @@ -6,6 +6,7 @@ class AbstractBuildDocker(AbstractCommand): def build_docker_file(file, image="typesense/docsearch-scraper-dev", local_tag=False): tags = [image] + AbstractBuildDocker.setup_buildx() if local_tag: tag = AbstractBuildDocker.get_local_tag().decode() @@ -30,3 +31,11 @@ def get_local_tag(): from subprocess import check_output return check_output( ['git', 'describe', '--abbrev=0', '--tags']).strip() + + @staticmethod + def setup_buildx(): + from subprocess import check_output, CalledProcessError + try: + return check_output(['docker', 'buildx', 'use', 'typesense-builder']).strip() + except CalledProcessError: + return check_output(['docker', 'buildx', 'create', '--name', 'typesense-builder', '--driver', 'docker-container', '--use', '--bootstrap']).strip() diff --git a/scraper/dev/docker/Dockerfile.base b/scraper/dev/docker/Dockerfile.base index 38fb543..8fdb004 100644 --- a/scraper/dev/docker/Dockerfile.base +++ b/scraper/dev/docker/Dockerfile.base @@ -32,8 +32,7 @@ RUN apt-get update -y && apt-get install -yq \ libgconf-2-4 \ default-jdk -RUN apt-get update -y && apt-get install -yq \ - chromium-driver +RUN apt-get update -y && apt-get install -yq chromium-driver RUN wget -q https://github.com/SeleniumHQ/selenium/releases/download/selenium-4.4.0/selenium-server-4.4.0.jar RUN wget -q https://repo1.maven.org/maven2/org/testng/testng/7.6.1/testng-7.6.1.jar diff --git a/scraper/src/config/browser_handler.py b/scraper/src/config/browser_handler.py index 3e43e69..2135078 100644 --- a/scraper/src/config/browser_handler.py +++ b/scraper/src/config/browser_handler.py @@ -29,8 +29,15 @@ def init(config_original_content, js_render, user_agent): chrome_options.add_argument('--headless') chrome_options.add_argument('user-agent={0}'.format(user_agent)) - webdriver_service = Service(ChromeDriverManager().install()) - driver = webdriver.Chrome(service=webdriver_service, options=chrome_options) + CHROMIUMDRIVER_PATH = os.environ.get('CHROMIUMDRIVER_PATH', "/usr/bin/chromedriver") + + if os.path.isfile(CHROMIUMDRIVER_PATH): + webdriver_service = Service(executable_path=CHROMIUMDRIVER_PATH) + driver = webdriver.Chrome(service=webdriver_service, options=chrome_options) + else: + webdriver_service = Service(ChromeDriverManager().install()) + driver = webdriver.Chrome(service=webdriver_service, options=chrome_options) + CustomDownloaderMiddleware.driver = driver JsExecutor.driver = driver return driver