Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: multi-arch #58

Merged
merged 2 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.
echo 'eval "$(pyenv init -)"' >> ~/.bashrc
source ~/.bashrc

# Install Python 3.10 inside pyenv:
pyenv install 3.10
# Install Python 3.11 inside pyenv:
pyenv install 3.11

# Set the active version of Python:
pyenv local 3.10
pyenv local 3.11

# Upgrade pip:
pip install --upgrade pip
Expand All @@ -139,7 +139,7 @@ source ~/.bashrc

# Ensure that you are in the "typesense-docsearch-scraper" directory.
# Then, install the Python dependencies for this project:
pipenv --python 3.10
pipenv --python 3.11
pipenv lock --clear
pipenv install

Expand All @@ -148,14 +148,16 @@ pipenv shell

# Build a new version of the base Docker container - ONLY NEEDED WHEN WE CHANGE DEPENDENCIES
export SCRAPER_BASE_VERSION="0.8.0" # Only need to change this when we update dependencies
docker buildx build -f ./scraper/dev/docker/Dockerfile.base -t typesense/docsearch-scraper-base:${SCRAPER_BASE_VERSION} .
docker buildx use typesense-builder || docker buildx create --name typesense-builder --driver docker-container --use --bootstrap # use same buildx context for all containers to build
docker buildx build --load -f ./scraper/dev/docker/Dockerfile.base -t typesense/docsearch-scraper-base:${SCRAPER_BASE_VERSION} .
docker push typesense/docsearch-scraper-base:${SCRAPER_BASE_VERSION}
docker tag typesense/docsearch-scraper-base:${SCRAPER_BASE_VERSION} typesense/docsearch-scraper-base:latest
docker push typesense/docsearch-scraper-base:latest

# Build a new version of the scraper Docker container
export SCRAPER_VERSION="0.9.1"
export SCRAPER_BASE_VERSION="latest"
docker buildx use typesense-builder || docker buildx create --name typesense-builder --driver docker-container --use --bootstrap # use same buildx context for all containers to build
docker buildx build -f ./scraper/dev/docker/Dockerfile --build-arg SCRAPER_BASE_VERSION=${SCRAPER_BASE_VERSION} -t typesense/docsearch-scraper:${SCRAPER_VERSION} .
docker push typesense/docsearch-scraper:${SCRAPER_VERSION}
docker tag typesense/docsearch-scraper:${SCRAPER_VERSION} typesense/docsearch-scraper:latest
Expand Down
16 changes: 15 additions & 1 deletion cli/src/commands/abstract_build_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,19 @@ class AbstractBuildDocker(AbstractCommand):
def build_docker_file(file, image="typesense/docsearch-scraper-dev",
local_tag=False):
tags = [image]
AbstractBuildDocker.setup_buildx()

if local_tag:
tag = AbstractBuildDocker.get_local_tag().decode()
tags.append(image + ":" + tag)

cmd = ["docker", "build"] + [param for tag in tags for param in
cmd = ["docker", "buildx", "build"] + [param for tag in tags for param in
['-t', tag]] + ["-f", file, "."]
if local_tag:
cmd += ["--platform", "linux/amd64,linux/arm64", "--push"]
else:
cmd += ["--load"]

return AbstractCommand.exec_shell_command(cmd)

def get_options(self):
Expand All @@ -25,3 +31,11 @@ def get_local_tag():
from subprocess import check_output
return check_output(
['git', 'describe', '--abbrev=0', '--tags']).strip()

@staticmethod
def setup_buildx():
from subprocess import check_output, CalledProcessError
try:
return check_output(['docker', 'buildx', 'use', 'typesense-builder']).strip()
except CalledProcessError:
return check_output(['docker', 'buildx', 'create', '--name', 'typesense-builder', '--driver', 'docker-container', '--use', '--bootstrap']).strip()
2 changes: 1 addition & 1 deletion scraper/dev/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG SCRAPER_BASE_VERSION
ARG SCRAPER_BASE_VERSION=0.9.1
FROM typesense/docsearch-scraper-base:$SCRAPER_BASE_VERSION
LABEL maintainer="[email protected]"

Expand Down
18 changes: 7 additions & 11 deletions scraper/dev/docker/Dockerfile.base
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM --platform=linux/amd64 ubuntu:22.04
# syntax=docker/dockerfile:1.4
FROM debian:12-slim
LABEL maintainer="[email protected]"

# Install selenium
Expand All @@ -14,8 +15,8 @@ WORKDIR /home/seleuser

RUN apt-get update -y && apt-get install -yq \
software-properties-common\
python3.10
RUN add-apt-repository -y ppa:openjdk-r/ppa
python3.11

RUN apt-get update -y && apt-get install -yq \
curl \
wget \
Expand All @@ -31,12 +32,7 @@ RUN apt-get update -y && apt-get install -yq \
libgconf-2-4 \
default-jdk

# https://www.ubuntuupdates.org/package/google_chrome/stable/main/base/google-chrome-stable for references around the latest versions
RUN curl -sS -o - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add
RUN echo "deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list
RUN apt-get update -y && apt-get install -yq \
google-chrome-stable=128.0.6613.84-1 \
unzip
RUN apt-get update -y && apt-get install -yq chromium-driver

RUN wget -q https://github.com/SeleniumHQ/selenium/releases/download/selenium-4.4.0/selenium-server-4.4.0.jar
RUN wget -q https://repo1.maven.org/maven2/org/testng/testng/7.6.1/testng-7.6.1.jar
Expand All @@ -50,7 +46,7 @@ ENV LANG C.UTF-8
ENV PIPENV_HIDE_EMOJIS 1
RUN apt-get update -y && apt-get install -yq \
python3-pip
RUN pip3 install pipenv
RUN pip3 install pipenv --break-system-packages

USER 1000
RUN pipenv sync --python 3.10
RUN pipenv sync --python 3.11
11 changes: 9 additions & 2 deletions scraper/src/config/browser_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,15 @@ def init(config_original_content, js_render, user_agent):
chrome_options.add_argument('--headless')
chrome_options.add_argument('user-agent={0}'.format(user_agent))

webdriver_service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
CHROMIUMDRIVER_PATH = os.environ.get('CHROMIUMDRIVER_PATH', "/usr/bin/chromedriver")

if os.path.isfile(CHROMIUMDRIVER_PATH):
webdriver_service = Service(executable_path=CHROMIUMDRIVER_PATH)
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
else:
webdriver_service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)

CustomDownloaderMiddleware.driver = driver
JsExecutor.driver = driver
return driver
Expand Down
Loading