Skip to content

Commit

Permalink
Docker integ test with async API (#1003)
Browse files Browse the repository at this point in the history
Signed-off-by: Norman Jordan <[email protected]>
  • Loading branch information
normanj-bitquill authored Jan 16, 2025
1 parent be1df0f commit 5884fea
Show file tree
Hide file tree
Showing 24 changed files with 1,077 additions and 35 deletions.
8 changes: 6 additions & 2 deletions docker/integ-test/.env
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ MASTER_UI_PORT=8080
MASTER_PORT=7077
UI_PORT=4040
SPARK_CONNECT_PORT=15002
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
PPL_JAR=./ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=./flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
SQL_APP_JAR=./spark-sql-application/target/scala-2.12/sql-job-assembly-0.7.0-SNAPSHOT.jar
OPENSEARCH_NODE_MEMORY=512m
OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
OPENSEARCH_PORT=9200
OPENSEARCH_PA_PORT=9600
OPENSEARCH_DASHBOARDS_PORT=5601
S3_ACCESS_KEY=Vt7jnvi5BICr1rkfsheT
S3_SECRET_KEY=5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO
73 changes: 73 additions & 0 deletions docker/integ-test/configuration-updater/apply-configuration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/bin/sh

# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

# Login to Minio
curl -q \
-c /tmp/minio-cookies.txt \
-H 'Content-Type: application/json' \
-d '{"accessKey": "minioadmin", "secretKey": "minioadmin"}' \
http://minio-S3:9001/api/v1/login
# Delete the test bucket
curl -b /tmp/minio-cookies.txt \
-X DELETE \
http://minio-S3:9001/api/v1/buckets/test
# Create the integ-test bucket
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d '{"name": "integ-test", "versioning": {"enabled": true, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \
http://minio-S3:9001/api/v1/buckets
# Create the access key
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \
http://minio-S3:9001/api/v1/service-account-credentials

# Login to OpenSearch Dashboards
echo ">>> Login to OpenSearch dashboards"
curl -q \
-c /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"username\": \"admin\", \"password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}" \
'http://opensearch-dashboards:5601/auth/login?dataSourceId='
if [ "$?" -eq "0" ]; then
echo " >>> Login successful"
else
echo " >>> Login failed"
fi
# Create the S3/Glue datasource
echo ">>> Creating datasource"
curl -q \
-b /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"name\": \"mys3\", \"allowedRoles\": [], \"connector\": \"s3glue\", \"properties\": {\"glue.auth.type\": \"iam_role\", \"glue.auth.role_arn\": \"arn:aws:iam::123456789012:role/S3Access\", \"glue.indexstore.opensearch.uri\": \"http://opensearch:9200\", \"glue.indexstore.opensearch.auth\": \"basicauth\", \"glue.indexstore.opensearch.auth.username\": \"admin\", \"glue.indexstore.opensearch.auth.password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}}" \
http://opensearch-dashboards:5601/api/directquery/dataconnections
if [ "$?" -eq "0" ]; then
echo " >>> S3 datasource created"
else
echo " >>> Failed to create S3 datasource"
fi

echo ">>> Setting cluster settings"
curl -v \
-u "admin:${OPENSEARCH_ADMIN_PASSWORD}" \
-X PUT \
-H 'Content-Type: application/json' \
-d '{"persistent": {"plugins.query.executionengine.spark.config": "{\"applicationId\":\"integ-test\",\"executionRoleARN\":\"arn:aws:iam::xxxxx:role/emr-job-execution-role\",\"region\":\"us-west-2\", \"sparkSubmitParameters\": \"--conf spark.dynamicAllocation.enabled=false\"}"}}' \
http://opensearch:9200/_cluster/settings
if [ "$?" -eq "0" ]; then
echo " >>> Successfully set cluster settings"
else
echo " >>> Failed to set cluster settings"
fi
139 changes: 109 additions & 30 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,35 @@
services:
metastore:
build: ./metastore
container_name: metastore
ports:
- "${THRIFT_PORT:-9083}:9083"
volumes:
- type: bind
source: ./metastore/hive-site.xml
target: /opt/apache-hive-2.3.9-bin/conf/hive-site.xml
- type: bind
source: ./metastore/hive-log4j2.properties
target: /opt/apache-hive-2.3.9-bin/conf/hive-log4j2.properties
- type: volume
source: metastore-data
target: /data
networks:
- opensearch-net

spark:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
build:
context: ./spark
dockerfile: Dockerfile
args:
SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
container_name: spark
entrypoint: /opt/bitnami/scripts/spark/spark-master-entrypoint.sh
ports:
- "${MASTER_UI_PORT:-8080}:8080"
- "${MASTER_PORT:-7077}:7077"
- "${UI_PORT:-4040}:4040"
- "${SPARK_CONNECT_PORT}:15002"
entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
Expand All @@ -17,19 +39,10 @@ services:
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-master-entrypoint.sh
target: /opt/bitnami/scripts/spark/master-entrypoint.sh
- type: bind
source: ./spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/"]
Expand All @@ -40,9 +53,22 @@ services:
start_interval: 5s
networks:
- opensearch-net
depends_on:
metastore:
condition: service_started
opensearch:
condition: service_healthy
opensearch-dashboards:
condition: service_healthy
configuration-updater:
condition: service_completed_successfully

spark-worker:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
build:
context: ./spark
dockerfile: Dockerfile
args:
SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
container_name: spark-worker
environment:
- SPARK_MODE=worker
Expand All @@ -56,32 +82,43 @@ services:
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
networks:
- opensearch-net
depends_on:
- spark
metastore:
condition: service_started
spark:
condition: service_healthy

spark-submit:
build:
context: ../../
dockerfile: docker/integ-test/spark-submit/Dockerfile
args:
FLINT_JAR: ${FLINT_JAR}
PPL_JAR: ${PPL_JAR}
SQL_APP_JAR: ${SQL_APP_JAR}
depends_on:
metastore:
condition: service_completed_successfully

opensearch:
image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
build: ./opensearch
container_name: opensearch
environment:
- cluster.name=opensearch-cluster
- node.name=opensearch
- discovery.seed_hosts=opensearch
- cluster.initial_cluster_manager_nodes=opensearch
- discovery.type=single-node
- bootstrap.memory_lock=true
- plugins.security.system_indices.enabled=false
- plugins.security.system_indices.permission.enabled=false
- plugins.security.ssl.http.enabled=false
- plugins.query.datasources.encryption.masterkey=9a515c99d4313f140a6607053502f4d6
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m}
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
ulimits:
Expand All @@ -92,12 +129,18 @@ services:
soft: 65536
hard: 65536
volumes:
- opensearch-data:/usr/share/opensearch/data
- type: volume
source: opensearch-data
target: /usr/share/opensearch/data
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
ports:
- ${OPENSEARCH_PORT:-9200}:9200
- 9600:9600
- ${OPENSEARCH_PA_PORT:-9600}:9600
expose:
- "${OPENSEARCH_PORT:-9200}"
- "9300"
healthcheck:
test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
interval: 1m
Expand All @@ -107,6 +150,9 @@ services:
start_interval: 5s
networks:
- opensearch-net
depends_on:
minio:
condition: service_healthy

opensearch-dashboards:
image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION}
Expand All @@ -119,8 +165,16 @@ services:
OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
networks:
- opensearch-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5601/"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
depends_on:
- opensearch
opensearch:
condition: service_healthy

minio:
image: minio/minio
Expand All @@ -132,12 +186,37 @@ services:
- "9001:9001"
volumes:
- minio-data:/data
healthcheck:
test: ["CMD", "curl", "-q", "-f", "http://localhost:9000/minio/health/live"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
networks:
- opensearch-net

configuration-updater:
image: alpine/curl:latest
entrypoint: /bin/sh
command: /apply-configuration.sh
environment:
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}
- OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
volumes:
- type: bind
source: configuration-updater/apply-configuration.sh
target: /apply-configuration.sh
depends_on:
opensearch-dashboards:
condition: service_healthy
networks:
- opensearch-net

volumes:
metastore-data:
opensearch-data:
minio-data:

networks:
opensearch-net:
29 changes: 29 additions & 0 deletions docker/integ-test/metastore/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

FROM openjdk:21-jdk-bookworm

WORKDIR /opt

ENV HADOOP_HOME=/opt/hadoop-3.3.4
ENV HIVE_HOME=/opt/apache-hive-2.3.9-bin

#RUN apt-get update
RUN curl -L https://archive.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz | tar zxf -
RUN curl -L https://archive.apache.org/dist/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz | tar zxf -
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-api-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-runtime-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-1.12.262.jar $HIVE_HOME/lib/

RUN groupadd -f -r hive --gid=1000
RUN useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive
RUN chown hive:hive -R ${HIVE_HOME}

RUN mkdir /data
RUN chown hive:hive /data

WORKDIR $HIVE_HOME
EXPOSE 9083
ENTRYPOINT ["/opt/apache-hive-2.3.9-bin/bin/hive", "--service", "metastore"]
USER hive
Loading

0 comments on commit 5884fea

Please sign in to comment.