diff --git a/docker/integ-test/.env b/docker/integ-test/.env
index cf73bdc89..7d8995956 100644
--- a/docker/integ-test/.env
+++ b/docker/integ-test/.env
@@ -5,9 +5,13 @@ MASTER_UI_PORT=8080
 MASTER_PORT=7077
 UI_PORT=4040
 SPARK_CONNECT_PORT=15002
-PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
-FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
+PPL_JAR=./ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
+FLINT_JAR=./flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
+SQL_APP_JAR=./spark-sql-application/target/scala-2.12/sql-job-assembly-0.7.0-SNAPSHOT.jar
 OPENSEARCH_NODE_MEMORY=512m
 OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
 OPENSEARCH_PORT=9200
+OPENSEARCH_PA_PORT=9600
 OPENSEARCH_DASHBOARDS_PORT=5601
+S3_ACCESS_KEY=Vt7jnvi5BICr1rkfsheT
+S3_SECRET_KEY=5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO
diff --git a/docker/integ-test/configuration-updater/apply-configuration.sh b/docker/integ-test/configuration-updater/apply-configuration.sh
new file mode 100644
index 000000000..7946c75cd
--- /dev/null
+++ b/docker/integ-test/configuration-updater/apply-configuration.sh
@@ -0,0 +1,73 @@
+/bin/sh
+
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
+# Login to Minio
+curl -q \
+     -c /tmp/minio-cookies.txt \
+     -H 'Content-Type: application/json' \
+     -d '{"accessKey": "minioadmin", "secretKey": "minioadmin"}' \
+     http://minio-S3:9001/api/v1/login
+# Delete the test bucket
+curl -b /tmp/minio-cookies.txt \
+     -X DELETE \
+     http://minio-S3:9001/api/v1/buckets/test
+# Create the integ-test bucket
+curl -q \
+     -b /tmp/minio-cookies.txt \
+     -X POST \
+     -H 'Content-Type: application/json' \
+     -d '{"name": "integ-test", "versioning": {"enabled": true, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \
+     http://minio-S3:9001/api/v1/buckets
+# Create the access key
+curl -q \
+     -b /tmp/minio-cookies.txt \
+     -X POST \
+     -H 'Content-Type: application/json' \
+     -d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \
+     http://minio-S3:9001/api/v1/service-account-credentials
+
+# Login to OpenSearch Dashboards
+echo ">>> Login to OpenSearch dashboards"
+curl -q \
+     -c /tmp/opensearch-cookies.txt \
+     -X POST \
+     -H 'Content-Type: application/json' \
+     -H 'Osd-Version: 2.18.0' \
+     -H 'Osd-Xsrf: fetch' \
+     -d "{\"username\": \"admin\", \"password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}" \
+     'http://opensearch-dashboards:5601/auth/login?dataSourceId='
+if [ "$?" -eq "0" ]; then
+  echo "    >>> Login successful"
+else
+  echo "    >>> Login failed"
+fi
+# Create the S3/Glue datasource
+echo ">>> Creating datasource"
+curl -q \
+     -b /tmp/opensearch-cookies.txt \
+     -X POST \
+     -H 'Content-Type: application/json' \
+     -H 'Osd-Version: 2.18.0' \
+     -H 'Osd-Xsrf: fetch' \
+     -d "{\"name\": \"mys3\", \"allowedRoles\": [], \"connector\": \"s3glue\", \"properties\": {\"glue.auth.type\": \"iam_role\", \"glue.auth.role_arn\": \"arn:aws:iam::123456789012:role/S3Access\", \"glue.indexstore.opensearch.uri\": \"http://opensearch:9200\", \"glue.indexstore.opensearch.auth\": \"basicauth\", \"glue.indexstore.opensearch.auth.username\": \"admin\", \"glue.indexstore.opensearch.auth.password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}}" \
+     http://opensearch-dashboards:5601/api/directquery/dataconnections
+if [ "$?" -eq "0" ]; then
+  echo "    >>> S3 datasource created"
+else
+  echo "    >>> Failed to create S3 datasource"
+fi
+
+echo ">>> Setting cluster settings"
+curl -v \
+     -u "admin:${OPENSEARCH_ADMIN_PASSWORD}" \
+     -X PUT \
+     -H 'Content-Type: application/json' \
+     -d '{"persistent": {"plugins.query.executionengine.spark.config": "{\"applicationId\":\"integ-test\",\"executionRoleARN\":\"arn:aws:iam::xxxxx:role/emr-job-execution-role\",\"region\":\"us-west-2\", \"sparkSubmitParameters\": \"--conf spark.dynamicAllocation.enabled=false\"}"}}' \
+     http://opensearch:9200/_cluster/settings
+if [ "$?" -eq "0" ]; then
+  echo "    >>> Successfully set cluster settings"
+else
+  echo "    >>> Failed to set cluster settings"
+fi
diff --git a/docker/integ-test/docker-compose.yml b/docker/integ-test/docker-compose.yml
index c5ee53d7d..9fe79dc22 100644
--- a/docker/integ-test/docker-compose.yml
+++ b/docker/integ-test/docker-compose.yml
@@ -1,13 +1,35 @@
 services:
+  metastore:
+    build: ./metastore
+    container_name: metastore
+    ports:
+      - "${THRIFT_PORT:-9083}:9083"
+    volumes:
+      - type: bind
+        source: ./metastore/hive-site.xml
+        target: /opt/apache-hive-2.3.9-bin/conf/hive-site.xml
+      - type: bind
+        source: ./metastore/hive-log4j2.properties
+        target: /opt/apache-hive-2.3.9-bin/conf/hive-log4j2.properties
+      - type: volume
+        source: metastore-data
+        target: /data
+    networks:
+      - opensearch-net
+
   spark:
-    image: bitnami/spark:${SPARK_VERSION:-3.5.3}
+    build:
+      context: ./spark
+      dockerfile: Dockerfile
+      args:
+        SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
     container_name: spark
+    entrypoint: /opt/bitnami/scripts/spark/spark-master-entrypoint.sh
     ports:
       - "${MASTER_UI_PORT:-8080}:8080"
       - "${MASTER_PORT:-7077}:7077"
       - "${UI_PORT:-4040}:4040"
       - "${SPARK_CONNECT_PORT}:15002"
-    entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
     environment:
       - SPARK_MODE=master
       - SPARK_RPC_AUTHENTICATION_ENABLED=no
@@ -17,19 +39,10 @@ services:
       - SPARK_PUBLIC_DNS=localhost
     volumes:
       - type: bind
-        source: ./spark-master-entrypoint.sh
-        target: /opt/bitnami/scripts/spark/master-entrypoint.sh
-      - type: bind
-        source: ./spark-defaults.conf
-        target: /opt/bitnami/spark/conf/spark-defaults.conf
-      - type: bind
-        source: ./log4j2.properties
-        target: /opt/bitnami/spark/conf/log4j2.properties
-      - type: bind
-        source: $PPL_JAR
+        source: ../../$PPL_JAR
         target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
       - type: bind
-        source: $FLINT_JAR
+        source: ../../$FLINT_JAR
         target: /opt/bitnami/spark/jars/flint-spark-integration.jar
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8080/"]
@@ -40,9 +53,22 @@ services:
       start_interval: 5s
     networks:
       - opensearch-net
+    depends_on:
+      metastore:
+        condition: service_started
+      opensearch:
+        condition: service_healthy
+      opensearch-dashboards:
+        condition: service_healthy
+      configuration-updater:
+        condition: service_completed_successfully
 
   spark-worker:
-    image: bitnami/spark:${SPARK_VERSION:-3.5.3}
+    build:
+      context: ./spark
+      dockerfile: Dockerfile
+      args:
+        SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
     container_name: spark-worker
     environment:
       - SPARK_MODE=worker
@@ -56,32 +82,43 @@ services:
       - SPARK_PUBLIC_DNS=localhost
     volumes:
       - type: bind
-        source: ./spark-defaults.conf
-        target: /opt/bitnami/spark/conf/spark-defaults.conf
-      - type: bind
-        source: ./log4j2.properties
-        target: /opt/bitnami/spark/conf/log4j2.properties
-      - type: bind
-        source: $PPL_JAR
+        source: ../../$PPL_JAR
         target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
       - type: bind
-        source: $FLINT_JAR
+        source: ../../$FLINT_JAR
         target: /opt/bitnami/spark/jars/flint-spark-integration.jar
     networks:
       - opensearch-net
     depends_on:
-      - spark
+      metastore:
+        condition: service_started
+      spark:
+        condition: service_healthy
+
+  spark-submit:
+    build:
+      context: ../../
+      dockerfile: docker/integ-test/spark-submit/Dockerfile
+      args:
+        FLINT_JAR: ${FLINT_JAR}
+        PPL_JAR: ${PPL_JAR}
+        SQL_APP_JAR: ${SQL_APP_JAR}
+    depends_on:
+      metastore:
+        condition: service_completed_successfully
 
   opensearch:
-    image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
+    build: ./opensearch
     container_name: opensearch
     environment:
       - cluster.name=opensearch-cluster
       - node.name=opensearch
-      - discovery.seed_hosts=opensearch
-      - cluster.initial_cluster_manager_nodes=opensearch
+      - discovery.type=single-node
       - bootstrap.memory_lock=true
+      - plugins.security.system_indices.enabled=false
+      - plugins.security.system_indices.permission.enabled=false
       - plugins.security.ssl.http.enabled=false
+      - plugins.query.datasources.encryption.masterkey=9a515c99d4313f140a6607053502f4d6
       - OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m}
       - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
     ulimits:
@@ -92,12 +129,18 @@ services:
         soft: 65536
         hard: 65536
     volumes:
-      - opensearch-data:/usr/share/opensearch/data
+      - type: volume
+        source: opensearch-data
+        target: /usr/share/opensearch/data
+      - type: bind
+        source: /var/run/docker.sock
+        target: /var/run/docker.sock
     ports:
       - ${OPENSEARCH_PORT:-9200}:9200
-      - 9600:9600
+      - ${OPENSEARCH_PA_PORT:-9600}:9600
     expose:
       - "${OPENSEARCH_PORT:-9200}"
+      - "9300"
     healthcheck:
       test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
       interval: 1m
@@ -107,6 +150,9 @@ services:
       start_interval: 5s
     networks:
       - opensearch-net
+    depends_on:
+      minio:
+        condition: service_healthy
 
   opensearch-dashboards:
     image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION}
@@ -119,8 +165,16 @@ services:
       OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
     networks:
       - opensearch-net
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:5601/"]
+      interval: 1m
+      timeout: 5s
+      retries: 3
+      start_period: 30s
+      start_interval: 5s
     depends_on:
-      - opensearch
+      opensearch:
+        condition: service_healthy
 
   minio:
     image: minio/minio
@@ -132,12 +186,37 @@ services:
       - "9001:9001"
     volumes:
       - minio-data:/data
+    healthcheck:
+      test: ["CMD", "curl", "-q", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 1m
+      timeout: 5s
+      retries: 3
+      start_period: 30s
+      start_interval: 5s
+    networks:
+      - opensearch-net
+
+  configuration-updater:
+    image: alpine/curl:latest
+    entrypoint: /bin/sh
+    command: /apply-configuration.sh
+    environment:
+      - S3_ACCESS_KEY=${S3_ACCESS_KEY}
+      - S3_SECRET_KEY=${S3_SECRET_KEY}
+      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
+    volumes:
+      - type: bind
+        source: configuration-updater/apply-configuration.sh
+        target: /apply-configuration.sh
+    depends_on:
+      opensearch-dashboards:
+        condition: service_healthy
     networks:
       - opensearch-net
 
 volumes:
+  metastore-data:
   opensearch-data:
   minio-data:
-
 networks:
   opensearch-net:
diff --git a/docker/integ-test/metastore/Dockerfile b/docker/integ-test/metastore/Dockerfile
new file mode 100644
index 000000000..79a7d725c
--- /dev/null
+++ b/docker/integ-test/metastore/Dockerfile
@@ -0,0 +1,29 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
+FROM openjdk:21-jdk-bookworm
+
+WORKDIR /opt
+
+ENV HADOOP_HOME=/opt/hadoop-3.3.4
+ENV HIVE_HOME=/opt/apache-hive-2.3.9-bin
+
+#RUN apt-get update
+RUN curl -L https://archive.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz | tar zxf -
+RUN curl -L https://archive.apache.org/dist/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz | tar zxf -
+RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-api-3.3.4.jar $HIVE_HOME/lib/
+RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-runtime-3.3.4.jar $HIVE_HOME/lib/
+RUN cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-3.3.4.jar $HIVE_HOME/lib/
+RUN cp $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-1.12.262.jar $HIVE_HOME/lib/
+
+RUN groupadd -f -r hive --gid=1000
+RUN useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive
+RUN chown hive:hive -R ${HIVE_HOME}
+
+RUN mkdir /data
+RUN chown hive:hive /data
+
+WORKDIR $HIVE_HOME
+EXPOSE 9083
+ENTRYPOINT ["/opt/apache-hive-2.3.9-bin/bin/hive", "--service", "metastore"]
+USER hive
diff --git a/docker/integ-test/metastore/hive-log4j2.properties b/docker/integ-test/metastore/hive-log4j2.properties
new file mode 100644
index 000000000..7e6a3b08e
--- /dev/null
+++ b/docker/integ-test/metastore/hive-log4j2.properties
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+status = INFO
+name = HiveLog4j2
+packages = org.apache.hadoop.hive.ql.log
+
+# list of properties
+property.hive.log.level = INFO
+property.hive.root.logger = console
+property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
+property.hive.log.file = hive.log
+property.hive.perflogger.log.level = INFO
+
+# list of all appenders
+appenders = console
+
+# console appender
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n
+
+# list of all loggers
+loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, PerfLogger
+
+logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
+logger.NIOServerCnxn.level = WARN
+
+logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
+logger.ClientCnxnSocketNIO.level = WARN
+
+logger.DataNucleus.name = DataNucleus
+logger.DataNucleus.level = ERROR
+
+logger.Datastore.name = Datastore
+logger.Datastore.level = ERROR
+
+logger.JPOX.name = JPOX
+logger.JPOX.level = ERROR
+
+logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger
+logger.PerfLogger.level = ${sys:hive.perflogger.log.level}
+
+# root logger
+rootLogger.level = ${sys:hive.log.level}
+rootLogger.appenderRefs = root
+rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
diff --git a/docker/integ-test/metastore/hive-site.xml b/docker/integ-test/metastore/hive-site.xml
new file mode 100644
index 000000000..e235306eb
--- /dev/null
+++ b/docker/integ-test/metastore/hive-site.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+      <property>
+         <name>hive.metastore.schema.verification</name>
+         <value>false</value>
+      </property>
+     <property>
+        <name>hive.metastore.warehouse.dir</name>
+        <value>file:///tmp</value>
+        <description></description>
+     </property>
+     <property>
+        <name>fs.default.name</name>
+        <value>file:///tmp</value>
+     </property>
+     <property>
+     <property>
+        <name>javax.jdo.option.ConnectionURL</name>
+        <value>jdbc:derby:;databaseName=/data/metastore_db;create=true</value>
+     </property>
+        <name>javax.jdo.option.ConnectionDriverName</name>
+        <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+     </property>
+     <property>
+        <name>datanucleus.schema.autoCreateTables</name>
+        <value>true</value>
+     </property>
+     <property>
+        <name>fs.s3a.impl</name>
+        <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
+     </property>
+     <property>
+        <name>fs.s3a.path.style.access</name>
+        <value>true</value>
+     </property>
+     <property>
+        <name>fs.s3a.access.key</name>
+        <value>Vt7jnvi5BICr1rkfsheT</value>
+     </property>
+     <property>
+        <name>fs.s3a.secret.key</name>
+        <value>5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO</value>
+     </property>
+     <property>
+        <name>fs.s3a.endpoint</name>
+        <value>http://minio-S3:9000</value>
+     </property>
+     <property>
+        <name>fs.s3a.connection.ssl.enabled</name>
+        <value>false</value>
+     </property>
+</configuration>
diff --git a/docker/integ-test/opensearch/Dockerfile b/docker/integ-test/opensearch/Dockerfile
new file mode 100644
index 000000000..da042516e
--- /dev/null
+++ b/docker/integ-test/opensearch/Dockerfile
@@ -0,0 +1,44 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
+FROM opensearchproject/opensearch:latest
+
+USER root
+
+RUN mkdir /tmp/alter-emr-jar
+WORKDIR /tmp/alter-emr-jar
+
+ENV AWS_VERSION=1.12.651
+
+RUN curl -O -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-emrserverless/${AWS_VERSION}/aws-java-sdk-emrserverless-${AWS_VERSION}.jar
+RUN curl -O -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/${AWS_VERSION}/aws-java-sdk-core-${AWS_VERSION}.jar
+
+COPY emr-src /tmp/alter-emr-jar/emr-src
+WORKDIR /tmp/alter-emr-jar/emr-src
+RUN /usr/share/opensearch/jdk/bin/javac -cp ../aws-java-sdk-emrserverless-${AWS_VERSION}.jar:../aws-java-sdk-core-${AWS_VERSION}.jar com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.java org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java
+RUN mkdir /tmp/alter-emr-jar/extracted
+WORKDIR /tmp/alter-emr-jar/extracted
+RUN /usr/share/opensearch/jdk/bin/jar -xf ../aws-java-sdk-emrserverless-${AWS_VERSION}.jar
+RUN cp ../emr-src/com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.class com/amazonaws/services/emrserverless/
+RUN mkdir -p org/opensearch/spark/emrserverless
+RUN cp ../emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.class org/opensearch/spark/emrserverless/
+RUN /usr/share/opensearch/jdk/bin/jar -cfM /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-*.jar META-INF/MANIFEST.MF *
+RUN chown opensearch:opensearch /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-*.jar
+RUN rm -rf /tmp/alter-emr-jar
+
+RUN yum install -y docker util-linux
+
+COPY opensearch-docker-it-entrypoint.sh /usr/share/opensearch/opensearch-docker-it-entrypoint.sh
+COPY docker-command-runner.sh /usr/share/opensearch/docker-command-runner.sh
+COPY opensearch_security.policy /usr/share/opensearch/config/opensearch-performance-analyzer/opensearch_security.policy
+COPY log4j2.properties /usr/share/opensearch/config/log4j2.properties
+
+RUN chown opensearch:opensearch /usr/share/opensearch/config/opensearch-performance-analyzer/opensearch_security.policy
+RUN chown opensearch:opensearch /usr/share/opensearch/config/log4j2.properties
+
+WORKDIR /usr/share/opensearch
+ENTRYPOINT ["./opensearch-docker-it-entrypoint.sh"]
+CMD ["opensearch"]
+
+EXPOSE 9200
+EXPOSE 9300
diff --git a/docker/integ-test/opensearch/docker-command-runner.sh b/docker/integ-test/opensearch/docker-command-runner.sh
new file mode 100755
index 000000000..25c1927ca
--- /dev/null
+++ b/docker/integ-test/opensearch/docker-command-runner.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
+function process_files {
+  for cmd_file in `ls -1`; do
+    echo "$cmd_file" | grep -q 'cmd$'
+    if [ "$?" -eq "0" ]; then
+      stdout_filename=$(echo $cmd_file | sed -e 's/cmd$/stdout/')
+      stderr_filename=$(echo $cmd_file | sed -e 's/cmd$/stderr/')
+      exit_code_filename=$(echo $cmd_file | sed -e 's/cmd/exitCode/')
+
+      /usr/bin/docker $(cat $cmd_file) > $stdout_filename 2> $stderr_filename
+      echo "$?" > $exit_code_filename
+
+      rm $cmd_file
+    fi
+  done
+}
+
+if [ ! -d '/tmp/docker' ]; then
+  mkdir /tmp/docker
+  chown opensearch:opensearch /tmp/docker
+fi
+
+cd /tmp/docker
+while true; do
+  process_files
+  sleep 1
+done
+
diff --git a/docker/integ-test/opensearch/emr-src/com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.java b/docker/integ-test/opensearch/emr-src/com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.java
new file mode 100644
index 000000000..4666b494f
--- /dev/null
+++ b/docker/integ-test/opensearch/emr-src/com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package com.amazonaws.services.emrserverless;
+
+import com.amazonaws.ClientConfigurationFactory;
+import com.amazonaws.client.AwsSyncClientParams;
+import com.amazonaws.client.builder.AwsSyncClientBuilder;
+
+import org.opensearch.spark.emrserverless.DockerEMRServerlessClient;
+
+public class AWSEMRServerlessClientBuilder extends AwsSyncClientBuilder<AWSEMRServerlessClientBuilder, AWSEMRServerless> {
+  private static final ClientConfigurationFactory CLIENT_CONFIG_FACTORY = new ClientConfigurationFactory();
+
+  private AWSEMRServerlessClientBuilder() {
+    super(CLIENT_CONFIG_FACTORY);
+  }
+
+  public static AWSEMRServerlessClientBuilder standard() {
+    return new AWSEMRServerlessClientBuilder();
+  }
+
+  public static AWSEMRServerless defaultClient() {
+    return (AWSEMRServerless) standard().build();
+  }
+
+  protected AWSEMRServerless build(AwsSyncClientParams params) {
+    DockerEMRServerlessClient client = new DockerEMRServerlessClient(CLIENT_CONFIG_FACTORY.getConfig());
+    client.setServiceNameIntern("emr");
+    return client;
+  }
+}
diff --git a/docker/integ-test/opensearch/emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java b/docker/integ-test/opensearch/emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java
new file mode 100644
index 000000000..88cc448a9
--- /dev/null
+++ b/docker/integ-test/opensearch/emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java
@@ -0,0 +1,216 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.spark.emrserverless;
+
+import com.amazonaws.AmazonWebServiceClient;
+import com.amazonaws.AmazonWebServiceRequest;
+import com.amazonaws.ClientConfiguration;
+import com.amazonaws.ResponseMetadata;
+import com.amazonaws.services.emrserverless.AWSEMRServerless;
+import com.amazonaws.services.emrserverless.model.CancelJobRunRequest;
+import com.amazonaws.services.emrserverless.model.CancelJobRunResult;
+import com.amazonaws.services.emrserverless.model.CreateApplicationRequest;
+import com.amazonaws.services.emrserverless.model.CreateApplicationResult;
+import com.amazonaws.services.emrserverless.model.DeleteApplicationRequest;
+import com.amazonaws.services.emrserverless.model.DeleteApplicationResult;
+import com.amazonaws.services.emrserverless.model.GetApplicationRequest;
+import com.amazonaws.services.emrserverless.model.GetApplicationResult;
+import com.amazonaws.services.emrserverless.model.GetDashboardForJobRunRequest;
+import com.amazonaws.services.emrserverless.model.GetDashboardForJobRunResult;
+import com.amazonaws.services.emrserverless.model.GetJobRunRequest;
+import com.amazonaws.services.emrserverless.model.GetJobRunResult;
+import com.amazonaws.services.emrserverless.model.ListApplicationsRequest;
+import com.amazonaws.services.emrserverless.model.ListApplicationsResult;
+import com.amazonaws.services.emrserverless.model.ListJobRunsRequest;
+import com.amazonaws.services.emrserverless.model.ListJobRunsResult;
+import com.amazonaws.services.emrserverless.model.ListTagsForResourceRequest;
+import com.amazonaws.services.emrserverless.model.ListTagsForResourceResult;
+import com.amazonaws.services.emrserverless.model.StartApplicationRequest;
+import com.amazonaws.services.emrserverless.model.StartApplicationResult;
+import com.amazonaws.services.emrserverless.model.StartJobRunRequest;
+import com.amazonaws.services.emrserverless.model.StartJobRunResult;
+import com.amazonaws.services.emrserverless.model.StopApplicationRequest;
+import com.amazonaws.services.emrserverless.model.StopApplicationResult;
+import com.amazonaws.services.emrserverless.model.TagResourceRequest;
+import com.amazonaws.services.emrserverless.model.TagResourceResult;
+import com.amazonaws.services.emrserverless.model.UntagResourceRequest;
+import com.amazonaws.services.emrserverless.model.UntagResourceResult;
+import com.amazonaws.services.emrserverless.model.UpdateApplicationRequest;
+import com.amazonaws.services.emrserverless.model.UpdateApplicationResult;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+
+public class DockerEMRServerlessClient extends AmazonWebServiceClient implements AWSEMRServerless {
+  private static final AtomicInteger JOB_ID = new AtomicInteger(1);
+
+  public DockerEMRServerlessClient(ClientConfiguration clientConfiguration) {
+    super(clientConfiguration);
+    setEndpointPrefix("emr");
+  }
+
+  @Override
+  public CancelJobRunResult cancelJobRun(final CancelJobRunRequest cancelJobRunRequest) {
+    return null;
+  }
+
+  @Override
+  public CreateApplicationResult createApplication(final CreateApplicationRequest createApplicationRequest) {
+    return null;
+  }
+
+  @Override
+  public DeleteApplicationResult deleteApplication(final DeleteApplicationRequest deleteApplicationRequest) {
+    return null;
+  }
+
+  @Override
+  public GetApplicationResult getApplication(final GetApplicationRequest getApplicationRequest) {
+    return null;
+  }
+
+  @Override
+  public GetDashboardForJobRunResult getDashboardForJobRun(
+      final GetDashboardForJobRunRequest getDashboardForJobRunRequest) {
+    return null;
+  }
+
+  @Override
+  public GetJobRunResult getJobRun(final GetJobRunRequest getJobRunRequest) {
+    return null;
+  }
+
+  @Override
+  public ListApplicationsResult listApplications(final ListApplicationsRequest listApplicationsRequest) {
+    return null;
+  }
+
+  @Override
+  public ListJobRunsResult listJobRuns(final ListJobRunsRequest listJobRunsRequest) {
+    return null;
+  }
+
+  @Override
+  public ListTagsForResourceResult listTagsForResource(
+      final ListTagsForResourceRequest listTagsForResourceRequest) {
+    return null;
+  }
+
+  @Override
+  public StartApplicationResult startApplication(final StartApplicationRequest startApplicationRequest) {
+    return null;
+  }
+
+  @Override
+  public StartJobRunResult startJobRun(final StartJobRunRequest startJobRunRequest) {
+    String entryPoint = startJobRunRequest.getJobDriver().getSparkSubmit().getEntryPoint();
+    List<String> entryPointArguments = startJobRunRequest.getJobDriver().getSparkSubmit().getEntryPointArguments();
+    String sparkSubmitParameters = startJobRunRequest.getJobDriver().getSparkSubmit().getSparkSubmitParameters();
+
+    final int jobId = JOB_ID.getAndIncrement();
+
+    List<String> runContainerCmd = new ArrayList<>();
+    runContainerCmd.add("run");
+    runContainerCmd.add("-d");
+    runContainerCmd.add("--rm");
+    runContainerCmd.add("--env");
+    runContainerCmd.add("SERVERLESS_EMR_JOB_ID=" + jobId);
+    runContainerCmd.add("--network");
+    runContainerCmd.add("integ-test_opensearch-net");
+    runContainerCmd.add("integ-test-spark-submit:latest");
+    runContainerCmd.add("/opt/bitnami/spark/bin/spark-submit");
+    runContainerCmd.add("--deploy-mode");
+    runContainerCmd.add("client");
+    runContainerCmd.add("--exclude-packages");
+    runContainerCmd.add("org.opensearch:opensearch-spark-standalone_2.12,org.opensearch:opensearch-spark-sql-application_2.12,org.opensearch:opensearch-spark-ppl_2.12");
+    runContainerCmd.add("--master");
+    runContainerCmd.add("local[2]");
+
+    runContainerCmd.addAll(Arrays.asList(sparkSubmitParameters.split(" ")));
+    runContainerCmd.addAll(entryPointArguments);
+
+    final List<String> cmd = runContainerCmd.stream().filter(s -> !s.isBlank()).collect(Collectors.toList());
+
+    for (int i = 1; i < cmd.size(); i++) {
+      if (cmd.get(i - 1).equals("--conf")) {
+        if (cmd.get(i).startsWith("spark.datasource.flint.customAWSCredentialsProvider=") ||
+            cmd.get(i).startsWith("spark.datasource.flint.") ||
+            cmd.get(i).startsWith("spark.hadoop.hive.metastore.client.factory.class=")) {
+          cmd.remove(i - 1);
+          cmd.remove(i - 1);
+          i -= 2;
+        } else if (cmd.get(i).startsWith("spark.emr-serverless.driverEnv.JAVA_HOME=")) {
+          cmd.set(i, "spark.emr-serverless.driverEnv.JAVA_HOME=/opt/bitnami/java");
+        } else if (cmd.get(i).startsWith("spark.executorEnv.JAVA_HOME=")) {
+          cmd.set(i, "spark.executorEnv.JAVA_HOME=/opt/bitnami/java");
+        }
+      }
+    }
+
+    cmd.add(cmd.size() - 1, "/app/spark-sql-application.jar");
+
+    System.out.println(">>> " + String.join(" ", cmd));
+
+    try {
+      File dockerDir = new File("/tmp/docker");
+      File cmdFile = File.createTempFile("docker_", ".cmd", dockerDir);
+      FileOutputStream fos = new FileOutputStream(cmdFile);
+      fos.write(String.join(" ", cmd).getBytes(StandardCharsets.UTF_8));
+      fos.close();
+
+      String cmdFilename = cmdFile.getName();
+      String filenameBase = cmdFilename.substring(7, cmdFilename.length() - 4);
+      File exitCodeFile = new File(dockerDir, "docker_" + filenameBase + ".exitCode");
+      for (int i = 0; i < 600 && !exitCodeFile.exists(); i++) {
+        Thread.sleep(100L);
+      }
+
+      if (exitCodeFile.exists()) {
+        StartJobRunResult startJobResult = new StartJobRunResult();
+        startJobResult.setApplicationId(startJobRunRequest.getApplicationId());
+        startJobResult.setArn("arn:aws:emr-containers:foo:123456789012:/virtualclusters/0/jobruns/" + jobId);
+        startJobResult.setJobRunId(Integer.toString(jobId));
+        return startJobResult;
+      }
+    } catch (IOException | InterruptedException e) {
+      e.printStackTrace();
+    }
+
+    return null;
+  }
+
+  @Override
+  public StopApplicationResult stopApplication(final StopApplicationRequest stopApplicationRequest) {
+    return null;
+  }
+
+  @Override
+  public TagResourceResult tagResource(final TagResourceRequest tagResourceRequest) {
+    return null;
+  }
+
+  @Override
+  public UntagResourceResult untagResource(final UntagResourceRequest untagResourceRequest) {
+    return null;
+  }
+
+  @Override
+  public UpdateApplicationResult updateApplication(final UpdateApplicationRequest updateApplicationRequest) {
+    return null;
+  }
+
+  @Override
+  public ResponseMetadata getCachedResponseMetadata(final AmazonWebServiceRequest amazonWebServiceRequest) {
+    return null;
+  }
+}
diff --git a/docker/integ-test/log4j2.properties b/docker/integ-test/opensearch/log4j2.properties
similarity index 100%
rename from docker/integ-test/log4j2.properties
rename to docker/integ-test/opensearch/log4j2.properties
diff --git a/docker/integ-test/opensearch/opensearch-docker-it-entrypoint.sh b/docker/integ-test/opensearch/opensearch-docker-it-entrypoint.sh
new file mode 100755
index 000000000..277f531a9
--- /dev/null
+++ b/docker/integ-test/opensearch/opensearch-docker-it-entrypoint.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
+./docker-command-runner.sh &
+echo $! > /var/run/docker-command-runner.pid
+
+su opensearch ./opensearch-docker-entrypoint.sh "$@"
+
+kill -TERM `cat /var/run/docker-command-runner.pid`
diff --git a/docker/integ-test/opensearch/opensearch_security.policy b/docker/integ-test/opensearch/opensearch_security.policy
new file mode 100644
index 000000000..8e4de3246
--- /dev/null
+++ b/docker/integ-test/opensearch/opensearch_security.policy
@@ -0,0 +1,18 @@
+grant {
+    permission java.lang.management.ManagementPermission "control";
+    permission java.net.SocketPermission "localhost:9600","connect,resolve";
+    permission java.lang.RuntimePermission "getClassLoader";
+    permission java.io.FilePermission "/tmp/docker/-", "read,write,delete";
+};
+
+grant codebase "file:${java.home}/../lib/tools.jar" {
+  permission java.security.AllPermission;
+};
+
+grant codeBase "jrt:/jdk.attach" {
+    permission java.security.AllPermission;
+};
+
+grant codeBase "jrt:/jdk.internal.jvmstat" {
+    permission java.security.AllPermission;
+};
diff --git a/docker/integ-test/spark-submit/Dockerfile b/docker/integ-test/spark-submit/Dockerfile
new file mode 100644
index 000000000..d61caad52
--- /dev/null
+++ b/docker/integ-test/spark-submit/Dockerfile
@@ -0,0 +1,20 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
+FROM bitnami/spark:3.5.3
+ARG FLINT_JAR
+ARG PPL_JAR
+ARG SQL_APP_JAR
+ENV FLINT_JAR $FLINT_JAR
+ENV PPL_JAR $PPL_JAR
+ENV SQL_APP_JAR $SQL_APP_JAR
+
+COPY docker/integ-test/spark/spark-defaults.conf /opt/bitnami/spark/conf/spark-defaults.conf
+COPY ${FLINT_JAR} /opt/bitnami/spark/jars/flint-spark-integration.jar
+COPY ${PPL_JAR} /opt/bitnami/spark/jars/ppl-spark-integration.jar
+
+USER root
+RUN mkdir /app
+COPY ${SQL_APP_JAR} /app/spark-sql-application.jar
+
+USER 1001
diff --git a/docker/integ-test/spark-defaults.conf b/docker/integ-test/spark-submit/spark-defaults.conf
similarity index 74%
rename from docker/integ-test/spark-defaults.conf
rename to docker/integ-test/spark-submit/spark-defaults.conf
index 19b9e4ec1..acd49625b 100644
--- a/docker/integ-test/spark-defaults.conf
+++ b/docker/integ-test/spark-submit/spark-defaults.conf
@@ -25,11 +25,21 @@
 # spark.serializer                 org.apache.spark.serializer.KryoSerializer
 # spark.driver.memory              5g
 # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
+spark.sql.catalogImplementation       hive
+spark.hadoop.hive.metastore.uris      thrift://metastore:9083
 spark.sql.extensions                  org.opensearch.flint.spark.FlintPPLSparkExtensions,org.opensearch.flint.spark.FlintSparkExtensions
 spark.sql.catalog.dev                 org.apache.spark.opensearch.catalog.OpenSearchCatalog
+spark.sql.catalog.mys3                org.opensearch.sql.FlintDelegatingSessionCatalog
 spark.datasource.flint.host           opensearch
 spark.datasource.flint.port           9200
 spark.datasource.flint.scheme         http
 spark.datasource.flint.auth           basic
 spark.datasource.flint.auth.username  admin
 spark.datasource.flint.auth.password  C0rrecthorsebatterystaple.
+spark.sql.warehouse.dir               s3a://integ-test
+spark.hadoop.fs.s3a.impl              org.apache.hadoop.fs.s3a.S3AFileSystem
+spark.hadoop.fs.s3a.path.style.access true
+spark.hadoop.fs.s3a.access.key        Vt7jnvi5BICr1rkfsheT
+spark.hadoop.fs.s3a.secret.key        5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO
+spark.hadoop.fs.s3a.endpoint          http://minio-S3:9000
+spark.hadoop.fs.s3a.connection.ssl.enabled false
\ No newline at end of file
diff --git a/docker/integ-test/spark/Dockerfile b/docker/integ-test/spark/Dockerfile
new file mode 100644
index 000000000..f180320a6
--- /dev/null
+++ b/docker/integ-test/spark/Dockerfile
@@ -0,0 +1,14 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
+ARG SPARK_VERSION=3.5.3
+FROM bitnami/spark:${SPARK_VERSION}
+
+USER root
+RUN apt update
+RUN apt install -y curl
+
+USER 1001
+COPY ./spark-defaults.conf /opt/bitnami/spark/conf/spark-defaults.conf
+COPY ./log4j2.properties /opt/bitnami/spark/conf/log4j2.properties
+COPY ./spark-master-entrypoint.sh /opt/bitnami/scripts/spark/spark-master-entrypoint.sh
diff --git a/docker/integ-test/spark/log4j2.properties b/docker/integ-test/spark/log4j2.properties
new file mode 100644
index 000000000..ab96e03ba
--- /dev/null
+++ b/docker/integ-test/spark/log4j2.properties
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+rootLogger.level = info
+rootLogger.appenderRef.stdout.ref = console
+
+# In the pattern layout configuration below, we specify an explicit `%ex` conversion
+# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
+# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
+# class packaging information. That extra information can sometimes add a substantial
+# performance overhead, so we disable it in our default logging config.
+# For more information, see SPARK-39361.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+
+# Set the default spark-shell/spark-sql log level to WARN. When running the
+# spark-shell/spark-sql, the log level for these classes is used to overwrite
+# the root logger's log level, so that the user can have different defaults
+# for the shell and regular Spark apps.
+logger.repl.name = org.apache.spark.repl.Main
+logger.repl.level = warn
+
+logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
+logger.thriftserver.level = warn
+
+# Settings to quiet third party logs that are too verbose
+logger.jetty1.name = org.sparkproject.jetty
+logger.jetty1.level = warn
+logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
+logger.jetty2.level = error
+logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
+logger.replexprTyper.level = info
+logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
+logger.replSparkILoopInterpreter.level = info
+logger.parquet1.name = org.apache.parquet
+logger.parquet1.level = error
+logger.parquet2.name = parquet
+logger.parquet2.level = error
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
+logger.RetryingHMSHandler.level = fatal
+logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
+logger.FunctionRegistry.level = error
+
+# For deploying Spark ThriftServer
+# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
+appender.console.filter.1.type = RegexFilter
+appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
+appender.console.filter.1.onMatch = deny
+appender.console.filter.1.onMismatch = neutral
diff --git a/docker/integ-test/spark/spark-defaults.conf b/docker/integ-test/spark/spark-defaults.conf
new file mode 100644
index 000000000..acd49625b
--- /dev/null
+++ b/docker/integ-test/spark/spark-defaults.conf
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
+spark.sql.catalogImplementation       hive
+spark.hadoop.hive.metastore.uris      thrift://metastore:9083
+spark.sql.extensions                  org.opensearch.flint.spark.FlintPPLSparkExtensions,org.opensearch.flint.spark.FlintSparkExtensions
+spark.sql.catalog.dev                 org.apache.spark.opensearch.catalog.OpenSearchCatalog
+spark.sql.catalog.mys3                org.opensearch.sql.FlintDelegatingSessionCatalog
+spark.datasource.flint.host           opensearch
+spark.datasource.flint.port           9200
+spark.datasource.flint.scheme         http
+spark.datasource.flint.auth           basic
+spark.datasource.flint.auth.username  admin
+spark.datasource.flint.auth.password  C0rrecthorsebatterystaple.
+spark.sql.warehouse.dir               s3a://integ-test
+spark.hadoop.fs.s3a.impl              org.apache.hadoop.fs.s3a.S3AFileSystem
+spark.hadoop.fs.s3a.path.style.access true
+spark.hadoop.fs.s3a.access.key        Vt7jnvi5BICr1rkfsheT
+spark.hadoop.fs.s3a.secret.key        5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO
+spark.hadoop.fs.s3a.endpoint          http://minio-S3:9000
+spark.hadoop.fs.s3a.connection.ssl.enabled false
\ No newline at end of file
diff --git a/docker/integ-test/spark-master-entrypoint.sh b/docker/integ-test/spark/spark-master-entrypoint.sh
similarity index 88%
rename from docker/integ-test/spark-master-entrypoint.sh
rename to docker/integ-test/spark/spark-master-entrypoint.sh
index a21c20643..51caa787b 100755
--- a/docker/integ-test/spark-master-entrypoint.sh
+++ b/docker/integ-test/spark/spark-master-entrypoint.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+
 function start_spark_connect() {
   sc_version=$(ls -1 /opt/bitnami/spark/jars/spark-core_*.jar | sed -e 's/^.*\/spark-core_//' -e 's/\.jar$//' -e 's/-/:/')
 
diff --git a/docs/docker/integ-test/README.md b/docs/docker/integ-test/README.md
new file mode 100644
index 000000000..30bf2b1f2
--- /dev/null
+++ b/docs/docker/integ-test/README.md
@@ -0,0 +1,226 @@
+# Docker Cluster for Integration Testing
+
+## Introduction
+
+The docker cluster in `docker/integ-test` is designed to be used for integration testing. It supports the following
+use cases:
+1. Submitting queries directly to Spark in order to test the PPL extension for Spark.
+2. Submitting queries directly to Spark that use the OpenSearch datasource. Useful for testing the Flint extension
+   for Spark.
+3. Using the Async API to submit queries to the OpenSearch server. Useful for testing the EMR workflow and querying
+   S3/Glue datasources. A local container is run rather than using the AWS EMR service.
+
+The cluster consists of several containers and handles configuring them. No tables are created.
+
+## Overview
+
+![Docker Containers](images/integ-test-containers.png "Docker Containers")
+
+All containers run in a dedicated docker network.
+
+### OpenSearch Dashboards
+
+An OpenSearch dashboards server that is connected to the OpenSearch server. It is exposed to the host OS,
+so it can be accessed with a browser.
+
+### OpenSearch
+
+An OpenSearch server. It is running in standalone mode. It is exposed to the host OS. It is configured to have
+an S3/Glue datasource with the name `mys3`. System indices and system indices permissions are disabled.
+
+This container also has a docker volume used to persist data such as local indices.
+
+### Spark
+
+The Spark master node. It is configured to use an external Hive metastore in the container `metastore`. The
+Spark master also has the Flint and PPL extensions installed. It can use locally built Jar files when building
+the docker image.
+
+Spark Connect is also running in this container and can be used to easily issue queries to run. The port for
+Spark Connect is exposed to the host OS.
+
+Spark is configured to have an OpenSearch datastore with the catalog name `dev`. Indices on the OpenSearch
+server can be queries as `dev.default.<INDEX_NAME>`.
+
+### Spark Worker
+
+The Spark worker node. It is configured to use an external Hive metastore in the container `metastore`. The
+Spark worker also has the Flint and PPL extensions installed. It can use locally built Jar files when building
+the docker image.
+
+### Spark Submit
+
+A temporary container that runs queries for an Async API session. It is started the OpenSearch container. It
+does not connect to the Spark cluster and instead runs the queries locally. It will keep looking for more
+queries to run until it reaches its timeout (3 minutes by default).
+
+The Spark submit container is configured to use an external Hive metastore in the container `metastore`. The
+Flint and PPL extensions are installed. When building the docker image, locally built Jar files can be used.
+
+### Metastore (Hive)
+
+A Hive server that is used as a metastore for the Spark containers. It is configured to use the Minio
+container in the bucket `integ-test`.
+
+This container also has a docker volume used to persist the metastore.
+
+### Minio (S3)
+
+A Minio server that acts as an S3 server. Is used as a part of the workflow of executing an S3/Glue query.
+It will contain the S3 tables data.
+
+This container also has a docker volume used to persist the S3 data.
+
+### Configuration-Updater
+
+A temporary container that is used to configure the OpenSearch and Minio containers. It is run after both
+of those have started up. For Minio, it will add the `integ-test` bucket and create an access key. For
+OpenSearch, it will create the S3/Glue datasource and apply a cluster configuration.
+
+## Running the Cluster
+
+To start the cluster go to the directory `docker/integ-test` and use docker compose to start the cluster. When
+starting the cluster, wait for the `spark-worker` container to finish starting up. It is the last container
+to start.
+
+Start cluster in foreground:
+```shell
+docker compose up
+```
+
+Start cluster in the background:
+```shell
+docker compose up -d
+```
+
+Stopping the cluster:
+```shell
+docker compose down -d
+```
+
+## Creating Tables in S3
+
+Tables need to be created in Spark as external tables. Their location must be set to a path under `s3a://integ-test/`.
+Can use `spark-shell` on the Spark master container to do this:
+```shell
+docker exec it spark spark-shell
+```
+
+Example for creating a table and adding data:
+```scala
+spark.sql("CREATE EXTERNAL TABLE foo (id int, name varchar(100)) location 's3a://integ-test/foo'")
+spark.sql("INSERT INTO foo (id, name) VALUES(1, 'Foo')")
+```
+
+## Querying an S3 Table
+
+A REST call to the OpenSearch container can be used to query the table using the Async API.
+
+[Async Query Creation API](https://github.com/opensearch-project/sql/blob/main/docs/user/interfaces/asyncqueryinterface.rst#async-query-creation-api)
+```shell
+curl \
+  -u 'admin:C0rrecthorsebatterystaple.' \
+  -X POST \
+  -H 'Content-Type: application/json' \
+  -d '{"datasource": "mys3", "lang": "sql", "query": "SELECT * FROM mys3.default.foo"}' \
+  http://localhost:9200/_plugins/_async_query
+```
+
+Sample response:
+```json
+{
+   "queryId": "HlbM61kX6MDkAktO",
+   "sessionId": "1Giy65ZnzNlmsPAm"
+}
+```
+
+When the query is finished, the results can be retrieved with a REST call to the OpenSearch container.
+
+[Async Query Result API](https://github.com/opensearch-project/sql/blob/main/docs/user/interfaces/asyncqueryinterface.rst#async-query-result-api)
+```shell
+curl \
+  -u 'admin:C0rrecthorsebatterystaple.' \
+  -X GET \
+  'http://localhost:9200/_plugins/_async_query/HlbM61kX6MDkAktO'
+```
+
+Sample response:
+```json
+{
+  "status": "SUCCESS",
+  "schema": [
+    {
+      "name": "id",
+      "type": "integer"
+    },
+    {
+      "name": "name",
+      "type": "string"
+    }
+  ],
+  "datarows": [
+    [
+      1,
+      "Foo"
+    ]
+  ],
+  "total": 1,
+  "size": 1
+}
+```
+
+## Configuration of the Cluster
+
+There are several settings that can be adjusted for the cluster.
+
+* SPARK_VERSION - the tag of the `bitnami/spark` docker image to use
+* OPENSEARCH_VERSION - the tag of the `opensearchproject/opensearch` docker image to use
+* DASHBOARDS_VERSION - the tag of the `opensearchproject/opensearch-dashboards` docker image to use
+* MASTER_UI_PORT - port on the host OS to map to the master UI port (8080) of the Spark master
+* MASTER_PORT - port on the host OS to map to the master port (7077) on the Spark master
+* UI_PORT - port on the host OS to map to the UI port (4040) on the Spark master
+* SPARK_CONNECT_PORT - port on the host OS to map to the Spark Connect port (15002) on the Spark master
+* PPL_JAR - The relative path to the PPL extension Jar file. Must be within the base directory of this repository
+* FLINT_JAR - The relative path to the Flint extension Jar file. Must be within the base directory of this
+   repository
+* SQL_APP_JAR - The relative path to the SQL application Jar file. Must be within the base directory of this
+   repository
+* OPENSEARCH_NODE_MEMORY - Amount of memory to allocate for the OpenSearch server
+* OPENSEARCH_ADMIN_PASSWORD - Password for the admin user of the OpenSearch server
+* OPENSEARCH_PORT - port on the host OS to map to port 9200 on the OpenSearch server
+* OPENSEARCH_PA_PORT - port on the host OS to map to the performance analyzer port (9600) on the OpenSearch
+   server
+* OPENSEARCH_DASHBOARDS_PORT - port on the host OS to map to the OpenSearch dashboards server
+* S3_ACCESS_KEY - access key to create on the Minio container
+* S3_SECRET_KEY - secret key to create on the Minio container
+
+## Async API Overview
+
+[Async API Interfaces](https://github.com/opensearch-project/sql/blob/main/docs/user/interfaces/asyncqueryinterface.rst)
+
+[Async API Documentation](https://opensearch.org/docs/latest/search-plugins/async/index/)
+
+The Async API is able to query S3/Glue datasources. This is done by calling the AWS EMR service to use a
+docker container to run the query. The docker container uses Spark and is able to access the Glue catalog and
+retrieve data from S3.
+
+For the docker cluster, Minio is used in place of S3. Docker itself is used in place of AWS EMR.
+
+![OpenSearch Async API Sequence Diagram](images/OpenSearch_Async_API.png "Sequence Diagram")
+
+1. Client submit a request to the async_search API endpoint
+2. OpenSearch server creates a special index (if it doesn't exist). This index is used to store async API requests
+   along with some state information.
+3. OpenSearch server checks if the query is for an S3/Glue datasource. If it is not, then OpenSearch can handle
+   the request on its own.
+4. OpenSearch uses docker to start a new container to process queries for the current async API session.
+5. OpenSearch returns the queryId and sessionId to the Client.
+6. Spark submit docker container starts up.
+7. Spark submit docker container searches for index from step 2 for a query in the current session to run.
+8. Spark submit docker container creates a special OpenSearch index (if it doesn't exist). This index is used to
+   store the results of the async API queries.
+9. Spark submit docker container looks up the table metadata from the `metastore` container.
+10. Spark submit docker container retrieves the data from the Minio container.
+11. Spark submit docker container writes the results to the OpenSearch index from step 7.
+12. Client submits a request to the async_search results API endpoint using the queryId form step 5.
+13. OpenSearch returns the results to the Client.
diff --git a/docs/docker/integ-test/images/OpenSearch_Async_API.png b/docs/docker/integ-test/images/OpenSearch_Async_API.png
new file mode 100644
index 000000000..882f73902
Binary files /dev/null and b/docs/docker/integ-test/images/OpenSearch_Async_API.png differ
diff --git a/docs/docker/integ-test/images/integ-test-containers.png b/docs/docker/integ-test/images/integ-test-containers.png
new file mode 100644
index 000000000..92878e187
Binary files /dev/null and b/docs/docker/integ-test/images/integ-test-containers.png differ
diff --git a/docs/spark-docker.md b/docs/docker/spark-docker.md
similarity index 96%
rename from docs/spark-docker.md
rename to docs/docker/spark-docker.md
index d1200e2b3..9c505f1cd 100644
--- a/docs/spark-docker.md
+++ b/docs/docker/spark-docker.md
@@ -19,7 +19,7 @@ sbt clean
 sbt assembly
 ```
 
-Refer to the [Developer Guide](../DEVELOPER_GUIDE.md) for more information.
+Refer to the [Developer Guide](../../DEVELOPER_GUIDE.md) for more information.
 
 ## Using Docker Compose
 
@@ -65,7 +65,7 @@ spark.sql("INSERT INTO test_table (id, name) VALUES(2, 'Bar')")
 spark.sql("source=test_table | eval x = id + 5 | fields x, name").show()
 ```
 
-For further information, see the [Spark PPL Test Instructions](ppl-lang/local-spark-ppl-test-instruction.md)
+For further information, see the [Spark PPL Test Instructions](../ppl-lang/local-spark-ppl-test-instruction.md)
 
 ## Manual Setup
 
diff --git a/docs/spark-emr-docker.md b/docs/docker/spark-emr-docker.md
similarity index 98%
rename from docs/spark-emr-docker.md
rename to docs/docker/spark-emr-docker.md
index 7eef4d250..e56295736 100644
--- a/docs/spark-emr-docker.md
+++ b/docs/docker/spark-emr-docker.md
@@ -18,7 +18,7 @@ sbt clean
 sbt assembly
 ```
 
-Refer to the [Developer Guide](../DEVELOPER_GUIDE.md) for more information.
+Refer to the [Developer Guide](../../DEVELOPER_GUIDE.md) for more information.
 
 ## Using Docker Compose