feat: Support gzip (#153)

Lightning-AI · Jun 25, 2024 · 3e5bb40 · 3e5bb40
1 parent c469a13
commit 3e5bb40
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 0 deletions.
diff --git a/src/litserve/server.py b/src/litserve/server.py
@@ -34,6 +34,7 @@
 import sys
 
 from fastapi.responses import StreamingResponse
+from starlette.middleware.gzip import GZipMiddleware
 
 from litserve import LitAPI
 from litserve.connector import _Connector
@@ -380,6 +381,9 @@ def __init__(
         lit_api.stream = stream
         lit_api.sanitize(max_batch_size, spec=spec)
         self.app = FastAPI(lifespan=self.lifespan)
+        # gzip does not play nicely with streaming, see https://github.com/tiangolo/fastapi/discussions/8448
+        if not stream:
+            self.app.add_middleware(GZipMiddleware, minimum_size=1000)
         self.lit_api = lit_api
         self.lit_spec = spec
         self.workers_per_device = workers_per_device

diff --git a/tests/test_compression.py b/tests/test_compression.py
@@ -0,0 +1,57 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from fastapi import Request, Response
+from fastapi.testclient import TestClient
+
+from litserve import LitAPI, LitServer
+
+# trivially compressible content
+test_output = {"result": "0" * 100000}
+
+
+class LargeOutputLitAPI(LitAPI):
+    def setup(self, device):
+        pass
+
+    def decode_request(self, request: Request):
+        pass
+
+    def predict(self, x):
+        pass
+
+    def encode_response(self, output) -> Response:
+        return test_output
+
+
+def test_compression():
+    server = LitServer(LargeOutputLitAPI(), accelerator="cpu", devices=1, workers_per_device=1)
+
+    # compressed
+    with TestClient(server.app) as client:
+        response = client.post("/predict", headers={"Accept-Encoding": "gzip"}, json={})
+        assert response.status_code == 200
+        assert response.headers["Content-Encoding"] == "gzip"
+        content_length = int(response.headers["Content-Length"])
+        assert 0 < content_length < 100000
+        assert response.json() == test_output
+
+    # uncompressed
+    with TestClient(server.app) as client:
+        response = client.post("/predict", headers={"Accept-Encoding": ""}, json={})
+        assert response.status_code == 200
+        assert "Content-Encoding" not in response.headers
+        content_length = int(response.headers["Content-Length"])
+        assert content_length > 100000
+        assert response.json() == test_output