Improve docs for OpenAI spec (#100)

* Improve OpenAI spec docs * Add client code * Add test dependency
Lightning-AI · May 21, 2024 · 58502cb · 58502cb
1 parent 1978f3e
commit 58502cb
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -549,44 +549,94 @@ You can serve LLMs like OpenAI API endpoint specification with LitServe's `OpenA
 providing the `spec` argument to LitServer:
 
 ```python
+from transformers import pipeline
 import litserve as ls
-from litserve.specs.openai import OpenAISpec
 
-class OpenAILitAPI(ls.LitAPI):
+
+class GPT2LitAPI(ls.LitAPI):
     def setup(self, device):
-        self.model = ...
+        self.generator = pipeline('text-generation', model='gpt2', device=device)
 
-    def predict(self, x):
-        yield {"content": "This is a generated output"}
+    def predict(self, prompt):
+        out = self.generator(prompt)
+        return out[0]["generated_text"]
 
-if __name__ == "__main__":
-    server = ls.LitServer(OpenAILitAPI(), spec=OpenAISpec())
+
+if __name__ == '__main__':
+    api = GPT2LitAPI()
+    server = ls.LitServer(api, accelerator='auto', spec=ls.OpenAISpec())
     server.run(port=8000)
 ```
 
+By default, LitServe will use `decode_request` and `encode_response` provided by `OpenAISpec`,
+so you don't need to provide them in `LitAPI`.
+
+In this case, `predict` is expected to take an input with the following shape:
+```python
+[{"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Hello there"},
+ {"role": "assistant", "content": "Hello, how can I help?"},
+ {"role": "user", "content": "What is the capital of Australia?"}]
+```
+
+and produce an output with one of the following shapes:
+- `"Canberra"`
+- `{"content": "Canberra"}`
+- `[{"content": "Canberra"}]`
+- `{"role": "assistant", "content": "Canberra"}`
+- `[{"role": "assistant", "content": "Canberra"}]`
 
-By default, LitServe will use `OpenAISpec`'s implementation of `LitAPI.decode_request` and `LitAPI.encode_response`.
-You can also customize this behavior by overriding the LitAPI's `decode_request` and `encode_response` methods.
+The above server can be queried using a standard OpenAI client:
+
+```python
+import requests
+
+response = requests.post("http://127.0.0.1:8000/v1/chat/completions", json={
+    "model": "my-gpt2",
+    "messages": [
+      {
+        "role": "system",
+        "content": "You are a helpful assistant."
+      },
+      {
+        "role": "user",
+        "content": "Hello!"
+      }
+    ]
+  })
+```
+
+You can also customize the behavior of `decode_request` and `encode_response` by
+overriding them in `LitAPI`. In this case:
+
+- `decode_request` takes a `litserve.specs.openai.ChatCompletionRequest` in input
+- `encode_response` returns a `litserve.specs.openai.ChatCompletionResponseChoice`
+
+See the OpenAI [Pydantic models](src/litserve/specs/openai.py) for reference.
+
+Here is an example of overriding `encode_response` in `LitAPI`:
 
 ```python
 import litserve as ls
-from litserve.specs.openai import OpenAISpec
+from litserve.specs.openai import ChatCompletionResponseChoice
 
-class CustomOpenAIAPI(ls.LitAPI):
+class GPT2LitAPI(ls.LitAPI):
     def setup(self, device):
         self.model = None
 
-    def encode_response(self, output_generator):
-        for output in output_generator:
-            output["content"] = "This output is a custom encoded output"
-            yield output
-
     def predict(self, x):
-        yield {"role": "assistant", "content": "This is a generated output"}
+        return {"role": "assistant", "content": "This is a generated output"}
+
+    def encode_response(self, output: dict) -> ChatCompletionResponseChoice:
+        return ChatCompletionResponseChoice(
+            index=0,
+            message=ChatMessage(role="assistant", content="This is a custom encoded output"),
+            finish_reason="stop",
+        )
 
 
 if __name__ == "__main__":
-    server = ls.LitServer(CustomOpenAIAPI(), spec=OpenAISpec())
+    server = ls.LitServer(GPT2LitAPI(), spec=ls.OpenAISpec())
     server.run(port=8000)
 ```
 

diff --git a/_requirements/test.txt b/_requirements/test.txt
@@ -10,3 +10,4 @@ psutil
 requests
 lightning >2.0.0
 torch >2.0.0
+transformers
diff --git a/src/litserve/__init__.py b/src/litserve/__init__.py
@@ -15,5 +15,6 @@
 from litserve.api import LitAPI
 from litserve.server import LitServer, Request, Response
 from litserve import examples
+from litserve.specs.openai import OpenAISpec
 
-__all__ = ["LitAPI", "LitServer", "Request", "Response", "examples"]
+__all__ = ["LitAPI", "LitServer", "Request", "Response", "examples", "OpenAISpec"]