com.openai.unity 8.2.2 (#281)

- Added generic parameters to methods that support structured output
RageAgainstThePixel · Aug 19, 2024 · 84b4e90 · 84b4e90
1 parent dffa9c2
commit 84b4e90
Show file tree

Hide file tree

Showing 21 changed files with 593 additions and 249 deletions.
diff --git a/Documentation~/README.md b/Documentation~/README.md
@@ -113,7 +113,7 @@ The recommended installation method is though the unity package manager and [Ope
   - [Streaming](#chat-streaming)
   - [Tools](#chat-tools)
   - [Vision](#chat-vision)
-  - [Json Schema](#chat-json-schema) :new:
+  - [Structured Outputs](#chat-structured-outputs) :new:
   - [Json Mode](#chat-json-mode)
 - [Audio](#audio)
   - [Create Speech](#create-speech)
@@ -814,62 +814,86 @@ Structured Outputs is the evolution of JSON mode. While both ensure valid JSON i
 > - When using JSON mode, always instruct the model to produce JSON via some message in the conversation, for example via your system message. If you don't include an explicit instruction to generate JSON, the model may generate an unending stream of whitespace and the request may run continually until it reaches the token limit. To help ensure you don't forget, the API will throw an error if the string "JSON" does not appear somewhere in the context.
 > - The JSON in the message the model returns may be partial (i.e. cut off) if `finish_reason` is length, which indicates the generation exceeded max_tokens or the conversation exceeded the token limit. To guard against this, check `finish_reason` before parsing the response.
 
+First define the structure of your responses. These will be used as your schema.
+These are the objects you'll deserialize to, so be sure to use standard Json object models.
+
 ```csharp
-var mathSchema = new JsonSchema("math_response", @"
+public class MathResponse
 {
-  ""type"": ""object"",
-  ""properties"": {
-    ""steps"": {
-      ""type"": ""array"",
-      ""items"": {
-        ""type"": ""object"",
-        ""properties"": {
-          ""explanation"": {
-            ""type"": ""string""
-          },
-          ""output"": {
-            ""type"": ""string""
-          }
-        },
-        ""required"": [
-          ""explanation"",
-          ""output""
-        ],
-        ""additionalProperties"": false
-      }
-    },
-    ""final_answer"": {
-      ""type"": ""string""
-    }
-  },
-  ""required"": [
-    ""steps"",
-    ""final_answer""
-  ],
-  ""additionalProperties"": false
-}");
-var assistant = await OpenAIClient.AssistantsEndpoint.CreateAssistantAsync(
+    [JsonProperty("steps")]
+    public IReadOnlyList<MathStep> Steps { get; private set; }
+
+    [JsonProperty("final_answer")]
+    public string FinalAnswer { get; private set; }
+}
+
+public class MathStep
+{
+    [JsonProperty("explanation")]
+    public string Explanation { get; private set; }
+
+    [JsonProperty("output")]
+    public string Output { get; private set; }
+}
+```
+
+To use, simply specify the `MathResponse` type as a generic constraint in either `CreateAssistantAsync`, `CreateRunAsync`, or `CreateThreadAndRunAsync`.
+
+```csharp
+var assistant = await OpenAIClient.AssistantsEndpoint.CreateAssistantAsync<MathResponse>(
     new CreateAssistantRequest(
         name: "Math Tutor",
         instructions: "You are a helpful math tutor. Guide the user through the solution step by step.",
-        model: "gpt-4o-2024-08-06",
-        jsonSchema: mathSchema));
+        model: "gpt-4o-2024-08-06"));
 ThreadResponse thread = null;
 
 try
 {
-    var run = await assistant.CreateThreadAndRunAsync("how can I solve 8x + 7 = -23",
-        async @event =>
+    async Task StreamEventHandler(IServerSentEvent @event)
+    {
+        try
         {
-            Debug.Log(@event.ToJsonString());
-            await Task.CompletedTask;
-        });
+            switch (@event)
+            {
+                case MessageResponse message:
+                    if (message.Status != MessageStatus.Completed)
+                    {
+                        Debug.Log(@event.ToJsonString());
+                        break;
+                    }
+
+                    var mathResponse = message.FromSchema<MathResponse>();
+
+                    for (var i = 0; i < mathResponse.Steps.Count; i++)
+                    {
+                        var step = mathResponse.Steps[i];
+                        Debug.Log($"Step {i}: {step.Explanation}");
+                        Debug.Log($"Result: {step.Output}");
+                    }
+
+                    Debug.Log($"Final Answer: {mathResponse.FinalAnswer}");
+                    break;
+                default:
+                    Debug.Log(@event.ToJsonString());
+                    break;
+            }
+        }
+        catch (Exception e)
+        {
+            Debug.Log(e);
+            throw;
+        }
+
+        await Task.CompletedTask;
+    }
+
+    var run = await assistant.CreateThreadAndRunAsync("how can I solve 8x + 7 = -23", StreamEventHandler);
     thread = await run.GetThreadAsync();
     run = await run.WaitForStatusChangeAsync();
     Debug.Log($"Created thread and run: {run.ThreadId} -> {run.Id} -> {run.CreatedAt}");
     var messages = await thread.ListMessagesAsync();
 
-    foreach (var response in messages.Items)
+    foreach (var response in messages.Items.OrderBy(response => response.CreatedAt))
     {
         Debug.Log($"{response.Role}: {response.PrintContent()}");
     }
@@ -881,7 +905,6 @@ finally
     if (thread != null)
     {
         var isDeleted = await thread.DeleteAsync(deleteToolResources: true);
-        Assert.IsTrue(isDeleted);
     }
 }
 ```
@@ -1251,7 +1274,7 @@ var result = await api.ChatEndpoint.GetCompletionAsync(chatRequest);
 Debug.Log($"{result.FirstChoice.Message.Role}: {result.FirstChoice} | Finish Reason: {result.FirstChoice.FinishDetails}");
 ```
 
-#### [Chat Json Schema](https://platform.openai.com/docs/guides/structured-outputs)
+#### [Chat Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
 
 The evolution of  [Json Mode](#chat-json-mode). While both ensure valid JSON is produced, only Structured Outputs ensure schema adherence.
 
@@ -1260,55 +1283,54 @@ The evolution of  [Json Mode](#chat-json-mode). While both ensure valid JSON is
 > - When using JSON mode, always instruct the model to produce JSON via some message in the conversation, for example via your system message. If you don't include an explicit instruction to generate JSON, the model may generate an unending stream of whitespace and the request may run continually until it reaches the token limit. To help ensure you don't forget, the API will throw an error if the string "JSON" does not appear somewhere in the context.
 > - The JSON in the message the model returns may be partial (i.e. cut off) if `finish_reason` is length, which indicates the generation exceeded max_tokens or the conversation exceeded the token limit. To guard against this, check `finish_reason` before parsing the response.
 
+First define the structure of your responses. These will be used as your schema.
+These are the objects you'll deserialize to, so be sure to use standard Json object models.
+
+```csharp
+public class MathResponse
+{
+    [JsonInclude]
+    [JsonPropertyName("steps")]
+    public IReadOnlyList<MathStep> Steps { get; private set; }
+
+    [JsonInclude]
+    [JsonPropertyName("final_answer")]
+    public string FinalAnswer { get; private set; }
+}
+
+public class MathStep
+{
+    [JsonInclude]
+    [JsonPropertyName("explanation")]
+    public string Explanation { get; private set; }
+
+    [JsonInclude]
+    [JsonPropertyName("output")]
+    public string Output { get; private set; }
+}
+```
+
+To use, simply specify the `MathResponse` type as a generic constraint when requesting a completion.
+
 ```csharp
 var messages = new List<Message>
 {
     new(Role.System, "You are a helpful math tutor. Guide the user through the solution step by step."),
     new(Role.User, "how can I solve 8x + 7 = -23")
 };
 
-var mathSchema = new JsonSchema("math_response", @"
-{
-  ""type"": ""object"",
-  ""properties"": {
-    ""steps"": {
-      ""type"": ""array"",
-      ""items"": {
-        ""type"": ""object"",
-        ""properties"": {
-          ""explanation"": {
-            ""type"": ""string""
-          },
-          ""output"": {
-            ""type"": ""string""
-          }
-        },
-        ""required"": [
-          ""explanation"",
-          ""output""
-        ],
-        ""additionalProperties"": false
-      }
-    },
-    ""final_answer"": {
-      ""type"": ""string""
-    }
-  },
-  ""required"": [
-    ""steps"",
-    ""final_answer""
-  ],
-  ""additionalProperties"": false
-}");
-var chatRequest = new ChatRequest(messages, model: new("gpt-4o-2024-08-06"), jsonSchema: mathSchema);
-var response = await OpenAIClient.ChatEndpoint.GetCompletionAsync(chatRequest);
+var chatRequest = new ChatRequest<MathResponse>(messages, model: new("gpt-4o-2024-08-06"));
+var (mathResponse, chatResponse) = await OpenAIClient.ChatEndpoint.GetCompletionAsync<MathResponse>(chatRequest);
 
-foreach (var choice in response.Choices)
+for (var i = 0; i < mathResponse.Steps.Count; i++)
 {
-    Debug.Log($"[{choice.Index}] {choice.Message.Role}: {choice} | Finish Reason: {choice.FinishReason}");
+    var step = mathResponse.Steps[i];
+    Debug.Log($"Step {i}: {step.Explanation}");
+    Debug.Log($"Result: {step.Output}");
 }
 
-response.GetUsage();
+Debug.Log($"Final Answer: {mathResponse.FinalAnswer}");
+chatResponse.GetUsage();
 ```
 
 #### [Chat Json Mode](https://platform.openai.com/docs/guides/text-generation/json-mode)

diff --git a/Runtime/Assistants/AssistantsEndpoint.cs b/Runtime/Assistants/AssistantsEndpoint.cs
@@ -29,6 +29,27 @@ public async Task<ListResponse<AssistantResponse>> ListAssistantsAsync(ListQuery
             return response.Deserialize<ListResponse<AssistantResponse>>(client);
         }
 
+        /// <summary>
+        /// Create an assistant.
+        /// </summary>
+        /// <typeparam name="T"><see cref="JsonSchema"/> to use for structured outputs.</typeparam>
+        /// <param name="request"><see cref="CreateAssistantRequest"/>.</param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns><see cref="AssistantResponse"/>.</returns>
+        public async Task<AssistantResponse> CreateAssistantAsync<T>(CreateAssistantRequest request = null, CancellationToken cancellationToken = default)
+        {
+            if (request == null)
+            {
+                request = new CreateAssistantRequest(jsonSchema: typeof(T));
+            }
+            else
+            {
+                request.ResponseFormatObject = new ResponseFormatObject(typeof(T));
+            }
+
+            return await CreateAssistantAsync(request, cancellationToken);
+        }
+
         /// <summary>
         /// Create an assistant.
         /// </summary>

diff --git a/Runtime/Assistants/CreateAssistantRequest.cs b/Runtime/Assistants/CreateAssistantRequest.cs
@@ -290,7 +290,7 @@ public CreateAssistantRequest(
         /// </remarks>
         [Preserve]
         [JsonProperty("response_format", DefaultValueHandling = DefaultValueHandling.Ignore)]
-        public ResponseFormatObject ResponseFormatObject { get; }
+        public ResponseFormatObject ResponseFormatObject { get; internal set; }
 
         [JsonIgnore]
         public ChatResponseFormat ResponseFormat => ResponseFormatObject ?? ChatResponseFormat.Auto;

diff --git a/Runtime/Chat/ChatEndpoint.cs b/Runtime/Chat/ChatEndpoint.cs
@@ -37,6 +37,21 @@ public async Task<ChatResponse> GetCompletionAsync(ChatRequest chatRequest, Canc
             return response.Deserialize<ChatResponse>(client);
         }
 
+        /// <summary>
+        /// Creates a completion for the chat message.
+        /// </summary>
+        /// <typeparam name="T"><see cref="JsonSchema"/> to use for structured outputs.</typeparam>
+        /// <param name="chatRequest">The chat request which contains the message content.</param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns><see cref="ChatResponse"/>.</returns>
+        public async Task<(T, ChatResponse)> GetCompletionAsync<T>(ChatRequest chatRequest, CancellationToken cancellationToken = default)
+        {
+            chatRequest.ResponseFormatObject = new ResponseFormatObject(typeof(T));
+            var response = await GetCompletionAsync(chatRequest, cancellationToken);
+            var output = JsonConvert.DeserializeObject<T>(response.FirstChoice, OpenAIClient.JsonSerializationOptions);
+            return (output, response);
+        }
+
         /// <summary>
         /// Created a completion for the chat message and stream the results to the <paramref name="resultHandler"/> as they come in.
         /// </summary>
@@ -57,6 +72,49 @@ public async Task<ChatResponse> StreamCompletionAsync(ChatRequest chatRequest, A
                 return Task.CompletedTask;
             }, streamUsage, cancellationToken);
 
+        /// <summary>
+        /// Created a completion for the chat message and stream the results to the <paramref name="resultHandler"/> as they come in.
+        /// </summary>
+        /// <typeparam name="T"><see cref="JsonSchema"/> to use for structured outputs.</typeparam>
+        /// <param name="chatRequest">The chat request which contains the message content.</param>
+        /// <param name="resultHandler">An <see cref="Action{ChatResponse}"/> to be invoked as each new result arrives.</param>
+        /// <param name="streamUsage">
+        /// Optional, If set, an additional chunk will be streamed before the 'data: [DONE]' message.
+        /// The 'usage' field on this chunk shows the token usage statistics for the entire request,
+        /// and the 'choices' field will always be an empty array. All other chunks will also include a 'usage' field,
+        /// but with a null value.
+        /// </param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns><see cref="ChatResponse"/>.</returns>
+        public async Task<(T, ChatResponse)> StreamCompletionAsync<T>(ChatRequest chatRequest, Action<ChatResponse> resultHandler, bool streamUsage = false, CancellationToken cancellationToken = default)
+            => await StreamCompletionAsync<T>(chatRequest, async response =>
+            {
+                resultHandler.Invoke(response);
+                await Task.CompletedTask;
+            }, streamUsage, cancellationToken);
+
+        /// <summary>
+        /// Created a completion for the chat message and stream the results to the <paramref name="resultHandler"/> as they come in.
+        /// </summary>
+        /// <typeparam name="T"><see cref="JsonSchema"/> to use for structured outputs.</typeparam>
+        /// <param name="chatRequest">The chat request which contains the message content.</param>
+        /// <param name="resultHandler">A <see cref="Func{ChatResponse, Task}"/> to to be invoked as each new result arrives.</param>
+        /// <param name="streamUsage">
+        /// Optional, If set, an additional chunk will be streamed before the 'data: [DONE]' message.
+        /// The 'usage' field on this chunk shows the token usage statistics for the entire request,
+        /// and the 'choices' field will always be an empty array. All other chunks will also include a 'usage' field,
+        /// but with a null value.
+        /// </param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns><see cref="ChatResponse"/>.</returns>
+        public async Task<(T, ChatResponse)> StreamCompletionAsync<T>(ChatRequest chatRequest, Func<ChatResponse, Task> resultHandler, bool streamUsage = false, CancellationToken cancellationToken = default)
+        {
+            chatRequest.ResponseFormatObject = new ResponseFormatObject(typeof(T));
+            var response = await StreamCompletionAsync(chatRequest, resultHandler, streamUsage, cancellationToken);
+            var output = JsonConvert.DeserializeObject<T>(response.FirstChoice, OpenAIClient.JsonSerializationOptions);
+            return (output, response);
+        }
+
         /// <summary>
         /// Created a completion for the chat message and stream the results to the <paramref name="resultHandler"/> as they come in.
         /// </summary>

diff --git a/Runtime/Chat/ChatRequest.cs b/Runtime/Chat/ChatRequest.cs
@@ -287,7 +287,7 @@ public ChatRequest(
         /// </remarks>
         [Preserve]
         [JsonProperty("response_format")]
-        public ResponseFormatObject ResponseFormatObject { get; }
+        public ResponseFormatObject ResponseFormatObject { get; internal set; }
 
         [JsonIgnore]
         public ChatResponseFormat ResponseFormat => ResponseFormatObject ?? ChatResponseFormat.Auto;