diff --git a/OpenAI-DotNet-Proxy/OpenAI-DotNet-Proxy.csproj b/OpenAI-DotNet-Proxy/OpenAI-DotNet-Proxy.csproj index 841a82c2..1cc80358 100644 --- a/OpenAI-DotNet-Proxy/OpenAI-DotNet-Proxy.csproj +++ b/OpenAI-DotNet-Proxy/OpenAI-DotNet-Proxy.csproj @@ -22,8 +22,10 @@ true false false - 8.2.0 + 8.4.0 +Version 8.4.0 +- Added support for Realtime Websocket proxy forwarding Version 8.2.0 - Deprecated ValidateAuthentication for ValidateAuthenticationAsync Version 8.1.1 diff --git a/OpenAI-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs b/OpenAI-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs index c0d7ca75..cb37a033 100644 --- a/OpenAI-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs +++ b/OpenAI-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs @@ -1,7 +1,6 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. using Microsoft.AspNetCore.Http; -using System; using System.Threading.Tasks; namespace OpenAI.Proxy @@ -9,9 +8,6 @@ namespace OpenAI.Proxy /// public abstract class AbstractAuthenticationFilter : IAuthenticationFilter { - [Obsolete("Use ValidateAuthenticationAsync")] - public virtual void ValidateAuthentication(IHeaderDictionary request) { } - /// public abstract Task ValidateAuthenticationAsync(IHeaderDictionary request); } diff --git a/OpenAI-DotNet-Proxy/Proxy/EndpointRouteBuilder.cs b/OpenAI-DotNet-Proxy/Proxy/EndpointRouteBuilder.cs index 1f44a7b5..6ba4b250 100644 --- a/OpenAI-DotNet-Proxy/Proxy/EndpointRouteBuilder.cs +++ b/OpenAI-DotNet-Proxy/Proxy/EndpointRouteBuilder.cs @@ -9,8 +9,10 @@ using System.IO; using System.Linq; using System.Net.Http; +using System.Net.WebSockets; using System.Security.Authentication; using System.Text.Json; +using System.Threading; using System.Threading.Tasks; namespace OpenAI.Proxy @@ -24,6 +26,9 @@ public static class EndpointRouteBuilder HeaderNames.TransferEncoding, HeaderNames.KeepAlive, HeaderNames.Upgrade, + HeaderNames.Host, + HeaderNames.SecWebSocketKey, + HeaderNames.SecWebSocketVersion, "Proxy-Connection", "Proxy-Authenticate", "Proxy-Authentication-Info", @@ -52,22 +57,25 @@ public static class EndpointRouteBuilder public static void MapOpenAIEndpoints(this IEndpointRouteBuilder endpoints, OpenAIClient openAIClient, IAuthenticationFilter authenticationFilter, string routePrefix = "") { endpoints.Map($"{routePrefix}{openAIClient.OpenAIClientSettings.BaseRequest}{{**endpoint}}", HandleRequest); + return; async Task HandleRequest(HttpContext httpContext, string endpoint) { try { -#pragma warning disable CS0618 // Type or member is obsolete - // ReSharper disable once MethodHasAsyncOverload - authenticationFilter.ValidateAuthentication(httpContext.Request.Headers); -#pragma warning restore CS0618 // Type or member is obsolete - await authenticationFilter.ValidateAuthenticationAsync(httpContext.Request.Headers); + if (httpContext.WebSockets.IsWebSocketRequest) + { + await ProcessWebSocketRequest(httpContext, endpoint).ConfigureAwait(false); + return; + } + await authenticationFilter.ValidateAuthenticationAsync(httpContext.Request.Headers).ConfigureAwait(false); var method = new HttpMethod(httpContext.Request.Method); + var uri = new Uri(string.Format( - openAIClient.OpenAIClientSettings.BaseRequestUrlFormat, - $"{endpoint}{httpContext.Request.QueryString}" - )); + openAIClient.OpenAIClientSettings.BaseRequestUrlFormat, + $"{endpoint}{httpContext.Request.QueryString}" + )); using var request = new HttpRequestMessage(method, uri); request.Content = new StreamContent(httpContext.Request.Body); @@ -76,7 +84,7 @@ async Task HandleRequest(HttpContext httpContext, string endpoint) request.Content.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse(httpContext.Request.ContentType); } - var proxyResponse = await openAIClient.Client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); + var proxyResponse = await openAIClient.Client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, httpContext.RequestAborted).ConfigureAwait(false); httpContext.Response.StatusCode = (int)proxyResponse.StatusCode; foreach (var (key, value) in proxyResponse.Headers) @@ -96,32 +104,120 @@ async Task HandleRequest(HttpContext httpContext, string endpoint) if (httpContext.Response.ContentType.Equals(streamingContent)) { - var stream = await proxyResponse.Content.ReadAsStreamAsync(); - await WriteServerStreamEventsAsync(httpContext, stream); + var stream = await proxyResponse.Content.ReadAsStreamAsync().ConfigureAwait(false); + await WriteServerStreamEventsAsync(httpContext, stream).ConfigureAwait(false); } else { - await proxyResponse.Content.CopyToAsync(httpContext.Response.Body); + await proxyResponse.Content.CopyToAsync(httpContext.Response.Body, httpContext.RequestAborted).ConfigureAwait(false); } } catch (AuthenticationException authenticationException) { httpContext.Response.StatusCode = StatusCodes.Status401Unauthorized; - await httpContext.Response.WriteAsync(authenticationException.Message); + await httpContext.Response.WriteAsync(authenticationException.Message).ConfigureAwait(false); + } + catch (WebSocketException) + { + // ignore + throw; } catch (Exception e) { + if (httpContext.Response.HasStarted) { throw; } httpContext.Response.StatusCode = StatusCodes.Status500InternalServerError; var response = JsonSerializer.Serialize(new { error = new { e.Message, e.StackTrace } }); - await httpContext.Response.WriteAsync(response); + await httpContext.Response.WriteAsync(response).ConfigureAwait(false); } static async Task WriteServerStreamEventsAsync(HttpContext httpContext, Stream contentStream) { var responseStream = httpContext.Response.Body; - await contentStream.CopyToAsync(responseStream, httpContext.RequestAborted); - await responseStream.FlushAsync(httpContext.RequestAborted); + await contentStream.CopyToAsync(responseStream, httpContext.RequestAborted).ConfigureAwait(false); + await responseStream.FlushAsync(httpContext.RequestAborted).ConfigureAwait(false); + } + } + + async Task ProcessWebSocketRequest(HttpContext httpContext, string endpoint) + { + using var clientWebsocket = await httpContext.WebSockets.AcceptWebSocketAsync().ConfigureAwait(false); + + try + { + await authenticationFilter.ValidateAuthenticationAsync(httpContext.Request.Headers).ConfigureAwait(false); + } + catch (AuthenticationException authenticationException) + { + var message = JsonSerializer.Serialize(new + { + type = "error", + error = new + { + type = "invalid_request_error", + code = "invalid_session_token", + message = authenticationException.Message + } + }); + await clientWebsocket.SendAsync(System.Text.Encoding.UTF8.GetBytes(message), WebSocketMessageType.Text, true, httpContext.RequestAborted).ConfigureAwait(false); + await clientWebsocket.CloseAsync(WebSocketCloseStatus.PolicyViolation, authenticationException.Message, httpContext.RequestAborted).ConfigureAwait(false); + return; } + + if (endpoint.EndsWith("echo")) + { + await EchoAsync(clientWebsocket, httpContext.RequestAborted); + return; + } + + using var hostWebsocket = new ClientWebSocket(); + + foreach (var header in openAIClient.WebsocketHeaders) + { + hostWebsocket.Options.SetRequestHeader(header.Key, header.Value); + } + + var uri = new Uri(string.Format( + openAIClient.OpenAIClientSettings.BaseWebSocketUrlFormat, + $"{endpoint}{httpContext.Request.QueryString}" + )); + await hostWebsocket.ConnectAsync(uri, httpContext.RequestAborted).ConfigureAwait(false); + var receive = ProxyWebSocketMessages(clientWebsocket, hostWebsocket, httpContext.RequestAborted); + var send = ProxyWebSocketMessages(hostWebsocket, clientWebsocket, httpContext.RequestAborted); + await Task.WhenAll(receive, send).ConfigureAwait(false); + return; + + async Task ProxyWebSocketMessages(WebSocket fromSocket, WebSocket toSocket, CancellationToken cancellationToken) + { + var buffer = new byte[1024 * 4]; + var memoryBuffer = buffer.AsMemory(); + + while (fromSocket.State == WebSocketState.Open && !cancellationToken.IsCancellationRequested) + { + var result = await fromSocket.ReceiveAsync(memoryBuffer, cancellationToken).ConfigureAwait(false); + + if (fromSocket.CloseStatus.HasValue || result.MessageType == WebSocketMessageType.Close) + { + await toSocket.CloseOutputAsync(fromSocket.CloseStatus ?? WebSocketCloseStatus.NormalClosure, fromSocket.CloseStatusDescription ?? "Closing", cancellationToken).ConfigureAwait(false); + break; + } + + await toSocket.SendAsync(memoryBuffer[..result.Count], result.MessageType, result.EndOfMessage, cancellationToken).ConfigureAwait(false); + } + } + } + + static async Task EchoAsync(WebSocket webSocket, CancellationToken cancellationToken) + { + var buffer = new byte[1024 * 4]; + var receiveResult = await webSocket.ReceiveAsync(new ArraySegment(buffer), cancellationToken); + + while (!receiveResult.CloseStatus.HasValue) + { + await webSocket.SendAsync(new ArraySegment(buffer, 0, receiveResult.Count), receiveResult.MessageType, receiveResult.EndOfMessage, cancellationToken); + receiveResult = await webSocket.ReceiveAsync(new ArraySegment(buffer), cancellationToken); + } + + await webSocket.CloseAsync(receiveResult.CloseStatus.Value, receiveResult.CloseStatusDescription, cancellationToken); } } } diff --git a/OpenAI-DotNet-Proxy/Proxy/IAuthenticationFilter.cs b/OpenAI-DotNet-Proxy/Proxy/IAuthenticationFilter.cs index 1d54ce5f..ffe6633f 100644 --- a/OpenAI-DotNet-Proxy/Proxy/IAuthenticationFilter.cs +++ b/OpenAI-DotNet-Proxy/Proxy/IAuthenticationFilter.cs @@ -1,7 +1,6 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. using Microsoft.AspNetCore.Http; -using System; using System.Security.Authentication; using System.Threading.Tasks; @@ -12,9 +11,6 @@ namespace OpenAI.Proxy /// public interface IAuthenticationFilter { - [Obsolete("Use ValidateAuthenticationAsync")] - void ValidateAuthentication(IHeaderDictionary request); - /// /// Checks the headers for your user issued token. /// If it's not valid, then throw . diff --git a/OpenAI-DotNet-Proxy/Proxy/OpenAIProxy.cs b/OpenAI-DotNet-Proxy/Proxy/OpenAIProxy.cs index 4513a8bb..27cedaf2 100644 --- a/OpenAI-DotNet-Proxy/Proxy/OpenAIProxy.cs +++ b/OpenAI-DotNet-Proxy/Proxy/OpenAIProxy.cs @@ -6,6 +6,7 @@ using Microsoft.AspNetCore.Server.Kestrel.Core; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; using System; using System.Threading.Tasks; @@ -41,6 +42,7 @@ public void Configure(IApplicationBuilder app, IWebHostEnvironment env) SetupServices(app.ApplicationServices); app.UseHttpsRedirection(); + app.UseWebSockets(); app.UseRouting(); app.UseEndpoints(endpoints => { @@ -62,6 +64,12 @@ public static IHost CreateDefaultHost(string[] args, OpenAIClient openAIClien webBuilder.UseStartup(); webBuilder.ConfigureKestrel(ConfigureKestrel); }) + .ConfigureLogging(logger => + { + logger.ClearProviders(); + logger.AddConsole(); + logger.SetMinimumLevel(LogLevel.Debug); + }) .ConfigureServices(services => { services.AddSingleton(openAIClient); @@ -77,6 +85,9 @@ public static IHost CreateDefaultHost(string[] args, OpenAIClient openAIClien public static WebApplication CreateWebApplication(string[] args, OpenAIClient openAIClient) where T : class, IAuthenticationFilter { var builder = WebApplication.CreateBuilder(args); + builder.Logging.ClearProviders(); + builder.Logging.AddConsole(); + builder.Logging.SetMinimumLevel(LogLevel.Debug); builder.WebHost.ConfigureKestrel(ConfigureKestrel); builder.Services.AddSingleton(openAIClient); builder.Services.AddSingleton(); diff --git a/OpenAI-DotNet-Proxy/Proxy/OpenAIProxyStartup.cs b/OpenAI-DotNet-Proxy/Proxy/OpenAIProxyStartup.cs deleted file mode 100644 index 55a4e927..00000000 --- a/OpenAI-DotNet-Proxy/Proxy/OpenAIProxyStartup.cs +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed under the MIT License. See LICENSE in the project root for license information. - -using Microsoft.AspNetCore.Builder; -using Microsoft.AspNetCore.Hosting; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Hosting; -using System; - -namespace OpenAI.Proxy -{ - [Obsolete("Use OpenAIProxy")] - public class OpenAIProxyStartup - { - private OpenAIProxy openAIProxy; - - private OpenAIProxy OpenAIProxy => openAIProxy ??= new OpenAIProxy(); - - public void ConfigureServices(IServiceCollection services) - => OpenAIProxy.ConfigureServices(services); - - public void Configure(IApplicationBuilder app, IWebHostEnvironment env) - => OpenAIProxy.Configure(app, env); - - public static IHost CreateDefaultHost(string[] args, OpenAIClient openAIClient) - where T : class, IAuthenticationFilter => OpenAIProxy.CreateDefaultHost(args, openAIClient); - - public static WebApplication CreateWebApplication(string[] args, OpenAIClient openAIClient) - where T : class, IAuthenticationFilter => OpenAIProxy.CreateWebApplication(args, openAIClient); - } -} diff --git a/OpenAI-DotNet-Tests-Proxy/OpenAI-DotNet-Tests-Proxy.csproj b/OpenAI-DotNet-Tests-Proxy/OpenAI-DotNet-Tests-Proxy.csproj index 71bc8545..47610ba5 100644 --- a/OpenAI-DotNet-Tests-Proxy/OpenAI-DotNet-Tests-Proxy.csproj +++ b/OpenAI-DotNet-Tests-Proxy/OpenAI-DotNet-Tests-Proxy.csproj @@ -10,5 +10,11 @@ + + + + + PreserveNewest + diff --git a/OpenAI-DotNet-Tests-Proxy/Program.cs b/OpenAI-DotNet-Tests-Proxy/Program.cs index 06fd684e..4c2ebd76 100644 --- a/OpenAI-DotNet-Tests-Proxy/Program.cs +++ b/OpenAI-DotNet-Tests-Proxy/Program.cs @@ -36,7 +36,8 @@ public static void Main(string[] args) var auth = OpenAIAuthentication.LoadFromEnv(); var settings = new OpenAIClientSettings(/* your custom settings if using Azure OpenAI */); using var openAIClient = new OpenAIClient(auth, settings); - OpenAIProxy.CreateWebApplication(args, openAIClient).Run(); + using var app = OpenAIProxy.CreateWebApplication(args, openAIClient); + app.Run(); } } } diff --git a/OpenAI-DotNet-Tests-Proxy/Properties/launchSettings.json b/OpenAI-DotNet-Tests-Proxy/Properties/launchSettings.json index 15e70edf..eb124f81 100644 --- a/OpenAI-DotNet-Tests-Proxy/Properties/launchSettings.json +++ b/OpenAI-DotNet-Tests-Proxy/Properties/launchSettings.json @@ -28,4 +28,4 @@ } } } -} +} \ No newline at end of file diff --git a/OpenAI-DotNet-Tests-Proxy/appsettings.Development.json b/OpenAI-DotNet-Tests-Proxy/appsettings.Development.json deleted file mode 100644 index 0c208ae9..00000000 --- a/OpenAI-DotNet-Tests-Proxy/appsettings.Development.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "Logging": { - "LogLevel": { - "Default": "Information", - "Microsoft.AspNetCore": "Warning" - } - } -} diff --git a/OpenAI-DotNet-Tests/AbstractTestFixture.cs b/OpenAI-DotNet-Tests/AbstractTestFixture.cs index cf155bf5..75300267 100644 --- a/OpenAI-DotNet-Tests/AbstractTestFixture.cs +++ b/OpenAI-DotNet-Tests/AbstractTestFixture.cs @@ -2,8 +2,14 @@ using Microsoft.AspNetCore.Hosting; using Microsoft.AspNetCore.Mvc.Testing; +using Microsoft.AspNetCore.TestHost; +using Microsoft.Extensions.Configuration; using System; +using System.IO; using System.Net.Http; +using System.Net.WebSockets; +using System.Threading; +using System.Threading.Tasks; namespace OpenAI.Tests { @@ -22,20 +28,58 @@ protected override void ConfigureWebHost(IWebHostBuilder builder) protected readonly HttpClient HttpClient; + protected readonly WebSocketClient WebSocket; + protected readonly OpenAIClient OpenAIClient; protected AbstractTestFixture() { var webApplicationFactory = new TestProxyFactory(); - HttpClient = webApplicationFactory.CreateClient(); - var domain = $"{HttpClient.BaseAddress?.Authority}:{HttpClient.BaseAddress?.Port}"; - var settings = new OpenAIClientSettings(domain: domain); + HttpClient = webApplicationFactory.CreateClient(new WebApplicationFactoryClientOptions + { + BaseAddress = GetBaseAddressFromLaunchSettings() + }); + WebSocket = webApplicationFactory.Server.CreateWebSocketClient(); + var settings = new OpenAIClientSettings(domain: HttpClient.BaseAddress?.Authority); var auth = new OpenAIAuthentication(TestUserToken); - HttpClient.Timeout = TimeSpan.FromMinutes(3); + OpenAIClient = new OpenAIClient(auth, settings, HttpClient) { - EnableDebug = true + EnableDebug = true, + CreateWebsocketAsync = CreateWebsocketAsync }; + + return; + + async Task CreateWebsocketAsync(Uri uri, CancellationToken cancellationToken) + { + var websocketClient = webApplicationFactory.Server.CreateWebSocketClient(); + websocketClient.ConfigureRequest = request => + { + foreach (var (key, value) in OpenAIClient.WebsocketHeaders) + { + request.Headers[key] = value; + } + }; + var websocket = await websocketClient.ConnectAsync(uri, cancellationToken); + return websocket; + } + } + + private static Uri GetBaseAddressFromLaunchSettings() + { + var projectDir = Directory.GetCurrentDirectory(); + var launchSettings = Path.Combine(projectDir, "Properties", "launchSettings.json"); + var config = new ConfigurationBuilder() + .AddJsonFile(launchSettings, optional: false) + .Build(); + var applicationUrl = config["profiles:OpenAI_DotNet_Tests_Proxy:applicationUrl"]; + if (string.IsNullOrEmpty(applicationUrl)) + { + throw new InvalidOperationException("Base address not found in launchSettings.json"); + } + var hosts = applicationUrl.Split(";"); + return new Uri(hosts[0]); } } } diff --git a/OpenAI-DotNet-Tests/OpenAI-DotNet-Tests.csproj b/OpenAI-DotNet-Tests/OpenAI-DotNet-Tests.csproj index 491a81aa..e471aef8 100644 --- a/OpenAI-DotNet-Tests/OpenAI-DotNet-Tests.csproj +++ b/OpenAI-DotNet-Tests/OpenAI-DotNet-Tests.csproj @@ -12,6 +12,7 @@ + diff --git a/OpenAI-DotNet-Tests/TestFixture_00_01_Authentication.cs b/OpenAI-DotNet-Tests/TestFixture_00_01_Authentication.cs index e536da1c..82404e8b 100644 --- a/OpenAI-DotNet-Tests/TestFixture_00_01_Authentication.cs +++ b/OpenAI-DotNet-Tests/TestFixture_00_01_Authentication.cs @@ -177,11 +177,14 @@ public void Test_11_AzureConfigurationSettings() public void Test_12_CustomDomainConfigurationSettings() { var auth = new OpenAIAuthentication("sess-customIssuedToken"); - var settings = new OpenAIClientSettings(domain: "api.your-custom-domain.com"); + const string domain = "api.your-custom-domain.com"; + var settings = new OpenAIClientSettings(domain: domain); var api = new OpenAIClient(auth, settings); Console.WriteLine(api.OpenAIClientSettings.BaseRequest); Console.WriteLine(api.OpenAIClientSettings.BaseRequestUrlFormat); - Assert.AreEqual("https://api.your-custom-domain.com/v1/{0}", api.OpenAIClientSettings.BaseRequestUrlFormat); + Console.WriteLine(api.OpenAIClientSettings.BaseWebSocketUrlFormat); + Assert.AreEqual($"https://{domain}/v1/{{0}}", api.OpenAIClientSettings.BaseRequestUrlFormat); + Assert.AreEqual($"wss://{domain}/v1/{{0}}", api.OpenAIClientSettings.BaseWebSocketUrlFormat); } [TearDown] diff --git a/OpenAI-DotNet-Tests/TestFixture_00_01_Proxy.cs b/OpenAI-DotNet-Tests/TestFixture_00_01_Proxy.cs index 9a2b063b..fd47d1a3 100644 --- a/OpenAI-DotNet-Tests/TestFixture_00_01_Proxy.cs +++ b/OpenAI-DotNet-Tests/TestFixture_00_01_Proxy.cs @@ -1,15 +1,50 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using Microsoft.Extensions.Configuration; using NUnit.Framework; using System; +using System.IO; using System.Net; using System.Net.Http; +using System.Net.WebSockets; +using System.Text; +using System.Threading; using System.Threading.Tasks; namespace OpenAI.Tests { internal class TestFixture_00_01_Proxy : AbstractTestFixture { + [Test] + public void Test_00_Proxy_Host_And_Ports() + { + var projectDir = Directory.GetCurrentDirectory(); + var launchSettings = Path.Combine(projectDir, "Properties", "launchSettings.json"); + + var config = new ConfigurationBuilder() + .AddJsonFile(launchSettings, optional: false) + .Build(); + + var applicationUrl = config["profiles:OpenAI_DotNet_Tests_Proxy:applicationUrl"]; + Assert.IsNotNull(applicationUrl); + var hosts = applicationUrl.Split(";"); + var https = hosts[0]; + Assert.AreEqual("https://localhost:7133", https); + var http = hosts[1]; + Assert.AreEqual("http://localhost:5105", http); + + var httpsUri = new Uri(https); + Assert.AreEqual("localhost", httpsUri.Host); + Assert.AreEqual(7133, httpsUri.Port); + + var httpUri = new Uri(http); + Assert.AreEqual("localhost", httpUri.Host); + Assert.AreEqual(5105, httpUri.Port); + + Assert.AreEqual(httpsUri.Host, HttpClient.BaseAddress?.Host); + Assert.AreEqual(httpsUri.Port, HttpClient.BaseAddress?.Port); + } + [Test] public async Task Test_01_Health() { @@ -35,11 +70,9 @@ public async Task Test_02_Client_Authenticated() [Test] public async Task Test_03_Client_Unauthenticated() { - var webApplicationFactory = new TestProxyFactory(); - var httpClient = webApplicationFactory.CreateClient(); - var settings = new OpenAIClientSettings(domain: "localhost:7133"); + var settings = new OpenAIClientSettings(domain: HttpClient.BaseAddress?.Authority); var auth = new OpenAIAuthentication("sess-invalid-token"); - var openAIClient = new OpenAIClient(auth, settings, httpClient); + var openAIClient = new OpenAIClient(auth, settings, HttpClient); try { @@ -47,12 +80,57 @@ public async Task Test_03_Client_Unauthenticated() } catch (HttpRequestException httpRequestException) { + Console.WriteLine(httpRequestException); // System.Net.Http.HttpRequestException : GetModelsAsync Failed! HTTP status code: Unauthorized | Response body: User is not authorized - Assert.IsTrue(httpRequestException.StatusCode == HttpStatusCode.Unauthorized); + Assert.AreEqual(HttpStatusCode.Unauthorized, httpRequestException.StatusCode); + } + catch (Exception e) + { + Console.WriteLine(e); + } + } + + [Test] + public async Task Test_04_Client_Websocket_Authentication() + { + try + { + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(10)); + var realtimeUri = new Uri(string.Format(OpenAIClient.OpenAIClientSettings.BaseWebSocketUrlFormat, "echo")); + Console.WriteLine(realtimeUri); + using var websocket = await OpenAIClient.CreateWebsocketAsync.Invoke(realtimeUri, cts.Token); + + if (websocket.State != WebSocketState.Open) + { + throw new Exception($"Failed to open WebSocket connection. Current state: {websocket.State}"); + } + + var data = new byte[1024]; + var buffer = new byte[1024 * 4]; + var random = new Random(); + random.NextBytes(data); + await websocket.SendAsync(new ArraySegment(data), WebSocketMessageType.Binary, true, cts.Token); + var receiveResult = await websocket.ReceiveAsync(new ArraySegment(buffer), cts.Token); + Assert.AreEqual(WebSocketMessageType.Binary, receiveResult.MessageType); + Assert.AreEqual(data.Length, receiveResult.Count); + var receivedData = buffer[..receiveResult.Count]; + Assert.AreEqual(data.Length, receivedData.Length); + Assert.AreEqual(data, receivedData); + var message = $"hello world! {DateTime.UtcNow}"; + var messageData = Encoding.UTF8.GetBytes(message); + await websocket.SendAsync(new ArraySegment(messageData), WebSocketMessageType.Text, true, cts.Token); + receiveResult = await websocket.ReceiveAsync(new ArraySegment(buffer), cts.Token); + Assert.AreEqual(WebSocketMessageType.Text, receiveResult.MessageType); + Assert.AreEqual(messageData.Length, receiveResult.Count); + Assert.AreEqual(messageData, buffer[..receiveResult.Count]); + var decodedMessage = Encoding.UTF8.GetString(buffer, 0, receiveResult.Count); + Assert.AreEqual(message, decodedMessage); + await websocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Test completed", cts.Token); } catch (Exception e) { Console.WriteLine(e); + throw; } } } diff --git a/OpenAI-DotNet-Tests/TestFixture_00_02_Extensions.cs b/OpenAI-DotNet-Tests/TestFixture_00_02_Extensions.cs index 5f3cec94..630fea38 100644 --- a/OpenAI-DotNet-Tests/TestFixture_00_02_Extensions.cs +++ b/OpenAI-DotNet-Tests/TestFixture_00_02_Extensions.cs @@ -7,8 +7,8 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Numerics; using System.Text.Json; -using System.Text.Json.Nodes; using System.Threading.Tasks; namespace OpenAI.Tests @@ -35,7 +35,7 @@ public async Task Test_01_02_Tool_Funcs() var tools = new List { Tool.FromFunc("test_func", Function), - Tool.FromFunc("test_func_with_args", FunctionWithArgs), + Tool.FromFunc("test_func_with_args", FunctionWithArgs), Tool.FromFunc("test_func_weather", () => WeatherService.GetCurrentWeatherAsync("my location", WeatherService.WeatherUnit.Celsius)), Tool.FromFunc, string>("test_func_with_array_args", FunctionWithArrayArgs), Tool.FromFunc("test_single_return_arg", arg1 => arg1), @@ -50,52 +50,44 @@ public async Task Test_01_02_Tool_Funcs() Assert.IsNotNull(tools); var tool = tools[0]; Assert.IsNotNull(tool); - var result = tool.InvokeFunction(); + var toolCall = new ToolCall("toolCall_0", tool.Function.Name); + var result = tool.InvokeFunction(toolCall); Assert.AreEqual("success", result); var toolWithArgs = tools[1]; Assert.IsNotNull(toolWithArgs); - toolWithArgs.Function.Arguments = new JsonObject - { - ["arg1"] = "arg1", - ["arg2"] = "arg2" - }; - var resultWithArgs = toolWithArgs.InvokeFunction(); - Assert.AreEqual("arg1 arg2", resultWithArgs); + var testValue = new { arg1 = DateTime.UtcNow, arg2 = Vector3.One }; + toolCall = new ToolCall("toolCall_1", toolWithArgs.Function.Name, JsonSerializer.Serialize(testValue, OpenAIClient.JsonSerializationOptions)); + var resultWithArgs = toolWithArgs.InvokeFunction(toolCall); + Console.WriteLine(resultWithArgs); var toolWeather = tools[2]; Assert.IsNotNull(toolWeather); - var resultWeather = await toolWeather.InvokeFunctionAsync(); + toolCall = new ToolCall("toolCall_2", toolWeather.Function.Name); + var resultWeather = await toolWeather.InvokeFunctionAsync(toolCall); Assert.IsFalse(string.IsNullOrWhiteSpace(resultWeather)); Console.WriteLine(resultWeather); var toolWithArrayArgs = tools[3]; Assert.IsNotNull(toolWithArrayArgs); - toolWithArrayArgs.Function.Arguments = new JsonObject - { - ["args"] = new JsonArray { 1, 2, 3, 4, 5 } - }; - var resultWithArrayArgs = toolWithArrayArgs.InvokeFunction(); - Assert.AreEqual("1, 2, 3, 4, 5", resultWithArrayArgs); + var arrayTestValue = new { list = new List { 1, 2, 3, 4, 5 } }; + toolCall = new ToolCall("toolCall_3", toolWithArrayArgs.Function.Name, JsonSerializer.Serialize(arrayTestValue, OpenAIClient.JsonSerializationOptions)); + var resultWithArrayArgs = toolWithArrayArgs.InvokeFunction(toolCall); + Assert.AreEqual("{\"list\":[1,2,3,4,5]}", resultWithArrayArgs); Console.WriteLine(resultWithArrayArgs); - var singleReturnArg = tools[4]; - Assert.IsNotNull(singleReturnArg); - singleReturnArg.Function.Arguments = new JsonObject - { - ["arg1"] = "arg1" - }; - var resultSingleReturnArg = singleReturnArg.InvokeFunction(); + var toolSingleReturnArg = tools[4]; + Assert.IsNotNull(toolSingleReturnArg); + var singleReturnArgTestValue = new Dictionary { { "arg1", "arg1" } }; + toolCall = new ToolCall("toolCall_4", toolSingleReturnArg.Function.Name, JsonSerializer.Serialize(singleReturnArgTestValue, OpenAIClient.JsonSerializationOptions)); + var resultSingleReturnArg = toolSingleReturnArg.InvokeFunction(toolCall); Assert.AreEqual("arg1", resultSingleReturnArg); Console.WriteLine(resultSingleReturnArg); var toolNoSpecifiers = tools[5]; Assert.IsNotNull(toolNoSpecifiers); - toolNoSpecifiers.Function.Arguments = new JsonObject - { - ["arg1"] = "arg1" - }; - var resultNoSpecifiers = toolNoSpecifiers.InvokeFunction(); + toolCall = new ToolCall("toolCall_5", toolNoSpecifiers.Function.Name, JsonSerializer.Serialize(singleReturnArgTestValue, OpenAIClient.JsonSerializationOptions)); + var resultNoSpecifiers = toolNoSpecifiers.InvokeFunction(toolCall); Assert.AreEqual("arg1", resultNoSpecifiers); Console.WriteLine(resultNoSpecifiers); } @@ -105,14 +97,14 @@ private string Function() return "success"; } - private string FunctionWithArgs(string arg1, string arg2) + private string FunctionWithArgs(DateTime arg1, Vector3 arg2) { - return $"{arg1} {arg2}"; + return JsonSerializer.Serialize(new { arg1, arg2 }, OpenAIClient.JsonSerializationOptions); } - private string FunctionWithArrayArgs(List args) + private string FunctionWithArrayArgs(List list) { - return string.Join(", ", args); + return JsonSerializer.Serialize(new { list }, OpenAIClient.JsonSerializationOptions); } [Test] @@ -134,8 +126,8 @@ async Task Test(int id) // Delay a little bit to simulate calling OpenAi API: await Task.Delay(50); - - var result = tool.InvokeFunction(); + var toolCall = new ToolCall($"toolCall_{id}", tool.Function.Name); + var result = tool.InvokeFunction(toolCall); Assert.AreEqual(id, result); } } diff --git a/OpenAI-DotNet-Tests/TestFixture_03_Threads.cs b/OpenAI-DotNet-Tests/TestFixture_03_Threads.cs index 71c2978e..de38c5c1 100644 --- a/OpenAI-DotNet-Tests/TestFixture_03_Threads.cs +++ b/OpenAI-DotNet-Tests/TestFixture_03_Threads.cs @@ -240,7 +240,7 @@ public async Task Test_03_01_CreateRun() } [Test] - public async Task Test_03_03_01_CreateRun_Streaming() + public async Task Test_03_02_01_CreateRun_Streaming() { Assert.NotNull(OpenAIClient.ThreadsEndpoint); var assistant = await OpenAIClient.AssistantsEndpoint.CreateAssistantAsync( @@ -269,7 +269,6 @@ public async Task Test_03_03_01_CreateRun_Streaming() break; case RunStepResponse runStepEvent: Assert.NotNull(runStepEvent); - switch (runStepEvent.Object) { case "thread.run.step.delta": @@ -285,7 +284,6 @@ public async Task Test_03_03_01_CreateRun_Streaming() break; case MessageResponse messageEvent: Assert.NotNull(messageEvent); - switch (messageEvent.Object) { case "thread.message.delta": @@ -328,13 +326,14 @@ public async Task Test_03_03_01_CreateRun_Streaming() } [Test] - public async Task Test_03_03_02_CreateRun_Streaming_ToolCalls() + public async Task Test_03_02_02_CreateRun_Streaming_ToolCalls() { Assert.NotNull(OpenAIClient.ThreadsEndpoint); var tools = new List { Tool.GetOrCreateTool(typeof(WeatherService), nameof(WeatherService.GetCurrentWeatherAsync)) }; + Assert.IsTrue(tools.All(tool => tool.Function?.Arguments == null), "Expected all tool function arguments to be null"); var assistantRequest = new CreateAssistantRequest(tools: tools, instructions: "You are a helpful weather assistant. Use the appropriate unit based on geographical location."); var assistant = await OpenAIClient.AssistantsEndpoint.CreateAssistantAsync(assistantRequest); Assert.NotNull(assistant); @@ -468,9 +467,9 @@ public async Task Test_04_02_CreateThreadAndRun_Streaming() try { var run = await assistant.CreateThreadAndRunAsync("I need to solve the equation `3x + 11 = 14`. Can you help me?", - async @event => + async streamEvent => { - Console.WriteLine(@event.ToJsonString()); + Console.WriteLine(streamEvent.ToJsonString()); await Task.CompletedTask; }); Assert.IsNotNull(run); @@ -503,11 +502,11 @@ public async Task Test_04_02_CreateThreadAndRun_Streaming() public async Task Test_04_03_CreateThreadAndRun_Streaming_ToolCalls() { Assert.NotNull(OpenAIClient.ThreadsEndpoint); - var tools = new List { Tool.GetOrCreateTool(typeof(DateTimeUtility), nameof(DateTimeUtility.GetDateTime)) }; + Assert.IsTrue(tools.All(tool => tool.Function?.Arguments == null), "Expected all tool function arguments to be null"); var assistantRequest = new CreateAssistantRequest( instructions: "You are a helpful assistant.", tools: tools); @@ -516,11 +515,13 @@ public async Task Test_04_03_CreateThreadAndRun_Streaming_ToolCalls() ThreadResponse thread = null; // check if any exceptions thrown in stream event handler var exceptionThrown = false; + var hasInvokedCallback = false; try { async Task StreamEventHandler(IServerSentEvent streamEvent) { + hasInvokedCallback = true; Console.WriteLine($"{streamEvent.ToJsonString()}"); try @@ -536,8 +537,9 @@ async Task StreamEventHandler(IServerSentEvent streamEvent) var toolOutputs = await assistant.GetToolOutputsAsync(runResponse); var toolRun = await runResponse.SubmitToolOutputsAsync(toolOutputs, StreamEventHandler); Assert.NotNull(toolRun); - Assert.IsTrue(toolRun.Status == RunStatus.Completed); + Assert.IsTrue(toolRun.Status == RunStatus.Completed, $"Failed to complete submit tool outputs! {toolRun.Status}"); } + break; case Error errorResponse: throw errorResponse.Exception ?? new Exception(errorResponse.Message); @@ -551,10 +553,11 @@ async Task StreamEventHandler(IServerSentEvent streamEvent) } var run = await assistant.CreateThreadAndRunAsync("What date is it?", StreamEventHandler); - Assert.NotNull(thread); Assert.IsNotNull(run); + Assert.IsTrue(hasInvokedCallback); + Assert.NotNull(thread); Assert.IsFalse(exceptionThrown); - Assert.IsTrue(run.Status == RunStatus.Completed); + Assert.IsTrue(run.Status == RunStatus.Completed, $"Failed to complete run! {run.Status}"); } finally { @@ -576,6 +579,7 @@ public async Task Test_04_04_CreateThreadAndRun_SubmitToolOutput() Tool.CodeInterpreter, Tool.GetOrCreateTool(typeof(WeatherService), nameof(WeatherService.GetCurrentWeatherAsync)) }; + Assert.IsTrue(tools.All(tool => tool.Function?.Arguments == null), "Expected all tool function arguments to be null"); var assistantRequest = new CreateAssistantRequest(tools: tools, instructions: "You are a helpful weather assistant. Use the appropriate unit based on geographical location."); var assistant = await OpenAIClient.AssistantsEndpoint.CreateAssistantAsync(assistantRequest); Assert.IsNotNull(assistant); @@ -737,10 +741,11 @@ async Task StreamEventHandler(IServerSentEvent @event) run = await run.WaitForStatusChangeAsync(); Assert.IsNotNull(run); Assert.IsTrue(run.Status == RunStatus.Completed); + Console.WriteLine($"Created thread and run: {run.ThreadId} -> {run.Id} -> {run.CreatedAt}"); Assert.NotNull(thread); var messages = await thread.ListMessagesAsync(); - foreach (var response in messages.Items.OrderBy(response => response.CreatedAt)) + foreach (var response in messages.Items) { Console.WriteLine($"{response.Role}: {response.PrintContent()}"); } diff --git a/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs b/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs index 2cccddd8..7f2ce6ff 100644 --- a/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs +++ b/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs @@ -30,12 +30,9 @@ public async Task Test_01_01_GetChatCompletion() Assert.IsNotNull(response); Assert.IsNotNull(response.Choices); Assert.IsNotEmpty(response.Choices); - - foreach (var choice in response.Choices) - { - Console.WriteLine($"[{choice.Index}] {choice.Message.Role}: {choice} | Finish Reason: {choice.FinishReason}"); - } - + Assert.AreEqual(1, response.Choices.Count); + Assert.IsNotNull(response.FirstChoice); + Console.WriteLine($"{response.FirstChoice.Message.Role}: {response.FirstChoice} | Finish Reason: {response.FirstChoice.FinishReason}"); response.GetUsage(); } @@ -78,7 +75,56 @@ public async Task Test_01_02_GetChatStreamingCompletion() } [Test] - public async Task Test_01_03_JsonMode() + public async Task Test_01_03_GetChatCompletion_Modalities() + { + Assert.IsNotNull(OpenAIClient.ChatEndpoint); + + var messages = new List + { + new(Role.System, "You are a helpful assistant."), + new(Role.User, "Is a golden retriever a good family dog?"), + }; + + var chatRequest = new ChatRequest(messages, Model.GPT4oAudio, audioConfig: Voice.Alloy); + Assert.IsNotNull(chatRequest); + Assert.IsNotNull(chatRequest.AudioConfig); + Assert.AreEqual(Model.GPT4oAudio.Id, chatRequest.Model); + Assert.AreEqual(Voice.Alloy.Id, chatRequest.AudioConfig.Voice); + Assert.AreEqual(AudioFormat.Pcm16, chatRequest.AudioConfig.Format); + Assert.AreEqual(Modality.Text | Modality.Audio, chatRequest.Modalities); + var response = await OpenAIClient.ChatEndpoint.GetCompletionAsync(chatRequest); + Assert.IsNotNull(response); + Assert.IsNotNull(response.Choices); + Assert.IsNotEmpty(response.Choices); + Assert.AreEqual(1, response.Choices.Count); + Assert.IsNotNull(response.FirstChoice); + Console.WriteLine($"{response.FirstChoice.Message.Role}: {response.FirstChoice} | Finish Reason: {response.FirstChoice.FinishReason}"); + Assert.IsNotNull(response.FirstChoice.Message.AudioOutput.Data); + Assert.IsFalse(response.FirstChoice.Message.AudioOutput.Data.IsEmpty); + response.GetUsage(); + + messages.Add(response.FirstChoice.Message); + messages.Add(new(Role.User, "What are some other good family dog breeds?")); + + chatRequest = new ChatRequest(messages, Model.GPT4oAudio, audioConfig: Voice.Alloy); + Assert.IsNotNull(chatRequest); + Assert.IsNotNull(messages[2]); + Assert.AreEqual(Role.Assistant, messages[2].Role); + Assert.IsNotNull(messages[2].AudioOutput); + response = await OpenAIClient.ChatEndpoint.GetCompletionAsync(chatRequest); + Assert.IsNotNull(response); + Assert.IsNotNull(response.Choices); + Assert.IsNotEmpty(response.Choices); + Assert.AreEqual(1, response.Choices.Count); + Assert.IsFalse(string.IsNullOrWhiteSpace(response.FirstChoice)); + Console.WriteLine($"{response.FirstChoice.Message.Role}: {response.FirstChoice} | Finish Reason: {response.FirstChoice.FinishReason}"); + Assert.IsNotNull(response.FirstChoice.Message.AudioOutput.Data); + Assert.IsFalse(response.FirstChoice.Message.AudioOutput.Data.IsEmpty); + response.GetUsage(); + } + + [Test] + public async Task Test_01_04_JsonMode() { Assert.IsNotNull(OpenAIClient.ChatEndpoint); var messages = new List @@ -101,7 +147,7 @@ public async Task Test_01_03_JsonMode() } [Test] - public async Task Test_01_04_GetChatStreamingCompletionEnumerableAsync() + public async Task Test_01_05_GetChatStreamingCompletionEnumerableAsync() { Assert.IsNotNull(OpenAIClient.ChatEndpoint); var messages = new List @@ -149,6 +195,7 @@ public async Task Test_02_01_GetChatToolCompletion() { Tool.GetOrCreateTool(typeof(WeatherService), nameof(WeatherService.GetCurrentWeatherAsync)) }; + Assert.IsTrue(tools.All(tool => tool.Function?.Arguments == null)); var chatRequest = new ChatRequest(messages, tools: tools, toolChoice: "none"); var response = await OpenAIClient.ChatEndpoint.GetCompletionAsync(chatRequest); Assert.IsNotNull(response); @@ -219,6 +266,7 @@ public async Task Test_02_02_GetChatToolCompletion_Streaming() { Tool.GetOrCreateTool(typeof(WeatherService), nameof(WeatherService.GetCurrentWeatherAsync)) }; + Assert.IsTrue(tools.All(tool => tool.Function?.Arguments == null)); var chatRequest = new ChatRequest(messages, tools: tools, toolChoice: "none"); var response = await OpenAIClient.ChatEndpoint.StreamCompletionAsync(chatRequest, partialResponse => { @@ -302,6 +350,7 @@ public async Task Test_02_03_ChatCompletion_Multiple_Tools_Streaming() }; var tools = Tool.GetAllAvailableTools(false, forceUpdate: true, clearCache: true); + Assert.IsTrue(tools.All(tool => tool.Function?.Arguments == null)); var chatRequest = new ChatRequest(messages, model: Model.GPT4o, tools: tools, toolChoice: "auto", parallelToolCalls: true); var response = await OpenAIClient.ChatEndpoint.StreamCompletionAsync(chatRequest, partialResponse => { @@ -309,7 +358,7 @@ public async Task Test_02_03_ChatCompletion_Multiple_Tools_Streaming() if (partialResponse.Usage != null) { return; } Assert.NotNull(partialResponse.Choices); Assert.NotZero(partialResponse.Choices.Count); - }); + }, true); Assert.IsTrue(response.FirstChoice.FinishReason == "tool_calls"); messages.Add(response.FirstChoice.Message); @@ -347,6 +396,7 @@ public async Task Test_02_04_GetChatToolForceCompletion() } var tools = Tool.GetAllAvailableTools(false, forceUpdate: true, clearCache: true); + Assert.IsTrue(tools.All(tool => tool.Function?.Arguments == null)); var chatRequest = new ChatRequest(messages, tools: tools, toolChoice: "none"); var response = await OpenAIClient.ChatEndpoint.GetCompletionAsync(chatRequest); Assert.IsNotNull(response); @@ -384,6 +434,39 @@ public async Task Test_02_04_GetChatToolForceCompletion() Console.WriteLine($"{Role.Tool}: {functionResult}"); } + [Test] + public async Task Test_02_05_GetChat_Enumerable_TestToolCalls_Streaming() + { + Assert.IsNotNull(OpenAIClient.ChatEndpoint); + + var messages = new List + { + new(Role.System, "You must extract the name from the input"), + new(Role.User, "My name is Joe") + }; + + var tools = new List + { + Tool.FromFunc("extract_first_name", (string name) => name) + }; + + var request = new ChatRequest(messages, tools); + + await foreach (var streamResponse in OpenAIClient.ChatEndpoint.StreamCompletionEnumerableAsync(request)) + { + Console.WriteLine(streamResponse.ToJsonString()); + + if (streamResponse.FirstChoice.Message is { } message) + { + foreach (var tool in message.ToolCalls) + { + var output = tool.InvokeFunction(); + Console.WriteLine($"Output from StreamCompletionEnumerableAsync: {output}"); + } + } + } + } + [Test] public async Task Test_03_01_GetChatVision() { @@ -425,7 +508,7 @@ public async Task Test_03_02_GetChatVisionStreaming() if (partialResponse.Usage != null) { return; } Assert.NotNull(partialResponse.Choices); Assert.NotZero(partialResponse.Choices.Count); - }); + }, true); Assert.IsNotNull(response); Assert.IsNotNull(response.Choices); Console.WriteLine($"{response.FirstChoice.Message.Role}: {response.FirstChoice} | Finish Reason: {response.FirstChoice.FinishDetails}"); @@ -496,40 +579,7 @@ public async Task Test_04_02_GetChatLogProbsStreaming() } [Test] - public async Task Test_05_02_GetChat_Enumerable_TestToolCalls_Streaming() - { - Assert.IsNotNull(OpenAIClient.ChatEndpoint); - - var messages = new List - { - new(Role.System, "You must extract the name from the input"), - new(Role.User, "My name is Joe") - }; - - var tools = new List - { - Tool.FromFunc("extract_first_name", (string name) => name) - }; - - var request = new ChatRequest(messages, tools); - - await foreach (var streamResponse in OpenAIClient.ChatEndpoint.StreamCompletionEnumerableAsync(request)) - { - Console.WriteLine(streamResponse.ToJsonString()); - - if (streamResponse.FirstChoice.Message is { } message) - { - foreach (var tool in message.ToolCalls) - { - var output = tool.InvokeFunction(); - Console.WriteLine($"Output from StreamCompletionEnumerableAsync: {output}"); - } - } - } - } - - [Test] - public async Task Test_06_01_GetChat_JsonSchema() + public async Task Test_05_01_GetChat_JsonSchema() { Assert.IsNotNull(OpenAIClient.ChatEndpoint); @@ -562,7 +612,7 @@ public async Task Test_06_01_GetChat_JsonSchema() } [Test] - public async Task Test_06_01_GetChat_JsonSchema_Streaming() + public async Task Test_05_02_GetChat_JsonSchema_Streaming() { Assert.IsNotNull(OpenAIClient.ChatEndpoint); diff --git a/OpenAI-DotNet-Tests/TestFixture_13_Realtime.cs b/OpenAI-DotNet-Tests/TestFixture_13_Realtime.cs new file mode 100644 index 00000000..e2d44bf1 --- /dev/null +++ b/OpenAI-DotNet-Tests/TestFixture_13_Realtime.cs @@ -0,0 +1,179 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using NUnit.Framework; +using OpenAI.Models; +using OpenAI.Realtime; +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenAI.Tests +{ + internal class TestFixture_13_Realtime : AbstractTestFixture + { + [Test] + public async Task Test_01_RealtimeSession() + { + RealtimeSession session = null; + + try + { + Assert.IsNotNull(OpenAIClient.RealtimeEndpoint); + var cts = new CancellationTokenSource(TimeSpan.FromSeconds(20)); + var wasGoodbyeCalled = false; + var tools = new List + { + Tool.FromFunc("goodbye", () => + { + cts.Cancel(); + wasGoodbyeCalled = true; + return "Goodbye!"; + }) + }; + + var options = new Options(Model.GPT4oRealtime, tools: tools); + session = await OpenAIClient.RealtimeEndpoint.CreateSessionAsync(options, cts.Token); + Assert.IsNotNull(session); + Assert.IsNotNull(session.Options); + Assert.AreEqual(Model.GPT4oRealtime.Id, options.Model); + Assert.AreEqual(options.Model, session.Options.Model); + Assert.IsNotNull(options.Tools); + Assert.IsNotEmpty(options.Tools); + Assert.AreEqual(1, options.Tools.Count); + Assert.AreEqual(options.Tools.Count, session.Options.Tools.Count); + Assert.AreEqual(options.Tools[0].Name, session.Options.Tools[0].Name); + Assert.AreEqual(Modality.Audio | Modality.Text, options.Modalities); + Assert.AreEqual(Modality.Audio | Modality.Text, session.Options.Modalities); + var responseTask = session.ReceiveUpdatesAsync(SessionEvents, cts.Token); + + await session.SendAsync(new ConversationItemCreateRequest("Hello!"), cts.Token); + await session.SendAsync(new CreateResponseRequest(), cts.Token); + await session.SendAsync(new InputAudioBufferAppendRequest(new ReadOnlyMemory(new byte[1024 * 4])), cts.Token); + await session.SendAsync(new ConversationItemCreateRequest("Goodbye!"), cts.Token); + await session.SendAsync(new CreateResponseRequest(), cts.Token); + + void SessionEvents(IServerEvent @event) + { + switch (@event) + { + case ResponseAudioTranscriptResponse transcriptResponse: + Console.WriteLine(transcriptResponse.ToString()); + break; + case ResponseFunctionCallArgumentsResponse functionCallResponse: + if (functionCallResponse.IsDone) + { + ToolCall toolCall = functionCallResponse; + toolCall.InvokeFunction(); + } + + break; + } + } + + await responseTask.ConfigureAwait(true); + Assert.IsTrue(wasGoodbyeCalled); + } + catch (Exception e) + { + switch (e) + { + case ObjectDisposedException: + // ignore + break; + default: + Console.WriteLine(e); + throw; + } + } + finally + { + session?.Dispose(); + } + } + + [Test] + public async Task Test_01_RealtimeSession_IAsyncEnumerable() + { + RealtimeSession session = null; + + try + { + Assert.IsNotNull(OpenAIClient.RealtimeEndpoint); + var cts = new CancellationTokenSource(TimeSpan.FromSeconds(20)); + var wasGoodbyeCalled = false; + var tools = new List + { + Tool.FromFunc("goodbye", () => + { + cts.Cancel(); + wasGoodbyeCalled = true; + return "Goodbye!"; + }) + }; + + var options = new Options(Model.GPT4oRealtime, tools: tools); + session = await OpenAIClient.RealtimeEndpoint.CreateSessionAsync(options, cts.Token); + Assert.IsNotNull(session); + Assert.IsNotNull(session.Options); + Assert.AreEqual(Model.GPT4oRealtime.Id, options.Model); + Assert.AreEqual(options.Model, session.Options.Model); + Assert.IsNotNull(options.Tools); + Assert.IsNotEmpty(options.Tools); + Assert.AreEqual(1, options.Tools.Count); + Assert.AreEqual(options.Tools.Count, session.Options.Tools.Count); + Assert.AreEqual(options.Tools[0].Name, session.Options.Tools[0].Name); + Assert.AreEqual(Modality.Audio | Modality.Text, options.Modalities); + Assert.AreEqual(Modality.Audio | Modality.Text, session.Options.Modalities); + + await foreach (var @event in session.ReceiveUpdatesAsync(cts.Token).ConfigureAwait(false)) + { + switch (@event) + { + case ConversationItemCreatedResponse: + Console.WriteLine("conversation created"); + break; + case SessionResponse sessionResponse: + if (sessionResponse.Type != "session.created") { return; } + await session.SendAsync(new ConversationItemCreateRequest("Hello!"), cts.Token); + await session.SendAsync(new CreateResponseRequest(), cts.Token); + await session.SendAsync(new InputAudioBufferAppendRequest(new ReadOnlyMemory(new byte[1024 * 4])), cts.Token); + await session.SendAsync(new ConversationItemCreateRequest("Goodbye!"), cts.Token); + await session.SendAsync(new CreateResponseRequest(), cts.Token); + break; + case ResponseAudioTranscriptResponse transcriptResponse: + Console.WriteLine(transcriptResponse.ToString()); + break; + case ResponseFunctionCallArgumentsResponse functionCallResponse: + if (functionCallResponse.IsDone) + { + ToolCall toolCall = functionCallResponse; + // ReSharper disable once MethodHasAsyncOverloadWithCancellation + toolCall.InvokeFunction(); + } + + break; + } + } + + Assert.IsTrue(wasGoodbyeCalled); + } + catch (Exception e) + { + switch (e) + { + case ObjectDisposedException: + // ignore + break; + default: + Console.WriteLine(e); + throw; + } + } + finally + { + session?.Dispose(); + } + } + } +} diff --git a/OpenAI-DotNet/Assistants/AssistantExtensions.cs b/OpenAI-DotNet/Assistants/AssistantExtensions.cs index 07c55e2e..2141fd06 100644 --- a/OpenAI-DotNet/Assistants/AssistantExtensions.cs +++ b/OpenAI-DotNet/Assistants/AssistantExtensions.cs @@ -1,10 +1,8 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. -using OpenAI.Files; using OpenAI.Threads; using System; using System.Collections.Generic; -using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -23,7 +21,7 @@ public static class AssistantExtensions public static async Task ModifyAsync(this AssistantResponse assistant, CreateAssistantRequest request, CancellationToken cancellationToken = default) => await assistant.Client.AssistantsEndpoint.ModifyAssistantAsync( assistantId: assistant.Id, - request: request ?? new(assistant), + request: request ?? new CreateAssistantRequest(assistant), cancellationToken: cancellationToken).ConfigureAwait(false); /// @@ -63,14 +61,6 @@ from vectorStoreId in assistant.ToolResources?.FileSearch?.VectorStoreIds return deleteTasks.TrueForAll(task => task.Result); } - [Obsolete("use new overload with Func instead.")] - public static async Task CreateThreadAndRunAsync(this AssistantResponse assistant, CreateThreadRequest request, Action streamEventHandler, CancellationToken cancellationToken = default) - => await CreateThreadAndRunAsync(assistant, request, streamEventHandler == null ? null : async serverSentEvent => - { - streamEventHandler.Invoke(serverSentEvent); - await Task.CompletedTask; - }, cancellationToken); - /// /// Create a thread and run it. /// @@ -112,10 +102,29 @@ public static string InvokeToolCall(this AssistantResponse assistant, ToolCall t throw new InvalidOperationException($"Cannot invoke built in tool {toolCall.Type}"); } - var tool = assistant.Tools.FirstOrDefault(tool => tool.IsFunction && tool.Function.Name == toolCall.FunctionCall.Name) ?? - throw new InvalidOperationException($"Failed to find a valid tool for [{toolCall.Id}] {toolCall.FunctionCall.Name}"); - tool.Function.Arguments = toolCall.FunctionCall.Arguments; - return tool.InvokeFunction(); + var tool = assistant.Tools.FirstOrDefault(tool => tool.IsFunction && tool.Function.Name == toolCall.Function.Name) ?? + throw new InvalidOperationException($"Failed to find a valid tool for [{toolCall.Id}] {toolCall.Function.Name}"); + return tool.InvokeFunction(toolCall); + } + + /// + /// Invoke the assistant's tool function using the . + /// + /// The expected signature return type. + /// . + /// . + /// Tool output result as . + /// Only call this directly on your if you know the method is synchronous. + public static T InvokeToolCall(this AssistantResponse assistant, ToolCall toolCall) + { + if (!toolCall.IsFunction) + { + throw new InvalidOperationException($"Cannot invoke built in tool {toolCall.Type}"); + } + + var tool = assistant.Tools.FirstOrDefault(tool => tool.IsFunction && tool.Function.Name == toolCall.Function.Name) ?? + throw new InvalidOperationException($"Failed to find a valid tool for [{toolCall.Id}] {toolCall.Function.Name}"); + return tool.InvokeFunction(toolCall); } /// @@ -132,10 +141,29 @@ public static async Task InvokeToolCallAsync(this AssistantResponse assi throw new InvalidOperationException($"Cannot invoke built in tool {toolCall.Type}"); } - var tool = assistant.Tools.FirstOrDefault(tool => tool.IsFunction && tool.Function.Name == toolCall.FunctionCall.Name) ?? - throw new InvalidOperationException($"Failed to find a valid tool for [{toolCall.Id}] {toolCall.FunctionCall.Name}"); - tool.Function.Arguments = toolCall.FunctionCall.Arguments; - return await tool.InvokeFunctionAsync(cancellationToken).ConfigureAwait(false); + var tool = assistant.Tools.FirstOrDefault(tool => tool.Type == "function" && tool.Function.Name == toolCall.Function.Name) ?? + throw new InvalidOperationException($"Failed to find a valid tool for [{toolCall.Id}] {toolCall.Function.Name}"); + return await tool.InvokeFunctionAsync(toolCall, cancellationToken); + } + + /// + /// Invoke the assistant's tool function using the . + /// + /// The expected signature return type. + /// . + /// . + /// Optional, . + /// Tool output result as . + public static async Task InvokeToolCallAsync(this AssistantResponse assistant, ToolCall toolCall, CancellationToken cancellationToken = default) + { + if (!toolCall.IsFunction) + { + throw new InvalidOperationException($"Cannot invoke built in tool {toolCall.Type}"); + } + + var tool = assistant.Tools.FirstOrDefault(tool => tool.Type == "function" && tool.Function.Name == toolCall.Function.Name) ?? + throw new InvalidOperationException($"Failed to find a valid tool for [{toolCall.Id}] {toolCall.Function.Name}"); + return await tool.InvokeFunctionAsync(toolCall, cancellationToken); } /// @@ -148,16 +176,6 @@ public static async Task InvokeToolCallAsync(this AssistantResponse assi public static ToolOutput GetToolOutput(this AssistantResponse assistant, ToolCall toolCall) => new(toolCall.Id, assistant.InvokeToolCall(toolCall)); - /// - /// Calls each tool's function, with the provided arguments from the toolCalls and returns the outputs. - /// - /// . - /// A collection of s. - /// A collection of s. - [Obsolete("Use GetToolOutputsAsync instead.")] - public static IReadOnlyList GetToolOutputs(this AssistantResponse assistant, IEnumerable toolCalls) - => toolCalls.Select(assistant.GetToolOutput).ToList(); - /// /// Calls the tool's function, with the provided arguments from the toolCall and returns the output. /// @@ -178,147 +196,29 @@ public static async Task GetToolOutputAsync(this AssistantResponse a /// A collection of s. /// Optional, . /// A collection of s. - public static async Task> GetToolOutputsAsync(this AssistantResponse assistant, IEnumerable toolCalls, CancellationToken cancellationToken = default) - => await Task.WhenAll(toolCalls.Select(async toolCall => await assistant.GetToolOutputAsync(toolCall, cancellationToken).ConfigureAwait(false))).ConfigureAwait(false); + public static async Task> GetToolOutputsAsync(this AssistantResponse assistant, IEnumerable toolCalls, CancellationToken cancellationToken = default) + => await Task.WhenAll(toolCalls.Select(toolCall => assistant.GetToolOutputAsync(toolCall, cancellationToken))).ConfigureAwait(false); /// /// Calls each tool's function, with the provided arguments from the toolCalls and returns the outputs. /// /// . - /// The to complete the tool calls for. + /// A collection of s. /// Optional, . /// A collection of s. - public static async Task> GetToolOutputsAsync(this AssistantResponse assistant, RunResponse run, CancellationToken cancellationToken = default) - => await GetToolOutputsAsync(assistant, run.RequiredAction.SubmitToolOutputs.ToolCalls, cancellationToken).ConfigureAwait(false); - - #endregion Tools - - #region Files (Obsolete) - - /// - /// Returns a list of assistant files. - /// - /// . - /// . - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task> ListFilesAsync(this AssistantResponse assistant, ListQuery query = null, CancellationToken cancellationToken = default) - => await assistant.Client.AssistantsEndpoint.ListFilesAsync(assistant.Id, query, cancellationToken).ConfigureAwait(false); - - /// - /// Attach a file to the . - /// - /// . - /// - /// A (with purpose="assistants") that the assistant should use. - /// Useful for tools like retrieval and code_interpreter that can access files. - /// - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task AttachFileAsync(this AssistantResponse assistant, FileResponse file, CancellationToken cancellationToken = default) - => await assistant.Client.AssistantsEndpoint.AttachFileAsync(assistant.Id, file, cancellationToken).ConfigureAwait(false); - - /// - /// Uploads a new file at the specified and attaches it to the . - /// - /// . - /// The local file path to upload. - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task UploadFileAsync(this AssistantResponse assistant, string filePath, CancellationToken cancellationToken = default) - { - var file = await assistant.Client.FilesEndpoint.UploadFileAsync(new FileUploadRequest(filePath, FilePurpose.Assistants), cancellationToken).ConfigureAwait(false); - return await assistant.AttachFileAsync(file, cancellationToken).ConfigureAwait(false); - } - - /// - /// Uploads a new file at the specified path and attaches it to the assistant. - /// - /// . - /// The file contents to upload. - /// The name of the file. - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task UploadFileAsync(this AssistantResponse assistant, Stream stream, string fileName, CancellationToken cancellationToken = default) - { - var file = await assistant.Client.FilesEndpoint.UploadFileAsync(new FileUploadRequest(stream, fileName, FilePurpose.Assistants), cancellationToken).ConfigureAwait(false); - return await assistant.AttachFileAsync(file, cancellationToken).ConfigureAwait(false); - } - - /// - /// Retrieves the . - /// - /// . - /// The ID of the file we're getting. - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task RetrieveFileAsync(this AssistantResponse assistant, string fileId, CancellationToken cancellationToken = default) - => await assistant.Client.AssistantsEndpoint.RetrieveFileAsync(assistant.Id, fileId, cancellationToken).ConfigureAwait(false); - - /// - /// Remove the file from the assistant it is attached to. - /// - /// - /// Note that removing an AssistantFile does not delete the original File object, - /// it simply removes the association between that File and the Assistant. - /// To delete a File, use . - /// - /// . - /// Optional, . - /// True, if file was removed. - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task RemoveFileAsync(this AssistantFileResponse file, CancellationToken cancellationToken = default) - => await file.Client.AssistantsEndpoint.RemoveFileAsync(file.AssistantId, file.Id, cancellationToken).ConfigureAwait(false); - - /// - /// Remove the file from the assistant it is attached to. - /// - /// - /// Note that removing an AssistantFile does not delete the original File object, - /// it simply removes the association between that File and the Assistant. - /// To delete a File, use . - /// - /// . - /// The ID of the file to remove. - /// Optional, . - /// True, if file was removed. - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task RemoveFileAsync(this AssistantResponse assistant, string fileId, CancellationToken cancellationToken = default) - => await assistant.Client.AssistantsEndpoint.RemoveFileAsync(assistant.Id, fileId, cancellationToken).ConfigureAwait(false); - - /// - /// Removes and Deletes a file from the assistant. - /// - /// . - /// Optional, . - /// True, if the file was successfully removed from the assistant and deleted. - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task DeleteFileAsync(this AssistantFileResponse file, CancellationToken cancellationToken = default) - { - var isRemoved = await file.RemoveFileAsync(cancellationToken).ConfigureAwait(false); - return isRemoved && await file.Client.FilesEndpoint.DeleteFileAsync(file.Id, cancellationToken).ConfigureAwait(false); - } + public static async Task> GetToolOutputsAsync(this AssistantResponse assistant, IEnumerable toolCalls, CancellationToken cancellationToken = default) + => await Task.WhenAll(toolCalls.Select(toolCall => assistant.GetToolOutputAsync(toolCall, cancellationToken))).ConfigureAwait(false); /// - /// Removes and Deletes a file from the . + /// Calls each tool's function, with the provided arguments from the toolCalls and returns the outputs. /// /// . - /// The ID of the file to delete. + /// The to complete the tool calls for. /// Optional, . - /// True, if the file was successfully removed from the assistant and deleted. - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public static async Task DeleteFileAsync(this AssistantResponse assistant, string fileId, CancellationToken cancellationToken = default) - { - var isRemoved = await assistant.Client.AssistantsEndpoint.RemoveFileAsync(assistant.Id, fileId, cancellationToken).ConfigureAwait(false); - if (!isRemoved) { return false; } - return await assistant.Client.FilesEndpoint.DeleteFileAsync(fileId, cancellationToken).ConfigureAwait(false); - } + /// A collection of s. + public static async Task> GetToolOutputsAsync(this AssistantResponse assistant, RunResponse run, CancellationToken cancellationToken = default) + => await GetToolOutputsAsync(assistant, run.RequiredAction.SubmitToolOutputs.ToolCalls, cancellationToken).ConfigureAwait(false); - #endregion Files (Obsolete) + #endregion Tools } } diff --git a/OpenAI-DotNet/Assistants/AssistantFileResponse.cs b/OpenAI-DotNet/Assistants/AssistantFileResponse.cs deleted file mode 100644 index b1f3a153..00000000 --- a/OpenAI-DotNet/Assistants/AssistantFileResponse.cs +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed under the MIT License. See LICENSE in the project root for license information. - -using System; -using System.Text.Json.Serialization; - -namespace OpenAI.Assistants -{ - /// - /// File attached to an assistant. - /// - [Obsolete("Removed. Use Assistant.ToolResources instead.")] - public sealed class AssistantFileResponse : BaseResponse - { - /// - /// The identifier, which can be referenced in API endpoints. - /// - [JsonInclude] - [JsonPropertyName("id")] - public string Id { get; private set; } - - /// - /// The object type, which is always assistant.file. - /// - [JsonInclude] - [JsonPropertyName("object")] - public string Object { get; private set; } - - /// - /// The Unix timestamp (in seconds) for when the assistant file was created. - /// - [JsonInclude] - [JsonPropertyName("created_at")] - public int CreatedAtUnixTimeSeconds { get; private set; } - - [JsonIgnore] - public DateTime CreatedAt => DateTimeOffset.FromUnixTimeSeconds(CreatedAtUnixTimeSeconds).DateTime; - - /// - /// The assistant ID that the file is attached to. - /// - [JsonInclude] - [JsonPropertyName("assistant_id")] - public string AssistantId { get; private set; } - - public static implicit operator string(AssistantFileResponse file) => file?.ToString(); - - public override string ToString() => Id; - } -} diff --git a/OpenAI-DotNet/Assistants/AssistantResponse.cs b/OpenAI-DotNet/Assistants/AssistantResponse.cs index eb01da86..376282dd 100644 --- a/OpenAI-DotNet/Assistants/AssistantResponse.cs +++ b/OpenAI-DotNet/Assistants/AssistantResponse.cs @@ -88,15 +88,6 @@ public sealed class AssistantResponse : BaseResponse [JsonPropertyName("tool_resources")] public ToolResources ToolResources { get; private set; } - /// - /// A list of file IDs attached to this assistant. - /// There can be a maximum of 20 files attached to the assistant. - /// Files are ordered by their creation date in ascending order. - /// - [JsonIgnore] - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public IReadOnlyList FileIds => null; - /// /// Set of 16 key-value pairs that can be attached to an object. /// This can be useful for storing additional information about the object in a structured format. @@ -130,14 +121,16 @@ public sealed class AssistantResponse : BaseResponse /// which guarantees the message the model generates is valid JSON. /// /// - /// Important: When using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message. - /// Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, - /// resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", + /// Important: When using JSON mode you must still instruct the model to produce JSON yourself via some conversation message, + /// for example via your system message. If you don't do this, the model may generate an unending stream of + /// whitespace until the generation reaches the token limit, which may take a lot of time and give the appearance + /// of a "stuck" request. Also note that the message content may be partial (i.e. cut off) if finish_reason="length", /// which indicates the generation exceeded max_tokens or the conversation exceeded the max context length. /// [JsonInclude] [JsonPropertyName("response_format")] [JsonConverter(typeof(ResponseFormatConverter))] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] public ResponseFormatObject ResponseFormatObject { get; private set; } [JsonIgnore] diff --git a/OpenAI-DotNet/Assistants/AssistantsEndpoint.cs b/OpenAI-DotNet/Assistants/AssistantsEndpoint.cs index 980cf2f7..c63bf24a 100644 --- a/OpenAI-DotNet/Assistants/AssistantsEndpoint.cs +++ b/OpenAI-DotNet/Assistants/AssistantsEndpoint.cs @@ -1,8 +1,6 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. using OpenAI.Extensions; -using OpenAI.Files; -using System; using System.Text.Json; using System.Threading; using System.Threading.Tasks; @@ -104,83 +102,5 @@ public async Task DeleteAssistantAsync(string assistantId, CancellationTok var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); return response.Deserialize(responseAsString, client)?.Deleted ?? false; } - - #region Files (Obsolete) - - /// - /// Returns a list of assistant files. - /// - /// The ID of the assistant the file belongs to. - /// . - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public async Task> ListFilesAsync(string assistantId, ListQuery query = null, CancellationToken cancellationToken = default) - { - using var response = await client.Client.GetAsync(GetUrl($"/{assistantId}/files", query), cancellationToken).ConfigureAwait(false); - var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); - return response.Deserialize>(responseAsString, client); - } - - /// - /// Attach a file to an assistant. - /// - /// The ID of the assistant for which to attach a file. - /// - /// A (with purpose="assistants") that the assistant should use. - /// Useful for tools like retrieval and code_interpreter that can access files. - /// - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public async Task AttachFileAsync(string assistantId, FileResponse file, CancellationToken cancellationToken = default) - { - if (file?.Purpose?.Equals(FilePurpose.Assistants) != true) - { - throw new InvalidOperationException($"{nameof(file)}.{nameof(file.Purpose)} must be 'assistants'!"); - } - - using var payload = JsonSerializer.Serialize(new { file_id = file.Id }, OpenAIClient.JsonSerializationOptions).ToJsonStringContent(); - using var response = await client.Client.PostAsync(GetUrl($"/{assistantId}/files"), payload, cancellationToken).ConfigureAwait(false); - var responseAsString = await response.ReadAsStringAsync(EnableDebug, payload, cancellationToken).ConfigureAwait(false); - return response.Deserialize(responseAsString, client); - } - - /// - /// Retrieves an AssistantFile. - /// - /// The ID of the assistant who the file belongs to. - /// The ID of the file we're getting. - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public async Task RetrieveFileAsync(string assistantId, string fileId, CancellationToken cancellationToken = default) - { - using var response = await client.Client.GetAsync(GetUrl($"/{assistantId}/files/{fileId}"), cancellationToken).ConfigureAwait(false); - var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); - return response.Deserialize(responseAsString, client); - } - - /// - /// Remove an assistant file. - /// - /// - /// Note that removing an AssistantFile does not delete the original File object, - /// it simply removes the association between that File and the Assistant. - /// To delete a File, use the File delete endpoint instead. - /// - /// The ID of the assistant that the file belongs to. - /// The ID of the file to delete. - /// Optional, . - /// True, if file was removed. - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public async Task RemoveFileAsync(string assistantId, string fileId, CancellationToken cancellationToken = default) - { - using var response = await client.Client.DeleteAsync(GetUrl($"/{assistantId}/files/{fileId}"), cancellationToken).ConfigureAwait(false); - var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); - return response.Deserialize(responseAsString, client)?.Deleted ?? false; - } - - #endregion Files (Obsolete) } } diff --git a/OpenAI-DotNet/Assistants/CreateAssistantRequest.cs b/OpenAI-DotNet/Assistants/CreateAssistantRequest.cs index d6441a78..5d6a47d2 100644 --- a/OpenAI-DotNet/Assistants/CreateAssistantRequest.cs +++ b/OpenAI-DotNet/Assistants/CreateAssistantRequest.cs @@ -1,7 +1,6 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. using OpenAI.Extensions; -using System; using System.Collections.Generic; using System.Linq; using System.Text.Json.Serialization; @@ -98,19 +97,6 @@ public CreateAssistantRequest( { } - [Obsolete("use new .ctr")] - public CreateAssistantRequest( - AssistantResponse assistant, - string model, - string name, - string description, - string instructions, - IEnumerable tools, - IEnumerable files, - IReadOnlyDictionary metadata) - { - } - /// /// Constructor. /// @@ -270,9 +256,10 @@ public CreateAssistantRequest( /// which guarantees the message the model generates is valid JSON. /// /// - /// Important: When using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message. - /// Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, - /// resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", + /// Important: When using JSON mode you must still instruct the model to produce JSON yourself via some conversation message, + /// for example via your system message. If you don't do this, the model may generate an unending stream of + /// whitespace until the generation reaches the token limit, which may take a lot of time and give the appearance + /// of a "stuck" request. Also note that the message content may be partial (i.e. cut off) if finish_reason="length", /// which indicates the generation exceeded max_tokens or the conversation exceeded the max context length. /// [JsonPropertyName("response_format")] diff --git a/OpenAI-DotNet/Audio/AudioTranslationRequest.cs b/OpenAI-DotNet/Audio/AudioTranslationRequest.cs index 21812c0a..7e602096 100644 --- a/OpenAI-DotNet/Audio/AudioTranslationRequest.cs +++ b/OpenAI-DotNet/Audio/AudioTranslationRequest.cs @@ -70,7 +70,7 @@ public AudioTranslationRequest( Stream audio, string audioName, string model = null, - string prompt = "response should be in english.", + string prompt = null, AudioResponseFormat responseFormat = AudioResponseFormat.Json, float? temperature = null) { diff --git a/OpenAI-DotNet/Audio/SpeechRequest.cs b/OpenAI-DotNet/Audio/SpeechRequest.cs index 072ae40a..6f7947d9 100644 --- a/OpenAI-DotNet/Audio/SpeechRequest.cs +++ b/OpenAI-DotNet/Audio/SpeechRequest.cs @@ -16,11 +16,11 @@ public sealed class SpeechRequest /// The voice to use when generating the audio. /// The format to audio in. Supported formats are mp3, opus, aac, flac, wav and pcm. /// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. - public SpeechRequest(string input, Model model = null, SpeechVoice voice = SpeechVoice.Alloy, SpeechResponseFormat responseFormat = SpeechResponseFormat.MP3, float? speed = null) + public SpeechRequest(string input, Model model = null, Voice voice = null, SpeechResponseFormat responseFormat = SpeechResponseFormat.MP3, float? speed = null) { Input = !string.IsNullOrWhiteSpace(input) ? input : throw new ArgumentException("Input cannot be null or empty.", nameof(input)); Model = string.IsNullOrWhiteSpace(model?.Id) ? Models.Model.TTS_1 : model; - Voice = voice; + Voice = string.IsNullOrWhiteSpace(voice?.Id) ? OpenAI.Voice.Alloy : voice; ResponseFormat = responseFormat; Speed = speed; } @@ -29,19 +29,22 @@ public SpeechRequest(string input, Model model = null, SpeechVoice voice = Speec /// One of the available TTS models. Defaults to tts-1. /// [JsonPropertyName("model")] + [FunctionProperty("One of the available TTS models. Defaults to tts-1.", true, "tts-1", "tts-1-hd")] public string Model { get; } /// /// The text to generate audio for. The maximum length is 4096 characters. /// [JsonPropertyName("input")] + [FunctionProperty("The text to generate audio for. The maximum length is 4096 characters.", true)] public string Input { get; } /// /// The voice to use when generating the audio. /// [JsonPropertyName("voice")] - public SpeechVoice Voice { get; } + [FunctionProperty("The voice to use when generating the audio.", true, "alloy", "echo", "fable", "onyx", "nova", "shimmer")] + public string Voice { get; } /// /// The format to audio in. Supported formats are mp3, opus, aac, flac, wav and pcm. @@ -49,12 +52,14 @@ public SpeechRequest(string input, Model model = null, SpeechVoice voice = Speec [JsonPropertyName("response_format")] [JsonIgnore(Condition = JsonIgnoreCondition.Never)] [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + [FunctionProperty("The format to audio in. Supported formats are mp3, opus, aac, flac, wav and pcm.", false, SpeechResponseFormat.MP3)] public SpeechResponseFormat ResponseFormat { get; } /// /// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. /// [JsonPropertyName("speed")] + [FunctionProperty("The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.", false, 1.0f)] public float? Speed { get; } } } diff --git a/OpenAI-DotNet/Audio/SpeechVoice.cs b/OpenAI-DotNet/Audio/SpeechVoice.cs index ba644021..0086a505 100644 --- a/OpenAI-DotNet/Audio/SpeechVoice.cs +++ b/OpenAI-DotNet/Audio/SpeechVoice.cs @@ -1,7 +1,10 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using System; + namespace OpenAI.Audio { + [Obsolete("Use OpenAI.Voice instead.")] public enum SpeechVoice { Alloy = 0, diff --git a/OpenAI-DotNet/Authentication/OpenAIClientSettings.cs b/OpenAI-DotNet/Authentication/OpenAIClientSettings.cs index 065e88c4..b242e3f9 100644 --- a/OpenAI-DotNet/Authentication/OpenAIClientSettings.cs +++ b/OpenAI-DotNet/Authentication/OpenAIClientSettings.cs @@ -10,6 +10,9 @@ namespace OpenAI /// public sealed class OpenAIClientSettings { + internal const string WS = "ws://"; + internal const string WSS = "wss://"; + internal const string Http = "http://"; internal const string Https = "https://"; internal const string OpenAIDomain = "api.openai.com"; internal const string DefaultOpenAIApiVersion = "v1"; @@ -22,10 +25,11 @@ public sealed class OpenAIClientSettings public OpenAIClientSettings() { ResourceName = OpenAIDomain; - ApiVersion = "v1"; + ApiVersion = DefaultOpenAIApiVersion; DeploymentId = string.Empty; BaseRequest = $"/{ApiVersion}/"; BaseRequestUrlFormat = $"{Https}{ResourceName}{BaseRequest}{{0}}"; + BaseWebSocketUrlFormat = $"{WSS}{ResourceName}{BaseRequest}{{0}}"; UseOAuthAuthentication = true; } @@ -52,11 +56,23 @@ public OpenAIClientSettings(string domain, string apiVersion = DefaultOpenAIApiV apiVersion = DefaultOpenAIApiVersion; } - ResourceName = domain.Contains("http") ? domain : $"{Https}{domain}"; + ResourceName = domain.Contains(Http) + ? domain + : $"{Https}{domain}"; + + if (domain.Contains(Http)) + { + domain = domain.Replace(Http, string.Empty); + domain = domain.Replace(Https, string.Empty); + } + ApiVersion = apiVersion; DeploymentId = string.Empty; BaseRequest = $"/{ApiVersion}/"; BaseRequestUrlFormat = $"{ResourceName}{BaseRequest}{{0}}"; + BaseWebSocketUrlFormat = ResourceName.Contains(Https) + ? $"{WSS}{domain}{BaseRequest}{{0}}" + : $"{WS}{domain}{BaseRequest}{{0}}"; UseOAuthAuthentication = true; } @@ -99,24 +115,24 @@ public OpenAIClientSettings(string resourceName, string deploymentId, string api ApiVersion = apiVersion; BaseRequest = "/openai/"; BaseRequestUrlFormat = $"{Https}{ResourceName}.{AzureOpenAIDomain}{BaseRequest}{{0}}"; + BaseWebSocketUrlFormat = $"{WSS}{ResourceName}.{AzureOpenAIDomain}{BaseRequest}{{0}}"; defaultQueryParameters.Add("api-version", ApiVersion); UseOAuthAuthentication = useActiveDirectoryAuthentication; } public string ResourceName { get; } - public string ApiVersion { get; } - public string DeploymentId { get; } + public string ApiVersion { get; } + public string BaseRequest { get; } internal string BaseRequestUrlFormat { get; } - internal bool UseOAuthAuthentication { get; } + internal string BaseWebSocketUrlFormat { get; } - [Obsolete("Use IsAzureOpenAI")] - public bool IsAzureDeployment => IsAzureOpenAI; + internal bool UseOAuthAuthentication { get; } public bool IsAzureOpenAI => BaseRequestUrlFormat.Contains(AzureOpenAIDomain); diff --git a/OpenAI-DotNet/Chat/AudioConfig.cs b/OpenAI-DotNet/Chat/AudioConfig.cs new file mode 100644 index 00000000..7a249f6f --- /dev/null +++ b/OpenAI-DotNet/Chat/AudioConfig.cs @@ -0,0 +1,27 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Chat +{ + public sealed class AudioConfig + { + public AudioConfig() { } + + public AudioConfig(Voice voice, AudioFormat format = AudioFormat.Pcm16) + { + Voice = string.IsNullOrWhiteSpace(voice?.Id) ? OpenAI.Voice.Alloy : voice; + Format = format; + } + + [JsonInclude] + [JsonPropertyName("voice")] + public string Voice { get; private set; } + + [JsonInclude] + [JsonPropertyName("format")] + public AudioFormat Format { get; private set; } + + public static implicit operator AudioConfig(Voice voice) => new(voice); + } +} diff --git a/OpenAI-DotNet/Chat/AudioFormat.cs b/OpenAI-DotNet/Chat/AudioFormat.cs new file mode 100644 index 00000000..6a73c71b --- /dev/null +++ b/OpenAI-DotNet/Chat/AudioFormat.cs @@ -0,0 +1,20 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace OpenAI.Chat +{ + public enum AudioFormat + { + [EnumMember(Value = "pcm16")] + Pcm16 = 1, + [EnumMember(Value = "opus")] + Opus, + [EnumMember(Value = "mp3")] + Mp3, + [EnumMember(Value = "wav")] + Wav, + [EnumMember(Value = "flac")] + Flac + } +} diff --git a/OpenAI-DotNet/Chat/AudioOutput.cs b/OpenAI-DotNet/Chat/AudioOutput.cs new file mode 100644 index 00000000..49701bd3 --- /dev/null +++ b/OpenAI-DotNet/Chat/AudioOutput.cs @@ -0,0 +1,31 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json.Serialization; + +namespace OpenAI.Chat +{ + [JsonConverter(typeof(AudioOutputConverter))] + public sealed class AudioOutput + { + internal AudioOutput(string id, int expiresAtUnixSeconds, ReadOnlyMemory data, string transcript) + { + Id = id; + ExpiresAtUnixSeconds = expiresAtUnixSeconds; + Data = data; + Transcript = transcript; + } + + public string Id { get; } + + public int ExpiresAtUnixSeconds { get; } + + public DateTime ExpiresAt => DateTimeOffset.FromUnixTimeSeconds(ExpiresAtUnixSeconds).DateTime; + + public ReadOnlyMemory Data { get; } + + public string Transcript { get; } + + public override string ToString() => Transcript ?? string.Empty; + } +} diff --git a/OpenAI-DotNet/Chat/ChatRequest.cs b/OpenAI-DotNet/Chat/ChatRequest.cs index c2ecd4cc..025cfc2d 100644 --- a/OpenAI-DotNet/Chat/ChatRequest.cs +++ b/OpenAI-DotNet/Chat/ChatRequest.cs @@ -22,7 +22,7 @@ public ChatRequest( int? maxTokens = null, int? number = null, double? presencePenalty = null, - ChatResponseFormat responseFormat = ChatResponseFormat.Text, + ChatResponseFormat responseFormat = ChatResponseFormat.Auto, int? seed = null, string[] stops = null, double? temperature = null, @@ -30,9 +30,10 @@ public ChatRequest( int? topLogProbs = null, bool? parallelToolCalls = null, JsonSchema jsonSchema = null, + AudioConfig audioConfig = null, string user = null) : this(messages, model, frequencyPenalty, logitBias, maxTokens, number, presencePenalty, - responseFormat, seed, stops, temperature, topP, topLogProbs, parallelToolCalls, jsonSchema, user) + responseFormat, seed, stops, temperature, topP, topLogProbs, parallelToolCalls, jsonSchema, audioConfig, user) { var toolList = tools?.ToList(); @@ -103,8 +104,7 @@ public ChatRequest( /// Up to 4 sequences where the API will stop generating further tokens. /// /// - /// The maximum number of tokens allowed for the generated answer. - /// By default, the number of tokens the model can return will be (4096 - prompt tokens). + /// An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. /// /// /// Number between -2.0 and 2.0. @@ -136,14 +136,17 @@ public ChatRequest( /// An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, /// each with an associated log probability. /// - /// - /// Whether to enable parallel function calling during tool use. - /// /// /// The to use for structured JSON outputs.
///
/// /// + /// + /// Whether to enable parallel function calling during tool use. + /// + /// + /// Parameters for audio output. . + /// /// /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. /// @@ -155,7 +158,7 @@ public ChatRequest( int? maxTokens = null, int? number = null, double? presencePenalty = null, - ChatResponseFormat responseFormat = ChatResponseFormat.Text, + ChatResponseFormat responseFormat = ChatResponseFormat.Auto, int? seed = null, string[] stops = null, double? temperature = null, @@ -163,6 +166,7 @@ public ChatRequest( int? topLogProbs = null, bool? parallelToolCalls = null, JsonSchema jsonSchema = null, + AudioConfig audioConfig = null, string user = null) { Messages = messages?.ToList(); @@ -173,9 +177,25 @@ public ChatRequest( } Model = string.IsNullOrWhiteSpace(model) ? Models.Model.GPT4o : model; + + if (audioConfig != null && !Model.Contains("audio")) + { + throw new ArgumentException("Audio settings are only valid for models that support audio output", nameof(audioConfig)); + } + + if (Model.Contains("audio")) + { + Modalities = Modality.Text | Modality.Audio; + AudioConfig = audioConfig ?? new(Voice.Alloy); + } + else + { + Modalities = Modality.Text; + } + FrequencyPenalty = frequencyPenalty; LogitBias = logitBias; - MaxTokens = maxTokens; + MaxCompletionTokens = maxTokens; Number = number; PresencePenalty = presencePenalty; @@ -185,7 +205,11 @@ public ChatRequest( } else { - ResponseFormatObject = responseFormat; + ResponseFormatObject = responseFormat switch + { + ChatResponseFormat.Text or ChatResponseFormat.Json => responseFormat, + _ => null + }; } Seed = seed; @@ -210,6 +234,18 @@ public ChatRequest( [JsonPropertyName("model")] public string Model { get; } + /// + /// Whether or not to store the output of this chat completion request for use in our model distillation or evals products. + /// + [JsonPropertyName("store")] + public bool? Store { get; set; } + + /// + /// Developer-defined tags and values used for filtering completions in the dashboard. + /// + [JsonPropertyName("metadata")] + public IReadOnlyDictionary Metadata { get; set; } + /// /// Number between -2.0 and 2.0. /// Positive values penalize new tokens based on their existing frequency in the text so far, @@ -258,8 +294,15 @@ public ChatRequest( /// By default, the number of tokens the model can return will be (4096 - prompt tokens). /// [JsonPropertyName("max_tokens")] + [Obsolete("Use MaxCompletionTokens instead")] public int? MaxTokens { get; } + /// + /// An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. + /// + [JsonPropertyName("max_completion_tokens")] + public int? MaxCompletionTokens { get; } + /// /// How many chat completion choices to generate for each input message.
/// Defaults to 1 @@ -267,6 +310,24 @@ public ChatRequest( [JsonPropertyName("n")] public int? Number { get; } + [JsonPropertyName("modalities")] + [JsonConverter(typeof(ModalityConverter))] + public Modality Modalities { get; } + + /// + /// Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. + /// This is most common when you are regenerating a file with only minor changes to most of the content. + /// + [JsonPropertyName("prediction")] + public object Prediction { get; set; } + + /// + /// Parameters for audio output. + /// Required when audio output is requested with modalities: ["audio"]. + /// + [JsonPropertyName("audio")] + public AudioConfig AudioConfig { get; } + /// /// Number between -2.0 and 2.0. /// Positive values penalize new tokens based on whether they appear in the text so far, @@ -276,6 +337,11 @@ public ChatRequest( [JsonPropertyName("presence_penalty")] public double? PresencePenalty { get; } + [JsonPropertyName("response_format")] + [JsonConverter(typeof(ResponseFormatConverter))] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public ResponseFormatObject ResponseFormatObject { get; internal set; } + /// /// An object specifying the format that the model must output. /// Setting to or enables JSON mode, @@ -290,11 +356,6 @@ public ChatRequest( [JsonIgnore] public ChatResponseFormat ResponseFormat => ResponseFormatObject ?? ChatResponseFormat.Auto; - [JsonPropertyName("response_format")] - [JsonConverter(typeof(ResponseFormatConverter))] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] - public ResponseFormatObject ResponseFormatObject { get; internal set; } - /// /// This feature is in Beta. If specified, our system will make a best effort to sample deterministically, /// such that repeated requests with the same seed and parameters should return the same result. @@ -304,6 +365,17 @@ public ChatRequest( [JsonPropertyName("seed")] public int? Seed { get; } + /// + /// Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:
+ /// - If set to 'auto', and the Project is Scale tier enabled, the system will utilize scale tier credits until they are exhausted.
+ /// - If set to 'auto', and the Project is not Scale tier enabled, the request will be processed using the default service tier with a lower uptime SLA and no latency guarantee.
+ /// - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarantee.
+ /// - When not set, the default behavior is 'auto'.
+ /// When this parameter is set, the response body will include the service_tier utilized. + ///
+ [JsonPropertyName("service_tier")] + public string ServiceTier { get; set; } + /// /// Up to 4 sequences where the API will stop generating further tokens. /// diff --git a/OpenAI-DotNet/Chat/ChatResponse.cs b/OpenAI-DotNet/Chat/ChatResponse.cs index 89a1cc47..25a995f3 100644 --- a/OpenAI-DotNet/Chat/ChatResponse.cs +++ b/OpenAI-DotNet/Chat/ChatResponse.cs @@ -71,9 +71,11 @@ public IReadOnlyList Choices } [JsonIgnore] - public Choice FirstChoice => Choices?.FirstOrDefault(choice => choice.Index == 0); + public Choice FirstChoice + => Choices?.FirstOrDefault(choice => choice.Index == 0); - public override string ToString() => FirstChoice?.ToString() ?? string.Empty; + public override string ToString() + => FirstChoice?.ToString() ?? string.Empty; public static implicit operator string(ChatResponse response) => response?.ToString(); diff --git a/OpenAI-DotNet/Chat/Choice.cs b/OpenAI-DotNet/Chat/Choice.cs index 0a2a2ca3..04c8cb4b 100644 --- a/OpenAI-DotNet/Chat/Choice.cs +++ b/OpenAI-DotNet/Chat/Choice.cs @@ -56,7 +56,8 @@ public sealed class Choice : IAppendable [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] public LogProbs LogProbs { get; private set; } - public override string ToString() => Message?.Content?.ToString() ?? Delta?.Content ?? string.Empty; + public override string ToString() + => Message?.ToString() ?? Delta?.Content ?? string.Empty; public static implicit operator string(Choice choice) => choice?.ToString(); diff --git a/OpenAI-DotNet/Chat/Conversation.cs b/OpenAI-DotNet/Chat/Conversation.cs index bf03f14b..0b1d30bc 100644 --- a/OpenAI-DotNet/Chat/Conversation.cs +++ b/OpenAI-DotNet/Chat/Conversation.cs @@ -1,6 +1,8 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using System.Collections.Concurrent; using System.Collections.Generic; +using System.Linq; using System.Text.Json; using System.Text.Json.Serialization; @@ -11,19 +13,27 @@ public sealed class Conversation [JsonConstructor] public Conversation(List messages) { - this.messages = messages; + this.messages = new ConcurrentQueue(); + + if (messages != null) + { + foreach (var message in messages) + { + this.messages.Enqueue(message); + } + } } - private readonly List messages; + private readonly ConcurrentQueue messages; [JsonPropertyName("messages")] - public IReadOnlyList Messages => messages; + public IReadOnlyList Messages => messages.ToList(); /// /// Appends to the end of . /// /// The message to add to the . - public void AppendMessage(Message message) => messages.Add(message); + public void AppendMessage(Message message) => messages.Enqueue(message); public override string ToString() => JsonSerializer.Serialize(this, OpenAIClient.JsonSerializationOptions); diff --git a/OpenAI-DotNet/Chat/Delta.cs b/OpenAI-DotNet/Chat/Delta.cs index 83639aca..3cb5d4f7 100644 --- a/OpenAI-DotNet/Chat/Delta.cs +++ b/OpenAI-DotNet/Chat/Delta.cs @@ -1,6 +1,5 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. -using System; using System.Collections.Generic; using System.Text.Json.Serialization; @@ -22,12 +21,19 @@ public sealed class Delta [JsonPropertyName("content")] public string Content { get; private set; } + /// + /// The refusal message generated by the model. + /// + [JsonInclude] + [JsonPropertyName("refusal")] + public string Refusal { get; private set; } + /// /// The tool calls generated by the model, such as function calls. /// [JsonInclude] [JsonPropertyName("tool_calls")] - public IReadOnlyList ToolCalls { get; private set; } + public IReadOnlyList ToolCalls { get; private set; } /// /// Optional, The name of the author of this message.
@@ -37,14 +43,6 @@ public sealed class Delta [JsonPropertyName("name")] public string Name { get; private set; } - /// - /// The function that should be called, as generated by the model. - /// - [JsonInclude] - [Obsolete("Replaced by ToolCalls")] - [JsonPropertyName("function_call")] - public Function Function { get; private set; } - public override string ToString() => Content ?? string.Empty; public static implicit operator string(Delta delta) => delta?.ToString(); diff --git a/OpenAI-DotNet/Chat/Message.cs b/OpenAI-DotNet/Chat/Message.cs index 8fac92e4..c4e481ce 100644 --- a/OpenAI-DotNet/Chat/Message.cs +++ b/OpenAI-DotNet/Chat/Message.cs @@ -49,21 +49,36 @@ public Message(Role role, string content, string name = null) } /// + [Obsolete("use overload with ToolCall")] public Message(Tool tool, string content) : this(Role.Tool, content, tool.Function.Name) { ToolCallId = tool.Id; } + /// + public Message(ToolCall toolCall, string content) + : this(Role.Tool, content, toolCall.Function.Name) + { + ToolCallId = toolCall.Id; + } + + [Obsolete("use overload with ToolCall")] + public Message(Tool tool, IEnumerable content) + : this(Role.Tool, content, tool.Function.Name) + { + ToolCallId = tool.Id; + } + /// /// Creates a new message to insert into a chat conversation. /// - /// Tool used for message. + /// ToolCall used for message. /// Tool function response. - public Message(Tool tool, IEnumerable content) - : this(Role.Tool, content, tool.Function.Name) + public Message(ToolCall toolCall, IEnumerable content) + : this(Role.Tool, content, toolCall.Function.Name) { - ToolCallId = tool.Id; + ToolCallId = toolCall.Id; } /// @@ -109,7 +124,7 @@ public Message(string toolCallId, string toolFunctionName, IEnumerable [JsonPropertyName("refusal")] public string Refusal { get; private set; } - private List toolCalls; + private List toolCalls; /// /// The tool calls generated by the model, such as function calls. @@ -117,7 +132,7 @@ public Message(string toolCallId, string toolFunctionName, IEnumerable [JsonInclude] [JsonPropertyName("tool_calls")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - public IReadOnlyList ToolCalls + public IReadOnlyList ToolCalls { get => toolCalls; private set => toolCalls = value?.ToList(); @@ -129,15 +144,21 @@ public IReadOnlyList ToolCalls public string ToolCallId { get; private set; } /// - /// The function that should be called, as generated by the model. + /// If the audio output modality is requested, this object contains data about the audio response from the model. /// [JsonInclude] - [Obsolete("Replaced by ToolCalls")] - [JsonPropertyName("function_call")] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] - public Function Function { get; private set; } + [JsonPropertyName("audio")] + public AudioOutput AudioOutput { get; private set; } - public override string ToString() => Content?.ToString() ?? string.Empty; + public override string ToString() + { + if (string.IsNullOrWhiteSpace(Content?.ToString())) + { + return AudioOutput?.ToString() ?? string.Empty; + } + + return Content?.ToString() ?? string.Empty; + } public static implicit operator string(Message message) => message?.ToString(); @@ -154,6 +175,11 @@ internal void AppendFrom(Delta other) Content += other.Content; } + if (!string.IsNullOrWhiteSpace(other?.Refusal)) + { + Refusal += other.Refusal; + } + if (!string.IsNullOrWhiteSpace(other?.Name)) { Name = other.Name; @@ -161,7 +187,7 @@ internal void AppendFrom(Delta other) if (other is { ToolCalls: not null }) { - toolCalls ??= new List(); + toolCalls ??= new List(); toolCalls.AppendFrom(other.ToolCalls); } } diff --git a/OpenAI-DotNet/Common/BaseResponse.cs b/OpenAI-DotNet/Common/BaseResponse.cs index 2de45d4d..3da974c4 100644 --- a/OpenAI-DotNet/Common/BaseResponse.cs +++ b/OpenAI-DotNet/Common/BaseResponse.cs @@ -126,10 +126,10 @@ private TimeSpan ConvertTimestampToTimespan(string timestamp) * Although their presence may not actually exist, we can still have this section in the parser, there is no * negative impact for a missing hours segment because the capture groups are flagged as optional. */ - int.TryParse(match.Groups["h"].Value.Replace("h", string.Empty), out var h); - int.TryParse(match.Groups["m"].Value.Replace("m", string.Empty), out var m); - int.TryParse(match.Groups["s"].Value.Replace("s", string.Empty), out var s); - int.TryParse(match.Groups["ms"].Value.Replace("ms", string.Empty), out var ms); + int.TryParse(match.Groups["h"]?.Value.Replace("h", string.Empty), out var h); + int.TryParse(match.Groups["m"]?.Value.Replace("m", string.Empty), out var m); + int.TryParse(match.Groups["s"]?.Value.Replace("s", string.Empty), out var s); + int.TryParse(match.Groups["ms"]?.Value.Replace("ms", string.Empty), out var ms); return new TimeSpan(h, m, s) + TimeSpan.FromMilliseconds(ms); } diff --git a/OpenAI-DotNet/Common/CompletionTokensDetails.cs b/OpenAI-DotNet/Common/CompletionTokensDetails.cs new file mode 100644 index 00000000..7d12ef3d --- /dev/null +++ b/OpenAI-DotNet/Common/CompletionTokensDetails.cs @@ -0,0 +1,53 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI +{ + public sealed class CompletionTokensDetails + { + public CompletionTokensDetails() { } + + private CompletionTokensDetails( + int? reasoningTokens, + int? audioTokens, + int? textTokens, + int? acceptedPredictionTokens, + int? rejectedPredictionTokens) + { + ReasoningTokens = reasoningTokens; + AudioTokens = audioTokens; + TextTokens = textTokens; + AcceptedPredictionTokens = acceptedPredictionTokens; + RejectedPredictionTokens = rejectedPredictionTokens; + } + + [JsonInclude] + [JsonPropertyName("reasoning_tokens")] + public int? ReasoningTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("audio_tokens")] + public int? AudioTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("text_tokens")] + public int? TextTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("accepted_prediction_tokens")] + public int? AcceptedPredictionTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("rejected_prediction_tokens")] + public int? RejectedPredictionTokens { get; private set; } + + public static CompletionTokensDetails operator +(CompletionTokensDetails a, CompletionTokensDetails b) + => new( + (a?.ReasoningTokens ?? 0) + (b?.ReasoningTokens ?? 0), + (a?.AudioTokens ?? 0) + (b?.AudioTokens ?? 0), + (a?.TextTokens ?? 0) + (b?.TextTokens ?? 0), + (a?.AcceptedPredictionTokens ?? 0) + (b?.AcceptedPredictionTokens ?? 0), + (a?.RejectedPredictionTokens ?? 0) + (b?.RejectedPredictionTokens ?? 0)); + } +} diff --git a/OpenAI-DotNet/Common/Content.cs b/OpenAI-DotNet/Common/Content.cs index 59a2a6f9..055c9224 100644 --- a/OpenAI-DotNet/Common/Content.cs +++ b/OpenAI-DotNet/Common/Content.cs @@ -33,6 +33,12 @@ public Content(ImageFile imageFile) ImageFile = imageFile; } + public Content(InputAudio inputAudio) + { + Type = ContentType.InputAudio; + InputAudio = inputAudio; + } + public Content(ContentType type, string input) { Type = type; @@ -47,6 +53,8 @@ public Content(ContentType type, string input) break; case ContentType.ImageFile: throw new ArgumentException("Use the ImageFile constructor for ImageFile content."); + case ContentType.InputAudio: + throw new ArgumentException("Use the InputAudio constructor for InputAudio content."); default: throw new ArgumentOutOfRangeException(nameof(type)); } @@ -79,19 +87,26 @@ public Content(ContentType type, string input) [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] public ImageFile ImageFile { get; private set; } + [JsonInclude] + [JsonPropertyName("input_audio")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public InputAudio InputAudio { get; private set; } + public static implicit operator Content(string input) => new(ContentType.Text, input); public static implicit operator Content(ImageUrl imageUrl) => new(imageUrl); public static implicit operator Content(ImageFile imageFile) => new(imageFile); + public static implicit operator Content(InputAudio inputAudio) => new(inputAudio); + public override string ToString() => Type switch { ContentType.Text => Text?.ToString(), ContentType.ImageUrl => ImageUrl?.ToString(), ContentType.ImageFile => ImageFile?.ToString(), - _ => throw new ArgumentOutOfRangeException(nameof(Type)) + _ => string.Empty, } ?? string.Empty; public void AppendFrom(Content other) @@ -150,6 +165,18 @@ public void AppendFrom(Content other) ImageFile.AppendFrom(other.ImageFile); } } + + if (other.InputAudio != null) + { + if (InputAudio == null) + { + InputAudio = other.InputAudio; + } + else + { + InputAudio.AppendFrom(other.InputAudio); + } + } } } } diff --git a/OpenAI-DotNet/Common/ContentType.cs b/OpenAI-DotNet/Common/ContentType.cs index be2427f0..e1d9f00e 100644 --- a/OpenAI-DotNet/Common/ContentType.cs +++ b/OpenAI-DotNet/Common/ContentType.cs @@ -11,6 +11,8 @@ public enum ContentType [EnumMember(Value = "image_url")] ImageUrl, [EnumMember(Value = "image_file")] - ImageFile + ImageFile, + [EnumMember(Value = "input_audio")] + InputAudio } } diff --git a/OpenAI-DotNet/Common/Error.cs b/OpenAI-DotNet/Common/Error.cs index 641b649a..79bb6875 100644 --- a/OpenAI-DotNet/Common/Error.cs +++ b/OpenAI-DotNet/Common/Error.cs @@ -90,5 +90,8 @@ public override string ToString() return builder.ToString(); } + + public static implicit operator Exception(Error error) + => error.Exception ?? new Exception(error.ToString()); } } diff --git a/OpenAI-DotNet/Common/FileCitation.cs b/OpenAI-DotNet/Common/FileCitation.cs index 09e69954..5f228609 100644 --- a/OpenAI-DotNet/Common/FileCitation.cs +++ b/OpenAI-DotNet/Common/FileCitation.cs @@ -1,6 +1,5 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. -using System; using System.Text.Json.Serialization; namespace OpenAI @@ -13,14 +12,5 @@ public sealed class FileCitation [JsonInclude] [JsonPropertyName("file_id")] public string FileId { get; private set; } - - /// - /// The specific quote in the file. - /// - [Obsolete("Removed")] - [JsonInclude] - [JsonPropertyName("quote")] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] - public string Quote { get; private set; } } } diff --git a/OpenAI-DotNet/Common/Function.cs b/OpenAI-DotNet/Common/Function.cs index d89f846f..a227a018 100644 --- a/OpenAI-DotNet/Common/Function.cs +++ b/OpenAI-DotNet/Common/Function.cs @@ -46,7 +46,7 @@ public Function(string name, string description = null, JsonNode parameters = nu { if (!Regex.IsMatch(name, NameRegex)) { - throw new ArgumentException($"The name of the function does not conform to naming standards: {NameRegex}"); + throw new ArgumentException($"The name of the function does not conform to naming standards: {NameRegex} \"{name}\""); } Name = name; @@ -69,17 +69,16 @@ public Function(string name, string description = null, JsonNode parameters = nu /// An optional JSON describing the parameters of the function that the model can generate. /// /// - /// Whether to enable strict schema adherence when generating the function call.
- /// If set to true, the model will follow the exact schema defined in the parameters field.
- /// Only a subset of JSON Schema is supported when strict is true.
- /// Learn more about Structured Outputs in the function calling guide.
+ /// Whether to enable strict schema adherence when generating the function call. + /// If set to true, the model will follow the exact schema defined in the parameters field. + /// Only a subset of JSON Schema is supported when strict is true. Learn more about Structured Outputs in the function calling guide.
/// /// public Function(string name, string description, string parameters, bool? strict = null) { if (!Regex.IsMatch(name, NameRegex)) { - throw new ArgumentException($"The name of the function does not conform to naming standards: {NameRegex}"); + throw new ArgumentException($"The name of the function does not conform to naming standards: {NameRegex} \"{name}\""); } Name = name; @@ -99,7 +98,7 @@ private Function(string name, string description, MethodInfo method, object inst { if (!Regex.IsMatch(name, NameRegex)) { - throw new ArgumentException($"The name of the function does not conform to naming standards: {NameRegex}"); + throw new ArgumentException($"The name of the function does not conform to naming standards: {NameRegex} \"{name}\""); } if (functionCache.ContainsKey(name)) @@ -172,6 +171,10 @@ public static Function FromFunc /// The optional description of the function. ///
@@ -240,7 +243,7 @@ public JsonNode Arguments /// [JsonInclude] [JsonPropertyName("strict")] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + [JsonIgnore(Condition = JsonIgnoreCondition.Always)] public bool? Strict { get; private set; } /// @@ -296,7 +299,8 @@ public string Invoke() { var (function, invokeArgs) = ValidateFunctionArguments(); - if (function.MethodInfo.ReturnType == typeof(Task)) + if (function.MethodInfo.ReturnType == typeof(Task) || + function.MethodInfo.ReturnType == typeof(Task<>)) { throw new InvalidOperationException("Cannot invoke an async function synchronously. Use InvokeAsync() instead."); } @@ -315,6 +319,10 @@ public string Invoke() Console.WriteLine(e); return JsonSerializer.Serialize(new { error = e.Message }, OpenAIClient.JsonSerializationOptions); } + finally + { + Arguments = null; + } } /// @@ -328,7 +336,8 @@ public T Invoke() { var (function, invokeArgs) = ValidateFunctionArguments(); - if (function.MethodInfo.ReturnType == typeof(Task)) + if (function.MethodInfo.ReturnType == typeof(Task) || + function.MethodInfo.ReturnType == typeof(Task<>)) { throw new InvalidOperationException("Cannot invoke an async function synchronously. Use InvokeAsync() instead."); } @@ -340,6 +349,10 @@ public T Invoke() Console.WriteLine(e); throw; } + finally + { + Arguments = null; + } } /// @@ -366,6 +379,10 @@ public async Task InvokeAsync(CancellationToken cancellationToken = defa Console.WriteLine(e); return JsonSerializer.Serialize(new { error = e.Message }, OpenAIClient.JsonSerializationOptions); } + finally + { + Arguments = null; + } } /// @@ -392,6 +409,10 @@ public async Task InvokeAsync(CancellationToken cancellationToken = defaul Console.WriteLine(e); throw; } + finally + { + Arguments = null; + } } private static T InvokeInternal(Function function, object[] invokeArgs) @@ -402,11 +423,11 @@ private static T InvokeInternal(Function function, object[] invokeArgs) private static async Task InvokeInternalAsync(Function function, object[] invokeArgs) { - var result = InvokeInternal(function, invokeArgs); + var result = function.MethodInfo.Invoke(function.Instance, invokeArgs); if (result is not Task task) { - return result; + return result == null ? default : (T)result; } await task; @@ -434,7 +455,7 @@ private static async Task InvokeInternalAsync(Function function, object[] throw new InvalidOperationException($"Failed to find a valid method to invoke for {Name}"); } - var requestedArgs = arguments != null + var requestedArgs = Arguments != null ? JsonSerializer.Deserialize>(Arguments.ToString(), OpenAIClient.JsonSerializationOptions) : new(); var methodParams = function.MethodInfo.GetParameters(); diff --git a/OpenAI-DotNet/Common/InputAudio.cs b/OpenAI-DotNet/Common/InputAudio.cs new file mode 100644 index 00000000..ba1cc6db --- /dev/null +++ b/OpenAI-DotNet/Common/InputAudio.cs @@ -0,0 +1,59 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json.Serialization; + +namespace OpenAI +{ + public sealed class InputAudio + { + public InputAudio() { } + + public InputAudio(ReadOnlyMemory memory, InputAudioFormat format) + : this(memory.Span, format) + { + } + + public InputAudio(ReadOnlySpan span, InputAudioFormat format) + : this($"data:audio/{format};base64,{Convert.ToBase64String(span)}", format) + { + } + + public InputAudio(byte[] data, InputAudioFormat format) + : this($"data:audio/{format};base64,{Convert.ToBase64String(data)}", format) + { + } + + public InputAudio(string data, InputAudioFormat format) + { + Data = data; + Format = format; + } + + [JsonInclude] + [JsonPropertyName("data")] + public string Data { get; private set; } + + [JsonInclude] + [JsonPropertyName("format")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public InputAudioFormat Format { get; private set; } + + public override string ToString() => Data; + + public void AppendFrom(InputAudio other) + { + if (other == null) { return; } + + if (other.Format > 0) + { + Format = other.Format; + } + + if (!string.IsNullOrWhiteSpace(other.Data)) + { + Data += other.Data; + } + } + } +} diff --git a/OpenAI-DotNet/Common/InputAudioFormat.cs b/OpenAI-DotNet/Common/InputAudioFormat.cs new file mode 100644 index 00000000..a822a8a6 --- /dev/null +++ b/OpenAI-DotNet/Common/InputAudioFormat.cs @@ -0,0 +1,14 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace OpenAI +{ + public enum InputAudioFormat + { + [EnumMember(Value = "wav")] + Wav = 1, + [EnumMember(Value = "mp3")] + Mp3 + } +} diff --git a/OpenAI-DotNet/Common/JsonSchema.cs b/OpenAI-DotNet/Common/JsonSchema.cs index dfa136aa..5fe330a1 100644 --- a/OpenAI-DotNet/Common/JsonSchema.cs +++ b/OpenAI-DotNet/Common/JsonSchema.cs @@ -68,7 +68,7 @@ public JsonSchema(string name, JsonNode schema, string description = null, bool /// [JsonInclude] [JsonPropertyName("strict")] - public bool Strict { get; private set; } = true; + public bool Strict { get; private set; } /// /// The schema for the response format, described as a JSON Schema object. diff --git a/OpenAI-DotNet/Common/Modality.cs b/OpenAI-DotNet/Common/Modality.cs new file mode 100644 index 00000000..879b6b40 --- /dev/null +++ b/OpenAI-DotNet/Common/Modality.cs @@ -0,0 +1,17 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Runtime.Serialization; + +namespace OpenAI +{ + [Flags] + public enum Modality + { + None = 0, + [EnumMember(Value = "text")] + Text = 1 << 0, + [EnumMember(Value = "audio")] + Audio = 1 << 1 + } +} diff --git a/OpenAI-DotNet/Common/OpenAIBaseEndpoint.cs b/OpenAI-DotNet/Common/OpenAIBaseEndpoint.cs index 98421da2..d2c6e56b 100644 --- a/OpenAI-DotNet/Common/OpenAIBaseEndpoint.cs +++ b/OpenAI-DotNet/Common/OpenAIBaseEndpoint.cs @@ -30,6 +30,11 @@ public abstract class OpenAIBaseEndpoint /// protected virtual bool? IsAzureDeployment => null; + /// + /// Indicates if the endpoint is for a WebSocket. + /// + protected virtual bool? IsWebSocketEndpoint => null; + /// /// Gets the full formatted url for the API endpoint. /// @@ -48,7 +53,10 @@ protected string GetUrl(string endpoint = "", Dictionary queryPa route = $"{Root}{endpoint}"; } - var result = string.Format(client.OpenAIClientSettings.BaseRequestUrlFormat, route); + var baseUrlFormat = IsWebSocketEndpoint == true + ? client.OpenAIClientSettings.BaseWebSocketUrlFormat + : client.OpenAIClientSettings.BaseRequestUrlFormat; + var result = string.Format(baseUrlFormat, route); foreach (var defaultQueryParameter in client.OpenAIClientSettings.DefaultQueryParameters) { diff --git a/OpenAI-DotNet/Common/PromptTokensDetails.cs b/OpenAI-DotNet/Common/PromptTokensDetails.cs new file mode 100644 index 00000000..d376fd65 --- /dev/null +++ b/OpenAI-DotNet/Common/PromptTokensDetails.cs @@ -0,0 +1,46 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI +{ + public sealed class PromptTokensDetails + { + public PromptTokensDetails() { } + + private PromptTokensDetails( + int? cachedTokens = null, + int? audioTokens = null, + int? textTokens = null, + int? imageTokens = null) + { + CachedTokens = cachedTokens; + AudioTokens = audioTokens; + TextTokens = textTokens; + ImageTokens = imageTokens; + } + + [JsonInclude] + [JsonPropertyName("cached_tokens")] + public int? CachedTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("audio_tokens")] + public int? AudioTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("text_tokens")] + public int? TextTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("image_tokens")] + public int? ImageTokens { get; private set; } + + public static PromptTokensDetails operator +(PromptTokensDetails a, PromptTokensDetails b) + => new( + (a?.CachedTokens ?? 0) + (b?.CachedTokens ?? 0), + (a?.AudioTokens ?? 0) + (b?.AudioTokens ?? 0), + (a?.TextTokens ?? 0) + (b?.TextTokens ?? 0), + (a?.ImageTokens ?? 0) + (b?.ImageTokens ?? 0)); + } +} diff --git a/OpenAI-DotNet/Common/ResponseFormatObject.cs b/OpenAI-DotNet/Common/ResponseFormatObject.cs index cb3581fc..30a9f869 100644 --- a/OpenAI-DotNet/Common/ResponseFormatObject.cs +++ b/OpenAI-DotNet/Common/ResponseFormatObject.cs @@ -25,6 +25,7 @@ public ResponseFormatObject(JsonSchema schema) [JsonInclude] [JsonPropertyName("type")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] public ChatResponseFormat Type { get; private set; } diff --git a/OpenAI-DotNet/Common/Tool.cs b/OpenAI-DotNet/Common/Tool.cs index 5dba0dc3..d8ede744 100644 --- a/OpenAI-DotNet/Common/Tool.cs +++ b/OpenAI-DotNet/Common/Tool.cs @@ -24,6 +24,7 @@ public Tool(Function function) Type = nameof(function); } + [Obsolete("use new OpenAI.Tools.ToolCall class")] public Tool(string toolCallId, string functionName, JsonNode functionArguments, bool? strict = null) { Function = new Function(functionName, arguments: functionArguments, strict); @@ -41,9 +42,6 @@ public Tool(FileSearchOptions fileSearchOptions) public static implicit operator Tool(FileSearchOptions fileSearchOptions) => new(fileSearchOptions); - [Obsolete("Use FileSearch")] - public static Tool Retrieval { get; } = new() { Type = "file_search" }; - public static Tool FileSearch { get; } = new() { Type = "file_search" }; public static Tool CodeInterpreter { get; } = new() { Type = "code_interpreter" }; @@ -111,18 +109,37 @@ public void AppendFrom(Tool other) } } - /// - /// Invokes the function and returns the result as json. - /// - /// The result of the function as json. - /// Only call this directly if you know the method is synchronous. - public string InvokeFunction() + #region Tool Calling + + private void ValidateToolCall(ToolCall toolCall) { if (!IsFunction) { throw new InvalidOperationException("This tool is not a function."); } + if (Function.Name != toolCall.Function.Name) + { + throw new InvalidOperationException("Tool does not match tool call!"); + } + } + + [Obsolete("Use overload with ToolCall parameter")] + public string InvokeFunction() + => IsFunction + ? Function.Invoke() + : throw new InvalidOperationException("This tool is not a function."); + + /// + /// Invokes the function and returns the result as json. + /// + /// The with the function arguments to invoke. + /// The result of the function as json. + /// Raised if function call is invalid or tool is not a function. + public string InvokeFunction(ToolCall toolCall) + { + ValidateToolCall(toolCall); + Function.Arguments = toolCall.Function.Arguments; return Function.Invoke(); } @@ -131,14 +148,23 @@ public string InvokeFunction() /// /// The type to deserialize the result to. /// The result of the function. - /// Only call this directly if you know the method is synchronous. + [Obsolete("Use overload with ToolCall parameter")] public T InvokeFunction() - { - if (!IsFunction) - { - throw new InvalidOperationException("This tool is not a function."); - } + => IsFunction + ? Function.Invoke() + : throw new InvalidOperationException("This tool is not a function."); + /// + /// Invokes the function and returns the result. + /// + /// The type to deserialize the result to. + /// The with the function arguments to invoke. + /// The result of the function. + /// Raised if function call is invalid or tool is not a function. + public T InvokeFunction(ToolCall toolCall) + { + ValidateToolCall(toolCall); + Function.Arguments = toolCall.Function.Arguments; return Function.Invoke(); } @@ -147,14 +173,24 @@ public T InvokeFunction() /// /// Optional, A token to cancel the request. /// The result of the function as json. + [Obsolete("Use overload with ToolCall parameter")] public async Task InvokeFunctionAsync(CancellationToken cancellationToken = default) - { - if (!IsFunction) - { - throw new InvalidOperationException("This tool is not a function."); - } + => IsFunction + ? await Function.InvokeAsync(cancellationToken) + : throw new InvalidOperationException("This tool is not a function."); - return await Function.InvokeAsync(cancellationToken).ConfigureAwait(false); + /// + /// Invokes the function and returns the result as json. + /// + /// The with the function arguments to invoke. + /// Optional, A token to cancel the request. + /// The result of the function as json. + /// Raised if function call is invalid or tool is not a function. + public async Task InvokeFunctionAsync(ToolCall toolCall, CancellationToken cancellationToken = default) + { + ValidateToolCall(toolCall); + Function.Arguments = toolCall.Function.Arguments; + return await Function.InvokeAsync(cancellationToken); } /// @@ -163,16 +199,29 @@ public async Task InvokeFunctionAsync(CancellationToken cancellationToke /// The type to deserialize the result to. /// Optional, A token to cancel the request. /// The result of the function. + [Obsolete("Use overload with ToolCall parameter")] public async Task InvokeFunctionAsync(CancellationToken cancellationToken = default) - { - if (!IsFunction) - { - throw new InvalidOperationException("This tool is not a function."); - } + => IsFunction + ? await Function.InvokeAsync(cancellationToken) + : throw new InvalidOperationException("This tool is not a function."); - return await Function.InvokeAsync(cancellationToken).ConfigureAwait(false); + /// + /// Invokes the function and returns the result. + /// + /// The type to deserialize the result to. + /// The with the function arguments to invoke. + /// Optional, A token to cancel the request. + /// The result of the function. + /// Raised if function call is invalid or tool is not a function. + public async Task InvokeFunctionAsync(ToolCall toolCall, CancellationToken cancellationToken = default) + { + ValidateToolCall(toolCall); + Function.Arguments = toolCall.Function.Arguments; + return await Function.InvokeAsync(cancellationToken); } + #endregion Tool Calling + #region Tool Cache private static readonly List toolCache = @@ -210,7 +259,7 @@ where method.IsStatic where functionAttribute != null let name = GetFunctionName(type, method) let description = functionAttribute.Description - select Function.GetOrCreateFunction(name, description, method, strict: true) + select Function.GetOrCreateFunction(name, description, method, strict: false) into function select new Tool(function)); @@ -343,7 +392,7 @@ private static Tool GetOrCreateToolInternal(Type type, MethodInfo method, string return tool; } - tool = new Tool(Function.GetOrCreateFunction(functionName, description, method, instance, strict: true)); + tool = new Tool(Function.GetOrCreateFunction(functionName, description, method, instance)); toolCache.Add(tool); return tool; } @@ -362,11 +411,20 @@ private static bool TryGetTool(string name, object instance, out Tool tool) return false; } + internal static bool TryGetTool(ToolCall toolCall, out Tool tool) + { + tool = toolCache + .Where(knownTool => knownTool.Type == toolCall.Type) + .FirstOrDefault(knownTool => knownTool.Function.Name == toolCall.Function.Name); + return tool != null; + } + private static string GetFunctionName(Type type, MethodInfo methodInfo) { - // todo possibly use string hash instead to mitigate long names? - // todo possibly use AssemblyQualifiedName? - return $"{type.FullName}.{methodInfo.Name}".Replace('.', '_'); + var baseName = methodInfo.Name.Replace('.', '_'); + var hashedFullyQualifiedName = $"{type.AssemblyQualifiedName}".GenerateGuid().ToString("N"); + var nameLength = baseName.Length <= 32 ? baseName.Length : 32; + return $"{baseName[..nameLength]}_{hashedFullyQualifiedName}"; } #endregion Tool Cache diff --git a/OpenAI-DotNet/Common/ToolCall.cs b/OpenAI-DotNet/Common/ToolCall.cs new file mode 100644 index 00000000..29503535 --- /dev/null +++ b/OpenAI-DotNet/Common/ToolCall.cs @@ -0,0 +1,141 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Extensions; +using System; +using System.Collections.Generic; +using System.Text.Json.Nodes; +using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenAI +{ + public sealed class ToolCall : IAppendable + { + public ToolCall() { } + + public ToolCall(string toolCallId, string functionName, JsonNode functionArguments = null) + { + Id = toolCallId; + Function = new Function(functionName, arguments: functionArguments); + Type = "function"; + } + + [JsonInclude] + [JsonPropertyName("id")] + public string Id { get; private set; } + + [JsonInclude] + [JsonPropertyName("index")] + public int? Index { get; private set; } + + [JsonInclude] + [JsonPropertyName("type")] + public string Type { get; private set; } + + [JsonInclude] + [JsonPropertyName("function")] + public Function Function { get; private set; } + + [JsonIgnore] + public bool IsFunction => Type == "function"; + + public void AppendFrom(ToolCall other) + { + if (other == null) { return; } + + if (!string.IsNullOrWhiteSpace(other.Id)) + { + Id = other.Id; + } + + if (other.Index.HasValue) + { + Index = other.Index.Value; + } + + if (!string.IsNullOrWhiteSpace(other.Type)) + { + Type = other.Type; + } + + if (other.Function != null) + { + if (Function == null) + { + Function = new Function(other.Function); + } + else + { + Function.AppendFrom(other.Function); + } + } + } + + /// + /// Invokes the function and returns the result as json. + /// + /// The result of the function as json. + /// If tool is not a function or tool is not registered. + public string InvokeFunction() + => TryGetToolCache(this, out var tool) + ? tool.InvokeFunction(this) + : throw new InvalidOperationException($"Tool \"{Function.Name}\" is not registered!"); + + /// + /// Invokes the function and returns the result. + /// + /// The type to deserialize the result to. + /// The result of the function. + /// If tool is not a function or tool is not registered. + public T InvokeFunction() + => TryGetToolCache(this, out var tool) + ? tool.InvokeFunction(this) + : throw new InvalidOperationException($"Tool \"{Function.Name}\" is not registered!"); + + /// + /// Invokes the function and returns the result as json. + /// + /// Optional, A token to cancel the request. + /// The result of the function as json. + /// If tool is not a function or tool is not registered. + public async Task InvokeFunctionAsync(CancellationToken cancellationToken = default) + => TryGetToolCache(this, out var tool) + ? await tool.InvokeFunctionAsync(this, cancellationToken) + : throw new InvalidOperationException($"Tool \"{Function.Name}\" is not registered!"); + + /// + /// Invokes the function and returns the result. + /// + /// The type to deserialize the result to. + /// Optional, A token to cancel the request. + /// The result of the function. + /// If tool is not a function or tool is not registered. + public async Task InvokeFunctionAsync(CancellationToken cancellationToken = default) + { + return TryGetToolCache(this, out var tool) + ? await tool.InvokeFunctionAsync(this, cancellationToken) + : throw new InvalidOperationException($"Tool \"{Function.Name}\" is not registered!"); + } + + private static bool TryGetToolCache(ToolCall toolCall, out Tool tool) + { + tool = null; + + if (toolCache.TryGetValue(toolCall.Function.Name, out tool)) + { + return true; + } + + if (Tool.TryGetTool(toolCall, out tool)) + { + toolCache[toolCall.Function.Name] = tool; + return true; + } + + return false; + } + + private static readonly Dictionary toolCache = new(); + } +} diff --git a/OpenAI-DotNet/Common/Usage.cs b/OpenAI-DotNet/Common/Usage.cs index 6b8f3c59..ede54b8e 100644 --- a/OpenAI-DotNet/Common/Usage.cs +++ b/OpenAI-DotNet/Common/Usage.cs @@ -9,11 +9,18 @@ public sealed class Usage { public Usage() { } - private Usage(int? promptTokens, int? completionTokens, int? totalTokens) + private Usage( + int? completionTokens, + int? promptTokens, + int? totalTokens, + CompletionTokensDetails completionTokensDetails, + PromptTokensDetails promptTokensDetails) { PromptTokens = promptTokens; CompletionTokens = completionTokens; TotalTokens = totalTokens; + CompletionTokensDetails = completionTokensDetails; + PromptTokensDetails = promptTokensDetails; } [JsonInclude] @@ -31,6 +38,16 @@ private Usage(int? promptTokens, int? completionTokens, int? totalTokens) [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] public int? TotalTokens { get; private set; } + [JsonInclude] + [JsonPropertyName("completion_tokens_details")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public CompletionTokensDetails CompletionTokensDetails { get; private set; } + + [JsonInclude] + [JsonPropertyName("prompt_tokens_details")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public PromptTokensDetails PromptTokensDetails { get; private set; } + internal void AppendFrom(Usage other) { if (other?.PromptTokens != null) @@ -47,14 +64,27 @@ internal void AppendFrom(Usage other) { TotalTokens = other.TotalTokens.Value; } + + if (other?.CompletionTokensDetails != null) + { + CompletionTokensDetails = other.CompletionTokensDetails; + } + + if (other?.PromptTokensDetails != null) + { + PromptTokensDetails = other.PromptTokensDetails; + } } - public override string ToString() => JsonSerializer.Serialize(this, OpenAIClient.JsonSerializationOptions); + public override string ToString() + => JsonSerializer.Serialize(this, OpenAIClient.JsonSerializationOptions); public static Usage operator +(Usage a, Usage b) => new( (a.PromptTokens ?? 0) + (b.PromptTokens ?? 0), (a.CompletionTokens ?? 0) + (b.CompletionTokens ?? 0), - (a.TotalTokens ?? 0) + (b.TotalTokens ?? 0)); + (a.TotalTokens ?? 0) + (b.TotalTokens ?? 0), + a.CompletionTokensDetails + b.CompletionTokensDetails, + a.PromptTokensDetails + b.PromptTokensDetails); } } diff --git a/OpenAI-DotNet/Common/Voice.cs b/OpenAI-DotNet/Common/Voice.cs new file mode 100644 index 00000000..3598d36c --- /dev/null +++ b/OpenAI-DotNet/Common/Voice.cs @@ -0,0 +1,46 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + + +namespace OpenAI +{ + public class Voice + { + public Voice(string id) { Id = id; } + + public string Id { get; } + + public override string ToString() => Id; + + public static implicit operator string(Voice voice) => voice?.ToString(); + + public static implicit operator Voice(string id) => new(id); + + public static readonly Voice Alloy = new("alloy"); + public static readonly Voice Ash = new("ash"); + public static readonly Voice Ballad = new("ballad"); + public static readonly Voice Coral = new("coral"); + public static readonly Voice Echo = new("echo"); + public static readonly Voice Fable = new("fable"); + public static readonly Voice Onyx = new("onyx"); + public static readonly Voice Nova = new("nova"); + public static readonly Voice Sage = new("sage"); + public static readonly Voice Shimmer = new("shimmer"); + public static readonly Voice Verse = new("verse"); + +#pragma warning disable CS0618 // Type or member is obsolete + public static implicit operator Voice(Audio.SpeechVoice voice) + { + return voice switch + { + Audio.SpeechVoice.Alloy => Alloy, + Audio.SpeechVoice.Echo => Echo, + Audio.SpeechVoice.Fable => Fable, + Audio.SpeechVoice.Onyx => Onyx, + Audio.SpeechVoice.Nova => Nova, + Audio.SpeechVoice.Shimmer => Shimmer, + _ => null + }; + } +#pragma warning restore CS0618 // Type or member is obsolete + } +} diff --git a/OpenAI-DotNet/Extensions/AudioOutputConverter.cs b/OpenAI-DotNet/Extensions/AudioOutputConverter.cs new file mode 100644 index 00000000..7eb42992 --- /dev/null +++ b/OpenAI-DotNet/Extensions/AudioOutputConverter.cs @@ -0,0 +1,65 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Chat; +using System; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace OpenAI +{ + internal class AudioOutputConverter : JsonConverter + { + public override AudioOutput Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + string id = null; + var expiresAt = 0; + string b64Data = null; + string transcript = null; + ReadOnlyMemory data = null; + + while (reader.Read()) + { + if (reader.TokenType == JsonTokenType.EndObject) + { + break; + } + + if (reader.TokenType == JsonTokenType.PropertyName) + { + var propertyName = reader.GetString(); + reader.Read(); + + switch (propertyName) + { + case "id": + id = reader.GetString(); + break; + case "expires_at": + expiresAt = reader.GetInt32(); + break; + case "data": + b64Data = reader.GetString(); + break; + case "transcript": + transcript = reader.GetString(); + break; + default: + throw new JsonException(propertyName); + } + } + } + + if (!string.IsNullOrWhiteSpace(b64Data)) + { + data = Convert.FromBase64String(b64Data); + } + + return new AudioOutput(id, expiresAt, data, transcript); + } + + public override void Write(Utf8JsonWriter writer, AudioOutput value, JsonSerializerOptions options) + { + JsonSerializer.Serialize(writer, new { id = value.Id }); + } + } +} diff --git a/OpenAI-DotNet/Extensions/ModalityConverter.cs b/OpenAI-DotNet/Extensions/ModalityConverter.cs new file mode 100644 index 00000000..30c9d324 --- /dev/null +++ b/OpenAI-DotNet/Extensions/ModalityConverter.cs @@ -0,0 +1,35 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace OpenAI +{ + internal class ModalityConverter : JsonConverter + { + public override void Write(Utf8JsonWriter writer, Modality value, JsonSerializerOptions options) + { + writer.WriteStartArray(); + if (value.HasFlag(Modality.Text)) { writer.WriteStringValue("text"); } + if (value.HasFlag(Modality.Audio)) { writer.WriteStringValue("audio"); } + writer.WriteEndArray(); + } + + public override Modality Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + var modalityArray = JsonDocument.ParseValue(ref reader).RootElement.EnumerateArray(); + var modality = Modality.None; + foreach (var modalityString in modalityArray) + { + modality |= modalityString.GetString() switch + { + "text" => Modality.Text, + "audio" => Modality.Audio, + _ => throw new NotImplementedException($"Unknown modality: {modalityString}") + }; + } + return modality; + } + } +} diff --git a/OpenAI-DotNet/Extensions/RealtimeClientEventConverter.cs b/OpenAI-DotNet/Extensions/RealtimeClientEventConverter.cs new file mode 100644 index 00000000..79be91d9 --- /dev/null +++ b/OpenAI-DotNet/Extensions/RealtimeClientEventConverter.cs @@ -0,0 +1,35 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Realtime; +using System; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace OpenAI +{ + internal class RealtimeClientEventConverter : JsonConverter + { + public override void Write(Utf8JsonWriter writer, IRealtimeEvent value, JsonSerializerOptions options) + => throw new NotImplementedException(); + + public override IRealtimeEvent Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + var root = JsonDocument.ParseValue(ref reader).RootElement; + var type = root.GetProperty("type").GetString(); + + return type switch + { + "session.update" => root.Deserialize(options), + "input_audio_buffer.append" => root.Deserialize(options), + "input_audio_buffer.commit" => root.Deserialize(options), + "input_audio_buffer.clear" => root.Deserialize(options), + "conversation.item.create" => root.Deserialize(options), + "conversation.item.truncate" => root.Deserialize(options), + "conversation.item.delete" => root.Deserialize(options), + "response.create" => root.Deserialize(options), + "response.cancel" => root.Deserialize(options), + _ => throw new NotImplementedException($"Unknown {nameof(IRealtimeEvent)}: {type}") + }; + } + } +} diff --git a/OpenAI-DotNet/Extensions/RealtimeServerEventConverter.cs b/OpenAI-DotNet/Extensions/RealtimeServerEventConverter.cs new file mode 100644 index 00000000..2455ff85 --- /dev/null +++ b/OpenAI-DotNet/Extensions/RealtimeServerEventConverter.cs @@ -0,0 +1,45 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Realtime; +using System; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace OpenAI +{ + internal class RealtimeServerEventConverter : JsonConverter + { + public override void Write(Utf8JsonWriter writer, IServerEvent value, JsonSerializerOptions options) + => throw new NotImplementedException(); + + public override IServerEvent Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + var root = JsonDocument.ParseValue(ref reader).RootElement; + var type = root.GetProperty("type").GetString()!; + + return type switch + { + "error" => root.Deserialize(options), + _ when type.StartsWith("session") => root.Deserialize(options), + "conversation.created" => root.Deserialize(options), + "conversation.item.created" => root.Deserialize(options), + _ when type.StartsWith("conversation.item.input_audio_transcription") => root.Deserialize(options), + "conversation.item.truncated" => root.Deserialize(options), + "conversation.item.deleted" => root.Deserialize(options), + "input_audio_buffer.committed" => root.Deserialize(options), + "input_audio_buffer.cleared" => root.Deserialize(options), + "input_audio_buffer.speech_started" => root.Deserialize(options), + "input_audio_buffer.speech_stopped" => root.Deserialize(options), + _ when type.StartsWith("response.audio_transcript") => root.Deserialize(options), + _ when type.StartsWith("response.audio") => root.Deserialize(), + _ when type.StartsWith("response.content_part") => root.Deserialize(options), + _ when type.StartsWith("response.function_call_arguments") => root.Deserialize(options), + _ when type.StartsWith("response.output_item") => root.Deserialize(options), + _ when type.StartsWith("response.text") => root.Deserialize(options), + _ when type.StartsWith("response") => root.Deserialize(options), + _ when type.StartsWith("rate_limits") => root.Deserialize(options), + _ => throw new NotImplementedException($"Unknown {nameof(IServerEvent)}: {type}") + }; + } + } +} diff --git a/OpenAI-DotNet/Extensions/ResponseFormatConverter.cs b/OpenAI-DotNet/Extensions/ResponseFormatConverter.cs index 68dccbad..7b0de4d1 100644 --- a/OpenAI-DotNet/Extensions/ResponseFormatConverter.cs +++ b/OpenAI-DotNet/Extensions/ResponseFormatConverter.cs @@ -27,8 +27,15 @@ public override ResponseFormatObject Read(ref Utf8JsonReader reader, Type typeTo public override void Write(Utf8JsonWriter writer, ResponseFormatObject value, JsonSerializerOptions options) { - // serialize the object normally - JsonSerializer.Serialize(writer, value, options); + switch (value.Type) + { + case ChatResponseFormat.Auto: + // ignore + break; + default: + JsonSerializer.Serialize(writer, value, options); + break; + } } } } diff --git a/OpenAI-DotNet/Extensions/StringExtensions.cs b/OpenAI-DotNet/Extensions/StringExtensions.cs index e071e4cf..f1173dce 100644 --- a/OpenAI-DotNet/Extensions/StringExtensions.cs +++ b/OpenAI-DotNet/Extensions/StringExtensions.cs @@ -1,7 +1,10 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using System; using System.Linq; using System.Net.Http; +using System.Security.Cryptography; +using System.Text; using System.Text.Encodings.Web; using System.Text.Json; @@ -9,6 +12,14 @@ namespace OpenAI.Extensions { internal static class StringExtensions { + /// + /// Generates a based on the string. + /// + /// The string to generate the . + /// A new that represents the string. + public static Guid GenerateGuid(this string @string) + => new(MD5.HashData(Encoding.UTF8.GetBytes(@string))); + /// /// Attempts to get the event data from the string data. /// Returns false once the stream is done. diff --git a/OpenAI-DotNet/Extensions/TaskExtensions.cs b/OpenAI-DotNet/Extensions/TaskExtensions.cs new file mode 100644 index 00000000..b3c48352 --- /dev/null +++ b/OpenAI-DotNet/Extensions/TaskExtensions.cs @@ -0,0 +1,59 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenAI.Extensions +{ + internal static class TaskExtensions + { + /// + /// Runs with . + /// + /// The to run. + /// . + /// + public static async Task WithCancellation(this Task task, CancellationToken cancellationToken) + { + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + + await using (cancellationToken.Register(state => ((TaskCompletionSource)state).TrySetResult(null), tcs)) + { + var resultTask = await Task.WhenAny(task, tcs.Task); + + if (resultTask == tcs.Task) + { + throw new OperationCanceledException(cancellationToken); + } + + await task; + } + } + + /// + /// Runs with . + /// + /// Task return type. + /// The to run. + /// . + /// + /// result. + public static async Task WithCancellation(this Task task, CancellationToken cancellationToken) + { + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + + await using (cancellationToken.Register(state => ((TaskCompletionSource)state).TrySetResult(null), tcs)) + { + var resultTask = await Task.WhenAny(task, tcs.Task); + + if (resultTask == tcs.Task) + { + throw new OperationCanceledException(cancellationToken); + } + + return await task; + } + } + } +} diff --git a/OpenAI-DotNet/Extensions/WebSocket.cs b/OpenAI-DotNet/Extensions/WebSocket.cs new file mode 100644 index 00000000..2def0a72 --- /dev/null +++ b/OpenAI-DotNet/Extensions/WebSocket.cs @@ -0,0 +1,396 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Net.WebSockets; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenAI.Extensions +{ + internal class WebSocket : IDisposable + { + public WebSocket(string url, IReadOnlyDictionary requestHeaders = null, IReadOnlyList subProtocols = null) + : this(new Uri(url), requestHeaders, subProtocols) + { + } + + public WebSocket(Uri uri, IReadOnlyDictionary requestHeaders = null, IReadOnlyList subProtocols = null) + { + var protocol = uri.Scheme; + + if (!protocol.Equals("ws") && !protocol.Equals("wss")) + { + throw new ArgumentException($"Unsupported protocol: {protocol}"); + } + + Address = uri; + RequestHeaders = requestHeaders ?? new Dictionary(); + SubProtocols = subProtocols ?? new List(); + CreateWebsocketAsync = (_, _) => Task.FromResult(new ClientWebSocket()); + RunMessageQueue(); + } + + private async void RunMessageQueue() + { + while (_semaphore != null) + { + while (_events.TryDequeue(out var action)) + { + try + { + action.Invoke(); + } + catch (Exception e) + { + Console.WriteLine(e); + OnError?.Invoke(e); + } + } + + await Task.Delay(16); + } + } + + ~WebSocket() => Dispose(false); + + #region IDisposable + + private void Dispose(bool disposing) + { + if (disposing) + { + lock (_lock) + { + if (State == State.Open) + { + CloseAsync().Wait(); + } + + _socket?.Dispose(); + _socket = null; + + _lifetimeCts?.Cancel(); + _lifetimeCts?.Dispose(); + _lifetimeCts = null; + + _semaphore?.Dispose(); + _semaphore = null; + } + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + #endregion IDisposable + + public event Action OnOpen; + + public event Action OnMessage; + + public event Action OnError; + + public event Action OnClose; + + public Uri Address { get; } + + public IReadOnlyDictionary RequestHeaders { get; } + + public IReadOnlyList SubProtocols { get; } + + public State State => _socket?.State switch + { + WebSocketState.Connecting => State.Connecting, + WebSocketState.Open => State.Open, + WebSocketState.CloseSent or WebSocketState.CloseReceived => State.Closing, + _ => State.Closed + }; + + private readonly object _lock = new(); + private System.Net.WebSockets.WebSocket _socket; + private SemaphoreSlim _semaphore = new(1, 1); + private CancellationTokenSource _lifetimeCts; + private readonly ConcurrentQueue _events = new(); + + public async void Connect() + => await ConnectAsync().ConfigureAwait(false); + + // used for unit testing websocket server + internal Func> CreateWebsocketAsync; + + public async Task ConnectAsync(CancellationToken cancellationToken = default) + { + try + { + if (State == State.Open) + { + Console.WriteLine("Websocket is already open!"); + return; + } + + // ReSharper disable once MethodHasAsyncOverload + _lifetimeCts?.Cancel(); + _lifetimeCts?.Dispose(); + _lifetimeCts = new CancellationTokenSource(); + using var cts = CancellationTokenSource.CreateLinkedTokenSource(_lifetimeCts.Token, cancellationToken); + + _socket = await CreateWebsocketAsync.Invoke(Address, cts.Token).ConfigureAwait(false); + + if (_socket is ClientWebSocket clientWebSocket) + { + foreach (var requestHeader in RequestHeaders) + { + clientWebSocket.Options.SetRequestHeader(requestHeader.Key, requestHeader.Value); + } + + foreach (var subProtocol in SubProtocols) + { + clientWebSocket.Options.AddSubProtocol(subProtocol); + } + + await clientWebSocket.ConnectAsync(Address, cts.Token).ConfigureAwait(false); + } + + _events.Enqueue(() => OnOpen?.Invoke()); + var buffer = new Memory(new byte[8192]); + + while (State == State.Open) + { + ValueWebSocketReceiveResult result; + using var stream = new MemoryStream(); + + do + { + result = await _socket.ReceiveAsync(buffer, cts.Token).ConfigureAwait(false); + stream.Write(buffer.Span[..result.Count]); + } while (!result.EndOfMessage); + + await stream.FlushAsync(cts.Token).ConfigureAwait(false); + var memory = new ReadOnlyMemory(stream.GetBuffer(), 0, (int)stream.Length); + + if (result.MessageType != WebSocketMessageType.Close) + { + _events.Enqueue(() => OnMessage?.Invoke(new DataFrame((OpCode)(int)result.MessageType, memory))); + } + else + { + await CloseAsync(cancellationToken: CancellationToken.None).ConfigureAwait(false); + break; + } + } + + try + { + await _semaphore.WaitAsync(CancellationToken.None).ConfigureAwait(false); + } + finally + { + _semaphore.Release(); + } + } + catch (Exception e) + { + switch (e) + { + case TaskCanceledException: + case OperationCanceledException: + break; + default: + Console.WriteLine(e); + _events.Enqueue(() => OnError?.Invoke(e)); + _events.Enqueue(() => OnClose?.Invoke(CloseStatusCode.AbnormalClosure, e.Message)); + break; + } + } + } + + public async Task SendAsync(string text, CancellationToken cancellationToken = default) + => await Internal_SendAsync(Encoding.UTF8.GetBytes(text), WebSocketMessageType.Text, cancellationToken).ConfigureAwait(false); + + public async Task SendAsync(ArraySegment data, CancellationToken cancellationToken = default) + => await Internal_SendAsync(data, WebSocketMessageType.Binary, cancellationToken).ConfigureAwait(false); + + private async Task Internal_SendAsync(ArraySegment data, WebSocketMessageType opCode, CancellationToken cancellationToken) + { + try + { + using var cts = CancellationTokenSource.CreateLinkedTokenSource(_lifetimeCts.Token, cancellationToken); + await _semaphore.WaitAsync(cts.Token).ConfigureAwait(false); + + if (State != State.Open) + { + throw new InvalidOperationException("WebSocket is not ready!"); + } + + await _socket.SendAsync(data, opCode, true, cts.Token).ConfigureAwait(false); + } + catch (Exception e) + { + switch (e) + { + case TaskCanceledException: + case OperationCanceledException: + break; + default: + Console.WriteLine(e); + _events.Enqueue(() => OnError?.Invoke(e)); + break; + } + } + finally + { + _semaphore.Release(); + } + } + + public async void Close() + => await CloseAsync(); + + public async Task CloseAsync(CloseStatusCode code = CloseStatusCode.Normal, string reason = "", CancellationToken cancellationToken = default) + { + try + { + if (State == State.Open) + { + await _socket.CloseAsync((WebSocketCloseStatus)(int)code, reason, cancellationToken).ConfigureAwait(false); + _events.Enqueue(() => OnClose?.Invoke(code, reason)); + } + } + catch (Exception e) + { + switch (e) + { + case ObjectDisposedException: + case TaskCanceledException: + case OperationCanceledException: + _events.Enqueue(() => OnClose?.Invoke(code, reason)); + break; + default: + Console.WriteLine(e); + _events.Enqueue(() => OnError?.Invoke(e)); + break; + } + } + } + } + + internal class DataFrame + { + public OpCode Type { get; } + + public ReadOnlyMemory Data { get; } + + public string Text { get; } + + public DataFrame(OpCode type, ReadOnlyMemory data) + { + Type = type; + Data = data; + Text = type == OpCode.Text + ? Encoding.UTF8.GetString(data.Span) + : string.Empty; + } + } + + internal enum CloseStatusCode : ushort + { + /// + /// Indicates a normal closure, meaning that the purpose for which the connection was established has been fulfilled. + /// + Normal = 1000, + /// + /// Indicates that an endpoint is "going away", such as a server going down or a browser having navigated away from a page. + /// + GoingAway = 1001, + /// + /// Indicates that an endpoint is terminating the connection due to a protocol error. + /// + ProtocolError = 1002, + /// + /// Indicates that an endpoint is terminating the connection because it has received a type of data it cannot accept + /// (e.g., an endpoint that understands only text data MAY send this if it receives a binary message). + /// + UnsupportedData = 1003, + /// + /// Reserved and MUST NOT be set as a status code in a Close control frame by an endpoint. + /// The specific meaning might be defined in the future. + /// + Reserved = 1004, + /// + /// Reserved and MUST NOT be set as a status code in a Close control frame by an endpoint. + /// It is designated for use in applications expecting a status code to indicate that no status code was actually present. + /// + NoStatus = 1005, + /// + /// Reserved and MUST NOT be set as a status code in a Close control frame by an endpoint. + /// It is designated for use in applications expecting a status code to indicate that the connection was closed abnormally, + /// e.g., without sending or receiving a Close control frame. + /// + AbnormalClosure = 1006, + /// + /// Indicates that an endpoint is terminating the connection because it has received data within a message + /// that was not consistent with the type of the message. + /// + InvalidPayloadData = 1007, + /// + /// Indicates that an endpoint is terminating the connection because it received a message that violates its policy. + /// This is a generic status code that can be returned when there is no other more suitable status code (e.g., 1003 or 1009) + /// or if there is a need to hide specific details about the policy. + /// + PolicyViolation = 1008, + /// + /// Indicates that an endpoint is terminating the connection because it has received a message that is too big for it to process. + /// + TooBigToProcess = 1009, + /// + /// Indicates that an endpoint (client) is terminating the connection because it has expected the server to negotiate + /// one or more extension, but the server didn't return them in the response message of the WebSocket handshake. + /// The list of extensions that are needed SHOULD appear in the /reason/ part of the Close frame. Note that this status code + /// is not used by the server, because it can fail the WebSocket handshake instead. + /// + MandatoryExtension = 1010, + /// + /// Indicates that a server is terminating the connection because it encountered an unexpected condition that prevented it from fulfilling the request. + /// + ServerError = 1011, + /// + /// Reserved and MUST NOT be set as a status code in a Close control frame by an endpoint. + /// It is designated for use in applications expecting a status code to indicate that the connection was closed due to a failure to perform a TLS handshake + /// (e.g., the server certificate can't be verified). + /// + TlsHandshakeFailure = 1015 + } + + internal enum OpCode + { + Text, + Binary + } + + internal enum State : ushort + { + /// + /// The connection has not yet been established. + /// + Connecting = 0, + /// + /// The connection has been established and communication is possible. + /// + Open = 1, + /// + /// The connection is going through the closing handshake or close has been requested. + /// + Closing = 2, + /// + /// The connection has been closed or could not be opened. + /// + Closed = 3 + } +} diff --git a/OpenAI-DotNet/Models/Model.cs b/OpenAI-DotNet/Models/Model.cs index 41fd3bcd..5467c925 100644 --- a/OpenAI-DotNet/Models/Model.cs +++ b/OpenAI-DotNet/Models/Model.cs @@ -73,6 +73,12 @@ public Model(string id, string ownedBy = null) [JsonPropertyName("parent")] public string Parent { get; private set; } + public static Model O1 { get; } = new("o1-preview", "openai"); + + public static Model O1Mini { get; } = new("o1-mini", "openai"); + + public static Model GPT4oRealtime { get; } = new("gpt-4o-realtime-preview", "openai"); + /// /// GPT-4o (“o” for “omni”) is our most advanced model. /// It is multimodal (accepting text or image inputs and outputting text), @@ -83,7 +89,11 @@ public Model(string id, string ownedBy = null) /// /// Context Window: 128,000 tokens /// - public static Model GPT4o { get; } = new Model("gpt-4o", "openai"); + public static Model GPT4o { get; } = new("gpt-4o", "openai"); + + public static Model GPT4oMini { get; } = new("gpt-4o-mini", "openai"); + + public static Model GPT4oAudio { get; } = new("gpt-4o-audio-preview", "openai"); /// /// More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. @@ -188,29 +198,5 @@ public Model(string id, string ownedBy = null) public static Model DallE_2 { get; } = new("dall-e-2", "openai"); public static Model DallE_3 { get; } = new("dall-e-3", "openai"); - - #region Obsolete - - /// - /// For edit requests. - /// - [Obsolete("Removed")] - public static Model DavinciEdit { get; } = new("text-davinci-edit-001", "openai"); - - /// - /// The 2nd most powerful engine, a bit faster than , and a bit faster. - /// Good at: Language translation, complex classification, text sentiment, summarization. - /// - [Obsolete("Removed")] - public static Model Curie { get; } = new("text-curie-001", "openai"); - - /// - /// The smallest, fastest engine available, although the quality of results may be poor. - /// Good at: Parsing text, simple classification, address correction, keywords - /// - [Obsolete("Removed")] - public static Model Ada { get; } = new("text-ada-001", "openai"); - - #endregion Obsolete } } diff --git a/OpenAI-DotNet/OpenAI-DotNet.csproj b/OpenAI-DotNet/OpenAI-DotNet.csproj index b872e1ef..4e582d68 100644 --- a/OpenAI-DotNet/OpenAI-DotNet.csproj +++ b/OpenAI-DotNet/OpenAI-DotNet.csproj @@ -29,8 +29,17 @@ More context [on Roger Pincombe's blog](https://rogerpincombe.com/openai-dotnet- OpenAI-DotNet.pfx true true - 8.3.0 + 8.4.0 +Version 8.4.0 +- Add realtime support +- Added o1, o1-mini, gpt-4o-mini, and gpt-4o-realtime, gpt-4o-audio model convenience properties +- Fixed some bugs with function invocations +- Fixed strict for built in FunctionAttribute defined tools +- Fixed FunctionAttribute tool generated names so they aren't too long +- Refactored Tools and ToolCalls. There is more of a distinction now in ChatResponses +- Refactored SpeechRequest, and deprecated SpeechVoice enum in favor of new Voice class +- Refactored OpenAI.Chat to support new audio modalities and output audio Version 8.3.0 - Updated library to .net 8 - Refactored TypeExtensions and JsonSchema generation diff --git a/OpenAI-DotNet/OpenAIClient.cs b/OpenAI-DotNet/OpenAIClient.cs index 467debae..a459624f 100644 --- a/OpenAI-DotNet/OpenAIClient.cs +++ b/OpenAI-DotNet/OpenAIClient.cs @@ -11,14 +11,18 @@ using OpenAI.Images; using OpenAI.Models; using OpenAI.Moderations; +using OpenAI.Realtime; using OpenAI.Threads; using OpenAI.VectorStores; using System; +using System.Collections.Generic; using System.Net.Http; using System.Net.Http.Headers; using System.Security.Authentication; using System.Text.Json; using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; namespace OpenAI { @@ -53,12 +57,12 @@ public OpenAIClient(OpenAIAuthentication openAIAuthentication = null, OpenAIClie OpenAIAuthentication = openAIAuthentication ?? OpenAIAuthentication.Default; OpenAIClientSettings = clientSettings ?? OpenAIClientSettings.Default; - if (OpenAIAuthentication?.ApiKey is null) + if (string.IsNullOrWhiteSpace(OpenAIAuthentication?.ApiKey)) { throw new AuthenticationException("You must provide API authentication. Please refer to https://github.com/RageAgainstThePixel/OpenAI-DotNet#authentication for details."); } - Client = SetupClient(client); + Client = SetupHttpClient(client); ModelsEndpoint = new ModelsEndpoint(this); ChatEndpoint = new ChatEndpoint(this); ImagesEndPoint = new ImagesEndpoint(this); @@ -71,6 +75,7 @@ public OpenAIClient(OpenAIAuthentication openAIAuthentication = null, OpenAIClie AssistantsEndpoint = new AssistantsEndpoint(this); BatchEndpoint = new BatchEndpoint(this); VectorStoresEndpoint = new VectorStoresEndpoint(this); + RealtimeEndpoint = new RealtimeEndpoint(this); } ~OpenAIClient() => Dispose(false); @@ -113,7 +118,12 @@ private void Dispose(bool disposing) internal static JsonSerializerOptions JsonSerializationOptions { get; } = new() { DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, - Converters = { new JsonStringEnumConverterFactory() }, + Converters = + { + new JsonStringEnumConverterFactory(), + new RealtimeClientEventConverter(), + new RealtimeServerEventConverter() + }, ReferenceHandler = ReferenceHandler.IgnoreCycles }; @@ -136,7 +146,7 @@ private void Dispose(bool disposing) /// /// List and describe the various models available in the API. - /// You can refer to the Models documentation to understand what are available and the differences between them.
+ /// You can refer to the Models documentation to understand which models are available for certain endpoints: .
/// ///
public ModelsEndpoint ModelsEndpoint { get; } @@ -173,8 +183,7 @@ private void Dispose(bool disposing) /// /// Manage fine-tuning jobs to tailor a model to your specific training data.
- ///
- /// + /// ///
public FineTuningEndpoint FineTuningEndpoint { get; } @@ -185,18 +194,18 @@ private void Dispose(bool disposing) ///
public ModerationsEndpoint ModerationsEndpoint { get; } - /// - /// Build assistants that can call models and use tools to perform tasks.
- /// - ///
- public AssistantsEndpoint AssistantsEndpoint { get; } - /// /// Create threads that assistants can interact with.
/// ///
public ThreadsEndpoint ThreadsEndpoint { get; } + /// + /// Build assistants that can call models and use tools to perform tasks.
+ /// + ///
+ public AssistantsEndpoint AssistantsEndpoint { get; } + /// /// Create large batches of API requests for asynchronous processing. /// The Batch API returns completions within 24 hours for a 50% discount. @@ -210,9 +219,11 @@ private void Dispose(bool disposing) /// public VectorStoresEndpoint VectorStoresEndpoint { get; } + public RealtimeEndpoint RealtimeEndpoint { get; } + #endregion Endpoints - private HttpClient SetupClient(HttpClient client = null) + private HttpClient SetupHttpClient(HttpClient client = null) { if (client == null) { @@ -258,5 +269,27 @@ private HttpClient SetupClient(HttpClient client = null) return client; } + + internal WebSocket CreateWebSocket(string url) + { + var websocket = new WebSocket(url, WebsocketHeaders); + + if (CreateWebsocketAsync != null) + { + websocket.CreateWebsocketAsync = CreateWebsocketAsync; + } + + return websocket; + } + + // used to create unit test proxy server + internal Func> CreateWebsocketAsync = null; + + internal IReadOnlyDictionary WebsocketHeaders => new Dictionary + { + { "User-Agent", "OpenAI-DotNet" }, + { "OpenAI-Beta", "realtime=v1" }, + { "Authorization", $"Bearer {OpenAIAuthentication.ApiKey}" } + }; } } diff --git a/OpenAI-DotNet/Realtime/BaseRealtimeEvent.cs b/OpenAI-DotNet/Realtime/BaseRealtimeEvent.cs new file mode 100644 index 00000000..cbd8a02e --- /dev/null +++ b/OpenAI-DotNet/Realtime/BaseRealtimeEvent.cs @@ -0,0 +1,22 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Extensions; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public abstract class BaseRealtimeEvent : IRealtimeEvent + { + /// + [JsonIgnore] + public abstract string EventId { get; internal set; } + + /// + [JsonIgnore] + public abstract string Type { get; protected set; } + + /// + public string ToJsonString() + => this.ToEscapedJsonString(); + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItem.cs b/OpenAI-DotNet/Realtime/ConversationItem.cs new file mode 100644 index 00000000..9044a37f --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItem.cs @@ -0,0 +1,145 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json.Nodes; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ConversationItem + { + public ConversationItem() { } + + public ConversationItem(Role role, IEnumerable content) + { + Role = role; + Type = ConversationItemType.Message; + Content = content?.ToList() ?? new List(); + + if (role is not (Role.Assistant or Role.User)) + { + throw new ArgumentException("Role must be either 'user' or 'assistant'."); + } + + if (role == Role.User && !Content.All(c => c.Type is RealtimeContentType.InputAudio or RealtimeContentType.InputText)) + { + throw new ArgumentException("User messages must contain only input text or input audio content."); + } + + if (role == Role.Assistant && !Content.All(c => c.Type is RealtimeContentType.Text or RealtimeContentType.Audio)) + { + throw new ArgumentException("Assistant messages must contain only text or audio content."); + } + } + + public ConversationItem(Role role, RealtimeContent content) + : this(role, new[] { content }) + { + } + + public ConversationItem(RealtimeContent content) + : this(Role.User, new[] { content }) + { + } + + public ConversationItem(ToolCall toolCall, string output) + { + Type = ConversationItemType.FunctionCallOutput; + FunctionCallId = toolCall.Id; + FunctionOutput = output; + } + + public ConversationItem(Tool tool) + { + Type = ConversationItemType.FunctionCall; + FunctionName = tool.Function.Name; + } + + public static implicit operator ConversationItem(string text) => new(text); + + /// + /// The unique ID of the item. + /// + [JsonInclude] + [JsonPropertyName("id")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Id { get; private set; } + + /// + /// The object type, must be "realtime.item". + /// + [JsonInclude] + [JsonPropertyName("object")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Object { get; private set; } + + /// + /// The type of the item ("message", "function_call", "function_call_output"). + /// + [JsonInclude] + [JsonPropertyName("type")] + [JsonIgnore(Condition = JsonIgnoreCondition.Never)] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + public ConversationItemType Type { get; private set; } + + /// + /// The status of the item ("completed", "in_progress", "incomplete"). + /// + [JsonInclude] + [JsonPropertyName("status")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + public RealtimeResponseStatus Status { get; private set; } + + /// + /// The role associated with the item ("user", "assistant", "system"). + /// + [JsonInclude] + [JsonPropertyName("role")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + public Role Role { get; private set; } + + /// + /// The content of the item. + /// + [JsonInclude] + [JsonPropertyName("content")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public IReadOnlyList Content { get; private set; } + + /// + /// The ID of the function call (for "function_call" items). + /// + [JsonInclude] + [JsonPropertyName("call_id")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string FunctionCallId { get; private set; } + + /// + /// The name of the function being called. + /// + [JsonInclude] + [JsonPropertyName("name")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string FunctionName { get; private set; } + + /// + /// The arguments of the function call. + /// + [JsonInclude] + [JsonPropertyName("arguments")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public JsonNode FunctionArguments { get; private set; } + + /// + /// The output of the function call (for "function_call_output" items). + /// + [JsonInclude] + [JsonPropertyName("output")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string FunctionOutput { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemCreateRequest.cs b/OpenAI-DotNet/Realtime/ConversationItemCreateRequest.cs new file mode 100644 index 00000000..b625b0d1 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemCreateRequest.cs @@ -0,0 +1,50 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses. + /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream, + /// but has the current limitation that it cannot populate assistant audio messages. + /// If successful, the server will respond with a conversation.item.created event, otherwise an error event will be sent. + /// + public sealed class ConversationItemCreateRequest : BaseRealtimeEvent, IClientEvent + { + public ConversationItemCreateRequest() { } + + public ConversationItemCreateRequest(ConversationItem item, string previousItemId = null) + { + PreviousItemId = previousItemId; + Item = item; + } + + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "conversation.item.create"; + + /// + /// The ID of the preceding item after which the new item will be inserted. + /// If not set, the new item will be appended to the end of the conversation. + /// If set, it allows an item to be inserted mid-conversation. + /// If the ID cannot be found, an error will be returned and the item will not be added. + /// + [JsonInclude] + [JsonPropertyName("previous_item_id")] + public string PreviousItemId { get; private set; } + + /// + /// The item to add to the conversation. + /// + [JsonInclude] + [JsonPropertyName("item")] + public ConversationItem Item { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemCreatedResponse.cs b/OpenAI-DotNet/Realtime/ConversationItemCreatedResponse.cs new file mode 100644 index 00000000..75010e02 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemCreatedResponse.cs @@ -0,0 +1,33 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ConversationItemCreatedResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the preceding item. + /// + [JsonInclude] + [JsonPropertyName("previous_item_id")] + public string PreviousItemId { get; private set; } + + /// + /// The item that was created. + /// + [JsonInclude] + [JsonPropertyName("item")] + public ConversationItem Item { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemDeleteRequest.cs b/OpenAI-DotNet/Realtime/ConversationItemDeleteRequest.cs new file mode 100644 index 00000000..216b9887 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemDeleteRequest.cs @@ -0,0 +1,39 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Send this event when you want to remove any item from the conversation history. + /// The server will respond with a conversation.item.deleted event, + /// unless the item does not exist in the conversation history, + /// in which case the server will respond with an error. + /// + public sealed class ConversationItemDeleteRequest : BaseRealtimeEvent, IClientEvent + { + public ConversationItemDeleteRequest() { } + + public ConversationItemDeleteRequest(string itemId) + { + ItemId = itemId; + } + + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "conversation.item.delete"; + + /// + /// The ID of the item to delete. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemDeletedResponse.cs b/OpenAI-DotNet/Realtime/ConversationItemDeletedResponse.cs new file mode 100644 index 00000000..03b76edc --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemDeletedResponse.cs @@ -0,0 +1,26 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ConversationItemDeletedResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the item that was deleted. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemInputAudioTranscriptionResponse.cs b/OpenAI-DotNet/Realtime/ConversationItemInputAudioTranscriptionResponse.cs new file mode 100644 index 00000000..c58b27e6 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemInputAudioTranscriptionResponse.cs @@ -0,0 +1,53 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ConversationItemInputAudioTranscriptionResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the user message item. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the content part containing the audio. + /// + [JsonInclude] + [JsonPropertyName("content_index")] + public int? ContentIndex { get; private set; } + + /// + /// The transcribed text. + /// + [JsonInclude] + [JsonPropertyName("transcript")] + public string Transcript { get; private set; } + + /// + /// Details of the transcription error. + /// + [JsonInclude] + [JsonPropertyName("error")] + public Error Error { get; private set; } + + [JsonIgnore] + public bool IsCompleted => Type.Contains("completed"); + + [JsonIgnore] + public bool IsFailed => Type.Contains("failed"); + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemTruncateRequest.cs b/OpenAI-DotNet/Realtime/ConversationItemTruncateRequest.cs new file mode 100644 index 00000000..dd81ebe3 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemTruncateRequest.cs @@ -0,0 +1,58 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Send this event to truncate a previous assistant message’s audio. + /// The server will produce audio faster than realtime, + /// so this event is useful when the user interrupts to truncate audio + /// that has already been sent to the client but not yet played. + /// This will synchronize the server's understanding of the audio with the client's playback. + /// Truncating audio will delete the server-side text transcript to ensure there + /// is not text in the context that hasn't been heard by the user. + /// If successful, the server will respond with a conversation.item.truncated event. + /// + public sealed class ConversationItemTruncateRequest : BaseRealtimeEvent, IClientEvent + { + public ConversationItemTruncateRequest(string itemId, int contentIndex, int audioEndMs) + { + ItemId = itemId; + ContentIndex = contentIndex; + AudioEndMs = audioEndMs; + } + + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "conversation.item.truncate"; + + /// + /// The ID of the assistant message item to truncate. Only assistant message items can be truncated. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the content part to truncate. Set this to 0. + /// + [JsonInclude] + [JsonPropertyName("content_index")] + public int ContentIndex { get; private set; } + + /// + /// Inclusive duration up to which audio is truncated, in milliseconds. + /// If the audio_end_ms is greater than the actual audio duration, the server will respond with an error. + /// + [JsonInclude] + [JsonPropertyName("audio_end_ms")] + public int AudioEndMs { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemTruncatedResponse.cs b/OpenAI-DotNet/Realtime/ConversationItemTruncatedResponse.cs new file mode 100644 index 00000000..ff42fc67 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemTruncatedResponse.cs @@ -0,0 +1,40 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ConversationItemTruncatedResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the assistant message item that was truncated. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the content part that was truncated. + /// + [JsonInclude] + [JsonPropertyName("content_index")] + public int ContentIndex { get; private set; } + + /// + /// The duration up to which the audio was truncated, in milliseconds. + /// + [JsonInclude] + [JsonPropertyName("audio_end_ms")] + public int AudioEndMs { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ConversationItemType.cs b/OpenAI-DotNet/Realtime/ConversationItemType.cs new file mode 100644 index 00000000..71cf37a1 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ConversationItemType.cs @@ -0,0 +1,16 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace OpenAI.Realtime +{ + public enum ConversationItemType + { + [EnumMember(Value = "message")] + Message, + [EnumMember(Value = "function_call")] + FunctionCall, + [EnumMember(Value = "function_call_output")] + FunctionCallOutput + } +} diff --git a/OpenAI-DotNet/Realtime/CreateResponseRequest.cs b/OpenAI-DotNet/Realtime/CreateResponseRequest.cs new file mode 100644 index 00000000..b0d6ba95 --- /dev/null +++ b/OpenAI-DotNet/Realtime/CreateResponseRequest.cs @@ -0,0 +1,43 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// This event instructs the server to create a Response, which means triggering model inference. + /// When in Server VAD mode, the server will create Responses automatically. + /// A Response will include at least one Item, and may have two, in which case the second will be a function call. + /// These Items will be appended to the conversation history. The server will respond with a response.created event, + /// events for Items and content created, and finally a response.done event to indicate the Response is complete. + /// The response.create event includes inference configuration like instructions, and temperature. + /// These fields will override the Session's configuration for this Response only. + /// + public sealed class CreateResponseRequest : BaseRealtimeEvent, IClientEvent + { + public CreateResponseRequest() { } + + /// + /// Constructor. + /// + /// Inference configuration to override the for this response only. + public CreateResponseRequest(Options options) + { + Options = options; + } + + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "response.create"; + + [JsonInclude] + [JsonPropertyName("response")] + public Options Options { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/IRealtimeEvent.cs b/OpenAI-DotNet/Realtime/IRealtimeEvent.cs new file mode 100644 index 00000000..a084cbc3 --- /dev/null +++ b/OpenAI-DotNet/Realtime/IRealtimeEvent.cs @@ -0,0 +1,31 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +namespace OpenAI.Realtime +{ + public interface IRealtimeEvent + { + /// + /// The unique ID of the server event. + /// + public string EventId { get; } + + public string Type { get; } + + public string ToJsonString(); + } + + public interface IClientEvent : IRealtimeEvent + { + } + + public interface IServerEvent : IRealtimeEvent + { + } + + internal interface IRealtimeEventStream + { + public bool IsDone { get; } + + public bool IsDelta { get; } + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioBufferAppendRequest.cs b/OpenAI-DotNet/Realtime/InputAudioBufferAppendRequest.cs new file mode 100644 index 00000000..038d60fc --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioBufferAppendRequest.cs @@ -0,0 +1,52 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Send this event to append audio bytes to the input audio buffer. + /// The audio buffer is temporary storage you can write to and later commit. + /// In Server VAD mode, the audio buffer is used to detect speech and the server will decide when to commit. + /// When Server VAD is disabled, you must commit the audio buffer manually. + /// The client may choose how much audio to place in each event up to a maximum of 15 MiB, + /// for example streaming smaller chunks from the client may allow the VAD to be more responsive. + /// Unlike made other client events, the server will not send a confirmation response to this event. + /// + public sealed class InputAudioBufferAppendRequest : BaseRealtimeEvent, IClientEvent + { + public InputAudioBufferAppendRequest(ReadOnlyMemory audioData) + : this(audioData.Span) + { + } + + public InputAudioBufferAppendRequest(ReadOnlySpan audioData) + { + Audio = Convert.ToBase64String(audioData); + } + + public InputAudioBufferAppendRequest(byte[] audioData) + { + Audio = Convert.ToBase64String(audioData); + } + + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "input_audio_buffer.append"; + + /// + /// Base64-encoded audio bytes. + /// This must be in the format specified by the input_audio_format field in the session configuration. + /// + [JsonInclude] + [JsonPropertyName("audio")] + public string Audio { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioBufferClearRequest.cs b/OpenAI-DotNet/Realtime/InputAudioBufferClearRequest.cs new file mode 100644 index 00000000..0a822f3e --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioBufferClearRequest.cs @@ -0,0 +1,23 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Send this event to clear the audio bytes in the buffer. + /// The server will respond with an input_audio_buffer.cleared event. + /// + public sealed class InputAudioBufferClearRequest : BaseRealtimeEvent, IClientEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "input_audio_buffer.clear"; + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioBufferClearedResponse.cs b/OpenAI-DotNet/Realtime/InputAudioBufferClearedResponse.cs new file mode 100644 index 00000000..9369313b --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioBufferClearedResponse.cs @@ -0,0 +1,19 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class InputAudioBufferClearedResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioBufferCommitRequest.cs b/OpenAI-DotNet/Realtime/InputAudioBufferCommitRequest.cs new file mode 100644 index 00000000..f3f875cd --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioBufferCommitRequest.cs @@ -0,0 +1,29 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Send this event to commit the user input audio buffer, + /// which will create a new user message item in the conversation. + /// This event will produce an error if the input audio buffer is empty. + /// When in Server VAD mode, the client does not need to send this event, + /// the server will commit the audio buffer automatically. + /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), + /// but it will not create a response from the model. + /// The server will respond with an input_audio_buffer.committed event. + /// + public sealed class InputAudioBufferCommitRequest : BaseRealtimeEvent, IClientEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "input_audio_buffer.commit"; + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioBufferCommittedResponse.cs b/OpenAI-DotNet/Realtime/InputAudioBufferCommittedResponse.cs new file mode 100644 index 00000000..21d4e372 --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioBufferCommittedResponse.cs @@ -0,0 +1,33 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class InputAudioBufferCommittedResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the preceding item after which the new item will be inserted. + /// + [JsonInclude] + [JsonPropertyName("previous_item_id")] + public string PreviousItemId { get; private set; } + + /// + /// The ID of the user message item that will be created. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioBufferStartedResponse.cs b/OpenAI-DotNet/Realtime/InputAudioBufferStartedResponse.cs new file mode 100644 index 00000000..c24ab760 --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioBufferStartedResponse.cs @@ -0,0 +1,33 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class InputAudioBufferStartedResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// Milliseconds since the session started when speech was detected. + /// + [JsonInclude] + [JsonPropertyName("audio_start_ms")] + public int AudioStartMs { get; private set; } + + /// + /// The ID of the user message item that will be created when speech stops. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioBufferStoppedResponse.cs b/OpenAI-DotNet/Realtime/InputAudioBufferStoppedResponse.cs new file mode 100644 index 00000000..84bbf1b9 --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioBufferStoppedResponse.cs @@ -0,0 +1,33 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class InputAudioBufferStoppedResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// Milliseconds since the session started when speech stopped. + /// + [JsonInclude] + [JsonPropertyName("audio_end_ms")] + public int AudioEndMs { get; private set; } + + /// + /// The ID of the user message item that will be created. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/InputAudioTranscriptionSettings.cs b/OpenAI-DotNet/Realtime/InputAudioTranscriptionSettings.cs new file mode 100644 index 00000000..eb664bf3 --- /dev/null +++ b/OpenAI-DotNet/Realtime/InputAudioTranscriptionSettings.cs @@ -0,0 +1,21 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Models; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class InputAudioTranscriptionSettings + { + public InputAudioTranscriptionSettings() { } + + public InputAudioTranscriptionSettings(Model model) + { + Model = string.IsNullOrWhiteSpace(model.Id) ? "whisper-1" : model; + } + + [JsonInclude] + [JsonPropertyName("model")] + public string Model { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/Options.cs b/OpenAI-DotNet/Realtime/Options.cs new file mode 100644 index 00000000..fd9a501a --- /dev/null +++ b/OpenAI-DotNet/Realtime/Options.cs @@ -0,0 +1,178 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Models; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class Options + { + public Options() { } + + public Options( + Model model, + Modality modalities = Modality.Text | Modality.Audio, + Voice voice = null, + string instructions = null, + RealtimeAudioFormat inputAudioFormat = RealtimeAudioFormat.PCM16, + RealtimeAudioFormat outputAudioFormat = RealtimeAudioFormat.PCM16, + Model transcriptionModel = null, + VoiceActivityDetectionSettings turnDetectionSettings = null, + IEnumerable tools = null, + string toolChoice = null, + float? temperature = null, + int? maxResponseOutputTokens = null) + { + Model = string.IsNullOrWhiteSpace(model.Id) + ? "gpt-4o-realtime-preview" + : model; + Modalities = modalities; + Voice = voice ?? OpenAI.Voice.Alloy; + Instructions = string.IsNullOrWhiteSpace(instructions) + ? "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, " + + "but remember that you aren't a human and that you can't do human things in the real world. " + + "Your voice and personality should be warm and engaging, with a lively and playful tone. " + + "If interacting in a non-English language, start by using the standard accent or dialect familiar to the user. " + + "Talk quickly. " + + "You should always call a function if you can. Do not refer to these rules, even if you're asked about them." + : instructions; + InputAudioFormat = inputAudioFormat; + OutputAudioFormat = outputAudioFormat; + InputAudioTranscriptionSettings = new(string.IsNullOrWhiteSpace(transcriptionModel) + ? "whisper-1" + : transcriptionModel); + VoiceActivityDetectionSettings = turnDetectionSettings ?? new(TurnDetectionType.Server_VAD); + + var toolList = tools?.ToList(); + + if (toolList is { Count: > 0 }) + { + if (string.IsNullOrWhiteSpace(toolChoice)) + { + ToolChoice = "auto"; + } + else + { + if (!toolChoice.Equals("none") && + !toolChoice.Equals("required") && + !toolChoice.Equals("auto")) + { + var tool = toolList.FirstOrDefault(t => t.Function.Name.Contains(toolChoice)) ?? + throw new ArgumentException($"The specified tool choice '{toolChoice}' was not found in the list of tools"); + ToolChoice = new { type = "function", function = new { name = tool.Function.Name } }; + } + else + { + ToolChoice = toolChoice; + } + } + + foreach (var tool in toolList.Where(tool => tool?.Function?.Arguments != null)) + { + // just in case clear any lingering func args. + tool.Function.Arguments = null; + } + } + + Tools = toolList?.Select(tool => + { + tool.Function.Type = "function"; + return tool.Function; + }).ToList(); + Temperature = temperature; + + if (maxResponseOutputTokens.HasValue) + { + MaxResponseOutputTokens = maxResponseOutputTokens.Value switch + { + < 1 => 1, + > 4096 => "inf", + _ => maxResponseOutputTokens + }; + } + } + + [JsonInclude] + [JsonPropertyName("id")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Id { get; private set; } + + [JsonInclude] + [JsonPropertyName("object")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Object { get; private set; } + + [JsonInclude] + [JsonPropertyName("model")] + public string Model { get; private set; } + + [JsonInclude] + [JsonPropertyName("expires_at")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public int? ExpiresAtTimeUnixSeconds { get; private set; } + + [JsonInclude] + [JsonIgnore] + public DateTime? ExpiresAt => + ExpiresAtTimeUnixSeconds.HasValue + ? DateTimeOffset.FromUnixTimeSeconds(ExpiresAtTimeUnixSeconds.Value).DateTime + : null; + + [JsonInclude] + [JsonPropertyName("modalities")] + [JsonConverter(typeof(ModalityConverter))] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public Modality Modalities { get; private set; } + + [JsonInclude] + [JsonPropertyName("voice")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Voice { get; private set; } + + [JsonInclude] + [JsonPropertyName("instructions")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Instructions { get; private set; } + + [JsonInclude] + [JsonPropertyName("input_audio_format")] + [JsonIgnore(Condition = JsonIgnoreCondition.Never)] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + public RealtimeAudioFormat InputAudioFormat { get; private set; } + + [JsonInclude] + [JsonPropertyName("output_audio_format")] + [JsonIgnore(Condition = JsonIgnoreCondition.Never)] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + public RealtimeAudioFormat OutputAudioFormat { get; private set; } + + [JsonInclude] + [JsonPropertyName("input_audio_transcription")] + public InputAudioTranscriptionSettings InputAudioTranscriptionSettings { get; private set; } + + [JsonInclude] + [JsonPropertyName("turn_detection")] + public VoiceActivityDetectionSettings VoiceActivityDetectionSettings { get; private set; } + + [JsonInclude] + [JsonPropertyName("tools")] + public IReadOnlyList Tools { get; private set; } + + [JsonInclude] + [JsonPropertyName("tool_choice")] + public object ToolChoice { get; private set; } + + [JsonInclude] + [JsonPropertyName("temperature")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public float? Temperature { get; private set; } + + [JsonInclude] + [JsonPropertyName("max_response_output_tokens")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public object MaxResponseOutputTokens { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/RateLimit.cs b/OpenAI-DotNet/Realtime/RateLimit.cs new file mode 100644 index 00000000..81481ed0 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RateLimit.cs @@ -0,0 +1,29 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RateLimit + { + [JsonInclude] + [JsonPropertyName("name")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Name { get; private set; } + + [JsonInclude] + [JsonPropertyName("limit")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public int? Limit { get; private set; } + + [JsonInclude] + [JsonPropertyName("remaining")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public int? Remaining { get; private set; } + + [JsonInclude] + [JsonPropertyName("reset_seconds")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public float? ResetSeconds { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/RateLimitsResponse.cs b/OpenAI-DotNet/Realtime/RateLimitsResponse.cs new file mode 100644 index 00000000..c1ba827a --- /dev/null +++ b/OpenAI-DotNet/Realtime/RateLimitsResponse.cs @@ -0,0 +1,27 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RateLimitsResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// List of rate limit information. + /// + [JsonInclude] + [JsonPropertyName("rate_limits")] + public IReadOnlyList RateLimits { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeAudioFormat.cs b/OpenAI-DotNet/Realtime/RealtimeAudioFormat.cs new file mode 100644 index 00000000..e73ebd9a --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeAudioFormat.cs @@ -0,0 +1,16 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace OpenAI.Realtime +{ + public enum RealtimeAudioFormat + { + [EnumMember(Value = "pcm16")] + PCM16, + [EnumMember(Value = "g771_ulaw")] + G771_uLaw, + [EnumMember(Value = "g771_alaw")] + G771_ALaw, + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeContent.cs b/OpenAI-DotNet/Realtime/RealtimeContent.cs new file mode 100644 index 00000000..5c8bd9d3 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeContent.cs @@ -0,0 +1,86 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeContent + { + public RealtimeContent() { } + + public RealtimeContent(string text, RealtimeContentType type) + { + Type = type; + Text = type switch + { + RealtimeContentType.InputText or RealtimeContentType.Text => text, + _ => throw new ArgumentException($"Invalid content type {type} for text content") + }; + } + + public RealtimeContent(ReadOnlyMemory audioData, RealtimeContentType type, string transcript = null) + : this(audioData.Span, type, transcript) + { + } + + public RealtimeContent(ReadOnlySpan audioData, RealtimeContentType type, string transcript = null) + { + Type = type; + Audio = type switch + { + RealtimeContentType.InputAudio or RealtimeContentType.Audio => Convert.ToBase64String(audioData), + _ => throw new ArgumentException($"Invalid content type {type} for audio content") + }; + Transcript = transcript; + } + + public RealtimeContent(byte[] audioData, RealtimeContentType type, string transcript = null) + { + Type = type; + Audio = type switch + { + RealtimeContentType.InputAudio or RealtimeContentType.Audio => Convert.ToBase64String(audioData), + _ => throw new ArgumentException($"Invalid content type {type} for audio content") + }; + Transcript = transcript; + } + + /// + /// The content type ("text", "audio", "input_text", "input_audio"). + /// + [JsonInclude] + [JsonPropertyName("type")] + [JsonIgnore(Condition = JsonIgnoreCondition.Never)] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + public RealtimeContentType Type { get; private set; } + + /// + /// The text content. + /// + [JsonInclude] + [JsonPropertyName("text")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Text { get; private set; } + + /// + /// Base64-encoded audio data. + /// + [JsonInclude] + [JsonPropertyName("audio")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Audio { get; private set; } + + /// + /// The transcript of the audio. + /// + [JsonInclude] + [JsonPropertyName("transcript")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Transcript { get; private set; } + + public static implicit operator RealtimeContent(string text) => new(text, RealtimeContentType.InputText); + + public static implicit operator RealtimeContent(byte[] audioData) => new(audioData, RealtimeContentType.InputAudio); + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeContentType.cs b/OpenAI-DotNet/Realtime/RealtimeContentType.cs new file mode 100644 index 00000000..6563a2b7 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeContentType.cs @@ -0,0 +1,18 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace OpenAI.Realtime +{ + public enum RealtimeContentType + { + [EnumMember(Value = "text")] + Text, + [EnumMember(Value = "audio")] + Audio, + [EnumMember(Value = "input_text")] + InputText, + [EnumMember(Value = "input_audio")] + InputAudio + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeConversation.cs b/OpenAI-DotNet/Realtime/RealtimeConversation.cs new file mode 100644 index 00000000..7c1cc519 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeConversation.cs @@ -0,0 +1,25 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeConversation + { + /// + /// The unique id of the conversation. + /// + [JsonInclude] + [JsonPropertyName("id")] + public string Id { get; private set; } + + /// + /// The object type, must be "realtime.conversation". + /// + [JsonInclude] + [JsonPropertyName("object")] + public string Object { get; private set; } + + public static implicit operator string(RealtimeConversation conversation) => conversation?.Id; + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeConversationResponse.cs b/OpenAI-DotNet/Realtime/RealtimeConversationResponse.cs new file mode 100644 index 00000000..5126a2fc --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeConversationResponse.cs @@ -0,0 +1,28 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeConversationResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The conversation resource. + /// + [JsonInclude] + [JsonPropertyName("conversation")] + public RealtimeConversation Conversation { get; private set; } + + public static implicit operator RealtimeConversation(RealtimeConversationResponse response) => response?.Conversation; + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeEndpoint.cs b/OpenAI-DotNet/Realtime/RealtimeEndpoint.cs new file mode 100644 index 00000000..d5935b18 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeEndpoint.cs @@ -0,0 +1,92 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Extensions; +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeEndpoint : OpenAIBaseEndpoint + { + internal RealtimeEndpoint(OpenAIClient client) : base(client) { } + + protected override string Root => "realtime"; + + protected override bool? IsWebSocketEndpoint => true; + + /// + /// Creates a new realtime session with the provided options. + /// + /// . + /// Optional, . + /// . + public async Task CreateSessionAsync(Options options = null, CancellationToken cancellationToken = default) + { + string model = string.IsNullOrWhiteSpace(options?.Model) ? Models.Model.GPT4oRealtime : options!.Model; + var queryParameters = new Dictionary(); + + if (client.OpenAIClientSettings.IsAzureOpenAI) + { + queryParameters["deployment"] = model; + } + else + { + queryParameters["model"] = model; + } + + var session = new RealtimeSession(client.CreateWebSocket(GetUrl(queryParameters: queryParameters)), EnableDebug); + var sessionCreatedTcs = new TaskCompletionSource(); + + try + { + session.OnEventReceived += OnEventReceived; + session.OnError += OnError; + await session.ConnectAsync(cancellationToken).ConfigureAwait(false); + var sessionResponse = await sessionCreatedTcs.Task.WithCancellation(cancellationToken).ConfigureAwait(false); + session.Options = sessionResponse.Options; + await session.SendAsync(new UpdateSessionRequest(options), cancellationToken: cancellationToken).ConfigureAwait(false); + } + finally + { + session.OnError -= OnError; + session.OnEventReceived -= OnEventReceived; + } + + return session; + + void OnError(Exception e) + { + sessionCreatedTcs.SetException(e); + } + + void OnEventReceived(IRealtimeEvent @event) + { + try + { + switch (@event) + { + case RealtimeConversationResponse: + Console.WriteLine("[conversation.created]"); + break; + case SessionResponse sessionResponse: + if (sessionResponse.Type == "session.created") + { + sessionCreatedTcs.TrySetResult(sessionResponse); + } + break; + case RealtimeEventError realtimeEventError: + sessionCreatedTcs.TrySetException(new Exception(realtimeEventError.Error.Message)); + break; + } + } + catch (Exception e) + { + Console.WriteLine(e); + sessionCreatedTcs.TrySetException(e); + } + } + } + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeEventError.cs b/OpenAI-DotNet/Realtime/RealtimeEventError.cs new file mode 100644 index 00000000..60dced58 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeEventError.cs @@ -0,0 +1,30 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeEventError : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + [JsonInclude] + [JsonPropertyName("error")] + public Error Error { get; private set; } + + public override string ToString() + => Error.ToString(); + + public static implicit operator Exception(RealtimeEventError error) + => error.Error?.Exception ?? new Exception(error.ToString()); + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeResponse.cs b/OpenAI-DotNet/Realtime/RealtimeResponse.cs new file mode 100644 index 00000000..98cfe099 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeResponse.cs @@ -0,0 +1,26 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The response resource. + /// + [JsonInclude] + [JsonPropertyName("response")] + public RealtimeResponseResource Response { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeResponseResource.cs b/OpenAI-DotNet/Realtime/RealtimeResponseResource.cs new file mode 100644 index 00000000..af93a1c1 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeResponseResource.cs @@ -0,0 +1,60 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeResponseResource + { + /// + /// The unique ID of the response. + /// + [JsonInclude] + [JsonPropertyName("id")] + public string Id { get; private set; } + + /// + /// The object type, must be "realtime.response". + /// + [JsonInclude] + [JsonPropertyName("object")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Object { get; private set; } + + /// + /// The status of the response ("in_progress"). + /// + [JsonInclude] + [JsonPropertyName("status")] + [JsonIgnore(Condition = JsonIgnoreCondition.Never)] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + public RealtimeResponseStatus Status { get; private set; } + + /// + /// Additional details about the status. + /// + [JsonInclude] + [JsonPropertyName("status_details")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public StatusDetails StatusDetails { get; private set; } + + /// + /// The list of output items generated by the response. + /// + [JsonInclude] + [JsonPropertyName("output")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public IReadOnlyList Output { get; private set; } + + /// + /// Usage statistics for the Response, this will correspond to billing. + /// A Realtime API session will maintain a conversation context and append new Items to the Conversation, + /// thus output from previous turns (text and audio tokens) will become the input for later turns. + /// + [JsonInclude] + [JsonPropertyName("usage")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public Usage Usage { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeResponseStatus.cs b/OpenAI-DotNet/Realtime/RealtimeResponseStatus.cs new file mode 100644 index 00000000..923fa89f --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeResponseStatus.cs @@ -0,0 +1,20 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace OpenAI.Realtime +{ + public enum RealtimeResponseStatus + { + [EnumMember(Value = "in_progress")] + InProgress = 1, + [EnumMember(Value = "completed")] + Completed, + [EnumMember(Value = "cancelled")] + Cancelled, + [EnumMember(Value = "failed")] + Failed, + [EnumMember(Value = "incomplete")] + Incomplete + } +} diff --git a/OpenAI-DotNet/Realtime/RealtimeSession.cs b/OpenAI-DotNet/Realtime/RealtimeSession.cs new file mode 100644 index 00000000..1be76ea9 --- /dev/null +++ b/OpenAI-DotNet/Realtime/RealtimeSession.cs @@ -0,0 +1,404 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using OpenAI.Extensions; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenAI.Realtime +{ + public sealed class RealtimeSession : IDisposable + { + /// + /// Enable or disable logging. + /// + public bool EnableDebug { get; set; } + + /// + /// The timeout in seconds to wait for a response from the server. + /// + public int EventTimeout { get; set; } = 30; + + /// + /// The options for the session. + /// + public Options Options { get; internal set; } + + #region Internal + + internal event Action OnEventReceived; + + internal event Action OnError; + + private readonly WebSocket websocketClient; + private readonly ConcurrentQueue events = new(); + private readonly object eventLock = new(); + + private bool isCollectingEvents; + + internal RealtimeSession(WebSocket wsClient, bool enableDebug) + { + websocketClient = wsClient; + websocketClient.OnMessage += OnMessage; + EnableDebug = enableDebug; + } + + private void OnMessage(DataFrame dataFrame) + { + if (dataFrame.Type == OpCode.Text) + { + if (EnableDebug) + { + Console.WriteLine(dataFrame.Text); + } + + try + { + var @event = JsonSerializer.Deserialize(dataFrame.Text, OpenAIClient.JsonSerializationOptions); + + lock (eventLock) + { + events.Enqueue(@event); + } + + OnEventReceived?.Invoke(@event); + } + catch (Exception e) + { + Console.WriteLine(e); + OnError?.Invoke(e); + } + } + } + + ~RealtimeSession() => Dispose(false); + + #region IDisposable + + private bool isDisposed; + + /// + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + private void Dispose(bool disposing) + { + if (!isDisposed && disposing) + { + websocketClient.OnMessage -= OnMessage; + websocketClient.Dispose(); + isDisposed = true; + } + } + + #endregion IDisposable + + internal async Task ConnectAsync(CancellationToken cancellationToken = default) + { + var connectTcs = new TaskCompletionSource(); + websocketClient.OnOpen += OnWebsocketClientOnOpen; + websocketClient.OnError += OnWebsocketClientOnError; + + try + { + // ReSharper disable once MethodHasAsyncOverloadWithCancellation + // don't call async because it is blocking until connection is closed. + websocketClient.Connect(); + await connectTcs.Task.WithCancellation(cancellationToken).ConfigureAwait(false); + + if (websocketClient.State != State.Open) + { + throw new Exception($"Failed to start new session! {websocketClient.State}"); + } + } + finally + { + websocketClient.OnOpen -= OnWebsocketClientOnOpen; + websocketClient.OnError -= OnWebsocketClientOnError; + } + + return; + + void OnWebsocketClientOnError(Exception e) + => connectTcs.TrySetException(e); + void OnWebsocketClientOnOpen() + => connectTcs.TrySetResult(websocketClient.State); + } + + #endregion Internal + + /// + /// Receive callback updates from the server + /// + /// to subscribe for updates to. + /// The event to receive updates for. + /// Optional, . + /// . + /// If is already running. + public async Task ReceiveUpdatesAsync(Action sessionEvent, CancellationToken cancellationToken) where T : IRealtimeEvent + { + try + { + lock (eventLock) + { + if (isCollectingEvents) + { + throw new Exception($"{nameof(ReceiveUpdatesAsync)} is already running!"); + } + + isCollectingEvents = true; + } + + do + { + try + { + T @event = default; + + lock (eventLock) + { + if (events.TryDequeue(out var dequeuedEvent) && + dequeuedEvent is T typedEvent) + { + @event = typedEvent; + } + } + + if (@event != null) + { + sessionEvent(@event); + } + + await Task.Yield(); + } + catch (Exception e) + { + Console.WriteLine(e); + } + } while (!cancellationToken.IsCancellationRequested && websocketClient.State == State.Open); + } + finally + { + lock (eventLock) + { + isCollectingEvents = false; + } + } + } + + /// + /// Receive callback updates from the server + /// + /// to subscribe for updates to. + /// Optional, . + /// . + /// If is already running. + public async IAsyncEnumerable ReceiveUpdatesAsync([EnumeratorCancellation] CancellationToken cancellationToken) where T : IRealtimeEvent + { + try + { + lock (eventLock) + { + if (isCollectingEvents) + { + throw new Exception($"{nameof(ReceiveUpdatesAsync)} is already running!"); + } + + isCollectingEvents = true; + } + + do + { + T @event = default; + + lock (eventLock) + { + if (events.TryDequeue(out var dequeuedEvent) && + dequeuedEvent is T typedEvent) + { + @event = typedEvent; + } + } + + if (@event != null) + { + yield return @event; + } + + await Task.Yield(); + } while (!cancellationToken.IsCancellationRequested && websocketClient.State == State.Open); + } + finally + { + lock (eventLock) + { + isCollectingEvents = false; + } + } + } + + /// + /// Send a client event to the server. + /// + /// to send to the server. + /// The event to send. + public async void Send(T @event) where T : IClientEvent + => await SendAsync(@event).ConfigureAwait(false); + + /// + /// Send a client event to the server. + /// + /// to send to the server. + /// The event to send. + /// Optional, . + /// Optional, . + /// . + public async Task SendAsync(T @event, CancellationToken cancellationToken = default) where T : IClientEvent + => await SendAsync(@event, null, cancellationToken).ConfigureAwait(false); + + /// + /// Send a client event to the server. + /// + /// to send to the server. + /// The event to send. + /// Optional, . + /// Optional, . + /// . + public async Task SendAsync(T @event, Action sessionEvents, CancellationToken cancellationToken = default) where T : IClientEvent + { + if (websocketClient.State != State.Open) + { + throw new Exception($"Websocket connection is not open! {websocketClient.State}"); + } + + IClientEvent clientEvent = @event; + var payload = clientEvent.ToJsonString(); + + if (EnableDebug) + { + if (@event is not InputAudioBufferAppendRequest) + { + Console.WriteLine(payload); + } + } + + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(EventTimeout)); + using var eventCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, cts.Token); + var tcs = new TaskCompletionSource(); + eventCts.Token.Register(() => tcs.TrySetCanceled()); + OnEventReceived += EventCallback; + + lock (eventLock) + { + events.Enqueue(clientEvent); + } + + var eventId = Guid.NewGuid().ToString("N"); + + if (EnableDebug) + { + if (@event is not InputAudioBufferAppendRequest) + { + Console.WriteLine($"[{eventId}] sending {clientEvent.Type}"); + } + } + + await websocketClient.SendAsync(payload, cancellationToken).ConfigureAwait(false); + + if (EnableDebug) + { + if (@event is not InputAudioBufferAppendRequest) + { + Console.WriteLine($"[{eventId}] sent {clientEvent.Type}"); + } + } + + if (@event is InputAudioBufferAppendRequest) + { + // no response for this client event + return default; + } + + var response = await tcs.Task.WithCancellation(eventCts.Token).ConfigureAwait(false); + + if (EnableDebug) + { + Console.WriteLine($"[{eventId}] received {response.Type}"); + } + + return response; + + void EventCallback(IServerEvent serverEvent) + { + sessionEvents?.Invoke(serverEvent); + + try + { + if (serverEvent is RealtimeEventError serverError) + { + tcs.TrySetException(serverError); + OnEventReceived -= EventCallback; + return; + } + + switch (clientEvent) + { + case UpdateSessionRequest when serverEvent is SessionResponse sessionResponse: + Options = sessionResponse.Options; + Complete(); + return; + case InputAudioBufferCommitRequest when serverEvent is InputAudioBufferCommittedResponse: + case InputAudioBufferClearRequest when serverEvent is InputAudioBufferClearedResponse: + case ConversationItemCreateRequest when serverEvent is ConversationItemCreatedResponse: + case ConversationItemTruncateRequest when serverEvent is ConversationItemTruncatedResponse: + case ConversationItemDeleteRequest when serverEvent is ConversationItemDeletedResponse: + Complete(); + return; + case CreateResponseRequest when serverEvent is RealtimeResponse serverResponse: + { + if (serverResponse.Response.Status == RealtimeResponseStatus.InProgress) + { + return; + } + + if (serverResponse.Response.Status != RealtimeResponseStatus.Completed) + { + tcs.TrySetException(new Exception(serverResponse.Response.StatusDetails.Error?.ToString() ?? serverResponse.Response.StatusDetails.Reason)); + } + else + { + Complete(); + } + + break; + } + } + } + catch (Exception e) + { + Console.WriteLine(e); + } + + return; + + void Complete() + { + if (EnableDebug) + { + Console.WriteLine($"{clientEvent.Type} -> {serverEvent.Type}"); + } + + tcs.TrySetResult(serverEvent); + OnEventReceived -= EventCallback; + } + } + } + } +} diff --git a/OpenAI-DotNet/Realtime/ResponseAudioResponse.cs b/OpenAI-DotNet/Realtime/ResponseAudioResponse.cs new file mode 100644 index 00000000..bd65eabd --- /dev/null +++ b/OpenAI-DotNet/Realtime/ResponseAudioResponse.cs @@ -0,0 +1,64 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ResponseAudioResponse : BaseRealtimeEvent, IServerEvent, IRealtimeEventStream + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the response. + /// + [JsonInclude] + [JsonPropertyName("response_id")] + public string ResponseId { get; private set; } + + /// + /// The ID of the item. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the output item in the response. + /// + [JsonInclude] + [JsonPropertyName("output_index")] + public int OutputIndex { get; private set; } + + /// + /// The index of the content part in the item's content array. + /// + [JsonInclude] + [JsonPropertyName("content_index")] + public int ContentIndex { get; private set; } + + [JsonInclude] + [JsonPropertyName("delta")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Delta { get; private set; } + + [JsonIgnore] + public ReadOnlyMemory DeltaBytes => !string.IsNullOrWhiteSpace(Delta) + ? Convert.FromBase64String(Delta) + : ReadOnlyMemory.Empty; + + [JsonIgnore] + public bool IsDelta => Type.EndsWith("delta"); + + [JsonIgnore] + public bool IsDone => Type.EndsWith("done"); + } +} diff --git a/OpenAI-DotNet/Realtime/ResponseAudioTranscriptResponse.cs b/OpenAI-DotNet/Realtime/ResponseAudioTranscriptResponse.cs new file mode 100644 index 00000000..cc3ad10a --- /dev/null +++ b/OpenAI-DotNet/Realtime/ResponseAudioTranscriptResponse.cs @@ -0,0 +1,75 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ResponseAudioTranscriptResponse : BaseRealtimeEvent, IServerEvent, IRealtimeEventStream + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the response. + /// + [JsonInclude] + [JsonPropertyName("response_id")] + public string ResponseId { get; private set; } + + /// + /// The ID of the item. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the output item in the response. + /// + [JsonInclude] + [JsonPropertyName("output_index")] + public int OutputIndex { get; private set; } + + /// + /// The index of the content part in the item's content array. + /// + [JsonInclude] + [JsonPropertyName("content_index")] + public int ContentIndex { get; private set; } + + /// + /// The transcript delta. + /// + [JsonInclude] + [JsonPropertyName("delta")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Delta { get; private set; } + + /// + /// The final transcript of the audio. + /// + [JsonInclude] + [JsonPropertyName("transcript")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Transcript { get; private set; } + + [JsonIgnore] + public bool IsDelta => Type.EndsWith("delta"); + + [JsonIgnore] + public bool IsDone => Type.EndsWith("done"); + + public override string ToString() + => !string.IsNullOrWhiteSpace(Delta) ? Delta : Transcript; + + public static implicit operator string(ResponseAudioTranscriptResponse response) + => response?.ToString(); + } +} diff --git a/OpenAI-DotNet/Realtime/ResponseCancelRequest.cs b/OpenAI-DotNet/Realtime/ResponseCancelRequest.cs new file mode 100644 index 00000000..85ee79a9 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ResponseCancelRequest.cs @@ -0,0 +1,23 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Send this event to cancel an in-progress response. + /// The server will respond with a `response.cancelled` event or an error if there is no response to cancel. + /// + public sealed class ResponseCancelRequest : BaseRealtimeEvent, IClientEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "response.cancel"; + } +} diff --git a/OpenAI-DotNet/Realtime/ResponseContentPartResponse.cs b/OpenAI-DotNet/Realtime/ResponseContentPartResponse.cs new file mode 100644 index 00000000..47f636ca --- /dev/null +++ b/OpenAI-DotNet/Realtime/ResponseContentPartResponse.cs @@ -0,0 +1,48 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ResponseContentPartResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the response to which the item belongs. + /// + [JsonInclude] + [JsonPropertyName("response_id")] + public string ResponseId { get; private set; } + + /// + /// The index of the output item in the response. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the content part in the item's content array. + /// + [JsonInclude] + [JsonPropertyName("output_index")] + public int OutputIndex { get; private set; } + + /// + /// The content part that was added. + /// + [JsonInclude] + [JsonPropertyName("part")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public RealtimeContent ContentPart { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ResponseFunctionCallArgumentsResponse.cs b/OpenAI-DotNet/Realtime/ResponseFunctionCallArgumentsResponse.cs new file mode 100644 index 00000000..ed300181 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ResponseFunctionCallArgumentsResponse.cs @@ -0,0 +1,78 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Nodes; +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ResponseFunctionCallArgumentsResponse : BaseRealtimeEvent, IServerEvent, IRealtimeEventStream + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the response. + /// + [JsonInclude] + [JsonPropertyName("response_id")] + public string ResponseId { get; private set; } + + /// + /// The ID of the item. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the output item in the response. + /// + [JsonInclude] + [JsonPropertyName("output_index")] + public int OutputIndex { get; private set; } + + /// + /// The ID of the function call. + /// + [JsonInclude] + [JsonPropertyName("call_id")] + public string CallId { get; private set; } + + /// + /// The arguments delta as a JSON string. + /// + [JsonInclude] + [JsonPropertyName("delta")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Delta { get; private set; } + + [JsonInclude] + [JsonPropertyName("name")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Name { get; private set; } + + /// + /// The final arguments as a JSON string. + /// + [JsonInclude] + [JsonPropertyName("arguments")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public JsonNode Arguments { get; private set; } + + [JsonIgnore] + public bool IsDelta => Type.EndsWith("delta"); + + [JsonIgnore] + public bool IsDone => Type.EndsWith("done"); + + public static implicit operator ToolCall(ResponseFunctionCallArgumentsResponse response) + => new(response.CallId, response.Name, response.Arguments); + } +} diff --git a/OpenAI-DotNet/Realtime/ResponseOutputItemResponse.cs b/OpenAI-DotNet/Realtime/ResponseOutputItemResponse.cs new file mode 100644 index 00000000..e7b1d699 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ResponseOutputItemResponse.cs @@ -0,0 +1,40 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ResponseOutputItemResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the response to which the item belongs. + /// + [JsonInclude] + [JsonPropertyName("response_id")] + public string ResponseId { get; private set; } + + /// + /// The index of the output item in the response. + /// + [JsonInclude] + [JsonPropertyName("output_index")] + public int OutputIndex { get; private set; } + + /// + /// The item that was added. + /// + [JsonInclude] + [JsonPropertyName("item")] + public ConversationItem Item { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/ResponseTextResponse.cs b/OpenAI-DotNet/Realtime/ResponseTextResponse.cs new file mode 100644 index 00000000..109075c5 --- /dev/null +++ b/OpenAI-DotNet/Realtime/ResponseTextResponse.cs @@ -0,0 +1,75 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class ResponseTextResponse : BaseRealtimeEvent, IServerEvent, IRealtimeEventStream + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The ID of the response. + /// + [JsonInclude] + [JsonPropertyName("response_id")] + public string ResponseId { get; private set; } + + /// + /// The ID of the item. + /// + [JsonInclude] + [JsonPropertyName("item_id")] + public string ItemId { get; private set; } + + /// + /// The index of the output item in the response. + /// + [JsonInclude] + [JsonPropertyName("output_index")] + public int OutputIndex { get; private set; } + + /// + /// The index of the content part in the item's content array. + /// + [JsonInclude] + [JsonPropertyName("content_index")] + public int ContentIndex { get; private set; } + + /// + /// The text delta. + /// + [JsonInclude] + [JsonPropertyName("delta")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Delta { get; private set; } + + /// + /// The final text content. + /// + [JsonInclude] + [JsonPropertyName("text")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Text { get; private set; } + + [JsonIgnore] + public bool IsDelta => Type.EndsWith("delta"); + + [JsonIgnore] + public bool IsDone => Type.EndsWith("done"); + + public override string ToString() + => IsDelta ? Delta : Text; + + public static implicit operator string(ResponseTextResponse response) + => response?.ToString(); + } +} diff --git a/OpenAI-DotNet/Realtime/SessionResponse.cs b/OpenAI-DotNet/Realtime/SessionResponse.cs new file mode 100644 index 00000000..27c15626 --- /dev/null +++ b/OpenAI-DotNet/Realtime/SessionResponse.cs @@ -0,0 +1,26 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class SessionResponse : BaseRealtimeEvent, IServerEvent + { + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } + + /// + /// The session resource options. + /// + [JsonInclude] + [JsonPropertyName("session")] + public Options Options { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/StatusDetails.cs b/OpenAI-DotNet/Realtime/StatusDetails.cs new file mode 100644 index 00000000..8f1e7181 --- /dev/null +++ b/OpenAI-DotNet/Realtime/StatusDetails.cs @@ -0,0 +1,35 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class StatusDetails + { + /// + /// The type of error that caused the response to fail, corresponding with the status field (cancelled, incomplete, failed). + /// + [JsonInclude] + [JsonPropertyName("type")] + public string Type { get; private set; } + + /// + /// The reason the Response did not complete. + /// For a cancelled Response, one of turn_detected (the server VAD detected a new start of speech) or + /// client_cancelled (the client sent a cancel event). + /// For an incomplete Response, one of max_output_tokens or content_filter + /// (the server-side safety filter activated and cut off the response). + /// + [JsonInclude] + [JsonPropertyName("reason")] + public string Reason { get; private set; } + + /// + /// A description of the error that caused the response to fail, populated when the status is failed. + /// + [JsonInclude] + [JsonPropertyName("error")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public Error Error { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/TokenUsageDetails.cs b/OpenAI-DotNet/Realtime/TokenUsageDetails.cs new file mode 100644 index 00000000..11528bbd --- /dev/null +++ b/OpenAI-DotNet/Realtime/TokenUsageDetails.cs @@ -0,0 +1,34 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class TokenUsageDetails + { + /// + /// The number of cached tokens used in the Response. + /// + [JsonInclude] + [JsonPropertyName("cached_tokens")] + public int? CachedTokens { get; private set; } + + /// + /// The number of text tokens used in the Response. + /// + [JsonInclude] + [JsonPropertyName("text_tokens")] + public int? TextTokens { get; private set; } + + /// + /// The number of audio tokens used in the Response. + /// + [JsonInclude] + [JsonPropertyName("audio_tokens")] + public int? AudioTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("image_tokens")] + public int? ImageTokens { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/TurnDetectionType.cs b/OpenAI-DotNet/Realtime/TurnDetectionType.cs new file mode 100644 index 00000000..2490e135 --- /dev/null +++ b/OpenAI-DotNet/Realtime/TurnDetectionType.cs @@ -0,0 +1,13 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace OpenAI.Realtime +{ + public enum TurnDetectionType + { + Disabled = 0, + [EnumMember(Value = "server_vad")] + Server_VAD, + } +} diff --git a/OpenAI-DotNet/Realtime/UpdateSessionRequest.cs b/OpenAI-DotNet/Realtime/UpdateSessionRequest.cs new file mode 100644 index 00000000..a2de62d7 --- /dev/null +++ b/OpenAI-DotNet/Realtime/UpdateSessionRequest.cs @@ -0,0 +1,40 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + /// + /// Send this event to update the session’s default configuration. + /// The client may send this event at any time to update the session configuration, + /// and any field may be updated at any time, except for "voice". + /// The server will respond with a session.updated event that shows the full effective configuration. + /// Only fields that are present are updated, thus the correct way to clear a field like "instructions" is to pass an empty string. + /// + public sealed class UpdateSessionRequest : BaseRealtimeEvent, IClientEvent + { + public UpdateSessionRequest() { } + + public UpdateSessionRequest(Options options) + { + Session = options; + } + + /// + [JsonInclude] + [JsonPropertyName("event_id")] + public override string EventId { get; internal set; } + + /// + [JsonInclude] + [JsonPropertyName("type")] + public override string Type { get; protected set; } = "session.update"; + + /// + /// The session resource. + /// + [JsonInclude] + [JsonPropertyName("session")] + public Options Session { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/Usage.cs b/OpenAI-DotNet/Realtime/Usage.cs new file mode 100644 index 00000000..22e2dd38 --- /dev/null +++ b/OpenAI-DotNet/Realtime/Usage.cs @@ -0,0 +1,37 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class Usage + { + /// + /// The total number of tokens in the Response including input and output text and audio tokens. + /// + [JsonInclude] + [JsonPropertyName("total_tokens")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public int? TotalTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("input_tokens")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public int? InputTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("output_tokens")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public int? OutputTokens { get; private set; } + + [JsonInclude] + [JsonPropertyName("input_token_details")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public TokenUsageDetails InputTokenDetails { get; private set; } + + [JsonInclude] + [JsonPropertyName("output_token_details")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public TokenUsageDetails OutputTokenDetails { get; private set; } + } +} diff --git a/OpenAI-DotNet/Realtime/VoiceActivityDetectionSettings.cs b/OpenAI-DotNet/Realtime/VoiceActivityDetectionSettings.cs new file mode 100644 index 00000000..58fd1fde --- /dev/null +++ b/OpenAI-DotNet/Realtime/VoiceActivityDetectionSettings.cs @@ -0,0 +1,51 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace OpenAI.Realtime +{ + public sealed class VoiceActivityDetectionSettings + { + public VoiceActivityDetectionSettings() { } + + public VoiceActivityDetectionSettings( + TurnDetectionType type = TurnDetectionType.Server_VAD, + float? detectionThreshold = null, + int? prefixPadding = null, + int? silenceDuration = null) + { + switch (type) + { + case TurnDetectionType.Server_VAD: + Type = TurnDetectionType.Server_VAD; + DetectionThreshold = detectionThreshold; + PrefixPadding = prefixPadding; + SilenceDuration = silenceDuration; + break; + } + } + + [JsonInclude] + [JsonPropertyName("type")] + [JsonConverter(typeof(Extensions.JsonStringEnumConverter))] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public TurnDetectionType Type { get; private set; } + + [JsonInclude] + [JsonPropertyName("threshold")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public float? DetectionThreshold { get; private set; } + + [JsonInclude] + [JsonPropertyName("prefix_padding_ms")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public int? PrefixPadding { get; private set; } + + [JsonInclude] + [JsonPropertyName("silence_duration_ms")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public int? SilenceDuration { get; private set; } + + public static VoiceActivityDetectionSettings Disabled() => new(TurnDetectionType.Disabled); + } +} diff --git a/OpenAI-DotNet/Threads/CreateMessageRequest.cs b/OpenAI-DotNet/Threads/CreateMessageRequest.cs deleted file mode 100644 index 7622a5c8..00000000 --- a/OpenAI-DotNet/Threads/CreateMessageRequest.cs +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed under the MIT License. See LICENSE in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text.Json.Serialization; - -namespace OpenAI.Threads -{ - /// - /// Create a message on a thread. - /// - [Obsolete("use Thread.Message instead.")] - public sealed class CreateMessageRequest - { - public static implicit operator CreateMessageRequest(string content) => new(content); - - public static implicit operator CreateMessageRequest(Message message) => new(message.Content, message.Role, message.Attachments, message.Metadata); - - public static implicit operator Message(CreateMessageRequest request) => new(request.Content, request.Role, request.Attachments, request.Metadata); - - [Obsolete("Removed")] - public CreateMessageRequest(string content, IEnumerable fileIds, IReadOnlyDictionary metadata = null) - { - } - - /// - /// Constructor. - /// - /// - /// The contents of the message. - /// - /// - /// The role of the entity that is creating the message. - /// - /// - /// A list of files attached to the message, and the tools they were added to. - /// - /// - /// Set of 16 key-value pairs that can be attached to an object. - /// This can be useful for storing additional information about the object in a structured format. - /// Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long. - /// - public CreateMessageRequest(string content, Role role = Role.User, IEnumerable attachments = null, IReadOnlyDictionary metadata = null) - : this(new List { new(content) }, role, attachments, metadata) - { - } - - /// - /// Constructor. - /// - /// - /// The contents of the message. - /// - /// - /// The role of the entity that is creating the message. - /// - /// - /// A list of files attached to the message, and the tools they were added to. - /// - /// - /// Set of 16 key-value pairs that can be attached to an object. - /// This can be useful for storing additional information about the object in a structured format. - /// Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long. - /// - public CreateMessageRequest(IEnumerable content, Role role = Role.User, IEnumerable attachments = null, IReadOnlyDictionary metadata = null) - { - Content = content?.ToList(); - Role = role; - Attachments = attachments?.ToList(); - Metadata = metadata; - } - - /// - /// The role of the entity that is creating the message. - /// - /// - /// Currently only user is supported. - /// - [JsonPropertyName("role")] - public Role Role { get; } - - /// - /// The contents of the message. - /// - [JsonInclude] - [JsonPropertyName("content")] - [JsonIgnore(Condition = JsonIgnoreCondition.Never)] - public IReadOnlyList Content { get; private set; } - - /// - /// A list of files attached to the message, and the tools they were added to. - /// - [JsonInclude] - [JsonPropertyName("Attachments")] - public IReadOnlyList Attachments { get; private set; } - - /// - /// Set of 16 key-value pairs that can be attached to an object. - /// This can be useful for storing additional information about the object in a structured format. - /// Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long. - /// - [JsonPropertyName("metadata")] - public IReadOnlyDictionary Metadata { get; } - } -} diff --git a/OpenAI-DotNet/Threads/CreateRunRequest.cs b/OpenAI-DotNet/Threads/CreateRunRequest.cs index 77c18bce..31bed55b 100644 --- a/OpenAI-DotNet/Threads/CreateRunRequest.cs +++ b/OpenAI-DotNet/Threads/CreateRunRequest.cs @@ -13,35 +13,6 @@ namespace OpenAI.Threads /// public sealed class CreateRunRequest { - /// - /// Constructor. - /// - /// - /// The ID of the assistant used for execution of this run. - /// - /// . - [Obsolete("removed")] - public CreateRunRequest(string assistantId, CreateRunRequest request) - : this( - assistantId, - request?.Model, - request?.Instructions, - request?.AdditionalInstructions, - request?.AdditionalMessages, - request?.Tools, - request?.Metadata, - request?.Temperature, - request?.TopP, - request?.MaxPromptTokens, - request?.MaxCompletionTokens, - request?.TruncationStrategy, - request?.ToolChoice as string ?? ((Tool)request?.ToolChoice)?.Function?.Name, - request?.ParallelToolCalls, - request?.ResponseFormatObject?.JsonSchema, - request?.ResponseFormatObject ?? ChatResponseFormat.Text) - { - } - /// /// Constructor. /// @@ -137,7 +108,7 @@ public CreateRunRequest( string toolChoice = null, bool? parallelToolCalls = null, JsonSchema jsonSchema = null, - ChatResponseFormat responseFormat = ChatResponseFormat.Text) + ChatResponseFormat responseFormat = ChatResponseFormat.Auto) { AssistantId = assistantId; Model = model; @@ -194,7 +165,11 @@ public CreateRunRequest( } else { - ResponseFormatObject = responseFormat; + ResponseFormatObject = responseFormat switch + { + ChatResponseFormat.Text or ChatResponseFormat.Json => responseFormat, + _ => null + }; } } @@ -328,7 +303,7 @@ public CreateRunRequest( /// [JsonPropertyName("response_format")] [JsonConverter(typeof(ResponseFormatConverter))] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public ResponseFormatObject ResponseFormatObject { get; internal set; } [JsonIgnore] diff --git a/OpenAI-DotNet/Threads/CreateThreadAndRunRequest.cs b/OpenAI-DotNet/Threads/CreateThreadAndRunRequest.cs index 9af054eb..a8e7e5c9 100644 --- a/OpenAI-DotNet/Threads/CreateThreadAndRunRequest.cs +++ b/OpenAI-DotNet/Threads/CreateThreadAndRunRequest.cs @@ -10,34 +10,6 @@ namespace OpenAI.Threads { public sealed class CreateThreadAndRunRequest { - /// - /// Constructor. - /// - /// - /// The ID of the assistant to use to execute this run. - /// - /// . - [Obsolete("removed")] - public CreateThreadAndRunRequest(string assistantId, CreateThreadAndRunRequest request) - : this( - assistantId, - request?.Model, - request?.Instructions, - request?.Tools, - request?.ToolResources, - request?.Metadata, - request?.Temperature, - request?.TopP, - request?.MaxPromptTokens, - request?.MaxCompletionTokens, - request?.TruncationStrategy, - request?.ToolChoice as string ?? ((Tool)request?.ToolChoice)?.Function?.Name, - request?.ParallelToolCalls, - request?.ResponseFormatObject?.JsonSchema, - request?.ResponseFormat ?? ChatResponseFormat.Text) - { - } - /// /// Constructor. /// @@ -138,7 +110,7 @@ public CreateThreadAndRunRequest( string toolChoice = null, bool? parallelToolCalls = null, JsonSchema jsonSchema = null, - ChatResponseFormat responseFormat = ChatResponseFormat.Text, + ChatResponseFormat responseFormat = ChatResponseFormat.Auto, CreateThreadRequest createThreadRequest = null) { AssistantId = assistantId; @@ -187,6 +159,7 @@ public CreateThreadAndRunRequest( MaxPromptTokens = maxPromptTokens; MaxCompletionTokens = maxCompletionTokens; TruncationStrategy = truncationStrategy; + ParallelToolCalls = parallelToolCalls; if (jsonSchema != null) { @@ -194,10 +167,13 @@ public CreateThreadAndRunRequest( } else { - ResponseFormatObject = responseFormat; + ResponseFormatObject = responseFormat switch + { + ChatResponseFormat.Text or ChatResponseFormat.Json => responseFormat, + _ => null + }; } - ParallelToolCalls = parallelToolCalls; ThreadRequest = createThreadRequest; } @@ -330,7 +306,7 @@ public CreateThreadAndRunRequest( /// [JsonPropertyName("response_format")] [JsonConverter(typeof(ResponseFormatConverter))] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public ResponseFormatObject ResponseFormatObject { get; internal set; } [JsonIgnore] diff --git a/OpenAI-DotNet/Threads/MessageFileResponse.cs b/OpenAI-DotNet/Threads/MessageFileResponse.cs deleted file mode 100644 index f29535dc..00000000 --- a/OpenAI-DotNet/Threads/MessageFileResponse.cs +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed under the MIT License. See LICENSE in the project root for license information. - -using System; -using System.Text.Json.Serialization; - -namespace OpenAI.Threads -{ - [Obsolete("Removed. Use Assistant.ToolResources instead.")] - public sealed class MessageFileResponse : BaseResponse - { - /// - /// The identifier, which can be referenced in API endpoints. - /// - [JsonInclude] - [JsonPropertyName("id")] - public string Id { get; private set; } - - /// - /// The object type, which is always thread.message.file. - /// - [JsonInclude] - [JsonPropertyName("object")] - public string Object { get; private set; } - - /// - /// The Unix timestamp (in seconds) for when the message file was created. - /// - [JsonInclude] - [JsonPropertyName("created_at")] - public int CreatedAtUnixTimeSeconds { get; private set; } - - [JsonIgnore] - public DateTime CreatedAt => DateTimeOffset.FromUnixTimeSeconds(CreatedAtUnixTimeSeconds).DateTime; - - /// - /// The ID of the message that the File is attached to. - /// - [JsonInclude] - [JsonPropertyName("message_id")] - public string MessageId { get; private set; } - - public static implicit operator string(MessageFileResponse response) => response?.ToString(); - - public override string ToString() => Id; - } -} diff --git a/OpenAI-DotNet/Threads/MessageResponse.cs b/OpenAI-DotNet/Threads/MessageResponse.cs index 466d9b65..c1f4b13c 100644 --- a/OpenAI-DotNet/Threads/MessageResponse.cs +++ b/OpenAI-DotNet/Threads/MessageResponse.cs @@ -134,15 +134,6 @@ public IReadOnlyList Content [JsonPropertyName("run_id")] public string RunId { get; private set; } - /// - /// A list of file IDs that the assistant should use. - /// Useful for tools like 'retrieval' and 'code_interpreter' that can access files. - /// A maximum of 10 files can be attached to a message. - /// - [JsonIgnore] - [Obsolete("Use Attachments instead.")] - public IReadOnlyList FileIds => Attachments?.Select(attachment => attachment.FileId).ToList(); - /// /// A list of files attached to the message, and the tools they were added to. /// @@ -174,9 +165,13 @@ public static implicit operator Message(MessageResponse response) /// /// of all . public string PrintContent() - => content == null - ? string.Empty - : string.Join("\n", content.Select(c => c?.ToString())); + { + return Delta != null + ? Delta.PrintContent() + : content == null + ? string.Empty + : string.Join("\n", content.Select(c => c?.ToString())); + } /// /// Converts the to the specified . diff --git a/OpenAI-DotNet/Threads/RunResponse.cs b/OpenAI-DotNet/Threads/RunResponse.cs index 1986b048..b3918dba 100644 --- a/OpenAI-DotNet/Threads/RunResponse.cs +++ b/OpenAI-DotNet/Threads/RunResponse.cs @@ -281,7 +281,7 @@ public IReadOnlyList Tools [JsonInclude] [JsonPropertyName("response_format")] [JsonConverter(typeof(ResponseFormatConverter))] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public ResponseFormatObject ResponseFormatObject { get; private set; } [JsonIgnore] diff --git a/OpenAI-DotNet/Threads/ThreadExtensions.cs b/OpenAI-DotNet/Threads/ThreadExtensions.cs index 375abce2..df5e257e 100644 --- a/OpenAI-DotNet/Threads/ThreadExtensions.cs +++ b/OpenAI-DotNet/Threads/ThreadExtensions.cs @@ -137,44 +137,6 @@ public static async Task ModifyMessageAsync(this ThreadResponse #endregion Messages - #region Files (Obsolete) - - /// - /// Returns a list of message files. - /// - /// . - /// The id of the message that the files belongs to. - /// . - /// Optional, . - /// . - [Obsolete("MessageFiles removed from Threads. Files now belong to ToolResources.")] - public static async Task> ListFilesAsync(this ThreadResponse thread, string messageId, ListQuery query = null, CancellationToken cancellationToken = default) - => await thread.Client.ThreadsEndpoint.ListFilesAsync(thread.Id, messageId, query, cancellationToken).ConfigureAwait(false); - - /// - /// Returns a list of message files. - /// - /// . - /// . - /// Optional, . - /// . - [Obsolete("MessageFiles removed from Threads. Files now belong to ToolResources.")] - public static async Task> ListFilesAsync(this MessageResponse message, ListQuery query = null, CancellationToken cancellationToken = default) - => await message.Client.ThreadsEndpoint.ListFilesAsync(message.ThreadId, message.Id, query, cancellationToken).ConfigureAwait(false); - - /// - /// Retrieve message file. - /// - /// . - /// The id of the file being retrieved. - /// Optional, . - /// . - [Obsolete("MessageFiles removed from Threads. Files now belong to ToolResources.")] - public static async Task RetrieveFileAsync(this MessageResponse message, string fileId, CancellationToken cancellationToken = default) - => await message.Client.ThreadsEndpoint.RetrieveFileAsync(message.ThreadId, message.Id, fileId, cancellationToken).ConfigureAwait(false); - - #endregion Files (Obsolete) - #region Runs [Obsolete("use new overload with Func instead.")] diff --git a/OpenAI-DotNet/Threads/ThreadsEndpoint.cs b/OpenAI-DotNet/Threads/ThreadsEndpoint.cs index 3142f874..19d3b608 100644 --- a/OpenAI-DotNet/Threads/ThreadsEndpoint.cs +++ b/OpenAI-DotNet/Threads/ThreadsEndpoint.cs @@ -191,14 +191,6 @@ public async Task> ListRunsAsync(string threadId, List return response.Deserialize>(responseAsString, client); } - [Obsolete("use new overload with Func instead.")] - public async Task CreateRunAsync(string threadId, CreateRunRequest request, Action streamEventHandler, CancellationToken cancellationToken = default) - => await CreateRunAsync(threadId, request, streamEventHandler == null ? null : serverSentEvent => - { - streamEventHandler.Invoke(serverSentEvent); - return Task.CompletedTask; - }, cancellationToken).ConfigureAwait(false); - /// /// Create a run. /// @@ -283,14 +275,6 @@ public async Task CreateRunAsync(string threadId, CreateRunRequest return response.Deserialize(responseAsString, client); } - [Obsolete("use new overload with Func instead.")] - public async Task CreateThreadAndRunAsync(CreateThreadAndRunRequest request, Action streamEventHandler, CancellationToken cancellationToken = default) - => await CreateThreadAndRunAsync(request, streamEventHandler == null ? null : serverSentEvent => - { - streamEventHandler.Invoke(serverSentEvent); - return Task.CompletedTask; - }, cancellationToken).ConfigureAwait(false); - /// /// Create a thread and run it in one request. /// @@ -407,14 +391,6 @@ public async Task ModifyRunAsync(string threadId, string runId, IRe return response.Deserialize(responseAsString, client); } - [Obsolete("use new overload with Func instead.")] - public async Task SubmitToolOutputsAsync(string threadId, string runId, SubmitToolOutputsRequest request, Action streamEventHandler, CancellationToken cancellationToken = default) - => await SubmitToolOutputsAsync(threadId, runId, request, streamEventHandler == null ? null : serverSentEvent => - { - streamEventHandler.Invoke(serverSentEvent); - return Task.CompletedTask; - }, cancellationToken).ConfigureAwait(false); - /// /// When a run has the status: "requires_action" and required_action.type is submit_tool_outputs, /// this endpoint can be used to submit the outputs from the tool calls once they're all completed. @@ -519,42 +495,6 @@ public async Task CancelRunAsync(string threadId, string runId, Cancellati #endregion Runs - #region Files (Obsolete) - - /// - /// Returns a list of message files. - /// - /// The id of the thread that the message and files belong to. - /// The id of the message that the files belongs to. - /// . - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public async Task> ListFilesAsync(string threadId, string messageId, ListQuery query = null, CancellationToken cancellationToken = default) - { - using var response = await client.Client.GetAsync(GetUrl($"/{threadId}/messages/{messageId}/files", query), cancellationToken).ConfigureAwait(false); - var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken: cancellationToken).ConfigureAwait(false); - return response.Deserialize>(responseAsString, client); - } - - /// - /// Retrieve message file. - /// - /// The id of the thread to which the message and file belong. - /// The id of the message the file belongs to. - /// The id of the file being retrieved. - /// Optional, . - /// . - [Obsolete("Files removed from Assistants. Files now belong to ToolResources.")] - public async Task RetrieveFileAsync(string threadId, string messageId, string fileId, CancellationToken cancellationToken = default) - { - using var response = await client.Client.GetAsync(GetUrl($"/{threadId}/messages/{messageId}/files/{fileId}"), cancellationToken).ConfigureAwait(false); - var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken: cancellationToken).ConfigureAwait(false); - return response.Deserialize(responseAsString, client); - } - - #endregion Files (Obsolete) - private async Task StreamRunAsync(string endpoint, StringContent payload, Func streamEventHandler, CancellationToken cancellationToken = default) { RunResponse run = null; diff --git a/OpenAI-DotNet/Threads/ToolCall.cs b/OpenAI-DotNet/Threads/ToolCall.cs index 03452d28..3fd7793e 100644 --- a/OpenAI-DotNet/Threads/ToolCall.cs +++ b/OpenAI-DotNet/Threads/ToolCall.cs @@ -1,7 +1,6 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. using OpenAI.Extensions; -using System; using System.Collections.Generic; using System.Text.Json.Serialization; @@ -56,13 +55,6 @@ public sealed class ToolCall : IAppendable [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] public IReadOnlyDictionary FileSearch { get; private set; } - /// - /// For now, this is always going to be an empty object. - /// - [JsonIgnore] - [Obsolete("Removed")] - public object Retrieval { get; private set; } - [JsonIgnore] public bool IsFunction => Type == "function"; @@ -73,14 +65,14 @@ public void AppendFrom(ToolCall other) return; } - if (other.Index.HasValue) + if (!string.IsNullOrWhiteSpace(other.Id)) { - Index = other.Index; + Id = other.Id; } - if (!string.IsNullOrWhiteSpace(other.Id)) + if (other.Index.HasValue) { - Id = other.Id; + Index = other.Index; } if (other.FunctionCall != null) @@ -112,5 +104,8 @@ public void AppendFrom(ToolCall other) FileSearch = other.FileSearch; } } + + public static implicit operator OpenAI.ToolCall(ToolCall toolCall) + => new(toolCall.Id, toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments); } } diff --git a/README.md b/README.md index a9bd5806..ccc49bda 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,12 @@ dotnet add package OpenAI-DotNet - [List Models](#list-models) - [Retrieve Models](#retrieve-model) - [Delete Fine Tuned Model](#delete-fine-tuned-model) +- [Realtime](#realtime) :new: + - [Create Realtime Session](#create-realtime-session) :new: + - [Client Events](#client-events) :new: + - [Sending Client Events](#sending-client-events) :new: + - [Server Events](#server-events) :new: + - [Receiving Server Events](#receiving-server-events) :new: - [Assistants](#assistants) - [List Assistants](#list-assistants) - [Create Assistant](#create-assistant) @@ -108,7 +114,8 @@ dotnet add package OpenAI-DotNet - [Streaming](#chat-streaming) - [Tools](#chat-tools) - [Vision](#chat-vision) - - [Json Schema](#chat-structured-outputs) + - [Audio](#chat-audio) :new: + - [Structured Outputs](#chat-structured-outputs) - [Json Mode](#chat-json-mode) - [Audio](#audio) - [Create Speech](#create-speech) @@ -350,6 +357,8 @@ public partial class Program Once you have set up your proxy server, your end users can now make authenticated requests to your proxy api instead of directly to the OpenAI API. The proxy server will handle authentication and forward requests to the OpenAI API, ensuring that your API keys and other sensitive information remain secure. +--- + ### [Models](https://platform.openai.com/docs/api-reference/models) List and describe the various models available in the API. You can refer to the [Models documentation](https://platform.openai.com/docs/models) to understand what models are available and the differences between them. @@ -398,6 +407,192 @@ var isDeleted = await api.ModelsEndpoint.DeleteFineTuneModelAsync("your-fine-tun Assert.IsTrue(isDeleted); ``` +--- + +### [Realtime](https://platform.openai.com/docs/api-reference/realtime) + +> [!WARNING] +> Beta Feature. API subject to breaking changes. + +- [Realtime Guide](https://platform.openai.com/docs/guides/realtime) + +The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling. + +The Assistants API is accessed via `OpenAIClient.RealtimeEndpoint` + +#### Create Realtime Session + +Here is a simple example of how to create a realtime session and to send and receive messages from the model. + +```csharp +using var api = new OpenAIClient(); +var cancellationTokenSource = new CancellationTokenSource(); +var tools = new List +{ + Tool.FromFunc("goodbye", () => + { + cancellationTokenSource.Cancel(); + return "Goodbye!"; + }) +}; +var options = new Options(Model.GPT4oRealtime, tools: tools); +using var session = await api.RealtimeEndpoint.CreateSessionAsync(options); +var responseTask = await session.ReceiveUpdatesAsync(ServerEvents, cancellationTokenSource.Token); +await session.SendAsync(new ConversationItemCreateRequest("Hello!")); +await session.SendAsync(new CreateResponseRequest()); +await session.SendAsync(new InputAudioBufferAppendRequest(new ReadOnlyMemory(new byte[1024 * 4])), cts.Token); +await session.SendAsync(new ConversationItemCreateRequest("GoodBye!")); +await session.SendAsync(new CreateResponseRequest()); +await responseTask; + +void ServerEvents(IServerEvent @event) +{ + switch (@event) + { + case ResponseAudioTranscriptResponse transcriptResponse: + Console.WriteLine(transcriptResponse.ToString()); + break; + case ResponseFunctionCallArgumentsResponse functionCallResponse: + if (functionCallResponse.IsDone) + { + ToolCall toolCall = functionCallResponse; + toolCall.InvokeFunction(); + } + + break; + } +} +``` + +#### Client Events + +The library implements `IClientEvent` interface for outgoing client sent events. + +- [`UpdateSessionRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/session/update): Update the session with new session options. +- [`InputAudioBufferAppendRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/input-audio-buffer/append): Append audio to the input audio buffer. (Unlike made other client events, the server will not send a confirmation response to this event). +- [`InputAudioBufferCommitRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/input-audio-buffer/commit): Commit the input audio buffer. (When in Server VAD mode, the client does not need to send this event). +- [`InputAudioBufferClearRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/input-audio-buffer/clear): Clear the input audio buffer. +- [`ConversationItemCreateRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/conversation/item/create): Create a new conversation item. This is the main way to send user content to the model. +- [`ConversationItemTruncateRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/conversation/item/truncate): Send this event to truncate a previous assistant message’s audio. +- [`ConversationItemDeleteRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/conversation/item/delete): Delete a conversation item. This is useful when you want to remove a message from the conversation history. +- [`CreateResponseRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/response/create): Create a response from the model. Send this event after creating new conversation items or invoking tool calls. This will trigger the model to generate a response. +- [`ResponseCancelRequest`](https://platform.openai.com/docs/api-reference/realtime-client-events/response/cancel) -Send this event to cancel an in-progress response. + +##### Sending Client Events + +You can send client events at any time to the server by calling the `RealtimeSession.SendAsync` method on the session object. The send call will return a `IServerEvent` handle that best represents the appropriate response from the server for that event. This is useful if you want to handle server responses in a more granular way. + +Ideally though, you may want to handle all server responses with [`RealtimeSession.ReceiveUpdatesAsync`](#receiving-server-events). + +> [!NOTE] +> The server will not send a confirmation response to the `InputAudioBufferAppendRequest` event. + +> [!IMPORTANT] +> You will also need to send `CreateResponseRequest` to trigger the model to generate a response. + +```csharp +var serverEvent = await session.SendAsync(new ConversationItemCreateRequest("Hello!")); +Console.WriteLine(serverEvent.ToJsonString()); +serverEvent = await session.SendAsync(new CreateResponseRequest()); +Console.WriteLine(serverEvent.ToJsonString()); +``` + +#### Server Events + +The library implements `IServerEvent` interface for incoming server sent events. + +- [`RealtimeEventError`](https://platform.openai.com/docs/api-reference/realtime-server-events/error): Returned when an error occurs, which could be a client problem or a server problem. +- [`SessionResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/session): Returned for both a `session.created` and `session.updated` event. +- [`RealtimeConversationResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/conversation/created): Returned when a new conversation item is created. +- [`ConversationItemCreatedResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/conversation/item/created): Returned when a new conversation item is created. +- [`ConversationItemInputAudioTranscriptionResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/conversation): Returned when the input audio transcription is completed or failed. +- [`ConversationItemTruncatedResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/conversation/item/truncated): Returned when a conversation item is truncated. +- [`ConversationItemDeletedResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/conversation/item/deleted): Returned when a conversation item is deleted. +- [`InputAudioBufferCommittedResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/input_audio_buffer/committed): Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode. +- [`InputAudioBufferClearedResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/input_audio_buffer/cleared): Returned when an input audio buffer is cleared. +- [`InputAudioBufferStartedResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/input_audio_buffer/speech_started): Sent by the server when in server_vad mode to indicate that speech has been detected in the audio buffer. This can happen any time audio is added to the buffer (unless speech is already detected). The client may want to use this event to interrupt audio playback or provide visual feedback to the user. +- [`InputAudioBufferStoppedResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/input_audio_buffer/speech_stopped): Returned in server_vad mode when the server detects the end of speech in the audio buffer. +- [`RealtimeResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/response): Returned when a response is created or done. +- [`ResponseOutputItemResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/response/output_item): Returned when a response output item is added or done. +- [`ResponseContentPartResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/response/content_part): Returned when a response content part is added or done. +- [`ResponseTextResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/response/text): Returned when a response text is updated or done. +- [`ResponseAudioTranscriptResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/response/audio_transcript): Returned when a response audio transcript is updated or done. +- [`ResponseAudioResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/response/audio): Returned when a response audio is updated or done. +- [`ResponseFunctionCallArgumentsResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/response/function_call_arguments): Returned when a response function call arguments are updated or done. +- [`RateLimitsResponse`](https://platform.openai.com/docs/api-reference/realtime-server-events/rate_limits): Returned when rate limits are updated. + +##### Receiving Server Events + +To receive server events, you will need to call the `RealtimeSession.ReceiveUpdatesAsync` method on the session object. This method will return a `Task` or `IAsyncEnumerable` that will complete when the session is closed or when the cancellation token is triggered. Ideally this method should be called once and runs for the duration of the session. + +> [!NOTE] +> You can also get sent `IClientEvent` callbacks as well by using the `IRealtimeEvent` interface instead of `IServerEvent`. + +```csharp +await foreach (var @event in session.ReceiveUpdatesAsync(cts.Token)) +{ + switch (@event) + { + case RealtimeEventError error: + // raised anytime an error occurs + break; + case SessionResponse sessionResponse: + // raised when a session is created or updated + break; + case RealtimeConversationResponse conversationResponse: + // raised when a new conversation is created + break; + case ConversationItemCreatedResponse conversationItemCreated: + // raised when a new conversation item is created + break; + case ConversationItemInputAudioTranscriptionResponse conversationItemTranscription: + // raised when the input audio transcription is completed or failed + break; + case ConversationItemTruncatedResponse conversationItemTruncated: + // raised when a conversation item is truncated + break; + case ConversationItemDeletedResponse conversationItemDeleted: + // raised when a conversation item is deleted + break; + case InputAudioBufferCommittedResponse committedResponse: + // raised when an input audio buffer is committed + break; + case InputAudioBufferClearedResponse clearedResponse: + // raised when an input audio buffer is cleared + break; + case InputAudioBufferStartedResponse startedResponse: + // raised when speech is detected in the audio buffer + break; + case InputAudioBufferStoppedResponse stoppedResponse: + // raised when speech stops in the audio buffer + break; + case RealtimeResponse realtimeResponse: + // raised when a response is created or done + break; + case ResponseOutputItemResponse outputItemResponse: + // raised when a response output item is added or done + break; + case ResponseContentPartResponse contentPartResponse: + // raised when a response content part is added or done + break; + case ResponseTextResponse textResponse: + // raised when a response text is updated or done + break; + case ResponseAudioTranscriptResponse transcriptResponse: + // raised when a response audio transcript is updated or done + break; + case ResponseFunctionCallArgumentsResponse functionCallResponse: + // raised when a response function call arguments are updated or done + break; + case RateLimitsResponse rateLimitsResponse: + // raised when rate limits are updated + break; + } +} +``` + +--- + ### [Assistants](https://platform.openai.com/docs/api-reference/assistants) > [!WARNING] @@ -1199,6 +1394,8 @@ using var api = new OpenAIClient(); var isCancelled = await api.VectorStoresEndpoint.CancelVectorStoreFileBatchAsync("vector-store-id", "vector-store-file-batch-id"); ``` +--- + ### [Chat](https://platform.openai.com/docs/api-reference/chat) Given a chat conversation, the model will return a chat completion response. @@ -1363,6 +1560,21 @@ var response = await api.ChatEndpoint.GetCompletionAsync(chatRequest); Console.WriteLine($"{response.FirstChoice.Message.Role}: {response.FirstChoice.Message.Content} | Finish Reason: {response.FirstChoice.FinishDetails}"); ``` +#### [Chat Audio](https://platform.openai.com/docs/guides/audio) + +```csharp +using var api = new OpenAIClient(); +var messages = new List +{ + new Message(Role.System, "You are a helpful assistant."), + new Message(Role.User, "Is a golden retriever a good family dog?") +}; +var chatRequest = new ChatRequest(messages, Model.GPT4oAudio, audioConfig: Voice.Alloy); +var response = await api.ChatEndpoint.GetCompletionAsync(chatRequest); +Console.WriteLine($"{response.FirstChoice.Message.Role}: {response.FirstChoice} | Finish Reason: {response.FirstChoice.FinishDetails}"); +// todo play response.FirstChoice.Message.AudioOutput.Data +``` + #### [Chat Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) The evolution of [Json Mode](#chat-json-mode). While both ensure valid JSON is produced, only Structured Outputs ensure schema adherence. @@ -1448,6 +1660,8 @@ foreach (var choice in response.Choices) response.GetUsage(); ``` +--- + ### [Audio](https://platform.openai.com/docs/api-reference/audio) Converts audio into text. @@ -1506,6 +1720,8 @@ var response = await api.AudioEndpoint.CreateTranslationTextAsync(request); Console.WriteLine(response); ``` +--- + ### [Images](https://platform.openai.com/docs/api-reference/images) Given a prompt and/or an input image, the model will generate a new image. @@ -1560,6 +1776,8 @@ foreach (var image in imageResults) } ``` +--- + ### [Files](https://platform.openai.com/docs/api-reference/files) Files are used to upload documents that can be used with features like [Fine-tuning](#fine-tuning). @@ -1623,6 +1841,8 @@ Console.WriteLine(downloadedFilePath); Assert.IsTrue(File.Exists(downloadedFilePath)); ``` +--- + ### [Fine Tuning](https://platform.openai.com/docs/api-reference/fine-tuning) Manage fine-tuning jobs to tailor a model to your specific training data. @@ -1694,6 +1914,8 @@ foreach (var @event in eventList.Items.OrderByDescending(@event => @event.Create } ``` +--- + ### [Batches](https://platform.openai.com/docs/api-reference/batch) Create large batches of API requests for asynchronous processing. The Batch API returns completions within 24 hours for a 50% discount. @@ -1748,6 +1970,8 @@ var isCancelled = await api.BatchEndpoint.CancelBatchAsync(batch); Assert.IsTrue(isCancelled); ``` +--- + ### [Embeddings](https://platform.openai.com/docs/api-reference/embeddings) Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. @@ -1766,6 +1990,8 @@ var response = await api.EmbeddingsEndpoint.CreateEmbeddingAsync("The food was d Console.WriteLine(response); ``` +--- + ### [Moderations](https://platform.openai.com/docs/api-reference/moderations) Given a input text, outputs if the model classifies it as violating OpenAI's content policy.