From 8616a63556abf99f40f4100dc52bd115e34f2926 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Thu, 1 Aug 2024 14:35:18 -0700 Subject: [PATCH] Add embedding dimensions --- .../Services/ReadRetrieveReadChatService.cs | 12 ++++++-- app/functions/EmbedFunctions/Program.cs | 10 +++++++ app/map-env.ps1 | 3 +- app/prepdocs/PrepareDocs/AppOptions.cs | 1 + app/prepdocs/PrepareDocs/Program.Clients.cs | 2 ++ app/prepdocs/PrepareDocs/Program.Options.cs | 5 ++++ .../Services/AzureSearchEmbedService.cs | 5 ++-- .../AzureDocumentSearchServiceTest.cs | 28 +++++++++++++++++++ .../AzureSearchEmbedServiceTest.cs | 15 ++++++++++ infra/app/web.bicep | 7 +++++ infra/main.bicep | 13 +++++++-- infra/main.parameters.json | 16 +++++++++-- scripts/prepdocs.ps1 | 5 +++- 13 files changed, 111 insertions(+), 11 deletions(-) diff --git a/app/backend/Services/ReadRetrieveReadChatService.cs b/app/backend/Services/ReadRetrieveReadChatService.cs index 7c72dcd9..7e4b9b58 100644 --- a/app/backend/Services/ReadRetrieveReadChatService.cs +++ b/app/backend/Services/ReadRetrieveReadChatService.cs @@ -33,19 +33,27 @@ public ReadRetrieveReadChatService( kernelBuilder = kernelBuilder.AddOpenAIChatCompletion(deployment, client); var embeddingModelName = configuration["OpenAiEmbeddingDeployment"]; + int embeddingModelDimensions; + if (!int.TryParse(configuration["AzureOpenAiEmbeddingModelDimensions"], out embeddingModelDimensions)) { + embeddingModelDimensions = 1536; + } ArgumentNullException.ThrowIfNullOrWhiteSpace(embeddingModelName); - kernelBuilder = kernelBuilder.AddOpenAITextEmbeddingGeneration(embeddingModelName, client); + kernelBuilder = kernelBuilder.AddOpenAITextEmbeddingGeneration(embeddingModelName, client, dimensions: embeddingModelDimensions); } else { var deployedModelName = configuration["AzureOpenAiChatGptDeployment"]; ArgumentNullException.ThrowIfNullOrWhiteSpace(deployedModelName); var embeddingModelName = configuration["AzureOpenAiEmbeddingDeployment"]; + int embeddingModelDimensions; + if (!int.TryParse(configuration["AzureOpenAiEmbeddingModelDimensions"], out embeddingModelDimensions)) { + embeddingModelDimensions = 1536; + } if (!string.IsNullOrEmpty(embeddingModelName)) { var endpoint = configuration["AzureOpenAiServiceEndpoint"]; ArgumentNullException.ThrowIfNullOrWhiteSpace(endpoint); - kernelBuilder = kernelBuilder.AddAzureOpenAITextEmbeddingGeneration(embeddingModelName, endpoint, tokenCredential ?? new DefaultAzureCredential()); + kernelBuilder = kernelBuilder.AddAzureOpenAITextEmbeddingGeneration(embeddingModelName, endpoint, tokenCredential ?? new DefaultAzureCredential(), dimensions: embeddingModelDimensions); kernelBuilder = kernelBuilder.AddAzureOpenAIChatCompletion(deployedModelName, endpoint, tokenCredential ?? new DefaultAzureCredential()); } } diff --git a/app/functions/EmbedFunctions/Program.cs b/app/functions/EmbedFunctions/Program.cs index 9cc8a4a2..93de91d3 100644 --- a/app/functions/EmbedFunctions/Program.cs +++ b/app/functions/EmbedFunctions/Program.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using Azure.AI.OpenAI; +using Microsoft.Extensions.Logging.Abstractions; var host = new HostBuilder() .ConfigureServices(services => @@ -65,17 +66,24 @@ uri is not null OpenAIClient? openAIClient = null; string? embeddingModelName = null; + int embeddingModelDimensions = -1; if (useAOAI) { var openaiEndPoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new ArgumentNullException("AZURE_OPENAI_ENDPOINT is null"); embeddingModelName = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new ArgumentNullException("AZURE_OPENAI_EMBEDDING_DEPLOYMENT is null"); + if (!int.TryParse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS"), out embeddingModelDimensions)) { + embeddingModelDimensions = 1536; + } openAIClient = new OpenAIClient(new Uri(openaiEndPoint), new DefaultAzureCredential()); } else { embeddingModelName = Environment.GetEnvironmentVariable("OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new ArgumentNullException("OPENAI_EMBEDDING_DEPLOYMENT is null"); var openaiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new ArgumentNullException("OPENAI_API_KEY is null"); + if (!int.TryParse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS"), out embeddingModelDimensions)) { + embeddingModelDimensions = 1536; + } openAIClient = new OpenAIClient(openaiKey); } @@ -94,6 +102,7 @@ uri is not null return new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingModelName, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: searchIndexName, searchIndexClient: searchIndexClient, @@ -108,6 +117,7 @@ uri is not null return new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingModelName, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: searchIndexName, searchIndexClient: searchIndexClient, diff --git a/app/map-env.ps1 b/app/map-env.ps1 index aa9a3570..eb221a53 100644 --- a/app/map-env.ps1 +++ b/app/map-env.ps1 @@ -6,8 +6,7 @@ foreach ($line in (& azd env get-values)) { if ($line -match "([^=]+)=(.*)") { $key = $matches[1] $value = $matches[2] -replace '^"|"$' - [Environment]::SetEnvironmentVariable( - $key, $value, [System.EnvironmentVariableTarget]::User) + Set-Item "env:$key" $value } } diff --git a/app/prepdocs/PrepareDocs/AppOptions.cs b/app/prepdocs/PrepareDocs/AppOptions.cs index 721fa901..04e7fb9c 100644 --- a/app/prepdocs/PrepareDocs/AppOptions.cs +++ b/app/prepdocs/PrepareDocs/AppOptions.cs @@ -13,6 +13,7 @@ internal record class AppOptions( string? AzureOpenAIServiceEndpoint, string? SearchIndexName, string? EmbeddingModelName, + int EmbeddingModelDimensions, bool Remove, bool RemoveAll, string? FormRecognizerServiceEndpoint, diff --git a/app/prepdocs/PrepareDocs/Program.Clients.cs b/app/prepdocs/PrepareDocs/Program.Clients.cs index 06ec021c..f63c3cfd 100644 --- a/app/prepdocs/PrepareDocs/Program.Clients.cs +++ b/app/prepdocs/PrepareDocs/Program.Clients.cs @@ -26,12 +26,14 @@ private static Task GetAzureSearchEmbedService(AppOptio var blobContainerClient = await GetCorpusBlobContainerClientAsync(o); var openAIClient = await GetOpenAIClientAsync(o); var embeddingModelName = o.EmbeddingModelName ?? throw new ArgumentNullException(nameof(o.EmbeddingModelName)); + var embeddingModelDimensions = o.EmbeddingModelDimensions; var searchIndexName = o.SearchIndexName ?? throw new ArgumentNullException(nameof(o.SearchIndexName)); var computerVisionService = await GetComputerVisionServiceAsync(o); return new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingModelName, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: searchIndexName, searchIndexClient: searchIndexClient, diff --git a/app/prepdocs/PrepareDocs/Program.Options.cs b/app/prepdocs/PrepareDocs/Program.Options.cs index 71f775ed..f94d1301 100644 --- a/app/prepdocs/PrepareDocs/Program.Options.cs +++ b/app/prepdocs/PrepareDocs/Program.Options.cs @@ -32,6 +32,9 @@ internal static partial class Program private static readonly Option s_embeddingModelName = new(name: "--embeddingmodel", description: "Optional. Name of the Azure AI Search embedding model to use for embedding content in the search index (will be created if it doesn't exist)"); + private static readonly Option s_embeddingModelDimensions = + new(name: "--embeddingmodeldimensions", description: "Optional. Name of the Azure AI Search embedding model dimensions to use for embedding content in the search index"); + private static readonly Option s_remove = new(name: "--remove", description: "Remove references to this document from blob storage and the search index"); @@ -63,6 +66,7 @@ internal static partial class Program s_searchIndexName, s_azureOpenAIService, s_embeddingModelName, + s_embeddingModelDimensions, s_remove, s_removeAll, s_formRecognizerServiceEndpoint, @@ -81,6 +85,7 @@ internal static partial class Program SearchIndexName: context.ParseResult.GetValueForOption(s_searchIndexName), AzureOpenAIServiceEndpoint: context.ParseResult.GetValueForOption(s_azureOpenAIService), EmbeddingModelName: context.ParseResult.GetValueForOption(s_embeddingModelName), + EmbeddingModelDimensions: context.ParseResult.GetValueForOption(s_embeddingModelDimensions), Remove: context.ParseResult.GetValueForOption(s_remove), RemoveAll: context.ParseResult.GetValueForOption(s_removeAll), FormRecognizerServiceEndpoint: context.ParseResult.GetValueForOption(s_formRecognizerServiceEndpoint), diff --git a/app/shared/Shared/Services/AzureSearchEmbedService.cs b/app/shared/Shared/Services/AzureSearchEmbedService.cs index ab31d2b8..bc475e42 100644 --- a/app/shared/Shared/Services/AzureSearchEmbedService.cs +++ b/app/shared/Shared/Services/AzureSearchEmbedService.cs @@ -18,6 +18,7 @@ public sealed partial class AzureSearchEmbedService( OpenAIClient openAIClient, string embeddingModelName, + int embeddingModelDimensions, SearchClient searchClient, string searchIndexName, SearchIndexClient searchIndexClient, @@ -135,7 +136,7 @@ public async Task CreateSearchIndexAsync(string searchIndexName, CancellationTok new SimpleField("sourcefile", SearchFieldDataType.String) { IsFacetable = true }, new SearchField("embedding", SearchFieldDataType.Collection(SearchFieldDataType.Single)) { - VectorSearchDimensions = 1536, + VectorSearchDimensions = embeddingModelDimensions, IsSearchable = true, VectorSearchProfileName = vectorSearchProfile, } @@ -449,7 +450,7 @@ private async Task IndexSectionsAsync(IEnumerable
sections) var batch = new IndexDocumentsBatch(); foreach (var section in sections) { - var embeddings = await openAIClient.GetEmbeddingsAsync(new Azure.AI.OpenAI.EmbeddingsOptions(embeddingModelName, [section.Content.Replace('\r', ' ')])); + var embeddings = await openAIClient.GetEmbeddingsAsync(new Azure.AI.OpenAI.EmbeddingsOptions(embeddingModelName, [section.Content.Replace('\r', ' ')]) { Dimensions = embeddingModelDimensions }); var embedding = embeddings.Value.Data.FirstOrDefault()?.Embedding.ToArray() ?? []; batch.Actions.Add(new IndexDocumentsAction( IndexActionType.MergeOrUpload, diff --git a/app/tests/MinimalApi.Tests/AzureDocumentSearchServiceTest.cs b/app/tests/MinimalApi.Tests/AzureDocumentSearchServiceTest.cs index 21f2000d..8518f7f5 100644 --- a/app/tests/MinimalApi.Tests/AzureDocumentSearchServiceTest.cs +++ b/app/tests/MinimalApi.Tests/AzureDocumentSearchServiceTest.cs @@ -37,6 +37,34 @@ public async Task QueryDocumentsTestTextOnlyAsync() records.Count().Should().Be(3); } + [EnvironmentVariablesFact("AZURE_SEARCH_INDEX", "AZURE_SEARCH_SERVICE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS")] + public async Task QueryDocumentsTestEmbeddingOnlyDimensionsAsync() + { + var index = Environment.GetEnvironmentVariable("AZURE_SEARCH_INDEX") ?? throw new InvalidOperationException(); + var searchServceEndpoint = Environment.GetEnvironmentVariable("AZURE_SEARCH_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); + var openAiEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException(); + var openAiEmbeddingDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new InvalidOperationException(); + var openAiEmbeddingModelDimensions = int.Parse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS") ?? ""); + var openAIClient = new OpenAIClient(new Uri(openAiEndpoint), new DefaultAzureCredential()); + var query = "What is included in my Northwind Health Plus plan that is not in standard?"; + var embeddingResponse = await openAIClient.GetEmbeddingsAsync(new EmbeddingsOptions(openAiEmbeddingDeployment, [query]) { Dimensions = openAiEmbeddingModelDimensions }); + var embedding = embeddingResponse.Value.Data.First().Embedding; + var searchClient = new SearchClient(new Uri(searchServceEndpoint), index, new DefaultAzureCredential()); + var service = new AzureSearchService(searchClient); + + // query only + var option = new RequestOverrides + { + RetrievalMode = RetrievalMode.Vector, + Top = 3, + SemanticCaptions = true, + SemanticRanker = true, + }; + + var records = await service.QueryDocumentsAsync(query: query, embedding: embedding.ToArray(), overrides: option); + records.Count().Should().Be(3); + } + [EnvironmentVariablesFact("AZURE_SEARCH_INDEX", "AZURE_SEARCH_SERVICE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT")] public async Task QueryDocumentsTestEmbeddingOnlyAsync() { diff --git a/app/tests/MinimalApi.Tests/AzureSearchEmbedServiceTest.cs b/app/tests/MinimalApi.Tests/AzureSearchEmbedServiceTest.cs index af356f76..ffba33c8 100644 --- a/app/tests/MinimalApi.Tests/AzureSearchEmbedServiceTest.cs +++ b/app/tests/MinimalApi.Tests/AzureSearchEmbedServiceTest.cs @@ -25,12 +25,14 @@ public class AzureSearchEmbedServiceTest "AZURE_SEARCH_SERVICE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", + "AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS", "AZURE_STORAGE_BLOB_ENDPOINT")] public async Task EnsureSearchIndexWithoutImageEmbeddingsAsync() { var indexName = nameof(EnsureSearchIndexWithoutImageEmbeddingsAsync).ToLower(); var openAIEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException(); var embeddingDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new InvalidOperationException(); + var embeddingModelDimensions = int.Parse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS") ?? ""); var azureSearchEndpoint = Environment.GetEnvironmentVariable("AZURE_SEARCH_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); var blobEndpoint = Environment.GetEnvironmentVariable("AZURE_STORAGE_BLOB_ENDPOINT") ?? throw new InvalidOperationException(); var blobContainer = "test"; @@ -45,6 +47,7 @@ public async Task EnsureSearchIndexWithoutImageEmbeddingsAsync() var service = new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingDeployment, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: indexName, searchIndexClient: searchIndexClient, @@ -82,12 +85,14 @@ public async Task EnsureSearchIndexWithoutImageEmbeddingsAsync() "AZURE_SEARCH_SERVICE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", + "AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS", "AZURE_STORAGE_BLOB_ENDPOINT")] public async Task EnsureSearchIndexWithImageEmbeddingsAsync() { var indexName = nameof(EnsureSearchIndexWithImageEmbeddingsAsync).ToLower(); var openAIEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException(); var embeddingDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new InvalidOperationException(); + var embeddingModelDimensions = int.Parse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS") ?? ""); var azureSearchEndpoint = Environment.GetEnvironmentVariable("AZURE_SEARCH_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); var blobEndpoint = Environment.GetEnvironmentVariable("AZURE_STORAGE_BLOB_ENDPOINT") ?? throw new InvalidOperationException(); var blobContainer = "test"; @@ -104,6 +109,7 @@ public async Task EnsureSearchIndexWithImageEmbeddingsAsync() var service = new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingDeployment, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: indexName, searchIndexClient: searchIndexClient, @@ -141,6 +147,7 @@ public async Task EnsureSearchIndexWithImageEmbeddingsAsync() "AZURE_SEARCH_SERVICE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", + "AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS", "AZURE_FORMRECOGNIZER_SERVICE_ENDPOINT", "AZURE_STORAGE_BLOB_ENDPOINT")] public async Task GetDocumentTextTestAsync() @@ -148,6 +155,7 @@ public async Task GetDocumentTextTestAsync() var indexName = nameof(GetDocumentTextTestAsync).ToLower(); var openAIEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException(); var embeddingDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new InvalidOperationException(); + var embeddingModelDimensions = int.Parse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS") ?? ""); var azureSearchEndpoint = Environment.GetEnvironmentVariable("AZURE_SEARCH_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); var blobEndpoint = Environment.GetEnvironmentVariable("AZURE_STORAGE_BLOB_ENDPOINT") ?? throw new InvalidOperationException(); var azureFormRecognizerEndpoint = Environment.GetEnvironmentVariable("AZURE_FORMRECOGNIZER_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); @@ -163,6 +171,7 @@ public async Task GetDocumentTextTestAsync() var service = new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingDeployment, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: indexName, searchIndexClient: searchIndexClient, @@ -191,6 +200,7 @@ public async Task GetDocumentTextTestAsync() "AZURE_SEARCH_SERVICE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", + "AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS", "AZURE_FORMRECOGNIZER_SERVICE_ENDPOINT", "AZURE_STORAGE_BLOB_ENDPOINT")] public async Task EmbedBlobWithoutImageEmbeddingTestAsync() @@ -198,6 +208,7 @@ public async Task EmbedBlobWithoutImageEmbeddingTestAsync() var indexName = nameof(EmbedBlobWithoutImageEmbeddingTestAsync).ToLower(); var openAIEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException(); var embeddingDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new InvalidOperationException(); + var embeddingModelDimensions = int.Parse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS") ?? ""); var azureSearchEndpoint = Environment.GetEnvironmentVariable("AZURE_SEARCH_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); var blobEndpoint = Environment.GetEnvironmentVariable("AZURE_STORAGE_BLOB_ENDPOINT") ?? throw new InvalidOperationException(); var azureFormRecognizerEndpoint = Environment.GetEnvironmentVariable("AZURE_FORMRECOGNIZER_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); @@ -215,6 +226,7 @@ public async Task EmbedBlobWithoutImageEmbeddingTestAsync() var service = new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingDeployment, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: indexName, searchIndexClient: searchIndexClient, @@ -251,6 +263,7 @@ public async Task EmbedBlobWithoutImageEmbeddingTestAsync() "AZURE_SEARCH_SERVICE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", + "AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS", "AZURE_FORMRECOGNIZER_SERVICE_ENDPOINT", "AZURE_STORAGE_BLOB_ENDPOINT")] public async Task EmbedImageBlobTestAsync() @@ -258,6 +271,7 @@ public async Task EmbedImageBlobTestAsync() var indexName = nameof(EmbedImageBlobTestAsync).ToLower(); var openAIEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException(); var embeddingDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new InvalidOperationException(); + var embeddingModelDimensions = int.Parse(Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS") ?? ""); var azureSearchEndpoint = Environment.GetEnvironmentVariable("AZURE_SEARCH_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); var blobEndpoint = Environment.GetEnvironmentVariable("AZURE_STORAGE_BLOB_ENDPOINT") ?? throw new InvalidOperationException(); var azureFormRecognizerEndpoint = Environment.GetEnvironmentVariable("AZURE_FORMRECOGNIZER_SERVICE_ENDPOINT") ?? throw new InvalidOperationException(); @@ -278,6 +292,7 @@ public async Task EmbedImageBlobTestAsync() var service = new AzureSearchEmbedService( openAIClient: openAIClient, embeddingModelName: embeddingDeployment, + embeddingModelDimensions: embeddingModelDimensions, searchClient: searchClient, searchIndexName: indexName, searchIndexClient: searchIndexClient, diff --git a/infra/app/web.bicep b/infra/app/web.bicep index bd9cdf61..450b9ee5 100644 --- a/infra/app/web.bicep +++ b/infra/app/web.bicep @@ -56,6 +56,9 @@ param openAiChatGptDeployment string @description('The OpenAI Embedding deployment name') param openAiEmbeddingDeployment string +@description('The OpenAI Embedding model dimensions') +param openAiEmbeddingModelDimensions string + @description('use gpt-4v') param useVision bool = false @@ -137,6 +140,10 @@ module app '../core/host/container-app-upsert.bicep' = { name: 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT' value: openAiEmbeddingDeployment } + { + name: 'AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS' + value: openAiEmbeddingModelDimensions + } { name: 'AZURE_COMPUTER_VISION_ENDPOINT' value: computerVisionEndpoint diff --git a/infra/main.bicep b/infra/main.bicep index 9c31f9fd..6106bedb 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -61,6 +61,12 @@ param embeddingDeploymentCapacity int = 30 @description('Name of the embedding model. Default: text-embedding-ada-002') param azureEmbeddingModelName string = 'text-embedding-ada-002' +@description('Version of the embedding model') +param azureEmbeddingModelVersion string = '2' + +@description('Dimensions of the embedding model. Defaults: 1536') +param azureEmbeddingModelDimensions string = '1536' + @description('Name of the container apps environment') param containerAppsEnvironmentName string = '' @@ -122,7 +128,7 @@ param searchIndexName string = 'gptkbindex' param searchServiceName string = '' @description('Location of the resource group for the Azure AI Search service') -param searchServiceResourceGroupLocation string = location +param searchServiceResourceGroupLocation string = '' @description('Name of the resource group for the Azure AI Search service') param searchServiceResourceGroupName string = '' @@ -328,6 +334,7 @@ module web './app/web.bicep' = { openAiEndpoint: useAOAI ? azureOpenAi.outputs.endpoint : '' openAiChatGptDeployment: useAOAI ? azureChatGptDeploymentName : '' openAiEmbeddingDeployment: useAOAI ? azureEmbeddingDeploymentName : '' + openAiEmbeddingModelDimensions: azureEmbeddingModelDimensions serviceBinds: [] } } @@ -366,6 +373,7 @@ module function './app/function.bicep' = { AZURE_SEARCH_INDEX: searchIndexName AZURE_STORAGE_BLOB_ENDPOINT: storage.outputs.primaryEndpoints.blob AZURE_OPENAI_EMBEDDING_DEPLOYMENT: useAOAI ? azureEmbeddingDeploymentName : '' + AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS: useAOAI ? azureEmbeddingModelDimensions : '' OPENAI_EMBEDDING_DEPLOYMENT: useAOAI ? '' : openAiEmbeddingDeployment AZURE_OPENAI_ENDPOINT: useAOAI ? azureOpenAi.outputs.endpoint : '' USE_VISION: string(useVision) @@ -407,7 +415,7 @@ module azureOpenAi 'core/ai/cognitiveservices.bicep' = if (useAOAI) { model: { format: 'OpenAI' name: azureEmbeddingModelName - version: '2' + version: azureEmbeddingModelVersion } sku: { name: 'Standard' @@ -748,6 +756,7 @@ output AZURE_LOCATION string = location output AZURE_OPENAI_RESOURCE_LOCATION string = openAiResourceGroupLocation output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = azureChatGptDeploymentName output AZURE_OPENAI_EMBEDDING_DEPLOYMENT string = azureEmbeddingDeploymentName +output AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS string = azureEmbeddingModelDimensions output AZURE_OPENAI_ENDPOINT string = useAOAI? azureOpenAi.outputs.endpoint : '' output AZURE_OPENAI_RESOURCE_GROUP string = useAOAI ? azureOpenAiResourceGroup.name : '' output AZURE_OPENAI_SERVICE string = useAOAI ? azureOpenAi.outputs.name : '' diff --git a/infra/main.parameters.json b/infra/main.parameters.json index acbd00ac..0e2c502e 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -35,7 +35,7 @@ "chatGptDeploymentName": { "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT=chat}" }, - "embeddingDeploymentName": { + "azureEmbeddingDeploymentName": { "value": "${AZURE_OPENAI_EMBEDDING_DEPLOYMENT=embedding}" }, "openAiServiceName": { @@ -59,6 +59,9 @@ "searchServiceResourceGroupName": { "value": "${AZURE_SEARCH_SERVICE_RESOURCE_GROUP}" }, + "searchServiceResourceGroupLocation": { + "value": "${AZURE_SEARCH_SERVICE_RESOURCE_GROUP_LOCATION}" + }, "searchServiceSkuName": { "value": "standard" }, @@ -78,7 +81,7 @@ "value": "${AZURE_USE_APPLICATION_INSIGHTS=true}" }, "openAIApiKey": { - "value": "${OPENAI_API_KEY}" + "value": "${OPENAI_API_KEY=''}" }, "useAOAI": { "value": "${USE_AOAI=true}" @@ -103,6 +106,15 @@ }, "azureOpenAIChatGptDeploymentCapacity": { "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY=30}" + }, + "azureEmbeddingModelName": { + "value": "${AZURE_OPENAI_EMBEDDING_MODEL_NAME=text-embedding-ada-002}" + }, + "azureEmbeddingModelDimensions": { + "value": "${AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS=1536}" + }, + "azureEmbeddingModelVersion": { + "value": "${AZURE_OPENAI_EMBEDDING_MODEL_VERSION=2}" } } } diff --git a/scripts/prepdocs.ps1 b/scripts/prepdocs.ps1 index 773a0d8e..be33b83d 100644 --- a/scripts/prepdocs.ps1 +++ b/scripts/prepdocs.ps1 @@ -28,6 +28,7 @@ function Invoke-ExternalCommand { $processStartInfo = New-Object System.Diagnostics.ProcessStartInfo $processStartInfo.FileName = $Command $processStartInfo.Arguments = $Arguments + $processStartInfo.WorkingDirectory = Get-Location | Select -ExpandProperty Path $processStartInfo.RedirectStandardOutput = $true $processStartInfo.RedirectStandardError = $true $processStartInfo.UseShellExecute = $false @@ -50,7 +51,8 @@ function Invoke-ExternalCommand { if ([string]::IsNullOrEmpty($env:AZD_PREPDOCS_RAN) -or $env:AZD_PREPDOCS_RAN -eq "false") { Write-Host 'Running "PrepareDocs.dll"' - Get-Location | Select-Object -ExpandProperty Path + # Set azd environment variables + ./app/map-env.ps1 $dotnetArguments = "run --project app/prepdocs/PrepareDocs/PrepareDocs.csproj ./data/**/* " + "--storageendpoint $($env:AZURE_STORAGE_BLOB_ENDPOINT) " + @@ -70,6 +72,7 @@ if ([string]::IsNullOrEmpty($env:AZD_PREPDOCS_RAN) -or $env:AZD_PREPDOCS_RAN -eq Write-Host "Using Azure OpenAI" $dotnetArguments += " --openaiendpoint $($env:AZURE_OPENAI_ENDPOINT) " $dotnetArguments += " --embeddingmodel $($env:AZURE_OPENAI_EMBEDDING_DEPLOYMENT) " + $dotnetArguments += " --embeddingmodeldimensions $($env:AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS)" } else{ Write-Host "Using OpenAI"