From f51e6f57af4cf75a365b5c3e34c2b0d153d9668b Mon Sep 17 00:00:00 2001 From: John Oliver <1615532+johnoliver@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:51:02 +0100 Subject: [PATCH 01/13] store wip --- .../openai/samples/rag/Application.java | 15 ++ .../RAGApproachFactorySpringBootImpl.java | 25 ++- .../JavaSemanticKernelChainsApproach.java | 3 - .../JavaSemanticKernelChainsChatApproach.java | 140 +++++++++++++++ ...aSemanticKernelWithMemoryChatApproach.java | 168 ++++++++++++++++++ .../rag/chat/controller/ChatController.java | 2 + .../samples/rag/common/ChatGPTUtils.java | 23 ++- app/frontend/src/pages/chat/Chat.tsx | 67 ++++++- 8 files changed, 420 insertions(+), 23 deletions(-) create mode 100644 app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java create mode 100644 app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java index 775c6dd..7483b2b 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java @@ -4,6 +4,9 @@ import org.slf4j.LoggerFactory; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.Bean; +import org.springframework.web.servlet.config.annotation.CorsRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; @SpringBootApplication public class Application { @@ -14,4 +17,16 @@ public static void main(String[] args) { LOG.info("Application profile from system property is [{}]", System.getProperty("spring.profiles.active")); new SpringApplication(Application.class).run(args); } + + @Bean + public WebMvcConfigurer corsConfigurer() { + return new WebMvcConfigurer() { + @Override + public void addCorsMappings(CorsRegistry registry) { + registry + .addMapping("/api/**") + .allowedOrigins("http://localhost:8080"); + } + }; + } } diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java index 5e9dd14..22d68bb 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java @@ -2,9 +2,11 @@ import com.microsoft.openai.samples.rag.ask.approaches.PlainJavaAskApproach; import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.JavaSemanticKernelChainsApproach; -import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.JavaSemanticKernelWithMemoryApproach; import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.JavaSemanticKernelPlannerApproach; +import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.JavaSemanticKernelWithMemoryApproach; import com.microsoft.openai.samples.rag.chat.approaches.PlainJavaChatApproach; +import com.microsoft.openai.samples.rag.chat.approaches.semantickernel.JavaSemanticKernelChainsChatApproach; +import com.microsoft.openai.samples.rag.chat.approaches.semantickernel.JavaSemanticKernelWithMemoryChatApproach; import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContextAware; import org.springframework.stereotype.Component; @@ -27,18 +29,27 @@ public class RAGApproachFactorySpringBootImpl implements RAGApproachFactory, App @Override public RAGApproach createApproach(String approachName, RAGType ragType, RAGOptions ragOptions) { - if (ragType.equals(RAGType.CHAT) && JAVA_OPENAI_SDK.equals(approachName)) { - return applicationContext.getBean(PlainJavaChatApproach.class); - + if (ragType.equals(RAGType.CHAT)) { + if (JAVA_SEMANTIC_KERNEL.equals(approachName)) { + return applicationContext.getBean(JavaSemanticKernelWithMemoryChatApproach.class); + } else if ( + JAVA_SEMANTIC_KERNEL_PLANNER.equals(approachName) && + ragOptions != null && + ragOptions.getSemantickKernelMode() != null && + ragOptions.getSemantickKernelMode() == SemanticKernelMode.chains) { + return applicationContext.getBean(JavaSemanticKernelChainsChatApproach.class); + } else { + return applicationContext.getBean(PlainJavaChatApproach.class); + } } else if (ragType.equals(RAGType.ASK)) { if (JAVA_OPENAI_SDK.equals(approachName)) return applicationContext.getBean(PlainJavaAskApproach.class); else if (JAVA_SEMANTIC_KERNEL.equals(approachName)) return applicationContext.getBean(JavaSemanticKernelWithMemoryApproach.class); else if (JAVA_SEMANTIC_KERNEL_PLANNER.equals(approachName) && ragOptions.getSemantickKernelMode() != null && ragOptions.getSemantickKernelMode() == SemanticKernelMode.planner) - return applicationContext.getBean(JavaSemanticKernelPlannerApproach.class); - else if(JAVA_SEMANTIC_KERNEL_PLANNER.equals(approachName) && ragOptions != null && ragOptions.getSemantickKernelMode() != null && ragOptions.getSemantickKernelMode() == SemanticKernelMode.chains) - return applicationContext.getBean(JavaSemanticKernelChainsApproach.class); + return applicationContext.getBean(JavaSemanticKernelPlannerApproach.class); + else if (JAVA_SEMANTIC_KERNEL_PLANNER.equals(approachName) && ragOptions != null && ragOptions.getSemantickKernelMode() != null && ragOptions.getSemantickKernelMode() == SemanticKernelMode.chains) + return applicationContext.getBean(JavaSemanticKernelChainsApproach.class); } //if this point is reached then the combination of approach and rag type is not supported diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java index 7891f71..f6d9258 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java @@ -127,7 +127,4 @@ private Kernel buildSemanticKernel( RAGOptions options) { return kernel; } - - - } diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java new file mode 100644 index 0000000..2b9429c --- /dev/null +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java @@ -0,0 +1,140 @@ +package com.microsoft.openai.samples.rag.chat.approaches.semantickernel; + +import com.azure.ai.openai.OpenAIAsyncClient; +import com.microsoft.openai.samples.rag.approaches.ContentSource; +import com.microsoft.openai.samples.rag.approaches.RAGApproach; +import com.microsoft.openai.samples.rag.approaches.RAGOptions; +import com.microsoft.openai.samples.rag.approaches.RAGResponse; +import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.CognitiveSearchPlugin; +import com.microsoft.openai.samples.rag.common.ChatGPTConversation; +import com.microsoft.openai.samples.rag.common.ChatGPTUtils; +import com.microsoft.openai.samples.rag.proxy.CognitiveSearchProxy; +import com.microsoft.openai.samples.rag.proxy.OpenAIProxy; +import com.microsoft.semantickernel.Kernel; +import com.microsoft.semantickernel.SKBuilders; +import com.microsoft.semantickernel.chatcompletion.ChatCompletion; +import com.microsoft.semantickernel.orchestration.SKContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.io.OutputStream; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * Simple chat-read-retrieve-read java implementation, using the Cognitive Search and OpenAI APIs directly. + * It uses the ChatGPT API to turn the user question into a good search query. + * It queries Azure Cognitive Search for search results for that query (optionally using the vector embeddings for that query). + * It then combines the search results and original user question, and asks ChatGPT API to answer the question based on the sources. It includes the last 4K of message history as well (or however many tokens are allowed by the deployed model). + */ +@Component +public class JavaSemanticKernelChainsChatApproach implements RAGApproach { + + private static final Logger LOGGER = LoggerFactory.getLogger(JavaSemanticKernelChainsChatApproach.class); + private static final String PLAN_PROMPT = """ + Take the input as a question and answer it finding any information needed + """; + private final CognitiveSearchProxy cognitiveSearchProxy; + + private final OpenAIProxy openAIProxy; + + private final OpenAIAsyncClient openAIAsyncClient; + + @Value("${openai.chatgpt.deployment}") + private String gptChatDeploymentModelId; + + public JavaSemanticKernelChainsChatApproach(CognitiveSearchProxy cognitiveSearchProxy, OpenAIAsyncClient openAIAsyncClient, OpenAIProxy openAIProxy) { + this.cognitiveSearchProxy = cognitiveSearchProxy; + this.openAIAsyncClient = openAIAsyncClient; + this.openAIProxy = openAIProxy; + } + + /** + * @param questionOrConversation + * @param options + * @return + */ + @Override + public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions options) { + + String question = ChatGPTUtils.getLastUserQuestion(questionOrConversation.getMessages()); + + Kernel semanticKernel = buildSemanticKernel(options); + + SKContext searchContext = + semanticKernel.runAsync( + question, + semanticKernel.getSkill("InformationFinder").getFunction("Search", null)).block(); + + var sources = formSourcesList(searchContext.getResult()); + + var answerVariables = SKBuilders.variables() + .withVariable("sources", searchContext.getResult()) + .withVariable("input", question) + .build(); + + SKContext answerExecutionContext = + semanticKernel.runAsync(answerVariables, + semanticKernel.getSkill("RAG").getFunction("AnswerQuestion", null)).block(); + return new RAGResponse.Builder() + .prompt("Prompt is managed by Semantic Kernel") + .answer(answerExecutionContext.getResult()) + .sources(sources) + .sourcesAsText(searchContext.getResult()) + .question(question) + .build(); + } + + @Override + public void runStreaming( + ChatGPTConversation questionOrConversation, + RAGOptions options, + OutputStream outputStream) { + } + + private List formSourcesList(String result) { + if (result == null) { + return Collections.emptyList(); + } + return Arrays.stream(result + .split("\n")) + .map(source -> { + String[] split = source.split(":", 2); + if (split.length >= 2) { + var sourceName = split[0].trim(); + var sourceContent = split[1].trim(); + return new ContentSource(sourceName, sourceContent); + } else { + return null; + } + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + + private Kernel buildSemanticKernel(RAGOptions options) { + Kernel kernel = SKBuilders.kernel() + .withDefaultAIService(SKBuilders.chatCompletion() + .withModelId(gptChatDeploymentModelId) + .withOpenAIClient(this.openAIAsyncClient) + .build()) + .build(); + + kernel.importSkill(new CognitiveSearchPlugin(this.cognitiveSearchProxy, this.openAIProxy, options), "InformationFinder"); + + kernel.importSkillFromResources( + "semantickernel/Plugins", + "RAG", + "AnswerQuestion", + null + ); + + return kernel; + } + +} diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java new file mode 100644 index 0000000..fc0a5e6 --- /dev/null +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -0,0 +1,168 @@ +package com.microsoft.openai.samples.rag.chat.approaches.semantickernel; + +import com.azure.ai.openai.OpenAIAsyncClient; +import com.azure.core.credential.TokenCredential; +import com.azure.search.documents.SearchAsyncClient; +import com.azure.search.documents.SearchDocument; +import com.microsoft.openai.samples.rag.approaches.ContentSource; +import com.microsoft.openai.samples.rag.approaches.RAGApproach; +import com.microsoft.openai.samples.rag.approaches.RAGOptions; +import com.microsoft.openai.samples.rag.approaches.RAGResponse; +import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.memory.CustomAzureCognitiveSearchMemoryStore; +import com.microsoft.openai.samples.rag.common.ChatGPTConversation; +import com.microsoft.openai.samples.rag.common.ChatGPTUtils; +import com.microsoft.semantickernel.Kernel; +import com.microsoft.semantickernel.SKBuilders; +import com.microsoft.semantickernel.ai.embeddings.Embedding; +import com.microsoft.semantickernel.memory.MemoryQueryResult; +import com.microsoft.semantickernel.memory.MemoryRecord; +import com.microsoft.semantickernel.orchestration.SKContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import reactor.core.publisher.Mono; + +import java.io.OutputStream; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Accomplish the same task as in the PlainJavaAskApproach approach but using Semantic Kernel framework: + * 1. Memory abstraction is used for vector search capability. It uses Azure Cognitive Search as memory store. + * 2. Semantic function has been defined to ask question using sources from memory search results + */ +@Component +public class JavaSemanticKernelWithMemoryChatApproach implements RAGApproach { + private static final Logger LOGGER = LoggerFactory.getLogger(JavaSemanticKernelWithMemoryChatApproach.class); + private final TokenCredential tokenCredential; + private final OpenAIAsyncClient openAIAsyncClient; + + private final SearchAsyncClient searchAsyncClient; + + private final String EMBEDDING_FIELD_NAME = "embedding"; + + @Value("${cognitive.search.service}") + String searchServiceName; + @Value("${cognitive.search.index}") + String indexName; + @Value("${openai.chatgpt.deployment}") + private String gptChatDeploymentModelId; + + @Value("${openai.embedding.deployment}") + private String embeddingDeploymentModelId; + + public JavaSemanticKernelWithMemoryChatApproach(TokenCredential tokenCredential, OpenAIAsyncClient openAIAsyncClient, SearchAsyncClient searchAsyncClient) { + this.tokenCredential = tokenCredential; + this.openAIAsyncClient = openAIAsyncClient; + this.searchAsyncClient = searchAsyncClient; + } + + @Override + public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions options) { + + String question = ChatGPTUtils.getLastUserQuestion(questionOrConversation.getMessages()); + + //Build semantic kernel with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from resources. + Kernel semanticKernel = buildSemanticKernel(options); + + /** + * Use semantic kernel built-in memory.searchAsync. It uses OpenAI to generate embeddings for the provided question. + * Question embeddings are provided to cognitive search via search options. + */ + List memoryResult = semanticKernel.getMemory().searchAsync( + indexName, + question, + options.getTop(), + 0.5f, + false) + .block(); + + LOGGER.info("Total {} sources found in cognitive vector store for search query[{}]", memoryResult.size(), question); + + String sources = buildSourcesText(memoryResult); + List sourcesList = buildSources(memoryResult); + + SKContext skcontext = SKBuilders.context().build() + .setVariable("sources", sources) + .setVariable("input", question); + + + Mono result = semanticKernel.getFunction("RAG", "AnswerQuestion").invokeAsync(skcontext); + + return new RAGResponse.Builder() + //.prompt(plan.toPlanString()) + .prompt("placeholders for prompt") + .answer(result.block().getResult()) + .sources(sourcesList) + .sourcesAsText(sources) + .question(question) + .build(); + + } + + @Override + public void runStreaming(ChatGPTConversation questionOrConversation, RAGOptions options, OutputStream outputStream) { + throw new IllegalStateException("Streaming not supported for this approach"); + } + + private List buildSources(List memoryResult) { + return memoryResult + .stream() + .map(result -> { + return new ContentSource( + result.getMetadata().getId(), + result.getMetadata().getText() + ); + }) + .collect(Collectors.toList()); + } + + private String buildSourcesText(List memoryResult) { + StringBuilder sourcesContentBuffer = new StringBuilder(); + memoryResult.stream().forEach(memory -> { + sourcesContentBuffer.append(memory.getMetadata().getId()) + .append(": ") + .append(memory.getMetadata().getText().replace("\n", "")) + .append("\n"); + }); + return sourcesContentBuffer.toString(); + } + + private Kernel buildSemanticKernel(RAGOptions options) { + var kernelWithACS = SKBuilders.kernel() + .withMemoryStorage( + new CustomAzureCognitiveSearchMemoryStore("https://%s.search.windows.net".formatted(searchServiceName), + tokenCredential, + this.searchAsyncClient, + this.EMBEDDING_FIELD_NAME, + buildCustomMemoryMapper())) + .withDefaultAIService(SKBuilders.textEmbeddingGeneration() + .withOpenAIClient(openAIAsyncClient) + .withModelId(embeddingDeploymentModelId) + .build()) + .withDefaultAIService(SKBuilders.chatCompletion() + .withModelId(gptChatDeploymentModelId) + .withOpenAIClient(this.openAIAsyncClient) + .build()) + .build(); + + kernelWithACS.importSkillFromResources("semantickernel/Plugins", "RAG", "AnswerQuestion", null); + return kernelWithACS; + } + + private Function buildCustomMemoryMapper() { + return searchDocument -> { + return MemoryRecord.localRecord( + (String) searchDocument.get("sourcepage"), + (String) searchDocument.get("content"), + "chunked text from original source", + new Embedding((List) searchDocument.get(EMBEDDING_FIELD_NAME)), + (String) searchDocument.get("category"), + (String) searchDocument.get("id"), + null); + + }; + } +} diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/controller/ChatController.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/controller/ChatController.java index 22e029e..5d4af28 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/controller/ChatController.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/controller/ChatController.java @@ -68,6 +68,7 @@ public ResponseEntity openAIAskStream( .excludeCategory(chatRequest.context().overrides().exclude_category()) .promptTemplate(chatRequest.context().overrides().prompt_template()) .top(chatRequest.context().overrides().top()) + .semanticKernelMode(chatRequest.context().overrides().semantic_kernel_mode()) .build(); RAGApproach ragApproach = ragApproachFactory.createApproach(chatRequest.approach(), RAGType.CHAT, ragOptions); @@ -118,6 +119,7 @@ public ResponseEntity openAIAsk(@RequestBody ChatAppRequest chatRe .excludeCategory(chatRequest.context().overrides().exclude_category()) .promptTemplate(chatRequest.context().overrides().prompt_template()) .top(chatRequest.context().overrides().top()) + .semanticKernelMode(chatRequest.context().overrides().semantic_kernel_mode()) .build(); RAGApproach ragApproach = ragApproachFactory.createApproach(chatRequest.approach(), RAGType.CHAT, ragOptions); diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java index db775bf..5772ee8 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java @@ -4,7 +4,6 @@ import com.azure.ai.openai.models.ChatMessage; import com.azure.ai.openai.models.ChatRole; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -27,12 +26,12 @@ public static ChatCompletionsOptions buildDefaultChatCompletionsOptions(List messages) { + public static String formatAsChatML(List messages) { StringBuilder sb = new StringBuilder(); messages.forEach(message -> { - if(message.getRole() == ChatRole.USER){ + if (message.getRole() == ChatRole.USER) { sb.append("<|im_start|>user\n"); - } else if(message.getRole() == ChatRole.ASSISTANT) { + } else if (message.getRole() == ChatRole.ASSISTANT) { sb.append("<|im_start|>assistant\n"); } else { sb.append("<|im_start|>system\n"); @@ -42,10 +41,16 @@ public static String formatAsChatML(List messages) { return sb.toString(); } - public static String getLastUserQuestion(List messages){ - ChatGPTMessage message = messages.get(messages.size()-1); - if(message.role() != ChatGPTMessage.ChatRole.USER) - return message.content(); - return ""; + public static String getLastUserQuestion(List messages) { + List userMessages = messages + .stream() + .filter(message -> message.role() == ChatGPTMessage.ChatRole.USER) + .toList(); + + if (!userMessages.isEmpty()) { + return userMessages.get(userMessages.size() - 1).content(); + } else { + return ""; + } } } diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index bc86f7f..e4c2cbc 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -1,5 +1,5 @@ import { useRef, useState, useEffect } from "react"; -import { Checkbox, Panel, DefaultButton, TextField, SpinButton, Dropdown, IDropdownOption } from "@fluentui/react"; +import { Checkbox, ChoiceGroup, Panel, DefaultButton, TextField, SpinButton, Dropdown, IDropdownOption } from "@fluentui/react"; import { SparkleFilled } from "@fluentui/react-icons"; import readNDJSONStream from "ndjson-readablestream"; @@ -12,7 +12,8 @@ import { ChatAppResponseOrError, ChatAppRequest, ResponseMessage, - Approaches + Approaches, + SKMode } from "../../api"; import { Answer, AnswerError, AnswerLoading } from "../../components/Answer"; import { QuestionInput } from "../../components/QuestionInput"; @@ -27,6 +28,8 @@ import { TokenClaimsDisplay } from "../../components/TokenClaimsDisplay"; const Chat = () => { const [isConfigPanelOpen, setIsConfigPanelOpen] = useState(false); + const [approach, setApproach] = useState(Approaches.JAVA_OPENAI_SDK); + const [skMode, setSKMode] = useState(SKMode.Chains); const [promptTemplate, setPromptTemplate] = useState(""); const [retrieveCount, setRetrieveCount] = useState(3); const [retrievalMode, setRetrievalMode] = useState(RetrievalMode.Hybrid); @@ -129,10 +132,11 @@ const Chat = () => { semantic_captions: useSemanticCaptions, suggest_followup_questions: useSuggestFollowupQuestions, use_oid_security_filter: useOidSecurityFilter, - use_groups_security_filter: useGroupsSecurityFilter + use_groups_security_filter: useGroupsSecurityFilter, + semantic_kernel_mode: skMode } }, - approach: Approaches.JAVA_OPENAI_SDK, + approach: approach, }; const response = await chatApi(request, token?.accessToken); @@ -182,6 +186,14 @@ const Chat = () => { setRetrievalMode(option?.data || RetrievalMode.Hybrid); }; + const onSKModeChange = (_ev: React.FormEvent, option?: IDropdownOption | undefined, index?: number | undefined) => { + setSKMode(option?.data || SKMode.Chains); + }; + + const onApproachChange = (_ev?: React.FormEvent, option?: IChoiceGroupOption) => { + setApproach((option?.key as Approaches) || Approaches.JAVA_OPENAI_SDK); + }; + const onUseSemanticRankerChange = (_ev?: React.FormEvent, checked?: boolean) => { setUseSemanticRanker(!!checked); }; @@ -235,6 +247,21 @@ const Chat = () => { setSelectedAnswer(index); }; + const approaches: IChoiceGroupOption[] = [ + { + key: Approaches.JAVA_OPENAI_SDK, + text: "Java Azure Open AI SDK" + }, + { + key: Approaches.JAVA_SEMANTIC_KERNEL, + text: "Java Semantic Kernel - Memory" + }, + { + key: Approaches.JAVA_SEMANTIC_KERNEL_PLANNER, + text: "Java Semantic Kernel - Orchestration" + } + ]; + return (
@@ -412,6 +439,38 @@ const Chat = () => { label="Stream chat completion responses" onChange={onShouldStreamChange} /> + + + + + {(approach === Approaches.JAVA_OPENAI_SDK || approach === Approaches.JAVA_SEMANTIC_KERNEL) && ( + + )} + {(approach === Approaches.JAVA_SEMANTIC_KERNEL_PLANNER) && ( + + )} {useLogin && }
From 57272e566c988ba541a28e514038e568d597311f Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Wed, 25 Oct 2023 01:37:34 -0700 Subject: [PATCH 02/13] Add SK chat support --- ...aSemanticKernelWithMemoryChatApproach.java | 55 ++++++++++++++++--- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index fc0a5e6..7b370e9 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -10,13 +10,16 @@ import com.microsoft.openai.samples.rag.approaches.RAGResponse; import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.memory.CustomAzureCognitiveSearchMemoryStore; import com.microsoft.openai.samples.rag.common.ChatGPTConversation; +import com.microsoft.openai.samples.rag.common.ChatGPTMessage; import com.microsoft.openai.samples.rag.common.ChatGPTUtils; import com.microsoft.semantickernel.Kernel; import com.microsoft.semantickernel.SKBuilders; import com.microsoft.semantickernel.ai.embeddings.Embedding; +import com.microsoft.semantickernel.chatcompletion.ChatCompletion; +import com.microsoft.semantickernel.connectors.ai.openai.chatcompletion.OpenAIChatCompletion; +import com.microsoft.semantickernel.connectors.ai.openai.chatcompletion.OpenAIChatHistory; import com.microsoft.semantickernel.memory.MemoryQueryResult; import com.microsoft.semantickernel.memory.MemoryRecord; -import com.microsoft.semantickernel.orchestration.SKContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; @@ -53,6 +56,24 @@ public class JavaSemanticKernelWithMemoryChatApproach implements RAGApproach>. + Try not to repeat questions that have already been asked. + Only generate questions and do not generate any text before or after the questions, such as 'Next Questions' + """; + private static final String SYSTEM_CHAT_MESSAGE_TEMPLATE = """ + Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers. + Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question. + For tabular information return it as an html table. Do not return markdown format. + Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf]. + %s + + %s + Sources: + %s + """ ; + public JavaSemanticKernelWithMemoryChatApproach(TokenCredential tokenCredential, OpenAIAsyncClient openAIAsyncClient, SearchAsyncClient searchAsyncClient) { this.tokenCredential = tokenCredential; this.openAIAsyncClient = openAIAsyncClient; @@ -61,7 +82,6 @@ public JavaSemanticKernelWithMemoryChatApproach(TokenCredential tokenCredential, @Override public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions options) { - String question = ChatGPTUtils.getLastUserQuestion(questionOrConversation.getMessages()); //Build semantic kernel with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from resources. @@ -84,22 +104,20 @@ public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions op String sources = buildSourcesText(memoryResult); List sourcesList = buildSources(memoryResult); - SKContext skcontext = SKBuilders.context().build() - .setVariable("sources", sources) - .setVariable("input", question); + // Use ChatCompletion Service to generate a reply + OpenAIChatCompletion chat = (OpenAIChatCompletion) semanticKernel.getService(null, ChatCompletion.class); + OpenAIChatHistory history = buildChatHistory(questionOrConversation, options, chat, sources); - - Mono result = semanticKernel.getFunction("RAG", "AnswerQuestion").invokeAsync(skcontext); + Mono reply = chat.generateMessageAsync(history, null); return new RAGResponse.Builder() //.prompt(plan.toPlanString()) .prompt("placeholders for prompt") - .answer(result.block().getResult()) + .answer(reply.block()) .sources(sourcesList) .sourcesAsText(sources) .question(question) .build(); - } @Override @@ -107,6 +125,25 @@ public void runStreaming(ChatGPTConversation questionOrConversation, RAGOptions throw new IllegalStateException("Streaming not supported for this approach"); } + private OpenAIChatHistory buildChatHistory(ChatGPTConversation conversation, RAGOptions options, OpenAIChatCompletion chat, + String sources) { + String systemMessage = SYSTEM_CHAT_MESSAGE_TEMPLATE.formatted( + options.isSuggestFollowupQuestions() ? FOLLOW_UP_QUESTIONS_TEMPLATE : "", + options.getPromptTemplate() != null ? options.getPromptTemplate() : "", + sources); + + OpenAIChatHistory chatHistory = chat.createNewChat(systemMessage); + conversation.getMessages().forEach(message -> { + if(message.role() == ChatGPTMessage.ChatRole.USER){ + chatHistory.addUserMessage(message.content()); + } else if(message.role() == ChatGPTMessage.ChatRole.ASSISTANT) { + chatHistory.addAssistantMessage(message.content()); + } + }); + + return chatHistory; + } + private List buildSources(List memoryResult) { return memoryResult .stream() From f14456f2d5f63bc49edf740b7725c68d6828d1a8 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Thu, 26 Oct 2023 23:24:18 -0700 Subject: [PATCH 03/13] Find sources based on conversation --- ...aSemanticKernelWithMemoryChatApproach.java | 70 +++++++++++++++---- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index 7b370e9..2916cd2 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -20,6 +20,9 @@ import com.microsoft.semantickernel.connectors.ai.openai.chatcompletion.OpenAIChatHistory; import com.microsoft.semantickernel.memory.MemoryQueryResult; import com.microsoft.semantickernel.memory.MemoryRecord; +import com.microsoft.semantickernel.orchestration.SKContext; +import com.microsoft.semantickernel.orchestration.SKFunction; +import com.microsoft.semantickernel.semanticfunctions.PromptTemplateConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; @@ -87,22 +90,12 @@ public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions op //Build semantic kernel with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from resources. Kernel semanticKernel = buildSemanticKernel(options); - /** - * Use semantic kernel built-in memory.searchAsync. It uses OpenAI to generate embeddings for the provided question. - * Question embeddings are provided to cognitive search via search options. - */ - List memoryResult = semanticKernel.getMemory().searchAsync( - indexName, - question, - options.getTop(), - 0.5f, - false) - .block(); + List sourcesResult = getSourcesFromConversation(questionOrConversation, semanticKernel, options); - LOGGER.info("Total {} sources found in cognitive vector store for search query[{}]", memoryResult.size(), question); + LOGGER.info("Total {} sources found in cognitive vector store for search query[{}]", sourcesResult.size(), question); - String sources = buildSourcesText(memoryResult); - List sourcesList = buildSources(memoryResult); + String sources = buildSourcesText(sourcesResult); + List sourcesList = buildSources(sourcesResult); // Use ChatCompletion Service to generate a reply OpenAIChatCompletion chat = (OpenAIChatCompletion) semanticKernel.getService(null, ChatCompletion.class); @@ -125,6 +118,55 @@ public void runStreaming(ChatGPTConversation questionOrConversation, RAGOptions throw new IllegalStateException("Streaming not supported for this approach"); } + private List getSourcesFromConversation (ChatGPTConversation conversation, Kernel kernel, RAGOptions options) { + String searchQueryPrompt = """ + Generate a search query for the below conversation. + Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms. + Do not include any text inside [] or <<>> in the search query terms. + Do not enclose the search query in quotes or double quotes. + conversation: + {{$conversation}} + """ ; + + SKContext skcontext = SKBuilders.context().build() + .setVariable("conversation", ChatGPTUtils.formatAsChatML(conversation.toOpenAIChatMessages())); + + SKFunction searchQuery = kernel + .getSemanticFunctionBuilder() + .withPromptTemplate(searchQueryPrompt) + .withFunctionName("searchQuery") + .withCompletionConfig( + new PromptTemplateConfig.CompletionConfig( + 0.2, + 1, + 0.0, + 0.0, + 1024 + ) + ) + .build(); + + Mono result = searchQuery.invokeAsync(skcontext); + String query = result.block().getResult(); + + LOGGER.info("SEARCH QUERY"); + LOGGER.info(query); + + /** + * Use semantic kernel built-in memory.searchAsync. It uses OpenAI to generate embeddings for the provided question. + * Question embeddings are provided to cognitive search via search options. + */ + List memoryResult = kernel.getMemory().searchAsync( + indexName, + query, + options.getTop(), + 0.5f, + false) + .block(); + + return memoryResult; + } + private OpenAIChatHistory buildChatHistory(ChatGPTConversation conversation, RAGOptions options, OpenAIChatCompletion chat, String sources) { String systemMessage = SYSTEM_CHAT_MESSAGE_TEMPLATE.formatted( From 26674870349ee7916c44d69036c707f906816cca Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Thu, 26 Oct 2023 23:26:35 -0700 Subject: [PATCH 04/13] Remove unnecessary log --- .../JavaSemanticKernelWithMemoryChatApproach.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index 2916cd2..dc24519 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -149,9 +149,6 @@ private List getSourcesFromConversation (ChatGPTConversation Mono result = searchQuery.invokeAsync(skcontext); String query = result.block().getResult(); - LOGGER.info("SEARCH QUERY"); - LOGGER.info(query); - /** * Use semantic kernel built-in memory.searchAsync. It uses OpenAI to generate embeddings for the provided question. * Question embeddings are provided to cognitive search via search options. From d8d6b14eb558ad2eab87e72be76be24e11ae9695 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Fri, 27 Oct 2023 14:10:45 -0700 Subject: [PATCH 05/13] Convert to file plugins --- ...aSemanticKernelWithMemoryChatApproach.java | 97 ++++--------------- .../RAG/AnswerConversation/config.json | 28 ++++++ .../RAG/AnswerConversation/skprompt.txt | 38 ++++++++ .../Plugins/RAG/ExtractKeywords/config.json | 20 ++++ .../Plugins/RAG/ExtractKeywords/skprompt.txt | 7 ++ 5 files changed, 113 insertions(+), 77 deletions(-) create mode 100644 app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json create mode 100644 app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt create mode 100644 app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/config.json create mode 100644 app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/skprompt.txt diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index dc24519..1fc6b85 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -58,25 +58,6 @@ public class JavaSemanticKernelWithMemoryChatApproach implements RAGApproach>. - Try not to repeat questions that have already been asked. - Only generate questions and do not generate any text before or after the questions, such as 'Next Questions' - """; - private static final String SYSTEM_CHAT_MESSAGE_TEMPLATE = """ - Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers. - Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question. - For tabular information return it as an html table. Do not return markdown format. - Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf]. - %s - - %s - Sources: - %s - """ ; - public JavaSemanticKernelWithMemoryChatApproach(TokenCredential tokenCredential, OpenAIAsyncClient openAIAsyncClient, SearchAsyncClient searchAsyncClient) { this.tokenCredential = tokenCredential; this.openAIAsyncClient = openAIAsyncClient; @@ -87,26 +68,30 @@ public JavaSemanticKernelWithMemoryChatApproach(TokenCredential tokenCredential, public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions options) { String question = ChatGPTUtils.getLastUserQuestion(questionOrConversation.getMessages()); - //Build semantic kernel with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from resources. + // STEP 1: Build semantic kernel with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from resources. Kernel semanticKernel = buildSemanticKernel(options); - List sourcesResult = getSourcesFromConversation(questionOrConversation, semanticKernel, options); + // STEP 2: Retrieve relevant documents using keywords extracted from the chat history + String conversation = ChatGPTUtils.formatAsChatML(questionOrConversation.toOpenAIChatMessages()); + List sourcesResult = getSourcesFromConversation(conversation, semanticKernel, options); LOGGER.info("Total {} sources found in cognitive vector store for search query[{}]", sourcesResult.size(), question); String sources = buildSourcesText(sourcesResult); List sourcesList = buildSources(sourcesResult); - // Use ChatCompletion Service to generate a reply - OpenAIChatCompletion chat = (OpenAIChatCompletion) semanticKernel.getService(null, ChatCompletion.class); - OpenAIChatHistory history = buildChatHistory(questionOrConversation, options, chat, sources); + // STEP 3: Generate a contextual and content specific answer using the search results and chat history + SKFunction answerConversation = semanticKernel.getFunction("RAG", "AnswerConversation"); + SKContext skcontext = SKBuilders.context().build() + .setVariable("sources", sources) + .setVariable("conversation", conversation) + .setVariable("input", question); - Mono reply = chat.generateMessageAsync(history, null); + Mono reply = answerConversation.invokeAsync(skcontext); return new RAGResponse.Builder() - //.prompt(plan.toPlanString()) .prompt("placeholders for prompt") - .answer(reply.block()) + .answer(reply.block().getResult()) .sources(sourcesList) .sourcesAsText(sources) .question(question) @@ -118,36 +103,12 @@ public void runStreaming(ChatGPTConversation questionOrConversation, RAGOptions throw new IllegalStateException("Streaming not supported for this approach"); } - private List getSourcesFromConversation (ChatGPTConversation conversation, Kernel kernel, RAGOptions options) { - String searchQueryPrompt = """ - Generate a search query for the below conversation. - Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms. - Do not include any text inside [] or <<>> in the search query terms. - Do not enclose the search query in quotes or double quotes. - conversation: - {{$conversation}} - """ ; - - SKContext skcontext = SKBuilders.context().build() - .setVariable("conversation", ChatGPTUtils.formatAsChatML(conversation.toOpenAIChatMessages())); - - SKFunction searchQuery = kernel - .getSemanticFunctionBuilder() - .withPromptTemplate(searchQueryPrompt) - .withFunctionName("searchQuery") - .withCompletionConfig( - new PromptTemplateConfig.CompletionConfig( - 0.2, - 1, - 0.0, - 0.0, - 1024 - ) - ) - .build(); + private List getSourcesFromConversation (String conversation, Kernel kernel, RAGOptions options) { + SKFunction extractKeywords = kernel.getFunction("RAG", "ExtractKeywords"); + SKContext skcontext = SKBuilders.context().build().setVariable("conversation", conversation); - Mono result = searchQuery.invokeAsync(skcontext); - String query = result.block().getResult(); + Mono result = extractKeywords.invokeAsync(skcontext); + String searchQuery = result.block().getResult(); /** * Use semantic kernel built-in memory.searchAsync. It uses OpenAI to generate embeddings for the provided question. @@ -155,7 +116,7 @@ private List getSourcesFromConversation (ChatGPTConversation */ List memoryResult = kernel.getMemory().searchAsync( indexName, - query, + searchQuery, options.getTop(), 0.5f, false) @@ -164,25 +125,6 @@ private List getSourcesFromConversation (ChatGPTConversation return memoryResult; } - private OpenAIChatHistory buildChatHistory(ChatGPTConversation conversation, RAGOptions options, OpenAIChatCompletion chat, - String sources) { - String systemMessage = SYSTEM_CHAT_MESSAGE_TEMPLATE.formatted( - options.isSuggestFollowupQuestions() ? FOLLOW_UP_QUESTIONS_TEMPLATE : "", - options.getPromptTemplate() != null ? options.getPromptTemplate() : "", - sources); - - OpenAIChatHistory chatHistory = chat.createNewChat(systemMessage); - conversation.getMessages().forEach(message -> { - if(message.role() == ChatGPTMessage.ChatRole.USER){ - chatHistory.addUserMessage(message.content()); - } else if(message.role() == ChatGPTMessage.ChatRole.ASSISTANT) { - chatHistory.addAssistantMessage(message.content()); - } - }); - - return chatHistory; - } - private List buildSources(List memoryResult) { return memoryResult .stream() @@ -224,7 +166,8 @@ private Kernel buildSemanticKernel(RAGOptions options) { .build()) .build(); - kernelWithACS.importSkillFromResources("semantickernel/Plugins", "RAG", "AnswerQuestion", null); + kernelWithACS.importSkillFromResources("semantickernel/Plugins", "RAG", "AnswerConversation", null); + kernelWithACS.importSkillFromResources("semantickernel/Plugins", "RAG", "ExtractKeywords", null); return kernelWithACS; } diff --git a/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json new file mode 100644 index 0000000..17e0b73 --- /dev/null +++ b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json @@ -0,0 +1,28 @@ +{ + "schema": 1, + "description": "Answer a chat conversation question based on the provided sources", + "type": "completion", + "completion": { + "max_tokens": 1024, + "temperature": 0.2, + "top_p": 1, + "presence_penalty": 0.0, + "frequency_penalty": 0.0 + }, + "input": { + "parameters": [ + { + "name": "input", + "description": "Question to answer" + }, + { + "name": "sources", + "description": "Information used to answer the question" + }, + { + "name": "conversation", + "description": "Chat history" + } + ] + } +} \ No newline at end of file diff --git a/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt new file mode 100644 index 0000000..d18c711 --- /dev/null +++ b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt @@ -0,0 +1,38 @@ +You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions. +Use 'you' to refer to the individual asking the questions even if they ask with 'I'. +Answer the following question using only the information below. +For tabular information return it as an html table. Do not return markdown format. +Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. +If you cannot answer using the sources below, say you don't know. + +[EXAMPLES] +[EXAMPLE 1] +[INFORMATION] +info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family. +info2.pdf: Overlake is in-network for the employee plan. +info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue. +info4.pdf: In-network institutions include Overlake, Swedish and others in the region +[END INFORMATION] +Question: What is the deductible for the employee plan for a visit to Overlake in Bellevue? +Answer: In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf]. +[END EXAMPLE 1] + + +[EXAMPLE 2] +[INFORMATION] +info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family. +info2.pdf: Overlake is in-network for the employee plan. +info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue. +info4.pdf: In-network institutions include Overlake, Swedish and others in the region +[END INFORMATION] +Question: what are the responsibilities of the product manager? +Answer: I do not have enough information to answer that. +[END EXAMPLE 2] +[END EXAMPLES] + +[INFORMATION] +{{$sources}} +{{$conversation}} +[END INFORMATION] + +Question: {{$input}} \ No newline at end of file diff --git a/app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/config.json b/app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/config.json new file mode 100644 index 0000000..eea1694 --- /dev/null +++ b/app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/config.json @@ -0,0 +1,20 @@ +{ + "schema": 1, + "description": "Extract keywords from a conversation to form a search query", + "type": "completion", + "completion": { + "max_tokens": 1024, + "temperature": 0.2, + "top_p": 1, + "presence_penalty": 0.0, + "frequency_penalty": 0.0 + }, + "input": { + "parameters": [ + { + "name": "conversation", + "description": "Chat conversation" + } + ] + } +} \ No newline at end of file diff --git a/app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/skprompt.txt b/app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/skprompt.txt new file mode 100644 index 0000000..20ea624 --- /dev/null +++ b/app/backend/src/main/resources/semantickernel/Plugins/RAG/ExtractKeywords/skprompt.txt @@ -0,0 +1,7 @@ +Generate a search query for the below conversation. +Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms. +Do not include any text inside [] or <<>> in the search query terms. +Do not enclose the search query in quotes or double quotes. + +conversation: +{{$conversation}} \ No newline at end of file From e81cbc6db4fb2cffb4d79cc66b22c3e8f58ec765 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Fri, 27 Oct 2023 14:32:22 -0700 Subject: [PATCH 06/13] Remove cors configuration --- .../microsoft/openai/samples/rag/Application.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java index 7483b2b..84bda37 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java @@ -17,16 +17,4 @@ public static void main(String[] args) { LOG.info("Application profile from system property is [{}]", System.getProperty("spring.profiles.active")); new SpringApplication(Application.class).run(args); } - - @Bean - public WebMvcConfigurer corsConfigurer() { - return new WebMvcConfigurer() { - @Override - public void addCorsMappings(CorsRegistry registry) { - registry - .addMapping("/api/**") - .allowedOrigins("http://localhost:8080"); - } - }; - } } From dfb4e9b499e601f64fa6fb5e525680ec65bd7da1 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Mon, 30 Oct 2023 10:28:38 -0700 Subject: [PATCH 07/13] Add suggestions variable to AnswerConversation skill --- ...aSemanticKernelWithMemoryChatApproach.java | 1 + .../RAG/AnswerConversation/config.json | 4 ++++ .../RAG/AnswerConversation/skprompt.txt | 22 ++++++++++++++++--- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index 1fc6b85..744dac4 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -85,6 +85,7 @@ public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions op SKContext skcontext = SKBuilders.context().build() .setVariable("sources", sources) .setVariable("conversation", conversation) + .setVariable("suggestions", String.valueOf(options.isSuggestFollowupQuestions())) .setVariable("input", question); Mono reply = answerConversation.invokeAsync(skcontext); diff --git a/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json index 17e0b73..625e73f 100644 --- a/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json +++ b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json @@ -22,6 +22,10 @@ { "name": "conversation", "description": "Chat history" + }, + { + "name": "suggestions", + "description": "Whether to suggest follow-up questions" } ] } diff --git a/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt index d18c711..89b720a 100644 --- a/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt +++ b/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/skprompt.txt @@ -5,6 +5,13 @@ For tabular information return it as an html table. Do not return markdown forma Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. If you cannot answer using the sources below, say you don't know. +If SuggestFollowUpQuestions is true then after answering question, also generate three very brief follow-up questions that the user would likely ask next. +Use double angle brackets to reference the questions, e.g. <>. +Try not to repeat questions that have already been asked. +Only generate questions and do not generate any text before or after the questions, such as 'Next Questions'. + +If SuggestFollowUpQuestions is false, do not generate extra questions. + [EXAMPLES] [EXAMPLE 1] [INFORMATION] @@ -14,7 +21,12 @@ info3.pdf: Overlake is the name of the area that includes a park and ride near B info4.pdf: In-network institutions include Overlake, Swedish and others in the region [END INFORMATION] Question: What is the deductible for the employee plan for a visit to Overlake in Bellevue? -Answer: In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf]. +SuggestFollowUpQuestions: true +Answer: +In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf]. +<> +<> +<> [END EXAMPLE 1] @@ -26,7 +38,9 @@ info3.pdf: Overlake is the name of the area that includes a park and ride near B info4.pdf: In-network institutions include Overlake, Swedish and others in the region [END INFORMATION] Question: what are the responsibilities of the product manager? -Answer: I do not have enough information to answer that. +SuggestFollowUpQuestions: false +Answer: +I do not have enough information to answer that. [END EXAMPLE 2] [END EXAMPLES] @@ -35,4 +49,6 @@ Answer: I do not have enough information to answer that. {{$conversation}} [END INFORMATION] -Question: {{$input}} \ No newline at end of file +Question: {{$input}} +SuggestFollowUpQuestions: {{$suggestions}} +Answer: \ No newline at end of file From ee85ece006c3c2118f88c82d999fcea091a152e8 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Wed, 1 Nov 2023 12:04:09 -0700 Subject: [PATCH 08/13] Add streaming to sk chat --- ...aSemanticKernelWithMemoryChatApproach.java | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index 744dac4..1ab64fe 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -4,31 +4,29 @@ import com.azure.core.credential.TokenCredential; import com.azure.search.documents.SearchAsyncClient; import com.azure.search.documents.SearchDocument; +import com.fasterxml.jackson.databind.ObjectMapper; import com.microsoft.openai.samples.rag.approaches.ContentSource; import com.microsoft.openai.samples.rag.approaches.RAGApproach; import com.microsoft.openai.samples.rag.approaches.RAGOptions; import com.microsoft.openai.samples.rag.approaches.RAGResponse; import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.memory.CustomAzureCognitiveSearchMemoryStore; import com.microsoft.openai.samples.rag.common.ChatGPTConversation; -import com.microsoft.openai.samples.rag.common.ChatGPTMessage; import com.microsoft.openai.samples.rag.common.ChatGPTUtils; +import com.microsoft.openai.samples.rag.controller.ChatResponse; import com.microsoft.semantickernel.Kernel; import com.microsoft.semantickernel.SKBuilders; import com.microsoft.semantickernel.ai.embeddings.Embedding; -import com.microsoft.semantickernel.chatcompletion.ChatCompletion; -import com.microsoft.semantickernel.connectors.ai.openai.chatcompletion.OpenAIChatCompletion; -import com.microsoft.semantickernel.connectors.ai.openai.chatcompletion.OpenAIChatHistory; import com.microsoft.semantickernel.memory.MemoryQueryResult; import com.microsoft.semantickernel.memory.MemoryRecord; import com.microsoft.semantickernel.orchestration.SKContext; import com.microsoft.semantickernel.orchestration.SKFunction; -import com.microsoft.semantickernel.semanticfunctions.PromptTemplateConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import reactor.core.publisher.Mono; +import java.io.IOException; import java.io.OutputStream; import java.util.List; import java.util.function.Function; @@ -47,6 +45,8 @@ public class JavaSemanticKernelWithMemoryChatApproach implements RAGApproach sourcesResult = getSourcesFromConversation(conversation, semanticKernel, options); + + LOGGER.info("Total {} sources found in cognitive vector store for search query[{}]", sourcesResult.size(), question); + + String sources = buildSourcesText(sourcesResult); + List sourcesList = buildSources(sourcesResult); + + // STEP 3: Generate a contextual and content specific answer using the search results and chat history + SKFunction answerConversation = semanticKernel.getFunction("RAG", "AnswerConversation"); + SKContext skcontext = SKBuilders.context().build() + .setVariable("sources", sources) + .setVariable("conversation", conversation) + .setVariable("suggestions", String.valueOf(options.isSuggestFollowupQuestions())) + .setVariable("input", question); + + SKContext reply = (SKContext) answerConversation.invokeAsync(skcontext).block(); + + RAGResponse ragResponse = + new RAGResponse.Builder() + .question( + ChatGPTUtils.getLastUserQuestion( + questionOrConversation.getMessages())) + .prompt("placeholders for prompt") + .answer(reply.getResult()) + .sources(sourcesList) + .sourcesAsText(sources) + .build(); + + try { + String value = objectMapper.writeValueAsString(ChatResponse.buildChatResponse(ragResponse)) + "\n"; + outputStream.write(value.getBytes()); + outputStream.flush(); + } catch (IOException e) { + throw new RuntimeException(e); + } } private List getSourcesFromConversation (String conversation, Kernel kernel, RAGOptions options) { From 802b107dbdd25775887a4f56ac8ed17f83377128 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Thu, 2 Nov 2023 18:37:23 -0700 Subject: [PATCH 09/13] Fix Chat.tsx --- app/frontend/src/pages/chat/Chat.tsx | 67 ++++++++++++---------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 2c72dc5..5ee09ca 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -1,5 +1,5 @@ import { useRef, useState, useEffect } from "react"; -import { Checkbox, ChoiceGroup, Panel, DefaultButton, TextField, SpinButton, Dropdown, IDropdownOption } from "@fluentui/react"; +import { Checkbox, ChoiceGroup, Panel, DefaultButton, TextField, SpinButton, Dropdown, IDropdownOption, IChoiceGroupOption } from "@fluentui/react"; import { SparkleFilled } from "@fluentui/react-icons"; import readNDJSONStream from "ndjson-readablestream"; @@ -362,15 +362,37 @@ const Chat = () => { onRenderFooterContent={() => setIsConfigPanelOpen(false)}>Close} isFooterAtBottom={true} > - + {(approach === Approaches.JAVA_OPENAI_SDK || approach === Approaches.JAVA_SEMANTIC_KERNEL) && ( + + )} + {(approach === Approaches.JAVA_SEMANTIC_KERNEL_PLANNER) && ( + + )} + { onChange={onShouldStreamChange} /> - - - - {(approach === Approaches.JAVA_OPENAI_SDK || approach === Approaches.JAVA_SEMANTIC_KERNEL) && ( - - )} - {(approach === Approaches.JAVA_SEMANTIC_KERNEL_PLANNER) && ( - - )} {useLogin && }
From 82190921441ab7c4c895fbd58d591bda338ebaa9 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Thu, 2 Nov 2023 18:38:16 -0700 Subject: [PATCH 10/13] Fix RAGApproachFactorySpringBootImpl.createApproach --- .../rag/approaches/RAGApproachFactorySpringBootImpl.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java index 22d68bb..fbc6917 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproachFactorySpringBootImpl.java @@ -30,7 +30,9 @@ public class RAGApproachFactorySpringBootImpl implements RAGApproachFactory, App public RAGApproach createApproach(String approachName, RAGType ragType, RAGOptions ragOptions) { if (ragType.equals(RAGType.CHAT)) { - if (JAVA_SEMANTIC_KERNEL.equals(approachName)) { + if (JAVA_OPENAI_SDK.equals(approachName)) { + return applicationContext.getBean(PlainJavaChatApproach.class); + } else if (JAVA_SEMANTIC_KERNEL.equals(approachName)) { return applicationContext.getBean(JavaSemanticKernelWithMemoryChatApproach.class); } else if ( JAVA_SEMANTIC_KERNEL_PLANNER.equals(approachName) && @@ -38,8 +40,6 @@ public RAGApproach createApproach(String approachName, RAGType ragType, RAGOptio ragOptions.getSemantickKernelMode() != null && ragOptions.getSemantickKernelMode() == SemanticKernelMode.chains) { return applicationContext.getBean(JavaSemanticKernelChainsChatApproach.class); - } else { - return applicationContext.getBean(PlainJavaChatApproach.class); } } else if (ragType.equals(RAGType.ASK)) { if (JAVA_OPENAI_SDK.equals(approachName)) @@ -50,7 +50,6 @@ else if (JAVA_SEMANTIC_KERNEL_PLANNER.equals(approachName) && ragOptions.getSema return applicationContext.getBean(JavaSemanticKernelPlannerApproach.class); else if (JAVA_SEMANTIC_KERNEL_PLANNER.equals(approachName) && ragOptions != null && ragOptions.getSemantickKernelMode() != null && ragOptions.getSemantickKernelMode() == SemanticKernelMode.chains) return applicationContext.getBean(JavaSemanticKernelChainsApproach.class); - } //if this point is reached then the combination of approach and rag type is not supported throw new IllegalArgumentException("Invalid combination for approach[%s] and rag type[%s]: ".formatted(approachName, ragType)); From 6866ede5636b8e09f5ec306d52c4f698226d6146 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Sun, 5 Nov 2023 22:51:09 -0800 Subject: [PATCH 11/13] Disable streaming for SK --- .../JavaSemanticKernelChainsChatApproach.java | 1 + ...aSemanticKernelWithMemoryChatApproach.java | 43 +------------------ app/frontend/src/pages/chat/Chat.tsx | 24 +++++++---- 3 files changed, 17 insertions(+), 51 deletions(-) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java index 2b9429c..9d21269 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java @@ -95,6 +95,7 @@ public void runStreaming( ChatGPTConversation questionOrConversation, RAGOptions options, OutputStream outputStream) { + throw new IllegalStateException("Streaming not supported for this approach"); } private List formSourcesList(String result) { diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index 1ab64fe..7cd2c75 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -102,48 +102,7 @@ public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions op @Override public void runStreaming(ChatGPTConversation questionOrConversation, RAGOptions options, OutputStream outputStream) { - String question = ChatGPTUtils.getLastUserQuestion(questionOrConversation.getMessages()); - - // STEP 1: Build semantic kernel with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from resources. - Kernel semanticKernel = buildSemanticKernel(options); - - // STEP 2: Retrieve relevant documents using keywords extracted from the chat history - String conversation = ChatGPTUtils.formatAsChatML(questionOrConversation.toOpenAIChatMessages()); - List sourcesResult = getSourcesFromConversation(conversation, semanticKernel, options); - - LOGGER.info("Total {} sources found in cognitive vector store for search query[{}]", sourcesResult.size(), question); - - String sources = buildSourcesText(sourcesResult); - List sourcesList = buildSources(sourcesResult); - - // STEP 3: Generate a contextual and content specific answer using the search results and chat history - SKFunction answerConversation = semanticKernel.getFunction("RAG", "AnswerConversation"); - SKContext skcontext = SKBuilders.context().build() - .setVariable("sources", sources) - .setVariable("conversation", conversation) - .setVariable("suggestions", String.valueOf(options.isSuggestFollowupQuestions())) - .setVariable("input", question); - - SKContext reply = (SKContext) answerConversation.invokeAsync(skcontext).block(); - - RAGResponse ragResponse = - new RAGResponse.Builder() - .question( - ChatGPTUtils.getLastUserQuestion( - questionOrConversation.getMessages())) - .prompt("placeholders for prompt") - .answer(reply.getResult()) - .sources(sourcesList) - .sourcesAsText(sources) - .build(); - - try { - String value = objectMapper.writeValueAsString(ChatResponse.buildChatResponse(ragResponse)) + "\n"; - outputStream.write(value.getBytes()); - outputStream.flush(); - } catch (IOException e) { - throw new RuntimeException(e); - } + throw new IllegalStateException("Streaming not supported for this approach"); } private List getSourcesFromConversation (String conversation, Kernel kernel, RAGOptions options) { diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 5ee09ca..6162fe4 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -35,6 +35,7 @@ const Chat = () => { const [retrievalMode, setRetrievalMode] = useState(RetrievalMode.Hybrid); const [useSemanticRanker, setUseSemanticRanker] = useState(true); const [shouldStream, setShouldStream] = useState(true); + const [streamAvailable, setStreamAvailable] = useState(true); const [useSemanticCaptions, setUseSemanticCaptions] = useState(false); const [excludeCategory, setExcludeCategory] = useState(""); const [useSuggestFollowupQuestions, setUseSuggestFollowupQuestions] = useState(false); @@ -112,9 +113,10 @@ const Chat = () => { { content: a[1].choices[0].message.content, role: "assistant" } ]); + const stream = streamAvailable && shouldStream; const request: ChatAppRequest = { messages: [...messages, { content: question, role: "user" }], - stream: shouldStream, + stream: stream, context: { overrides: { prompt_template: promptTemplate.length === 0 ? undefined : promptTemplate, @@ -138,7 +140,7 @@ const Chat = () => { if (!response.body) { throw Error("No response body"); } - if (shouldStream) { + if (stream) { const parsedResponse: ChatAppResponse = await handleAsyncRequest(question, answers, setAnswers, response.body); setAnswers([...answers, [question, parsedResponse]]); } else { @@ -186,7 +188,9 @@ const Chat = () => { }; const onApproachChange = (_ev?: React.FormEvent, option?: IChoiceGroupOption) => { - setApproach((option?.key as Approaches) || Approaches.JAVA_OPENAI_SDK); + const newApproach = (option?.key as Approaches); + setApproach(newApproach || Approaches.JAVA_OPENAI_SDK); + setStreamAvailable(newApproach === Approaches.JAVA_OPENAI_SDK); }; const onUseSemanticRankerChange = (_ev?: React.FormEvent, checked?: boolean) => { @@ -450,12 +454,14 @@ const Chat = () => { required onChange={onRetrievalModeChange} /> - + {streamAvailable && + + } {useLogin && } From 3935ff1b6ce7648e8195387b7be5990745d26fa2 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Tue, 7 Nov 2023 01:18:37 -0800 Subject: [PATCH 12/13] Add keywords extraction through native function for SK Chaining approach --- .../JavaSemanticKernelChainsApproach.java | 12 ++-- .../JavaSemanticKernelPlannerApproach.java | 5 +- .../JavaSemanticKernelChainsChatApproach.java | 72 ++++++++++--------- ...aSemanticKernelWithMemoryChatApproach.java | 8 +-- .../samples/rag/common/ChatGPTUtils.java | 41 ++++++++++- .../semantickernel/CognitiveSearchPlugin.java | 39 ++++++++-- 6 files changed, 123 insertions(+), 54 deletions(-) rename app/backend/src/main/java/com/microsoft/openai/samples/rag/{ask/approaches => retrieval}/semantickernel/CognitiveSearchPlugin.java (60%) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java index 4b8ab75..21ede85 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java @@ -6,6 +6,7 @@ import com.microsoft.openai.samples.rag.approaches.RAGApproach; import com.microsoft.openai.samples.rag.approaches.RAGOptions; import com.microsoft.openai.samples.rag.approaches.RAGResponse; +import com.microsoft.openai.samples.rag.retrieval.semantickernel.CognitiveSearchPlugin; import com.microsoft.openai.samples.rag.proxy.CognitiveSearchProxy; import com.microsoft.openai.samples.rag.proxy.OpenAIProxy; import com.microsoft.semantickernel.Kernel; @@ -25,7 +26,7 @@ /** * Use Java Semantic Kernel framework with semantic and native functions chaining. It uses an * imperative style for AI orchestration through semantic kernel functions chaining. - * InformationFinder.Search native function and RAG.AnswerQuestion semantic function are called + * InformationFinder.SearchFromQuestion native function and RAG.AnswerQuestion semantic function are called * sequentially. Several cognitive search retrieval options are available: Text, Vector, Hybrid. */ @Component @@ -74,7 +75,7 @@ public RAGResponse run(String question, RAGOptions options) { question, semanticKernel .getSkill("InformationFinder") - .getFunction("Search", null)) + .getFunction("SearchFromQuestion", null)) .block(); var sources = formSourcesList(searchContext.getResult()); @@ -135,9 +136,9 @@ private List formSourcesList(String result) { /** * Build semantic kernel context with AnswerQuestion semantic function and - * InformationFinder.Search native function. AnswerQuestion is imported from - * src/main/resources/semantickernel/Plugins. InformationFinder.Search is implemented in a - * traditional Java class method: CognitiveSearchPlugin.search + * InformationFinder.SearchFromQuestion native function. AnswerQuestion is imported from + * src/main/resources/semantickernel/Plugins. InformationFinder.SearchFromQuestion is implemented in a + * traditional Java class method: CognitiveSearchPlugin.searchFromConversation * * @param options * @return @@ -155,7 +156,6 @@ private Kernel buildSemanticKernel(RAGOptions options) { kernel.importSkill( new CognitiveSearchPlugin(this.cognitiveSearchProxy, this.openAIProxy, options), "InformationFinder"); - kernel.importSkillFromResources("semantickernel/Plugins", "RAG", "AnswerQuestion", null); return kernel; diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java index f3e6417..000f16b 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java @@ -5,6 +5,7 @@ import com.microsoft.openai.samples.rag.approaches.RAGApproach; import com.microsoft.openai.samples.rag.approaches.RAGOptions; import com.microsoft.openai.samples.rag.approaches.RAGResponse; +import com.microsoft.openai.samples.rag.retrieval.semantickernel.CognitiveSearchPlugin; import com.microsoft.openai.samples.rag.proxy.CognitiveSearchProxy; import com.microsoft.openai.samples.rag.proxy.OpenAIProxy; import com.microsoft.semantickernel.Kernel; @@ -97,8 +98,8 @@ public void runStreaming( /** * Build semantic kernel context with AnswerQuestion semantic function and * InformationFinder.Search native function. AnswerQuestion is imported from - * src/main/resources/semantickernel/Plugins. InformationFinder.Search is implemented in a - * traditional Java class method: CognitiveSearchPlugin.search + * src/main/resources/semantickernel/Plugins. InformationFinder.SearchFromQuestion is implemented in a + * traditional Java class method: CognitiveSearchPlugin.searchFromQuestion * * @param options * @return diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java index 9d21269..cd45813 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java @@ -5,17 +5,15 @@ import com.microsoft.openai.samples.rag.approaches.RAGApproach; import com.microsoft.openai.samples.rag.approaches.RAGOptions; import com.microsoft.openai.samples.rag.approaches.RAGResponse; -import com.microsoft.openai.samples.rag.ask.approaches.semantickernel.CognitiveSearchPlugin; +import com.microsoft.openai.samples.rag.retrieval.semantickernel.CognitiveSearchPlugin; import com.microsoft.openai.samples.rag.common.ChatGPTConversation; import com.microsoft.openai.samples.rag.common.ChatGPTUtils; import com.microsoft.openai.samples.rag.proxy.CognitiveSearchProxy; import com.microsoft.openai.samples.rag.proxy.OpenAIProxy; import com.microsoft.semantickernel.Kernel; import com.microsoft.semantickernel.SKBuilders; -import com.microsoft.semantickernel.chatcompletion.ChatCompletion; +import com.microsoft.semantickernel.orchestration.ContextVariables; import com.microsoft.semantickernel.orchestration.SKContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; @@ -27,18 +25,13 @@ import java.util.stream.Collectors; /** - * Simple chat-read-retrieve-read java implementation, using the Cognitive Search and OpenAI APIs directly. - * It uses the ChatGPT API to turn the user question into a good search query. - * It queries Azure Cognitive Search for search results for that query (optionally using the vector embeddings for that query). - * It then combines the search results and original user question, and asks ChatGPT API to answer the question based on the sources. It includes the last 4K of message history as well (or however many tokens are allowed by the deployed model). + * Use Java Semantic Kernel framework with semantic and native functions chaining. It uses an + * imperative style for AI orchestration through semantic kernel functions chaining. + * InformationFinder.SearchFromConversation native function and RAG.AnswerConversation semantic function are called + * sequentially. Several cognitive search retrieval options are available: Text, Vector, Hybrid. */ @Component public class JavaSemanticKernelChainsChatApproach implements RAGApproach { - - private static final Logger LOGGER = LoggerFactory.getLogger(JavaSemanticKernelChainsChatApproach.class); - private static final String PLAN_PROMPT = """ - Take the input as a question and answer it finding any information needed - """; private final CognitiveSearchProxy cognitiveSearchProxy; private final OpenAIProxy openAIProxy; @@ -61,30 +54,38 @@ public JavaSemanticKernelChainsChatApproach(CognitiveSearchProxy cognitiveSearch */ @Override public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions options) { - String question = ChatGPTUtils.getLastUserQuestion(questionOrConversation.getMessages()); + String conversation = ChatGPTUtils.formatAsChatML(questionOrConversation.toOpenAIChatMessages()); Kernel semanticKernel = buildSemanticKernel(options); + // STEP 1: Retrieve relevant documents using the current conversation. It reuses the + // CognitiveSearchRetriever appraoch through the CognitiveSearchPlugin native function. SKContext searchContext = semanticKernel.runAsync( - question, - semanticKernel.getSkill("InformationFinder").getFunction("Search", null)).block(); - - var sources = formSourcesList(searchContext.getResult()); + conversation, + semanticKernel.getSkill("InformationFinder").getFunction("SearchFromConversation", null)).block(); - var answerVariables = SKBuilders.variables() + // STEP 2: Build a SK context with the sources retrieved from the memory store and conversation + ContextVariables variables = SKBuilders.variables() .withVariable("sources", searchContext.getResult()) - .withVariable("input", question) + .withVariable("conversation", conversation) + .withVariable("suggestions", String.valueOf(options.isSuggestFollowupQuestions())) + .withVariable("input", question) .build(); - SKContext answerExecutionContext = - semanticKernel.runAsync(answerVariables, - semanticKernel.getSkill("RAG").getFunction("AnswerQuestion", null)).block(); + /** + * STEP 3: Get a reference of the semantic function [AnswerConversation] of the [RAG] plugin + * (a.k.a. skill) from the SK skills registry and provide it with the pre-built context. + * Triggering Open AI to get a reply. + */ + SKContext reply = semanticKernel.runAsync(variables, + semanticKernel.getSkill("RAG").getFunction("AnswerConversation", null)).block(); + return new RAGResponse.Builder() .prompt("Prompt is managed by Semantic Kernel") - .answer(answerExecutionContext.getResult()) - .sources(sources) + .answer(reply.getResult()) + .sources(formSourcesList(searchContext.getResult())) .sourcesAsText(searchContext.getResult()) .question(question) .build(); @@ -118,6 +119,15 @@ private List formSourcesList(String result) { .collect(Collectors.toList()); } + /** + * Build semantic kernel context with AnswerConversation semantic function and + * InformationFinder.SearchFromConversation native function. AnswerConversation is imported from + * src/main/resources/semantickernel/Plugins. InformationFinder.SearchFromConversation is implemented in a + * traditional Java class method: CognitiveSearchPlugin.searchFromConversation + * + * @param options + * @return + */ private Kernel buildSemanticKernel(RAGOptions options) { Kernel kernel = SKBuilders.kernel() .withDefaultAIService(SKBuilders.chatCompletion() @@ -126,14 +136,10 @@ private Kernel buildSemanticKernel(RAGOptions options) { .build()) .build(); - kernel.importSkill(new CognitiveSearchPlugin(this.cognitiveSearchProxy, this.openAIProxy, options), "InformationFinder"); - - kernel.importSkillFromResources( - "semantickernel/Plugins", - "RAG", - "AnswerQuestion", - null - ); + kernel.importSkill( + new CognitiveSearchPlugin(this.cognitiveSearchProxy, this.openAIProxy, options), + "InformationFinder"); + kernel.importSkillFromResources("semantickernel/Plugins", "RAG", "AnswerConversation", null); return kernel; } diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java index 7cd2c75..2acce7a 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java @@ -35,7 +35,7 @@ /** * Accomplish the same task as in the PlainJavaAskApproach approach but using Semantic Kernel framework: * 1. Memory abstraction is used for vector search capability. It uses Azure Cognitive Search as memory store. - * 2. Semantic function has been defined to ask question using sources from memory search results + * 2. Semantic functions have been defined to ask question using sources from memory search results */ @Component public class JavaSemanticKernelWithMemoryChatApproach implements RAGApproach { @@ -89,11 +89,11 @@ public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions op .setVariable("suggestions", String.valueOf(options.isSuggestFollowupQuestions())) .setVariable("input", question); - Mono reply = answerConversation.invokeAsync(skcontext); + SKContext reply = (SKContext) answerConversation.invokeAsync(skcontext).block(); return new RAGResponse.Builder() - .prompt("placeholders for prompt") - .answer(reply.block().getResult()) + .prompt("Prompt is managed by Semantic Kernel") + .answer(reply.getResult()) .sources(sourcesList) .sourcesAsText(sources) .question(question) diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java index 5772ee8..f4b879e 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/common/ChatGPTUtils.java @@ -4,6 +4,7 @@ import com.azure.ai.openai.models.ChatMessage; import com.azure.ai.openai.models.ChatRole; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -26,21 +27,55 @@ public static ChatCompletionsOptions buildDefaultChatCompletionsOptions(List messages) { StringBuilder sb = new StringBuilder(); messages.forEach(message -> { if (message.getRole() == ChatRole.USER) { - sb.append("<|im_start|>user\n"); + sb.append(IM_START_USER).append("\n"); } else if (message.getRole() == ChatRole.ASSISTANT) { - sb.append("<|im_start|>assistant\n"); + sb.append(IM_START_ASSISTANT).append("\n"); } else { - sb.append("<|im_start|>system\n"); + sb.append(IM_START_SYSTEM).append("\n"); } sb.append(message.getContent()).append("\n").append("|im_end|").append("\n"); }); return sb.toString(); } + public static List parseChatML(String chatML) { + List messages = new ArrayList<>(); + String[] messageTokens = chatML.split("\\|im_end\\|\\n"); + + for (String messageToken : messageTokens) { + String[] lines = messageToken.trim().split("\n"); + + if (lines.length >= 2) { + ChatRole role = ChatRole.SYSTEM; + if (IM_START_USER.equals(lines[0])) { + role = ChatRole.USER; + } else if (IM_START_ASSISTANT.equals(lines[0])) { + role = ChatRole.ASSISTANT; + } + + StringBuilder content = new StringBuilder(); + for (int i = 1; i < lines.length; ++i) { + content.append(lines[i]); + if (i < lines.length - 1) { + content.append("\n"); + } + } + + messages.add(new ChatMessage(role).setContent(content.toString())); + } + } + + return messages; + } + public static String getLastUserQuestion(List messages) { List userMessages = messages .stream() diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/CognitiveSearchPlugin.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/semantickernel/CognitiveSearchPlugin.java similarity index 60% rename from app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/CognitiveSearchPlugin.java rename to app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/semantickernel/CognitiveSearchPlugin.java index 884db99..a4a2616 100644 --- a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/CognitiveSearchPlugin.java +++ b/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/semantickernel/CognitiveSearchPlugin.java @@ -1,9 +1,11 @@ // Copyright (c) Microsoft. All rights reserved. -package com.microsoft.openai.samples.rag.ask.approaches.semantickernel; +package com.microsoft.openai.samples.rag.retrieval.semantickernel; -import com.azure.search.documents.models.*; import com.microsoft.openai.samples.rag.approaches.ContentSource; import com.microsoft.openai.samples.rag.approaches.RAGOptions; +import com.microsoft.openai.samples.rag.common.ChatGPTConversation; +import com.microsoft.openai.samples.rag.common.ChatGPTMessage; +import com.microsoft.openai.samples.rag.common.ChatGPTUtils; import com.microsoft.openai.samples.rag.proxy.CognitiveSearchProxy; import com.microsoft.openai.samples.rag.proxy.OpenAIProxy; import com.microsoft.openai.samples.rag.retrieval.CognitiveSearchRetriever; @@ -31,9 +33,9 @@ public CognitiveSearchPlugin( } @DefineSKFunction( - name = "Search", + name = "SearchFromQuestion", description = "Search information relevant to answering a given query") - public Mono search( + public Mono searchFromQuestion( @SKFunctionInputAttribute(description = "the query to answer") String query) { CognitiveSearchRetriever retriever = @@ -45,8 +47,33 @@ public Mono search( sources.size(), query); + return Mono.just(buildSources(sources)); + } + + @DefineSKFunction( + name = "SearchFromConversation", + description = "Search information relevant to a conversation") + public Mono searchFromConversation( + @SKFunctionInputAttribute(description = "the conversation to search the information from") String conversation) { + // Parse conversation + List chatMessages = ChatGPTUtils.parseChatML(conversation).stream().map(message -> + new ChatGPTMessage(ChatGPTMessage.ChatRole.fromString(message.getRole().toString()), message.getContent()) + ).toList(); + + CognitiveSearchRetriever retriever = + new CognitiveSearchRetriever(this.cognitiveSearchProxy, this.openAIProxy); + List sources = retriever.retrieveFromConversation(new ChatGPTConversation(chatMessages), this.options); + + LOGGER.info( + "Total {} sources found in cognitive search", + sources.size()); + + return Mono.just(buildSources(sources)); + } + + private String buildSources (List sources) { StringBuilder sourcesStringBuilder = new StringBuilder(); - // Build sources section + sources.iterator() .forEachRemaining( source -> @@ -55,6 +82,6 @@ public Mono search( .append(": ") .append(source.getSourceContent().replace("\n", "")) .append("\n")); - return Mono.just(sourcesStringBuilder.toString()); + return sourcesStringBuilder.toString(); } } From 659b5081284f382696f152cf0f9e300856e8db7e Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Tue, 7 Nov 2023 10:46:25 -0800 Subject: [PATCH 13/13] Add new SK chat approaches to README.md --- README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ce97d7c..8b2cc7f 100644 --- a/README.md +++ b/README.md @@ -284,13 +284,15 @@ To then limit access to a specific set of users or groups, you can follow the st This repo is focused to showcase different options to implement semantic search on private documents using RAG patterns with Java, Azure OpenAI and Semantic Kernel. Below you can find the list of available implementations. -| Conversational Style | RAG Approach | Description | Java Open AI SDK | Java Semantic Kernel | -|:---------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|:----------------------| -| One Shot Ask | [PlainJavaAskApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/PlainJavaAskApproach.java) | Use Cognitive Search and Java OpenAI APIs. It first retrieves top documents from search and use them to build a prompt. Then, it uses OpenAI to generate an answer for the user question.Several cognitive search retrieval options are available: Text, Vector, Hybrid. When Hybrid and Vector are selected an additional call to OpenAI is required to generate embeddings vector for the question. | :white_check_mark: | :x: | -| Chat | [PlainJavaChatApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/PlainJavaChatApproach.java) | Use Cognitive Search and Java OpenAI APIs. It first calls OpenAI to generate a search keyword for the chat history and then answer to the last chat question.Several cognitive search retrieval options are available: Text, Vector, Hybrid. When Hybrid and Vector are selected an additional call to OpenAI is required to generate embeddings vector for the chat extracted keywords. | :white_check_mark: | :x: | -| One Shot Ask | [JavaSemanticKernelWithMemoryApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelWithMemoryApproach.java) | Use Java Semantic Kernel framework with built-in MemoryStore for embeddings similarity search. A semantic function [RAG.AnswerQuestion](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerQuestion/config.json) is defined to build the prompt using Memory Store vector search results.A customized version of SK built-in [CognitiveSearchMemoryStore](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/memory/CustomAzureCognitiveSearchMemoryStore.java) is used to map index fields populated by the documents ingestion process. | :x: | :white_check_mark: | -| One Shot Ask | [JavaSemanticKernelChainsApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java) | Use Java Semantic Kernel framework with semantic and native functions chaining. It uses an imperative style for AI orchestration through semantic kernel functions chaining. [InformationFinder.Search](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/CognitiveSearchPlugin.java) native function and [RAG.AnswerQuestion](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerQuestion/config.json) semantic function are called sequentially. Several cognitive search retrieval options are available: Text, Vector, Hybrid. | :x: | :white_check_mark: | -| One Shot Ask | [JavaSemanticKernelPlannerApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java) | Use Java Semantic Kernel framework with built-in Planner for functions orchestration. It uses a declarative style for AI orchestration through the built-in SequentialPlanner. SequentialPlanner call OpenAI to generate a plan for answering a question using available skills/plugins: [InformationFinder](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/CognitiveSearchPlugin.java) and [RAG](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerQuestion/config.json). Several cognitive search retrieval options are available: Text, Vector, Hybrid. ⚠️ This approach is currently disabled within the UI, pending fixes for this feature. | :x: | :white_check_mark: | +| Conversational Style | RAG Approach | Description | Java Open AI SDK | Java Semantic Kernel | +|:---------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|:----------------------| +| One Shot Ask | [PlainJavaAskApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/PlainJavaAskApproach.java) | Use Cognitive Search and Java OpenAI APIs. It first retrieves top documents from search and use them to build a prompt. Then, it uses OpenAI to generate an answer for the user question.Several cognitive search retrieval options are available: Text, Vector, Hybrid. When Hybrid and Vector are selected an additional call to OpenAI is required to generate embeddings vector for the question. | :white_check_mark: | :x: | +| Chat | [PlainJavaChatApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/PlainJavaChatApproach.java) | Use Cognitive Search and Java OpenAI APIs. It first calls OpenAI to generate a search keyword for the chat history and then answer to the last chat question.Several cognitive search retrieval options are available: Text, Vector, Hybrid. When Hybrid and Vector are selected an additional call to OpenAI is required to generate embeddings vector for the chat extracted keywords. | :white_check_mark: | :x: | +| One Shot Ask | [JavaSemanticKernelWithMemoryApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelWithMemoryApproach.java) | Use Java Semantic Kernel framework with built-in MemoryStore for embeddings similarity search. A semantic function [RAG.AnswerQuestion](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerQuestion/config.json) is defined to build the prompt using Memory Store vector search results.A customized version of SK built-in [CognitiveSearchMemoryStore](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/memory/CustomAzureCognitiveSearchMemoryStore.java) is used to map index fields populated by the documents ingestion process. | :x: | :white_check_mark: | +| One Shot Ask | [JavaSemanticKernelChainsApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java) | Use Java Semantic Kernel framework with semantic and native functions chaining. It uses an imperative style for AI orchestration through semantic kernel functions chaining. [InformationFinder.SearchFromQuestion](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/semantickernel/CognitiveSearchPlugin.java) native function and [RAG.AnswerQuestion](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerQuestion/config.json) semantic function are called sequentially. Several cognitive search retrieval options are available: Text, Vector, Hybrid. | :x: | :white_check_mark: | +| One Shot Ask | [JavaSemanticKernelPlannerApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java) | Use Java Semantic Kernel framework with built-in Planner for functions orchestration. It uses a declarative style for AI orchestration through the built-in SequentialPlanner. SequentialPlanner call OpenAI to generate a plan for answering a question using available skills/plugins: [InformationFinder](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/CognitiveSearchPlugin.java) and [RAG](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerQuestion/config.json). Several cognitive search retrieval options are available: Text, Vector, Hybrid. ⚠️ This approach is currently disabled within the UI, pending fixes for this feature. | :x: | :white_check_mark: | +| Chat | [JavaSemanticKernelWithMemoryApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelWithMemoryChatApproach.java) | Use Java Semantic Kernel framework with built-in MemoryStore for embeddings similarity search. A semantic function [RAG.AnswerConversation](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerQuestion/config.json) is defined to build the prompt using Memory Store vector search results. A customized version of SK built-in [CognitiveSearchMemoryStore](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/memory/CustomAzureCognitiveSearchMemoryStore.java) is used to map index fields populated by the documents ingestion process. | :x: | :white_check_mark: | +| Chat | [JavaSemanticKernelChainsApproach](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/semantickernel/JavaSemanticKernelChainsChatApproach.java) | Use Java Semantic Kernel framework with semantic and native functions chaining. It uses an imperative style for AI orchestration through semantic kernel functions chaining. [InformationFinder.SearchFromConversation](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/semantickernel/CognitiveSearchPlugin.java) native function and [RAG.AnswerConversation](https://github.com/Azure-Samples/azure-search-openai-demo-java/blob/main/app/backend/src/main/resources/semantickernel/Plugins/RAG/AnswerConversation/config.json) semantic function are called sequentially. Several cognitive search retrieval options are available: Text, Vector, Hybrid. | :x: | :white_check_mark: | The plain Java Open AI sdk based implementations are stable. Java Semantic Kernel based implementations are still experimental and it will be consolidated as soon as Java Semantic Kernel beta version will be released. Below a brief description of the SK integration status: