Merge pull request #44 from meysamhadeli/feat/add-calculation-cost-to…

…-token-management test: add calculation token cost to token management
meysamhadeli · Nov 6, 2024 · 92a1a20 · 92a1a20
2 parents b409ddf + 1e7274d
commit 92a1a20
Show file tree

Hide file tree

Showing 24 changed files with 1,362 additions and 248 deletions.
diff --git a/README.md b/README.md
@@ -51,12 +51,13 @@ ai_provider_config:
   chat_completion_url: "http://localhost:11434/v1/chat/completions"
   chat_completion_model: "gpt-4o"
   embedding_url: "http://localhost:11434/v1/embeddings" (Optional, If you want use RAG.)
-  embedding_model: "text-embedding-ada-002" (Optional, If you want use RAG.)
+  embedding_model: "text-embedding-3-small" (Optional, If you want use RAG.)
   temperature: 0.2
-  max_tokens: 128000
 theme: "dracula"
 RAG: true (Optional, if you want, can disable RAG.)
 ```
+> Note: We used the standard integration of [OpenAI APIs](https://platform.openai.com/docs/api-reference/introduction) and [Ollama APIs](https://github.com/ollama/ollama/blob/main/docs/api.md) and you can find more details in documentation of each APIs.
+
 If you wish to customize your configuration, you can create your own `config.yml` file and place it in the `root directory` of each project you want to analyze with codai. If no configuration file is provided, codai will use the default settings.
 
 You can also specify a configuration file from any directory by using the following CLI command:
@@ -69,7 +70,7 @@ codai code --provider_name openapi --temperature 0.8
 ```
 This flexibility allows you to customize config of codai on the fly.
 
-> Note: We use [Chroma](https://github.com/alecthomas/chroma) for `style` of our `text` and `code block`, and you can find more theme here in [Chroma Style Gallery](https://xyproto.github.io/splash/docs/) and use it as a `theme` in `codai`.
+> Note: We used [Chroma](https://github.com/alecthomas/chroma) for `style` of our `text` and `code block`, and you can find more theme here in [Chroma Style Gallery](https://xyproto.github.io/splash/docs/) and use it as a `theme` in `codai`.
 
 ## 🔮 LLM Models
 ### ⚡ Best Models
@@ -79,12 +80,12 @@ The codai works well with advanced LLM models specifically designed for code gen
 In addition to cloud-based models, codai is compatible with local models such as `Ollama`. To achieve the best results, it is recommended to utilize models like `DeepSeek-Coder-v2`, `CodeLlama`, and `Mistral`. These models have been optimized for coding tasks, ensuring that you can maximize the efficiency and effectiveness of your coding projects.
 
 ### 🌐 OpenAI Embedding Models
-The codai can utilize `OpenAI’s embedding models` to retrieve the `most relevant content`. The current recommended model for `code context` is `text-embedding-ada-002`, known for its high performance and capability in capturing semantic relationships, making it an excellent choice for accurate and efficient embedding retrieval.
+The codai platform uses `OpenAI embedding models` to retrieve `relevant content` with high efficiency. Recommended models include are **text-embedding-3-large**, **text-embedding-3-small**, and **text-embedding-ada-002**, both known for their `cost-effectiveness` and `accuracy` in `capturing semantic relationships`. These models are ideal for applications needing high-quality performance in `code context retrieval`.
 
 ### 🦙 Ollama Embedding Models
-The codai also supports `Ollama embedding models`, allowing `local embedding` generation and retrieval. A suitable option here is the `nomic-embed-text model`, which provides efficient embedding generation locally, aiding in effective RAG-based retrieval `for relevant code context`.
+codai also supports `Ollama embedding models` for `local`, `cost-effective`, and `efficient` embedding generation and `retrieval of relevant content`. Models such as **mxbai-embed-large**, **all-minilm**, and **nomic-embed-text** provide **effective**, **private embedding** creation optimized for high-quality performance. These models are well-suited for `RAG-based retrieval in code contexts`, eliminating the need for external API calls.
 
-How to Run
+## ▶️ How to Run
 To use `codai` as your code assistant, navigate to the directory where you want to apply codai and run the following command:
 
 ```bash
@@ -124,7 +125,7 @@ Allow users to customize settings through a config file (e.g., changing AI provi
 📊 **Project Context Awareness:**
 Maintain awareness of the entire project context to provide more accurate suggestions.
 
-🌳 **Full Project Context Summarization:** 
+🌳 **Full Project Context Summarization:**
 Summarize the full context of your codebase using Tree-sitter for accurate and efficient code analysis.
 
 🔍 **RAG System Implementation:**

diff --git a/cmd/code.go b/cmd/code.go
@@ -42,6 +42,8 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 
 	signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
 
+	loopNumber := 0
+
 	reader := bufio.NewReader(os.Stdin)
 
 	var requestedContext string
@@ -65,8 +67,16 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 
 	// Launch the user input handler in a goroutine
 	go func() {
-
+	startLoop: // Label for the start loop
 		for {
+
+			if loopNumber > 0 {
+				// Display token usage details in a boxed format after each AI request
+				rootDependencies.TokenManagement.DisplayTokens(rootDependencies.Config.AIProviderConfig.ProviderName, rootDependencies.Config.AIProviderConfig.ChatCompletionModel, rootDependencies.Config.AIProviderConfig.EmbeddingModel, rootDependencies.Config.RAG)
+			}
+
+			loopNumber++
+
 			err := utils.CleanupTempFiles(rootDependencies.Cwd)
 			if err != nil {
 				fmt.Println(lipgloss_color.Red.Render(fmt.Sprintf("failed to cleanup temp files: %v", err)))
@@ -91,15 +101,13 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 					go func(dataFile models.FileData) {
 						defer wg.Done() // Decrement the counter when the Goroutine completes
 						filesEmbeddingOperation := func() error {
-							fileEmbeddingResponse, err := rootDependencies.CurrentProvider.EmbeddingRequest(ctx, dataFile.TreeSitterCode)
+							fileEmbedding, err := rootDependencies.CurrentProvider.EmbeddingRequest(ctx, dataFile.TreeSitterCode)
 							if err != nil {
 								return err
 							}
 
-							fileEmbedding := fileEmbeddingResponse.Data[0].Embedding
-
 							// Save embeddings to the embedding store
-							rootDependencies.Store.Save(dataFile.RelativePath, dataFile.Code, fileEmbedding)
+							rootDependencies.Store.Save(dataFile.RelativePath, dataFile.Code, fileEmbedding[0])
 							return nil
 						}
 
@@ -116,28 +124,26 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 				for err = range errorChan {
 					spinnerLoadContextEmbedding.Stop()
 					fmt.Println(lipgloss_color.Red.Render(fmt.Sprintf("%v", err)))
-					continue
+					continue startLoop
 				}
 
 				queryEmbeddingOperation := func() error {
 					// Step 5: Generate embedding for the user query
-					queryEmbeddingResponse, err := rootDependencies.CurrentProvider.EmbeddingRequest(ctx, userInput)
+					queryEmbedding, err := rootDependencies.CurrentProvider.EmbeddingRequest(ctx, userInput)
 					if err != nil {
 						return err
 					}
 
-					queryEmbedding := queryEmbeddingResponse.Data[0].Embedding
-
 					// Ensure there's an embedding for the user query
-					if len(queryEmbedding) == 0 {
+					if len(queryEmbedding[0]) == 0 {
 						return fmt.Errorf(lipgloss_color.Red.Render("no embeddings returned for user query"))
 					}
 
 					// Find relevant chunks with a similarity threshold of 0.3, no topN limit (-1 means all results and positive number only return this relevant results number)
 					topN := -1
 
 					// Step 6: Find relevant code chunks based on the user query embedding
-					fullContextCodes = rootDependencies.Store.FindRelevantChunks(queryEmbedding, topN, rootDependencies.Config.AIProviderConfig.EmbeddingModel, rootDependencies.Config.AIProviderConfig.Threshold)
+					fullContextCodes = rootDependencies.Store.FindRelevantChunks(queryEmbedding[0], topN, rootDependencies.Config.AIProviderConfig.EmbeddingModel, rootDependencies.Config.AIProviderConfig.Threshold)
 					return nil
 				}
 
@@ -147,9 +153,10 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 				if err != nil {
 					spinnerLoadContextEmbedding.Stop()
 					fmt.Println(lipgloss_color.Red.Render(fmt.Sprintf("%v", err)))
-					continue
+					continue startLoop
 				}
 
+				fmt.Println()
 				spinnerLoadContextEmbedding.Stop()
 			}
 
@@ -188,21 +195,14 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 
 			if err != nil {
 				fmt.Println(lipgloss_color.Red.Render(fmt.Sprintf("%v", err)))
-				continue
+				continue startLoop
 			}
 
-			fmt.Print("\n\n")
-
 			if !rootDependencies.Config.RAG {
 				// Try to get full block code if block codes is summarized and incomplete
 				requestedContext, err = rootDependencies.Analyzer.TryGetInCompletedCodeBlocK(aiResponseBuilder.String())
 
-				if err != nil {
-					fmt.Println(lipgloss_color.Red.Render(fmt.Sprintf("%v", err)))
-					continue
-				}
-
-				if requestedContext != "" {
+				if requestedContext != "" && err == nil {
 					aiResponseBuilder.Reset()
 
 					fmt.Println(lipgloss_color.BlueSky.Render("Trying to send above context files for getting code suggestion fromm AI...\n"))
@@ -220,7 +220,7 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 			changes, err := rootDependencies.Analyzer.ExtractCodeChanges(aiResponseBuilder.String())
 
 			if err != nil || changes == nil {
-				fmt.Println(lipgloss_color.Gray.Render("no code blocks with a valid path detected to apply."))
+				fmt.Println(lipgloss_color.BlueSky.Render("\nno code blocks with a valid path detected to apply."))
 				continue
 			}
 
@@ -265,9 +265,6 @@ func handleCodeCommand(rootDependencies *RootDependencies) {
 
 			}
 
-			// Display token usage details in a boxed format after each AI request
-			rootDependencies.TokenManagement.DisplayTokens(rootDependencies.Config.AIProviderConfig.ChatCompletionModel, rootDependencies.Config.AIProviderConfig.EmbeddingModel)
-
 			// If we need Update the context after apply changes
 			if updateContextNeeded {
 

diff --git a/cmd/root.go b/cmd/root.go
@@ -58,7 +58,7 @@ func handleRootCommand(cmd *cobra.Command) *RootDependencies {
 
 	rootDependencies.Config = config.LoadConfigs(cmd, rootDependencies.Cwd)
 
-	rootDependencies.TokenManagement = providers.NewTokenManager(rootDependencies.Config.AIProviderConfig.MaxTokens)
+	rootDependencies.TokenManagement = providers.NewTokenManager()
 
 	rootDependencies.ChatHistory = providers.NewChatHistory()
 

diff --git a/embed_data/embed.go b/embed_data/embed.go
@@ -5,6 +5,9 @@ import _ "embed"
 //go:embed prompts/code_block_prompt.tmpl
 var CodeBlockTemplate []byte
 
+//go:embed models_details/model_details.tmpl
+var ModelDetails []byte
+
 //go:embed tree-sitter/queries/csharp.scm
 var CSharpQuery []byte