From c00e7d3f654965eef9fd26cecbb5750d21f10791 Mon Sep 17 00:00:00 2001 From: Novice <857526207@qq.com> Date: Tue, 11 Feb 2025 15:48:55 +0800 Subject: [PATCH 01/12] fix: retry log running error (#13472) Co-authored-by: Novice Lee --- api/core/workflow/graph_engine/graph_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index db1e01f14fda59..be7ccad4eb332d 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -648,7 +648,7 @@ def _run_node( retries += 1 route_node_state.node_run_result = run_result yield NodeRunRetryEvent( - id=node_instance.id, + id=str(uuid.uuid4()), node_id=node_instance.node_id, node_type=node_instance.node_type, node_data=node_instance.node_data, @@ -663,7 +663,7 @@ def _run_node( start_at=retry_start_at, ) time.sleep(retry_interval) - continue + break route_node_state.set_finished(run_result=run_result) if run_result.status == WorkflowNodeExecutionStatus.FAILED: From f96b4f287aa8bedd8a557290ec4bb38e21b48a36 Mon Sep 17 00:00:00 2001 From: Novice <857526207@qq.com> Date: Tue, 11 Feb 2025 16:35:21 +0800 Subject: [PATCH 02/12] fix: iteration node log time error (#13511) --- api/core/app/task_pipeline/workflow_cycle_manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py index dcc364d22766e6..ad29201df384be 100644 --- a/api/core/app/task_pipeline/workflow_cycle_manage.py +++ b/api/core/app/task_pipeline/workflow_cycle_manage.py @@ -842,4 +842,4 @@ def _get_workflow_node_execution(self, session: Session, node_execution_id: str) if node_execution_id not in self._workflow_node_executions: raise ValueError(f"Workflow node execution not found: {node_execution_id}") cached_workflow_node_execution = self._workflow_node_executions[node_execution_id] - return cached_workflow_node_execution + return session.merge(cached_workflow_node_execution) From 423fb2d7bce20b0211a1631a29a2a38b20d19af9 Mon Sep 17 00:00:00 2001 From: Wu Jiayang <62842862+Wu-Jiayang@users.noreply.github.com> Date: Tue, 11 Feb 2025 18:44:56 +0800 Subject: [PATCH 03/12] Ensure the 'inputs' field in /chat-messages takes effect every time (#7955) Co-authored-by: Your Name Co-authored-by: -LAN- --- api/core/app/apps/advanced_chat/app_generator.py | 4 +--- api/core/app/apps/agent_chat/app_generator.py | 4 +--- api/core/app/apps/chat/app_generator.py | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index 36f71fd47879c9..930d4c80bcb465 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -140,9 +140,7 @@ def generate( app_config=app_config, file_upload_config=file_extra_config, conversation_id=conversation.id if conversation else None, - inputs=conversation.inputs - if conversation - else self._prepare_user_inputs( + inputs=self._prepare_user_inputs( user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id ), query=query, diff --git a/api/core/app/apps/agent_chat/app_generator.py b/api/core/app/apps/agent_chat/app_generator.py index e7f622263eb084..0b3704b018f78c 100644 --- a/api/core/app/apps/agent_chat/app_generator.py +++ b/api/core/app/apps/agent_chat/app_generator.py @@ -148,9 +148,7 @@ def generate( model_conf=ModelConfigConverter.convert(app_config), file_upload_config=file_extra_config, conversation_id=conversation.id if conversation else None, - inputs=conversation.inputs - if conversation - else self._prepare_user_inputs( + inputs=self._prepare_user_inputs( user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id ), query=query, diff --git a/api/core/app/apps/chat/app_generator.py b/api/core/app/apps/chat/app_generator.py index dc7cf3667a0175..0f359f36d70534 100644 --- a/api/core/app/apps/chat/app_generator.py +++ b/api/core/app/apps/chat/app_generator.py @@ -141,9 +141,7 @@ def generate( model_conf=ModelConfigConverter.convert(app_config), file_upload_config=file_extra_config, conversation_id=conversation.id if conversation else None, - inputs=conversation.inputs - if conversation - else self._prepare_user_inputs( + inputs=self._prepare_user_inputs( user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id ), query=query, From bde756a1ab1bc2c8c71c91e5e3abc788b6a96f55 Mon Sep 17 00:00:00 2001 From: jiangbo721 <365065261@qq.com> Date: Tue, 11 Feb 2025 22:05:29 +0800 Subject: [PATCH 04/12] chore:Remove useless brackets and format code (#13479) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 刘江波 --- api/core/provider_manager.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/api/core/provider_manager.py b/api/core/provider_manager.py index 2430d598ffcb7d..479074e997442c 100644 --- a/api/core/provider_manager.py +++ b/api/core/provider_manager.py @@ -452,11 +452,9 @@ def _get_all_provider_load_balancing_configs(tenant_id: str) -> dict[str, list[L provider_name_to_provider_load_balancing_model_configs_dict = defaultdict(list) for provider_load_balancing_config in provider_load_balancing_configs: - ( - provider_name_to_provider_load_balancing_model_configs_dict[ - provider_load_balancing_config.provider_name - ].append(provider_load_balancing_config) - ) + provider_name_to_provider_load_balancing_model_configs_dict[ + provider_load_balancing_config.provider_name + ].append(provider_load_balancing_config) return provider_name_to_provider_load_balancing_model_configs_dict From 786550bdc9014ad45979ac5b69ada71613c482ac Mon Sep 17 00:00:00 2001 From: Riddhimaan-Senapati <114703025+Riddhimaan-Senapati@users.noreply.github.com> Date: Tue, 11 Feb 2025 20:15:15 -0500 Subject: [PATCH 05/12] fix: changed topics/keywords to topic/keywords (#13544) --- web/hooks/use-metadata.ts | 8 ++++---- web/i18n/de-DE/dataset-documents.ts | 4 ++-- web/i18n/en-US/dataset-documents.ts | 2 +- web/i18n/es-ES/dataset-documents.ts | 2 +- web/i18n/fa-IR/dataset-documents.ts | 2 +- web/i18n/fr-FR/dataset-documents.ts | 2 +- web/i18n/hi-IN/dataset-documents.ts | 2 +- web/i18n/it-IT/dataset-documents.ts | 2 +- web/i18n/ja-JP/dataset-documents.ts | 2 +- web/i18n/ko-KR/dataset-documents.ts | 2 +- web/i18n/pl-PL/dataset-documents.ts | 2 +- web/i18n/pt-BR/dataset-documents.ts | 2 +- web/i18n/ro-RO/dataset-documents.ts | 2 +- web/i18n/ru-RU/dataset-documents.ts | 2 +- web/i18n/sl-SI/dataset-documents.ts | 2 +- web/i18n/th-TH/dataset-documents.ts | 2 +- web/i18n/tr-TR/dataset-documents.ts | 2 +- web/i18n/uk-UA/dataset-documents.ts | 2 +- web/i18n/vi-VN/dataset-documents.ts | 2 +- web/i18n/zh-Hans/dataset-documents.ts | 2 +- web/i18n/zh-Hant/dataset-documents.ts | 2 +- 21 files changed, 25 insertions(+), 25 deletions(-) diff --git a/web/hooks/use-metadata.ts b/web/hooks/use-metadata.ts index 5d1d86c20e7250..92eb733e359aa2 100644 --- a/web/hooks/use-metadata.ts +++ b/web/hooks/use-metadata.ts @@ -65,7 +65,7 @@ export const useMetadataMap = (): MetadataMap => { }, 'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) }, 'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) }, - 'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) }, + 'topic/keywords': { label: t(`${fieldPrefix}.webPage.topicKeywords`) }, 'description': { label: t(`${fieldPrefix}.webPage.description`) }, }, }, @@ -85,7 +85,7 @@ export const useMetadataMap = (): MetadataMap => { }, 'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) }, 'doi': { label: t(`${fieldPrefix}.paper.DOI`) }, - 'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) }, + 'topic/keywords': { label: t(`${fieldPrefix}.paper.topicKeywords`) }, 'abstract': { label: t(`${fieldPrefix}.paper.abstract`), inputType: 'textarea', @@ -158,8 +158,8 @@ export const useMetadataMap = (): MetadataMap => { 'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) }, 'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) }, 'participants': { label: t(`${fieldPrefix}.IMChat.participants`) }, - 'topicsKeywords': { - label: t(`${fieldPrefix}.IMChat.topicsKeywords`), + 'topicKeywords': { + label: t(`${fieldPrefix}.IMChat.topicKeywords`), inputType: 'textarea', }, 'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) }, diff --git a/web/i18n/de-DE/dataset-documents.ts b/web/i18n/de-DE/dataset-documents.ts index 16bb6349cf8f15..6cfc5147a36c3c 100644 --- a/web/i18n/de-DE/dataset-documents.ts +++ b/web/i18n/de-DE/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Sprache', authorPublisher: 'Autor/Verlag', publishDate: 'Veröffentlichungsdatum', - topicsKeywords: 'Themen/Schlüsselwörter', + topicKeywords: 'Themen/Schlüsselwörter', description: 'Beschreibung', }, paper: { @@ -144,7 +144,7 @@ const translation = { journalConferenceName: 'Zeitschrift/Konferenzname', volumeIssuePage: 'Band/Ausgabe/Seite', DOI: 'DOI', - topicsKeywords: 'Themen/Schlüsselwörter', + topicKeywords: 'Themen/Schlüsselwörter', abstract: 'Zusammenfassung', }, socialMediaPost: { diff --git a/web/i18n/en-US/dataset-documents.ts b/web/i18n/en-US/dataset-documents.ts index d315261c361da0..d7fd70c089eb54 100644 --- a/web/i18n/en-US/dataset-documents.ts +++ b/web/i18n/en-US/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Language', authorPublisher: 'Author/Publisher', publishDate: 'Publish Date', - topicsKeywords: 'Topics/Keywords', + topicKeywords: 'Topic/Keywords', description: 'Description', }, paper: { diff --git a/web/i18n/es-ES/dataset-documents.ts b/web/i18n/es-ES/dataset-documents.ts index ea4690c5f53a38..cd5bb361971883 100644 --- a/web/i18n/es-ES/dataset-documents.ts +++ b/web/i18n/es-ES/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Idioma', authorPublisher: 'Autor/Editorial', publishDate: 'Fecha de publicación', - topicsKeywords: 'Temas/Palabras clave', + topicKeywords: 'Temas/Palabras clave', description: 'Descripción', }, paper: { diff --git a/web/i18n/fa-IR/dataset-documents.ts b/web/i18n/fa-IR/dataset-documents.ts index ff9e47f71aaf16..85e1e0a4aaa74e 100644 --- a/web/i18n/fa-IR/dataset-documents.ts +++ b/web/i18n/fa-IR/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: 'زبان', authorPublisher: 'نویسنده/ناشر', publishDate: 'تاریخ انتشار', - topicsKeywords: 'موضوعات/کلیدواژه‌ها', + topicKeywords: 'موضوعات/کلیدواژه‌ها', description: 'توضیحات', }, paper: { diff --git a/web/i18n/fr-FR/dataset-documents.ts b/web/i18n/fr-FR/dataset-documents.ts index 614590de536036..7a795202ed01dd 100644 --- a/web/i18n/fr-FR/dataset-documents.ts +++ b/web/i18n/fr-FR/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Langue', authorPublisher: 'Auteur/Éditeur', publishDate: 'Date de publication', - topicsKeywords: 'Sujets/Mots-clés', + topicKeywords: 'Sujets/Mots-clés', description: 'Description', }, paper: { diff --git a/web/i18n/hi-IN/dataset-documents.ts b/web/i18n/hi-IN/dataset-documents.ts index e01b3ebb1326fe..35bcb0aad27387 100644 --- a/web/i18n/hi-IN/dataset-documents.ts +++ b/web/i18n/hi-IN/dataset-documents.ts @@ -134,7 +134,7 @@ const translation = { language: 'भाषा', authorPublisher: 'लेखक/प्रकाशक', publishDate: 'प्रकाशन तिथि', - topicsKeywords: 'विषय/कीवर्ड्स', + topicKeywords: 'विषय/कीवर्ड्स', description: 'विवरण', }, paper: { diff --git a/web/i18n/it-IT/dataset-documents.ts b/web/i18n/it-IT/dataset-documents.ts index 06c5a2deedb9c1..b9afb1ea75828e 100644 --- a/web/i18n/it-IT/dataset-documents.ts +++ b/web/i18n/it-IT/dataset-documents.ts @@ -134,7 +134,7 @@ const translation = { language: 'Lingua', authorPublisher: 'Autore/Editore', publishDate: 'Data di Pubblicazione', - topicsKeywords: 'Argomenti/Parole Chiave', + topicKeywords: 'Argomenti/Parole Chiave', description: 'Descrizione', }, paper: { diff --git a/web/i18n/ja-JP/dataset-documents.ts b/web/i18n/ja-JP/dataset-documents.ts index 0ca93624332266..270c61911600e6 100644 --- a/web/i18n/ja-JP/dataset-documents.ts +++ b/web/i18n/ja-JP/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: '言語', authorPublisher: '著者/出版社', publishDate: '公開日', - topicsKeywords: 'トピック/キーワード', + topicKeywords: 'トピック/キーワード', description: '説明', }, paper: { diff --git a/web/i18n/ko-KR/dataset-documents.ts b/web/i18n/ko-KR/dataset-documents.ts index ec0b2bb62db42d..6f6cb451cd44c2 100644 --- a/web/i18n/ko-KR/dataset-documents.ts +++ b/web/i18n/ko-KR/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: '언어', authorPublisher: '저자/출판사', publishDate: '공개일', - topicsKeywords: '주제/키워드', + topicKeywords: '주제/키워드', description: '설명', }, paper: { diff --git a/web/i18n/pl-PL/dataset-documents.ts b/web/i18n/pl-PL/dataset-documents.ts index d5292fd2c41f2e..37f373ac93f339 100644 --- a/web/i18n/pl-PL/dataset-documents.ts +++ b/web/i18n/pl-PL/dataset-documents.ts @@ -134,7 +134,7 @@ const translation = { language: 'Język', authorPublisher: 'Autor/Wydawca', publishDate: 'Data publikacji', - topicsKeywords: 'Tematy/Słowa kluczowe', + topicKeywords: 'Tematy/Słowa kluczowe', description: 'Opis', }, paper: { diff --git a/web/i18n/pt-BR/dataset-documents.ts b/web/i18n/pt-BR/dataset-documents.ts index 9acfca302960b3..9a3d13bcab9b02 100644 --- a/web/i18n/pt-BR/dataset-documents.ts +++ b/web/i18n/pt-BR/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Idioma', authorPublisher: 'Autor/Editor', publishDate: 'Data de Publicação', - topicsKeywords: 'Tópicos/Palavras-chave', + topicKeywords: 'Tópicos/Palavras-chave', description: 'Descrição', }, paper: { diff --git a/web/i18n/ro-RO/dataset-documents.ts b/web/i18n/ro-RO/dataset-documents.ts index acf40ec4aa4f89..e42be875020b18 100644 --- a/web/i18n/ro-RO/dataset-documents.ts +++ b/web/i18n/ro-RO/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Limbă', authorPublisher: 'Autor/Editor', publishDate: 'Data publicării', - topicsKeywords: 'Subiecte/Cuvinte cheie', + topicKeywords: 'Subiecte/Cuvinte cheie', description: 'Descriere', }, paper: { diff --git a/web/i18n/ru-RU/dataset-documents.ts b/web/i18n/ru-RU/dataset-documents.ts index f344a7e48c64c7..735266c0876ec1 100644 --- a/web/i18n/ru-RU/dataset-documents.ts +++ b/web/i18n/ru-RU/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Язык', authorPublisher: 'Автор/Издатель', publishDate: 'Дата публикации', - topicsKeywords: 'Темы/Ключевые слова', + topicKeywords: 'Темы/Ключевые слова', description: 'Описание', }, paper: { diff --git a/web/i18n/sl-SI/dataset-documents.ts b/web/i18n/sl-SI/dataset-documents.ts index 3953e9a57453b7..78d63c9e29da2c 100644 --- a/web/i18n/sl-SI/dataset-documents.ts +++ b/web/i18n/sl-SI/dataset-documents.ts @@ -133,7 +133,7 @@ const translation = { language: 'Jezik', authorPublisher: 'Avtor/Založnik', publishDate: 'Datum objave', - topicsKeywords: 'Teme/Ključne besede', + topicKeywords: 'Teme/Ključne besede', description: 'Opis', }, paper: { diff --git a/web/i18n/th-TH/dataset-documents.ts b/web/i18n/th-TH/dataset-documents.ts index a7ea67c11b6e13..2f4c6d5c9cf5a6 100644 --- a/web/i18n/th-TH/dataset-documents.ts +++ b/web/i18n/th-TH/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: 'ภาษา', authorPublisher: 'ผู้เขียน/สํานักพิมพ์', publishDate: 'วันที่เผยแพร่', - topicsKeywords: 'หัวข้อ/คําสําคัญ', + topicKeywords: 'หัวข้อ/คําสําคัญ', description: 'คำอธิบาย', }, paper: { diff --git a/web/i18n/tr-TR/dataset-documents.ts b/web/i18n/tr-TR/dataset-documents.ts index 7a297d9093447b..f643375334d41f 100644 --- a/web/i18n/tr-TR/dataset-documents.ts +++ b/web/i18n/tr-TR/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: 'Dil', authorPublisher: 'Yazar/Yayıncı', publishDate: 'Yayın Tarihi', - topicsKeywords: 'Konular/Anahtar Kelimeler', + topicKeywords: 'Konular/Anahtar Kelimeler', description: 'Açıklama', }, paper: { diff --git a/web/i18n/uk-UA/dataset-documents.ts b/web/i18n/uk-UA/dataset-documents.ts index 192253b264064f..da012cbb570d07 100644 --- a/web/i18n/uk-UA/dataset-documents.ts +++ b/web/i18n/uk-UA/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: 'Мова', authorPublisher: 'Автор/видавець', publishDate: 'Дата публікації', - topicsKeywords: 'Теми/ключові слова', + topicKeywords: 'Теми/ключові слова', description: 'Опис', }, paper: { diff --git a/web/i18n/vi-VN/dataset-documents.ts b/web/i18n/vi-VN/dataset-documents.ts index 07e5c5c6e38462..6e13c1185f10c3 100644 --- a/web/i18n/vi-VN/dataset-documents.ts +++ b/web/i18n/vi-VN/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: 'Ngôn ngữ', authorPublisher: 'Tác giả/Nhà xuất bản', publishDate: 'Ngày xuất bản', - topicsKeywords: 'Chủ đề/Từ khóa', + topicKeywords: 'Chủ đề/Từ khóa', description: 'Mô tả', }, paper: { diff --git a/web/i18n/zh-Hans/dataset-documents.ts b/web/i18n/zh-Hans/dataset-documents.ts index 9949f33d8737d2..5ff1b50f8592b7 100644 --- a/web/i18n/zh-Hans/dataset-documents.ts +++ b/web/i18n/zh-Hans/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: '语言', authorPublisher: '作者/出版商', publishDate: '发布日期', - topicsKeywords: '主题/关键词', + topicKeywords: '主题/关键词', description: '描述', }, paper: { diff --git a/web/i18n/zh-Hant/dataset-documents.ts b/web/i18n/zh-Hant/dataset-documents.ts index 8a6c1f79242fd9..5ad2c8f61f77db 100644 --- a/web/i18n/zh-Hant/dataset-documents.ts +++ b/web/i18n/zh-Hant/dataset-documents.ts @@ -132,7 +132,7 @@ const translation = { language: '語言', authorPublisher: '作者/出版商', publishDate: '釋出日期', - topicsKeywords: '主題/關鍵詞', + topicKeywords: '主題/關鍵詞', description: '描述', }, paper: { From 0751ad1eeb901200b5c89a684e854fab10cc0300 Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Wed, 12 Feb 2025 13:53:51 +0800 Subject: [PATCH 06/12] feat(vdb): add HNSW vector index for TiDB vector store with TiFlash (#12043) --- .github/workflows/expose_service_ports.sh | 2 +- .github/workflows/vdb-tests.yml | 14 ++++- .gitignore | 1 + .../datasource/vdb/tidb_vector/tidb_vector.py | 62 ++++++++++++------- .../vdb/tidb_vector/check_tiflash_ready.py | 59 ++++++++++++++++++ docker/docker-compose-template.yaml | 10 --- docker/docker-compose.yaml | 10 --- docker/tidb/config/pd.toml | 4 ++ docker/tidb/config/tiflash-learner.toml | 13 ++++ docker/tidb/config/tiflash.toml | 19 ++++++ docker/tidb/docker-compose.yaml | 62 +++++++++++++++++++ 11 files changed, 211 insertions(+), 45 deletions(-) create mode 100644 api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py create mode 100644 docker/tidb/config/pd.toml create mode 100644 docker/tidb/config/tiflash-learner.toml create mode 100644 docker/tidb/config/tiflash.toml create mode 100644 docker/tidb/docker-compose.yaml diff --git a/.github/workflows/expose_service_ports.sh b/.github/workflows/expose_service_ports.sh index d3146cd90dc02b..16f24439e6e826 100755 --- a/.github/workflows/expose_service_ports.sh +++ b/.github/workflows/expose_service_ports.sh @@ -9,6 +9,6 @@ yq eval '.services["pgvecto-rs"].ports += ["5431:5432"]' -i docker/docker-compos yq eval '.services["elasticsearch"].ports += ["9200:9200"]' -i docker/docker-compose.yaml yq eval '.services.couchbase-server.ports += ["8091-8096:8091-8096"]' -i docker/docker-compose.yaml yq eval '.services.couchbase-server.ports += ["11210:11210"]' -i docker/docker-compose.yaml -yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/docker-compose.yaml +yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/tidb/docker-compose.yaml echo "Ports exposed for sandbox, weaviate, tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase" diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml index b18316bcd6a5b0..5e3f7a557aa6db 100644 --- a/.github/workflows/vdb-tests.yml +++ b/.github/workflows/vdb-tests.yml @@ -54,7 +54,15 @@ jobs: - name: Expose Service Ports run: sh .github/workflows/expose_service_ports.sh - - name: Set up Vector Stores (TiDB, Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase) + - name: Set up Vector Store (TiDB) + uses: hoverkraft-tech/compose-action@v2.0.2 + with: + compose-file: docker/tidb/docker-compose.yaml + services: | + tidb + tiflash + + - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase) uses: hoverkraft-tech/compose-action@v2.0.2 with: compose-file: | @@ -70,7 +78,9 @@ jobs: pgvector chroma elasticsearch - tidb + + - name: Check TiDB Ready + run: poetry run -P api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py - name: Test Vector Stores run: poetry run -P api bash dev/pytest/pytest_vdb.sh diff --git a/.gitignore b/.gitignore index 1423bfee56e922..a6dad201e8affa 100644 --- a/.gitignore +++ b/.gitignore @@ -163,6 +163,7 @@ docker/volumes/db/data/* docker/volumes/redis/data/* docker/volumes/weaviate/* docker/volumes/qdrant/* +docker/tidb/volumes/* docker/volumes/etcd/* docker/volumes/minio/* docker/volumes/milvus/* diff --git a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py index be3a417390e802..6dd4be65c8e64d 100644 --- a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py +++ b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py @@ -9,6 +9,7 @@ from sqlalchemy.orm import Session, declarative_base from configs import dify_config +from core.rag.datasource.vdb.field import Field from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_type import VectorType @@ -54,14 +55,13 @@ def _table(self, dim: int) -> Table: return Table( self._collection_name, self._orm_base.metadata, - Column("id", String(36), primary_key=True, nullable=False), + Column(Field.PRIMARY_KEY.value, String(36), primary_key=True, nullable=False), Column( - "vector", + Field.VECTOR.value, VectorType(dim), nullable=False, - comment="" if self._distance_func is None else f"hnsw(distance={self._distance_func})", ), - Column("text", TEXT, nullable=False), + Column(Field.TEXT_KEY.value, TEXT, nullable=False), Column("meta", JSON, nullable=False), Column("create_time", DateTime, server_default=sqlalchemy.text("CURRENT_TIMESTAMP")), Column( @@ -96,6 +96,7 @@ def _create_collection(self, dimension: int): collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) if redis_client.get(collection_exist_cache_key): return + tidb_dist_func = self._get_distance_func() with Session(self._engine) as session: session.begin() create_statement = sql_text(f""" @@ -104,14 +105,14 @@ def _create_collection(self, dimension: int): text TEXT NOT NULL, meta JSON NOT NULL, doc_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.doc_id'))) STORED, - KEY (doc_id), - vector VECTOR({dimension}) NOT NULL COMMENT "hnsw(distance={self._distance_func})", + vector VECTOR({dimension}) NOT NULL, create_time DATETIME DEFAULT CURRENT_TIMESTAMP, - update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP + update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + KEY (doc_id), + VECTOR INDEX idx_vector (({tidb_dist_func}(vector))) USING HNSW ); """) session.execute(create_statement) - # tidb vector not support 'CREATE/ADD INDEX' now session.commit() redis_client.set(collection_exist_cache_key, 1, ex=3600) @@ -194,23 +195,30 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc ) docs = [] - if self._distance_func == "l2": - tidb_func = "Vec_l2_distance" - elif self._distance_func == "cosine": - tidb_func = "Vec_Cosine_distance" - else: - tidb_func = "Vec_Cosine_distance" + tidb_dist_func = self._get_distance_func() with Session(self._engine) as session: - select_statement = sql_text( - f"""SELECT meta, text, distance FROM ( - SELECT meta, text, {tidb_func}(vector, "{query_vector_str}") as distance - FROM {self._collection_name} - ORDER BY distance - LIMIT {top_k} - ) t WHERE distance < {distance};""" + select_statement = sql_text(f""" + SELECT meta, text, distance + FROM ( + SELECT + meta, + text, + {tidb_dist_func}(vector, :query_vector_str) AS distance + FROM {self._collection_name} + ORDER BY distance ASC + LIMIT :top_k + ) t + WHERE distance <= :distance + """) + res = session.execute( + select_statement, + params={ + "query_vector_str": query_vector_str, + "distance": distance, + "top_k": top_k, + }, ) - res = session.execute(select_statement) results = [(row[0], row[1], row[2]) for row in res] for meta, text, distance in results: metadata = json.loads(meta) @@ -227,6 +235,16 @@ def delete(self) -> None: session.execute(sql_text(f"""DROP TABLE IF EXISTS {self._collection_name};""")) session.commit() + def _get_distance_func(self) -> str: + match self._distance_func: + case "l2": + tidb_dist_func = "VEC_L2_DISTANCE" + case "cosine": + tidb_dist_func = "VEC_COSINE_DISTANCE" + case _: + tidb_dist_func = "VEC_COSINE_DISTANCE" + return tidb_dist_func + class TiDBVectorFactory(AbstractVectorFactory): def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> TiDBVector: diff --git a/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py b/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py new file mode 100644 index 00000000000000..294a168310e732 --- /dev/null +++ b/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py @@ -0,0 +1,59 @@ +import time + +import pymysql + + +def check_tiflash_ready() -> bool: + try: + connection = pymysql.connect( + host="localhost", + port=4000, + user="root", + password="", + ) + + with connection.cursor() as cursor: + # Doc reference: + # https://docs.pingcap.com/zh/tidb/stable/information-schema-cluster-hardware + select_tiflash_query = """ + SELECT * FROM information_schema.cluster_hardware + WHERE TYPE='tiflash' + LIMIT 1; + """ + cursor.execute(select_tiflash_query) + result = cursor.fetchall() + return result is not None and len(result) > 0 + except Exception as e: + print(f"TiFlash is not ready. Exception: {e}") + return False + finally: + if connection: + connection.close() + + +def main(): + max_attempts = 30 + retry_interval_seconds = 2 + is_tiflash_ready = False + for attempt in range(max_attempts): + try: + is_tiflash_ready = check_tiflash_ready() + except Exception as e: + print(f"TiFlash is not ready. Exception: {e}") + is_tiflash_ready = False + + if is_tiflash_ready: + break + else: + print(f"Attempt {attempt + 1} failed,retry in {retry_interval_seconds} seconds...") + time.sleep(retry_interval_seconds) + + if is_tiflash_ready: + print("TiFlash is ready in TiDB.") + else: + print(f"TiFlash is not ready in TiDB after {max_attempts} attempting checks.") + exit(1) + + +if __name__ == "__main__": + main() diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index c10c4d80d85300..fc4e7d9c896720 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -199,16 +199,6 @@ services: - '${EXPOSE_NGINX_PORT:-80}:${NGINX_PORT:-80}' - '${EXPOSE_NGINX_SSL_PORT:-443}:${NGINX_SSL_PORT:-443}' - # The TiDB vector store. - # For production use, please refer to https://github.com/pingcap/tidb-docker-compose - tidb: - image: pingcap/tidb:v8.4.0 - profiles: - - tidb - command: - - --store=unistore - restart: always - # The Weaviate vector store. weaviate: image: semitechnologies/weaviate:1.19.0 diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 33379e4b8c88c2..38e478380682fa 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -594,16 +594,6 @@ services: - '${EXPOSE_NGINX_PORT:-80}:${NGINX_PORT:-80}' - '${EXPOSE_NGINX_SSL_PORT:-443}:${NGINX_SSL_PORT:-443}' - # The TiDB vector store. - # For production use, please refer to https://github.com/pingcap/tidb-docker-compose - tidb: - image: pingcap/tidb:v8.4.0 - profiles: - - tidb - command: - - --store=unistore - restart: always - # The Weaviate vector store. weaviate: image: semitechnologies/weaviate:1.19.0 diff --git a/docker/tidb/config/pd.toml b/docker/tidb/config/pd.toml new file mode 100644 index 00000000000000..042b251e464baa --- /dev/null +++ b/docker/tidb/config/pd.toml @@ -0,0 +1,4 @@ +# PD Configuration File reference: +# https://docs.pingcap.com/tidb/stable/pd-configuration-file#pd-configuration-file +[replication] +max-replicas = 1 \ No newline at end of file diff --git a/docker/tidb/config/tiflash-learner.toml b/docker/tidb/config/tiflash-learner.toml new file mode 100644 index 00000000000000..5098829aaa6dbb --- /dev/null +++ b/docker/tidb/config/tiflash-learner.toml @@ -0,0 +1,13 @@ +# TiFlash tiflash-learner.toml Configuration File reference: +# https://docs.pingcap.com/tidb/stable/tiflash-configuration#configure-the-tiflash-learnertoml-file + +log-file = "/logs/tiflash_tikv.log" + +[server] +engine-addr = "tiflash:4030" +addr = "0.0.0.0:20280" +advertise-addr = "tiflash:20280" +status-addr = "tiflash:20292" + +[storage] +data-dir = "/data/flash" diff --git a/docker/tidb/config/tiflash.toml b/docker/tidb/config/tiflash.toml new file mode 100644 index 00000000000000..30ac13efcbdd15 --- /dev/null +++ b/docker/tidb/config/tiflash.toml @@ -0,0 +1,19 @@ +# TiFlash tiflash.toml Configuration File reference: +# https://docs.pingcap.com/tidb/stable/tiflash-configuration#configure-the-tiflashtoml-file + +listen_host = "0.0.0.0" +path = "/data" + +[flash] +tidb_status_addr = "tidb:10080" +service_addr = "tiflash:4030" + +[flash.proxy] +config = "/tiflash-learner.toml" + +[logger] +errorlog = "/logs/tiflash_error.log" +log = "/logs/tiflash.log" + +[raft] +pd_addr = "pd0:2379" diff --git a/docker/tidb/docker-compose.yaml b/docker/tidb/docker-compose.yaml new file mode 100644 index 00000000000000..fa157701753978 --- /dev/null +++ b/docker/tidb/docker-compose.yaml @@ -0,0 +1,62 @@ +services: + pd0: + image: pingcap/pd:v8.5.1 + # ports: + # - "2379" + volumes: + - ./config/pd.toml:/pd.toml:ro + - ./volumes/data:/data + - ./volumes/logs:/logs + command: + - --name=pd0 + - --client-urls=http://0.0.0.0:2379 + - --peer-urls=http://0.0.0.0:2380 + - --advertise-client-urls=http://pd0:2379 + - --advertise-peer-urls=http://pd0:2380 + - --initial-cluster=pd0=http://pd0:2380 + - --data-dir=/data/pd + - --config=/pd.toml + - --log-file=/logs/pd.log + restart: on-failure + tikv: + image: pingcap/tikv:v8.5.1 + volumes: + - ./volumes/data:/data + - ./volumes/logs:/logs + command: + - --addr=0.0.0.0:20160 + - --advertise-addr=tikv:20160 + - --status-addr=tikv:20180 + - --data-dir=/data/tikv + - --pd=pd0:2379 + - --log-file=/logs/tikv.log + depends_on: + - "pd0" + restart: on-failure + tidb: + image: pingcap/tidb:v8.5.1 + # ports: + # - "4000:4000" + volumes: + - ./volumes/logs:/logs + command: + - --advertise-address=tidb + - --store=tikv + - --path=pd0:2379 + - --log-file=/logs/tidb.log + depends_on: + - "tikv" + restart: on-failure + tiflash: + image: pingcap/tiflash:v8.5.1 + volumes: + - ./config/tiflash.toml:/tiflash.toml:ro + - ./config/tiflash-learner.toml:/tiflash-learner.toml:ro + - ./volumes/data:/data + - ./volumes/logs:/logs + command: + - --config=/tiflash.toml + depends_on: + - "tikv" + - "tidb" + restart: on-failure From 6529240da69c6f293779ef36b35a48bfc283306d Mon Sep 17 00:00:00 2001 From: kimjion <45935338+kimjion@users.noreply.github.com> Date: Wed, 12 Feb 2025 15:02:11 +0800 Subject: [PATCH 07/12] fix: no longer using old app detail cover when switch pathname (#13585) --- .../(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx index 91b305dc8caadf..406a6642be9e6a 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx @@ -146,7 +146,8 @@ const AppDetailLayout: FC = (props) => { }) } } - }, [appDetailRes, appId, getNavigations, isCurrentWorkspaceEditor, isLoadingAppDetail, isLoadingCurrentWorkspace, pathname, router, setAppDetail, systemFeatures.enable_web_sso_switch_component]) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [appDetailRes, appId, getNavigations, isCurrentWorkspaceEditor, isLoadingAppDetail, isLoadingCurrentWorkspace, router, setAppDetail, systemFeatures.enable_web_sso_switch_component]) useUnmount(() => { setAppDetail() From 2b86465d4ce10eb2e1f543682ff7a5d7573ec61f Mon Sep 17 00:00:00 2001 From: AugNSo Date: Wed, 12 Feb 2025 18:04:28 +0800 Subject: [PATCH 08/12] fix document extractor node incorrectly processing doc and ppt files (#12902) --- api/constants/__init__.py | 2 +- .../workflow/nodes/document_extractor/node.py | 61 ++++++++++++++++--- .../nodes/test_document_extractor_node.py | 8 +-- .../base/prompt-editor/constants.tsx | 2 +- 4 files changed, 59 insertions(+), 14 deletions(-) diff --git a/api/constants/__init__.py b/api/constants/__init__.py index 4500ef4306fc2a..b5dfd9cb1836f5 100644 --- a/api/constants/__init__.py +++ b/api/constants/__init__.py @@ -15,7 +15,7 @@ if dify_config.ETL_TYPE == "Unstructured": DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"] - DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub")) + DOCUMENT_EXTENSIONS.extend(("doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub")) if dify_config.UNSTRUCTURED_API_URL: DOCUMENT_EXTENSIONS.append("ppt") DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS]) diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index c0d8c6409982e6..07abe345dddbfe 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -107,8 +107,10 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str: return _extract_text_from_plain_text(file_content) case "application/pdf": return _extract_text_from_pdf(file_content) - case "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | "application/msword": + case "application/msword": return _extract_text_from_doc(file_content) + case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": + return _extract_text_from_docx(file_content) case "text/csv": return _extract_text_from_csv(file_content) case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.ms-excel": @@ -142,8 +144,10 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) return _extract_text_from_yaml(file_content) case ".pdf": return _extract_text_from_pdf(file_content) - case ".doc" | ".docx": + case ".doc": return _extract_text_from_doc(file_content) + case ".docx": + return _extract_text_from_docx(file_content) case ".csv": return _extract_text_from_csv(file_content) case ".xls" | ".xlsx": @@ -203,7 +207,33 @@ def _extract_text_from_pdf(file_content: bytes) -> str: def _extract_text_from_doc(file_content: bytes) -> str: """ - Extract text from a DOC/DOCX file. + Extract text from a DOC file. + """ + from unstructured.partition.api import partition_via_api + + if not (dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY): + raise TextExtractionError("UNSTRUCTURED_API_URL and UNSTRUCTURED_API_KEY must be set") + + try: + with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file: + temp_file.write(file_content) + temp_file.flush() + with open(temp_file.name, "rb") as file: + elements = partition_via_api( + file=file, + metadata_filename=temp_file.name, + api_url=dify_config.UNSTRUCTURED_API_URL, + api_key=dify_config.UNSTRUCTURED_API_KEY, + ) + os.unlink(temp_file.name) + return "\n".join([getattr(element, "text", "") for element in elements]) + except Exception as e: + raise TextExtractionError(f"Failed to extract text from DOC: {str(e)}") from e + + +def _extract_text_from_docx(file_content: bytes) -> str: + """ + Extract text from a DOCX file. For now support only paragraph and table add more if needed """ try: @@ -255,13 +285,13 @@ def _extract_text_from_doc(file_content: bytes) -> str: text.append(markdown_table) except Exception as e: - logger.warning(f"Failed to extract table from DOC/DOCX: {e}") + logger.warning(f"Failed to extract table from DOC: {e}") continue return "\n".join(text) except Exception as e: - raise TextExtractionError(f"Failed to extract text from DOC/DOCX: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from DOCX: {str(e)}") from e def _download_file_content(file: File) -> bytes: @@ -329,14 +359,29 @@ def _extract_text_from_excel(file_content: bytes) -> str: def _extract_text_from_ppt(file_content: bytes) -> str: + from unstructured.partition.api import partition_via_api from unstructured.partition.ppt import partition_ppt try: - with io.BytesIO(file_content) as file: - elements = partition_ppt(file=file) + if dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY: + with tempfile.NamedTemporaryFile(suffix=".ppt", delete=False) as temp_file: + temp_file.write(file_content) + temp_file.flush() + with open(temp_file.name, "rb") as file: + elements = partition_via_api( + file=file, + metadata_filename=temp_file.name, + api_url=dify_config.UNSTRUCTURED_API_URL, + api_key=dify_config.UNSTRUCTURED_API_KEY, + ) + os.unlink(temp_file.name) + else: + with io.BytesIO(file_content) as file: + elements = partition_ppt(file=file) return "\n".join([getattr(element, "text", "") for element in elements]) + except Exception as e: - raise TextExtractionError(f"Failed to extract text from PPT: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e def _extract_text_from_pptx(file_content: bytes) -> str: diff --git a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py index 1a550ec5309aa3..5dfdfc0ebdac2f 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py @@ -8,7 +8,7 @@ from core.workflow.entities.node_entities import NodeRunResult from core.workflow.nodes.document_extractor import DocumentExtractorNode, DocumentExtractorNodeData from core.workflow.nodes.document_extractor.node import ( - _extract_text_from_doc, + _extract_text_from_docx, _extract_text_from_pdf, _extract_text_from_plain_text, ) @@ -120,7 +120,7 @@ def test_run_extract_text( monkeypatch.setattr("core.workflow.nodes.document_extractor.node._extract_text_from_pdf", mock_pdf_extract) elif mime_type.startswith("application/vnd.openxmlformats"): mock_docx_extract = Mock(return_value=expected_text[0]) - monkeypatch.setattr("core.workflow.nodes.document_extractor.node._extract_text_from_doc", mock_docx_extract) + monkeypatch.setattr("core.workflow.nodes.document_extractor.node._extract_text_from_docx", mock_docx_extract) result = document_extractor_node._run() @@ -163,14 +163,14 @@ def test_extract_text_from_pdf(mock_pdf_document): @patch("docx.Document") -def test_extract_text_from_doc(mock_document): +def test_extract_text_from_docx(mock_document): mock_paragraph1 = Mock() mock_paragraph1.text = "Paragraph 1" mock_paragraph2 = Mock() mock_paragraph2.text = "Paragraph 2" mock_document.return_value.paragraphs = [mock_paragraph1, mock_paragraph2] - text = _extract_text_from_doc(b"PK\x03\x04") + text = _extract_text_from_docx(b"PK\x03\x04") assert text == "Paragraph 1\nParagraph 2" diff --git a/web/app/components/base/prompt-editor/constants.tsx b/web/app/components/base/prompt-editor/constants.tsx index c78b2fc50a4fa8..1288e1539e1c1f 100644 --- a/web/app/components/base/prompt-editor/constants.tsx +++ b/web/app/components/base/prompt-editor/constants.tsx @@ -52,7 +52,7 @@ export const getInputVars = (text: string): ValueSelector[] => { export const FILE_EXTS: Record = { [SupportUploadFileTypes.image]: ['JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG'], - [SupportUploadFileTypes.document]: ['TXT', 'MD', 'MDX', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'], + [SupportUploadFileTypes.document]: ['TXT', 'MD', 'MDX', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOC', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'], [SupportUploadFileTypes.audio]: ['MP3', 'M4A', 'WAV', 'WEBM', 'AMR', 'MPGA'], [SupportUploadFileTypes.video]: ['MP4', 'MOV', 'MPEG', 'MPGA'], } From a3d3e30e3a884b549fbfe3386f2a81562fb81ba5 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Thu, 13 Feb 2025 10:24:05 +0800 Subject: [PATCH 09/12] fix: fix tongyi models blocking mode with incremental_output=stream (#13620) --- api/core/model_runtime/model_providers/tongyi/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py index 1dce372bba07d4..bb987d499898b7 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py +++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py @@ -197,7 +197,7 @@ def _generate( else: # nothing different between chat model and completion model in tongyi params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages) - response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True) + response = Generation.call(**params, result_format="message", stream=stream, incremental_output=stream) if stream: return self._handle_generate_stream_response(model, credentials, response, prompt_messages) From c398c9cb6a6dc94272fe10041420799d856abeb5 Mon Sep 17 00:00:00 2001 From: jiangbo721 <365065261@qq.com> Date: Thu, 13 Feb 2025 14:51:38 +0800 Subject: [PATCH 10/12] chore:Remove duplicate code, lines 8 to 27, same as lines 29 & 45 to 62. (#13659) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 刘江波 --- api/fields/message_fields.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py index 0571faab08c134..76e61f07079a5c 100644 --- a/api/fields/message_fields.py +++ b/api/fields/message_fields.py @@ -7,27 +7,6 @@ feedback_fields = {"rating": fields.String} -retriever_resource_fields = { - "id": fields.String, - "message_id": fields.String, - "position": fields.Integer, - "dataset_id": fields.String, - "dataset_name": fields.String, - "document_id": fields.String, - "document_name": fields.String, - "data_source_type": fields.String, - "segment_id": fields.String, - "score": fields.Float, - "hit_count": fields.Integer, - "word_count": fields.Integer, - "segment_position": fields.Integer, - "index_node_hash": fields.String, - "content": fields.String, - "created_at": TimestampField, -} - -feedback_fields = {"rating": fields.String} - agent_thought_fields = { "id": fields.String, "chain_id": fields.String, From f0b9257387fc70012b0a2637b23a66861e4ec879 Mon Sep 17 00:00:00 2001 From: Novice <857526207@qq.com> Date: Thu, 13 Feb 2025 18:00:28 +0800 Subject: [PATCH 11/12] fix: error in obtaining end_to_node_id during conditional parallel execution (#13673) --- api/core/workflow/graph_engine/entities/graph.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/core/workflow/graph_engine/entities/graph.py b/api/core/workflow/graph_engine/entities/graph.py index 1c6b4b6618448f..5c672c985b6a1f 100644 --- a/api/core/workflow/graph_engine/entities/graph.py +++ b/api/core/workflow/graph_engine/entities/graph.py @@ -590,8 +590,6 @@ def _fetch_all_node_ids_in_parallels( start_node_id=node_id, routes_node_ids=routes_node_ids, ) - # Exclude conditional branch nodes - and all(edge.run_condition is None for edge in reverse_edge_mapping.get(node_id, [])) ): if node_id not in merge_branch_node_ids: merge_branch_node_ids[node_id] = [] From 33a565a719036a99c95745944ebdff1432abddc9 Mon Sep 17 00:00:00 2001 From: llinvokerl <38915183+llinvokerl@users.noreply.github.com> Date: Thu, 13 Feb 2025 19:35:03 +0800 Subject: [PATCH 12/12] perf: Implemented short-circuit evaluation for logical conditions (#13674) Co-authored-by: liusurong.lsr --- api/core/workflow/utils/condition/processor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/core/workflow/utils/condition/processor.py b/api/core/workflow/utils/condition/processor.py index 19473f39d2299a..c61b3d1861cc57 100644 --- a/api/core/workflow/utils/condition/processor.py +++ b/api/core/workflow/utils/condition/processor.py @@ -64,6 +64,10 @@ def process_conditions( expected=expected_value, ) group_results.append(result) + # Implemented short-circuit evaluation for logical conditions + if (operator == "and" and not result) or (operator == "or" and result): + final_result = result + return input_conditions, group_results, final_result final_result = all(group_results) if operator == "and" else any(group_results) return input_conditions, group_results, final_result