Skip to content

Commit

Permalink
feat: Zero-sahot NLU (#189)
Browse files Browse the repository at this point in the history
* Add zero shot NLU using LLMs

* add docker-compose for ollama

* switch between default NLU and LLM pipelines

* add synonym replacer NLU component

* refactor docker-compose

* update docs
  • Loading branch information
alfredfrancis authored Feb 2, 2025
1 parent c23c561 commit cf06697
Show file tree
Hide file tree
Showing 30 changed files with 663 additions and 179 deletions.
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ You don’t need to be an expert at artificial intelligence to create an awesome
- Spacy Word Embeddings
- Intent Recognition (ML)
- Entity Extraction (ML)
- One shot NLU using Large Language Models (Coming Soon)
- Zero shot NLU using Large Language Models (LLMs)
- Persistent Memory & Context Management
- API request fulfilment
- Channel Integrations
Expand Down Expand Up @@ -54,7 +54,7 @@ You don’t need to be an expert at artificial intelligence to create an awesome
docker-compose up -d
```

Open http://localhost:3000/
Open http://localhost:8080/

### Using Helm

Expand All @@ -75,8 +75,6 @@ Want to contribute? Check out our [contribution guidelines](CONTRIBUTING.md).

### Tutorial

Checkout this basic tutorial on youtube,

[![Coming Soon](https://www.wpcc.edu/wp-content/uploads/2021/04/YouTube-Stream-Coming-Soon.jpg)](https://www.youtube.com/watch?v=S1Fj7WinaBA)
Check out our [tutorial](docs/01-getting-started.md) to get started.

<hr></hr>
4 changes: 2 additions & 2 deletions app/admin/bots/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ async def set_config(name: str, config: Dict[str, Any]):
"""
Update bot config
"""
await store.update_config(name, config)
await store.update_nlu_config(name, config)
return {"message": "Config updated successfully"}


Expand All @@ -22,7 +22,7 @@ async def get_config(name: str):
"""
Get bot config
"""
return await store.get_config(name)
return await store.get_nlu_config(name)


@router.get("/{name}/export")
Expand Down
37 changes: 32 additions & 5 deletions app/admin/bots/schemas.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,40 @@
from pydantic import BaseModel, Field, ConfigDict
from typing import Dict, Any
from pydantic import BaseModel, Field
from typing import Optional
from app.database import ObjectIdField
from datetime import datetime


class TraditionalNLUSettings(BaseModel):
"""Settings for traditional ML-based NLU pipeline"""

intent_detection_threshold: float = 0.75
entity_detection_threshold: float = 0.65
use_spacy: bool = True


class LLMSettings(BaseModel):
"""Settings for LLM-based NLU pipeline"""

base_url: str = "http://127.0.0.1:11434/v1"
api_key: str = "ollama"
model_name: str = "llama2:13b-chat"
max_tokens: int = 4096
temperature: float = 0.7


class NLUConfiguration(BaseModel):
"""Configuration for Natural Language Understanding"""

pipeline_type: str = "traditional" # Either 'traditional' or 'llm'
traditional_settings: TraditionalNLUSettings = TraditionalNLUSettings()
llm_settings: LLMSettings = LLMSettings()


class Bot(BaseModel):
"""Base schema for bot"""

id: ObjectIdField = Field(validation_alias="_id", default=None)
name: str
config: Dict[str, Any] = {}

model_config = ConfigDict(arbitrary_types_allowed=True)
nlu_config: NLUConfiguration = NLUConfiguration()
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
28 changes: 21 additions & 7 deletions app/admin/bots/store.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,42 @@
from typing import Dict
from app.admin.bots.schemas import Bot
from app.admin.bots.schemas import Bot, NLUConfiguration
from app.admin.entities.store import list_entities, bulk_import_entities
from app.admin.intents.store import list_intents, bulk_import_intents
from app.database import database
from datetime import datetime

bot_collection = database.get_collection("bot")


async def add_bot(data: dict):
await bot_collection.insert_one(data)
async def ensure_default_bot():
# Check if the default bot exists
default_bot = await bot_collection.find_one({"name": "default"})
if default_bot is None:
# Create the default bot
default_bot_data = Bot(name="default")
default_bot_data.created_at = datetime.utcnow()
default_bot_data.updated_at = datetime.utcnow()
await bot_collection.insert_one(
default_bot_data.model_dump(exclude={"id": True})
)
return default_bot_data
return Bot.model_validate(default_bot)


async def get_bot(name: str) -> Bot:
bot = await bot_collection.find_one({"name": name})
return Bot.model_validate(bot)


async def get_config(name: str) -> Dict:
async def get_nlu_config(name: str) -> NLUConfiguration:
bot = await get_bot(name)
return bot.config
return bot.nlu_config


async def update_config(name: str, entity_data: dict):
await bot_collection.update_one({"name": name}, {"$set": {"config": entity_data}})
async def update_nlu_config(name: str, nlu_config: dict):
await bot_collection.update_one(
{"name": name}, {"$set": {"nlu_config": nlu_config}}
)


async def export_bot(name) -> Dict:
Expand Down
2 changes: 1 addition & 1 deletion app/admin/train/routes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from fastapi import APIRouter, HTTPException, BackgroundTasks
from app.admin.intents import store
from app.dependencies import reload_dialogue_manager
from app.bot.nlu.training import train_pipeline
from app.bot.nlu.pipeline_utils import train_pipeline

router = APIRouter(prefix="/train", tags=["train"])

Expand Down
23 changes: 7 additions & 16 deletions app/bot/dialogue_manager/dialogue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,8 @@
from app.bot.memory.memory_saver_mongo import MemorySaverMongo
from app.bot.memory.models import State
from app.bot.nlu.pipeline import NLUPipeline
from app.bot.nlu.featurizers import SpacyFeaturizer
from app.bot.nlu.intent_classifiers import SklearnIntentClassifier
from app.bot.nlu.entity_extractors import CRFEntityExtractor
from app.bot.nlu.pipeline_utils import get_pipeline
from app.bot.dialogue_manager.utils import SilentUndefined, split_sentence
from app.admin.entities.store import list_synonyms
from app.bot.dialogue_manager.models import (
IntentModel,
ParameterModel,
Expand Down Expand Up @@ -48,27 +45,21 @@ async def from_config(cls):
Initialize DialogueManager with all required dependencies
"""

synonyms = await list_synonyms()

# Initialize pipeline with components
nlu_pipeline = NLUPipeline(
[
SpacyFeaturizer(app_config.SPACY_LANG_MODEL),
SklearnIntentClassifier(),
CRFEntityExtractor(synonyms),
]
)

# Load all intents and convert to domain models
db_intents = await list_intents()
intents = [IntentModel.from_db(intent) for intent in db_intents]

# Initialize pipeline with components
nlu_pipeline = await get_pipeline()

# Get configuration
fallback_intent_id = app_config.DEFAULT_FALLBACK_INTENT_NAME

# Get bot configuration
bot = await get_bot("default")
confidence_threshold = bot.config.get("confidence_threshold", 0.90)
confidence_threshold = (
bot.nlu_config.traditional_settings.intent_detection_threshold
)

memory_saver = MemorySaverMongo(client)

Expand Down
3 changes: 2 additions & 1 deletion app/bot/nlu/entity_extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .crf_entity_extractor import CRFEntityExtractor
from .synonym_replacer import SynonymReplacer

__all__ = ["CRFEntityExtractor"]
__all__ = ["CRFEntityExtractor", "SynonymReplacer"]
21 changes: 3 additions & 18 deletions app/bot/nlu/entity_extractors/crf_entity_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pycrfsuite
import logging
from typing import Dict, Any, List, Optional
from typing import Dict, Any, List
from app.bot.nlu.pipeline import NLUComponent
import os

Expand All @@ -13,23 +13,9 @@ class CRFEntityExtractor(NLUComponent):
Performs NER training, prediction, model import/export
"""

def __init__(self, synonyms: Optional[Dict[str, str]] = None):
self.synonyms = synonyms or {}
def __init__(self):
self.tagger = None

def replace_synonyms(self, entities):
"""
replace extracted entity values with
root word by matching with synonyms dict.
:param entities:
:return:
"""
for entity in entities.keys():
entity_value = str(entities[entity])
if entity_value.lower() in self.synonyms:
entities[entity] = self.synonyms[entity_value.lower()]
return entities

def extract_features(self, sent, i):
"""
Extract features for a given sentence
Expand Down Expand Up @@ -178,8 +164,7 @@ def predict(self, message):
tagged_token = self.pos_tagger(spacy_doc)
words = [token.text for token in spacy_doc]
predicted_labels = self.tagger.tag(self.sent_to_features(tagged_token))
extracted_entities = self.crf2json(zip(words, predicted_labels))
return self.replace_synonyms(extracted_entities)
return self.crf2json(zip(words, predicted_labels))

def pos_tagger(self, spacy_doc):
"""
Expand Down
44 changes: 44 additions & 0 deletions app/bot/nlu/entity_extractors/synonym_replacer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import logging
from typing import Dict, Any, Optional
from app.bot.nlu.pipeline import NLUComponent

logger = logging.getLogger(__name__)


class SynonymReplacer(NLUComponent):
"""
Replaces extracted entity values with their root words
using a predefined synonyms dictionary.
"""

def __init__(self, synonyms: Optional[Dict[str, str]] = None):
self.synonyms = synonyms or {}

def replace_synonyms(self, entities: Dict[str, str]) -> Dict[str, str]:
"""
Replace extracted entity values with root words by matching with synonyms dict.
:param entities: Dictionary of entity name to entity value mappings
:return: Dictionary with replaced entity values where applicable
"""
for entity in entities.keys():
entity_value = str(entities[entity])
if entity_value.lower() in self.synonyms:
entities[entity] = self.synonyms[entity_value.lower()]
return entities

def train(self, training_data: Dict[str, Any], model_path: str) -> None:
"""Nothing to train for synonym replacement."""
pass

def load(self, model_path: str) -> bool:
"""Nothing to load for synonym replacement."""
return True

def process(self, message: Dict[str, Any]) -> Dict[str, Any]:
"""Process a message by replacing entity values with their synonyms."""
if not message.get("entities"):
return message

entities = message["entities"]
message["entities"] = self.replace_synonyms(entities)
return message
3 changes: 3 additions & 0 deletions app/bot/nlu/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .zero_shot_nlu_openai import ZeroShotNLUOpenAI

__all__ = ["ZeroShotNLUOpenAI"]
27 changes: 27 additions & 0 deletions app/bot/nlu/llm/prompts/ZERO_SHOT_LEARNING_PROMPT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
You are provided with a text input. Your task is to analyze the given input and extract specific details based on the following instructions:

1. **Identify the Intent**: Determine the intent of the input from the following options:
{% for intent in intents %}
- {{ intent }}
{% endfor %}
2. **Extract Entities**: Extract the following entities only if they are explicitly mentioned in the text:
{% for entity in entities %}
- {{ entity }}
{% endfor %}
3. **Strict Extraction Rules**:
- Do not infer or guess any values. If an entity is not mentioned, assign it a value of null.
- Ensure that the output is strictly in JSON format.
- Output only the JSON object. Do not include any additional text, explanations, or comments.
- Ensure that the JSON structure is valid and properly formatted.
4. **Output Format**: Provide the output in the following JSON structure:
{% raw %}
```json
{{
"intent": "<intent_value>" or null,
"entities": {{
"entity_name_1": "<value>" or null,
"entity_name_2": "<value>" or null,
}}
}}
```
{% endraw %}
Loading

0 comments on commit cf06697

Please sign in to comment.