Skip to content

Commit

Permalink
dev: use ddg for websearch
Browse files Browse the repository at this point in the history
  • Loading branch information
zhudotexe committed Oct 21, 2024
1 parent 24f3e97 commit 98d8cbe
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 23 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ bundled = [
"PyMuPDF>=1.24.3,<2.0.0",
"pymupdf4llm~=0.0.1",
"trafilatura>=1.9.0,<2.0.0",
"duckduckgo_search>=6.3.0,<7.0.0",
]

web = [
Expand Down
43 changes: 23 additions & 20 deletions redel/tools/browsing/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import urllib.parse
from typing import Optional, TYPE_CHECKING

from duckduckgo_search import AsyncDDGS
from kani import ChatMessage, ChatRole, ai_function
from kani.engines import BaseEngine

Expand Down Expand Up @@ -106,26 +107,28 @@ async def close(self):
# ==== functions ====
@ai_function()
async def search(self, query: str):
"""Search a query on Google."""
page = await self.get_page()
query_enc = urllib.parse.quote_plus(query)
await page.goto(f"https://www.google.com/search?q={query_enc}")
# content
try:
# if the main content is borked, fallback
search_html = await page.inner_html("#main", timeout=5000)
search_text = web_markdownify(search_html, include_links=False)
# links
search_loc = page.locator("#search")
links = await get_google_links(search_loc)
return (
f"{search_text.strip()}\n\nYou should visit some of these links for more information or delegate"
f" helpers to visit multiple:\n\n===== Links =====\n{links.to_md_str()}"
)
except PlaywrightTimeoutError:
content_html = await page.content()
content = web_markdownify(content_html)
return content
"""Search for a query on a web search engine."""
# page = await self.get_page()
# query_enc = urllib.parse.quote_plus(query)
# await page.goto(f"https://www.google.com/search?q={query_enc}")
# # content
# try:
# # if the main content is borked, fallback
# search_html = await page.inner_html("#main", timeout=5000)
# search_text = web_markdownify(search_html, include_links=False)
# # links
# search_loc = page.locator("#search")
# links = await get_google_links(search_loc)
# return (
# f"{search_text.strip()}\n\nYou should visit some of these links for more information or delegate"
# f" helpers to visit multiple:\n\n===== Links =====\n{links.to_md_str()}"
# )
# except PlaywrightTimeoutError:
# content_html = await page.content()
# content = web_markdownify(content_html)
# return content
results = await AsyncDDGS().atext(query)
return results

@ai_function()
async def visit_page(self, href: str):
Expand Down
8 changes: 5 additions & 3 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
- ANTHROPIC_API_KEY (optional)
Configuration:
- root engine: gpt-4
- delegate engine: gpt-4
- root engine: gpt-4o
- delegate engine: gpt-4o
- tools:
- Browsing (always included in delegates)
- long engine: claude-3-opus (for summarizing long webpages, if ANTHROPIC_API_KEY is set)
Expand All @@ -21,11 +21,12 @@
from kani.ext.ratelimits import RatelimitedEngine

from redel import AUTOGENERATE_TITLE, ReDel
from redel.delegation import DelegateOne
from redel.server import VizServer
from redel.tools.browsing import Browsing

# Define the engines
engine = OpenAIEngine(model="gpt-4", temperature=0.8, top_p=0.95)
engine = OpenAIEngine(model="gpt-4o", temperature=0.8, top_p=0.95)
if "ANTHROPIC_API_KEY" in os.environ:
long_engine = RatelimitedEngine(
AnthropicEngine(model="claude-3-5-sonnet-20240620", temperature=0.7, max_tokens=4096), max_concurrency=1
Expand All @@ -37,6 +38,7 @@
ai = ReDel(
root_engine=engine,
delegate_engine=engine,
delegation_scheme=DelegateOne,
title=AUTOGENERATE_TITLE,
tool_configs={
Browsing: {
Expand Down

0 comments on commit 98d8cbe

Please sign in to comment.