dev: use ddg for websearch

zhudotexe · Oct 21, 2024 · 98d8cbe · 98d8cbe
1 parent 24f3e97
commit 98d8cbe
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 23 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -40,6 +40,7 @@ bundled = [
     "PyMuPDF>=1.24.3,<2.0.0",
     "pymupdf4llm~=0.0.1",
     "trafilatura>=1.9.0,<2.0.0",
+    "duckduckgo_search>=6.3.0,<7.0.0",
 ]
 
 web = [

diff --git a/redel/tools/browsing/impl.py b/redel/tools/browsing/impl.py
@@ -4,6 +4,7 @@
 import urllib.parse
 from typing import Optional, TYPE_CHECKING
 
+from duckduckgo_search import AsyncDDGS
 from kani import ChatMessage, ChatRole, ai_function
 from kani.engines import BaseEngine
 
@@ -106,26 +107,28 @@ async def close(self):
     # ==== functions ====
     @ai_function()
     async def search(self, query: str):
-        """Search a query on Google."""
-        page = await self.get_page()
-        query_enc = urllib.parse.quote_plus(query)
-        await page.goto(f"https://www.google.com/search?q={query_enc}")
-        # content
-        try:
-            # if the main content is borked, fallback
-            search_html = await page.inner_html("#main", timeout=5000)
-            search_text = web_markdownify(search_html, include_links=False)
-            # links
-            search_loc = page.locator("#search")
-            links = await get_google_links(search_loc)
-            return (
-                f"{search_text.strip()}\n\nYou should visit some of these links for more information or delegate"
-                f" helpers to visit multiple:\n\n===== Links =====\n{links.to_md_str()}"
-            )
-        except PlaywrightTimeoutError:
-            content_html = await page.content()
-            content = web_markdownify(content_html)
-            return content
+        """Search for a query on a web search engine."""
+        # page = await self.get_page()
+        # query_enc = urllib.parse.quote_plus(query)
+        # await page.goto(f"https://www.google.com/search?q={query_enc}")
+        # # content
+        # try:
+        #     # if the main content is borked, fallback
+        #     search_html = await page.inner_html("#main", timeout=5000)
+        #     search_text = web_markdownify(search_html, include_links=False)
+        #     # links
+        #     search_loc = page.locator("#search")
+        #     links = await get_google_links(search_loc)
+        #     return (
+        #         f"{search_text.strip()}\n\nYou should visit some of these links for more information or delegate"
+        #         f" helpers to visit multiple:\n\n===== Links =====\n{links.to_md_str()}"
+        #     )
+        # except PlaywrightTimeoutError:
+        #     content_html = await page.content()
+        #     content = web_markdownify(content_html)
+        #     return content
+        results = await AsyncDDGS().atext(query)
+        return results
 
     @ai_function()
     async def visit_page(self, href: str):

diff --git a/server.py b/server.py
@@ -6,8 +6,8 @@
 - ANTHROPIC_API_KEY (optional)
 
 Configuration:
-- root engine: gpt-4
-- delegate engine: gpt-4
+- root engine: gpt-4o
+- delegate engine: gpt-4o
 - tools:
     - Browsing (always included in delegates)
         - long engine: claude-3-opus (for summarizing long webpages, if ANTHROPIC_API_KEY is set)
@@ -21,11 +21,12 @@
 from kani.ext.ratelimits import RatelimitedEngine
 
 from redel import AUTOGENERATE_TITLE, ReDel
+from redel.delegation import DelegateOne
 from redel.server import VizServer
 from redel.tools.browsing import Browsing
 
 # Define the engines
-engine = OpenAIEngine(model="gpt-4", temperature=0.8, top_p=0.95)
+engine = OpenAIEngine(model="gpt-4o", temperature=0.8, top_p=0.95)
 if "ANTHROPIC_API_KEY" in os.environ:
     long_engine = RatelimitedEngine(
         AnthropicEngine(model="claude-3-5-sonnet-20240620", temperature=0.7, max_tokens=4096), max_concurrency=1
@@ -37,6 +38,7 @@
 ai = ReDel(
     root_engine=engine,
     delegate_engine=engine,
+    delegation_scheme=DelegateOne,
     title=AUTOGENERATE_TITLE,
     tool_configs={
         Browsing: {