From 76aadd0439dfd22434be7f0996a20ae30a662d07 Mon Sep 17 00:00:00 2001 From: Andrew Barnes Date: Sun, 8 Mar 2026 12:20:20 -0400 Subject: [PATCH 1/6] fix: remove incorrect await on synchronous get_llm call get_llm() is a regular function, not async. Awaiting its None return value raises TypeError before the helpful error message can be shown. Fixes #4254 --- browser_use/skill_cli/commands/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/skill_cli/commands/agent.py b/browser_use/skill_cli/commands/agent.py index 6609cb945..d11085c84 100644 --- a/browser_use/skill_cli/commands/agent.py +++ b/browser_use/skill_cli/commands/agent.py @@ -199,7 +199,7 @@ async def _handle_local_task(session: SessionInfo, params: dict[str, Any]) -> An from browser_use.agent.service import Agent # Try to get LLM from environment (with optional model override) - llm = await get_llm(model=model) + llm = get_llm(model=model) if llm is None: if model: return { From bda29e6c4e70e5af6f627eb51ae05b0684fba990 Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Tue, 10 Mar 2026 20:32:46 -0700 Subject: [PATCH 2/6] fix: prevent false success reporting on unresolved blocking error --- browser_use/agent/system_prompts/system_prompt.md | 6 +++--- .../agent/system_prompts/system_prompt_anthropic_flash.md | 6 +++--- browser_use/agent/system_prompts/system_prompt_flash.md | 3 ++- .../agent/system_prompts/system_prompt_flash_anthropic.md | 3 ++- .../agent/system_prompts/system_prompt_no_thinking.md | 6 +++--- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/browser_use/agent/system_prompts/system_prompt.md b/browser_use/agent/system_prompts/system_prompt.md index fe023907f..8929fd827 100644 --- a/browser_use/agent/system_prompts/system_prompt.md +++ b/browser_use/agent/system_prompts/system_prompt.md @@ -145,9 +145,9 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification: 3. **Verify actions actually completed:** - If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened. - If you took a screenshot or downloaded a file — verify it exists in your file system. -4. **Check for fabricated content:** - - Every fact, price, name, and date in your response must come from the page you visited — never generate plausible-sounding data. -5. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** +4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Never construct URLs or use "representative" values. If not extracted, say not found — do not substitute. +5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count. +6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** Partial results with `success=false` are more valuable than overclaiming success. diff --git a/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md b/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md index ae16b37de..a28530491 100644 --- a/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md +++ b/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md @@ -93,9 +93,9 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification: 3. **Verify actions actually completed:** - If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened. - If you took a screenshot or downloaded a file — verify it exists in your file system. -4. **Check for fabricated content:** - - Every fact, price, name, and date in your response must come from the page you visited — never generate plausible-sounding data. -5. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** +4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Never construct URLs or use "representative" values. If not extracted, say not found — do not substitute. +5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count. +6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** Partial results with `success=false` are more valuable than overclaiming success. diff --git a/browser_use/agent/system_prompts/system_prompt_flash.md b/browser_use/agent/system_prompts/system_prompt_flash.md index 4ef82501c..706c63621 100644 --- a/browser_use/agent/system_prompts/system_prompt_flash.md +++ b/browser_use/agent/system_prompts/system_prompt_flash.md @@ -11,5 +11,6 @@ You are allowed to use a maximum of {max_actions} actions per step. Check the br "memory": "Up to 5 sentences of specific reasoning about: Was the previous step successful / failed? What do we need to remember from the current state for the task? Plan ahead what are the best next actions. What's the next immediate goal? Depending on the complexity think longer. For example if its opvious to click the start button just say: click start. But if you need to remember more about the step it could be: Step successful, need to remember A, B, C to visit later. Next click on A.", "action":[{{"navigate": {{ "url": "url_value"}}}}] }} -Before calling `done` with `success=true`: re-read the user request, verify every requirement is met (correct count, filters applied, format matched), confirm actions actually completed via page state/screenshot, and ensure no data was fabricated. If anything is unmet or uncertain, set `success` to `false`. +Before calling `done` with `success=true`: re-read the user request, verify every requirement is met (correct count, filters applied, format matched), confirm actions actually completed via page state/screenshot, and ensure no data was fabricated. If anything is unmet or uncertain, set `success` to `false`. BLOCKING ERROR CHECK: if you encountered an unresolved blocking error (payment declined, login failed with no credentials, email verification wall, access denied not bypassed, required paywall) you MUST set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups) do not count. +DATA GROUNDING: Only report data observed in browser state or tool outputs. Never fabricate URLs, prices, or values — including "representative" ones. If not found, say so. diff --git a/browser_use/agent/system_prompts/system_prompt_flash_anthropic.md b/browser_use/agent/system_prompts/system_prompt_flash_anthropic.md index fa0291b12..bebedd2cf 100644 --- a/browser_use/agent/system_prompts/system_prompt_flash_anthropic.md +++ b/browser_use/agent/system_prompts/system_prompt_flash_anthropic.md @@ -26,5 +26,6 @@ You are allowed to use a maximum of {max_actions} actions per step. Check the br }} Always put `memory` field before the `action` field. -Before calling `done` with `success=true`: re-read the user request, verify every requirement is met (correct count, filters applied, format matched), confirm actions actually completed via page state/screenshot, and ensure no data was fabricated. If anything is unmet or uncertain, set `success` to `false`. +Before calling `done` with `success=true`: re-read the user request, verify every requirement is met (correct count, filters applied, format matched), confirm actions actually completed via page state/screenshot, and ensure no data was fabricated. If anything is unmet or uncertain, set `success` to `false`. BLOCKING ERROR CHECK: if you encountered an unresolved blocking error (payment declined, login failed with no credentials, email verification wall, access denied not bypassed, required paywall) you MUST set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups) do not count. +DATA GROUNDING: Only report data observed in browser state or tool outputs. Never fabricate URLs, prices, or values — including "representative" ones. If not found, say so. diff --git a/browser_use/agent/system_prompts/system_prompt_no_thinking.md b/browser_use/agent/system_prompts/system_prompt_no_thinking.md index 6dc986b7f..2499742c2 100644 --- a/browser_use/agent/system_prompts/system_prompt_no_thinking.md +++ b/browser_use/agent/system_prompts/system_prompt_no_thinking.md @@ -130,9 +130,9 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification: 3. **Verify actions actually completed:** - If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened. - If you took a screenshot or downloaded a file — verify it exists in your file system. -4. **Check for fabricated content:** - - Every fact, price, name, and date in your response must come from the page you visited — never generate plausible-sounding data. -5. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** +4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Never construct URLs or use "representative" values. If not extracted, say not found — do not substitute. +5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count. +6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** Partial results with `success=false` are more valuable than overclaiming success. From 360f5842f535baade04609936387e894e7ddefff Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Tue, 10 Mar 2026 20:58:18 -0700 Subject: [PATCH 3/6] improved scrolling instruction for agent --- browser_use/agent/prompts.py | 6 +++--- browser_use/agent/system_prompts/system_prompt.md | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index 4a06a994f..ca593539d 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -253,9 +253,9 @@ class AgentMessagePrompt: has_content_above = pages_above > 0 has_content_below = pages_below > 0 page_info_text = '' - page_info_text += f'{pages_above:.1f} above, ' - page_info_text += f'{pages_below:.1f} below ' - + page_info_text += f'{pages_above:.1f} pages above, {pages_below:.1f} pages below' + if pages_below > 0.2: + page_info_text += ' — scroll down to reveal more content' page_info_text += '\n' if elements_text != '': if not has_content_above: diff --git a/browser_use/agent/system_prompts/system_prompt.md b/browser_use/agent/system_prompts/system_prompt.md index 4e1af5b57..51dbf338e 100644 --- a/browser_use/agent/system_prompts/system_prompt.md +++ b/browser_use/agent/system_prompts/system_prompt.md @@ -80,7 +80,8 @@ Strictly follow these rules while using the browser and navigating the web: - When collecting a large set of items (products, venues, records, etc.) across multiple pages: save collected item names/URLs to a results file after each page, and pass the list of already-collected identifiers via `already_collected` in each subsequent extract() call to prevent duplicates. Before calling done, deduplicate your results file. - Use search_page to quickly find specific text or patterns on the page — it's free and instant. Great for: verifying content exists, finding where data is located, checking for error messages, locating prices/dates/IDs. - Use find_elements with CSS selectors to explore DOM structure — also free and instant. Great for: counting items (e.g. table rows, product cards), getting links or attributes, understanding page layout before extracting. -- Prefer search_page and find_elements over scrolling when looking for specific content not visible in browser_state. +- Prefer search_page and find_elements over scrolling when looking for specific content not visible in browser_state. Exception: always use the scroll action when the user explicitly requests it. +- When collecting all items from a list, table, or infinite-scroll feed on a single page: check for pages below (e.g. "1.5 pages below"). If present, scroll down to load all content before extracting. Repeat scroll until reaching the bottom before calling done. - If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field. - If the action sequence was interrupted in previous step due to page changes, make sure to complete any remaining actions that were not executed. For example, if you tried to input text and click a search button but the click was not executed because the page changed, you should retry the click action in your next step. - If the includes specific page information such as product type, rating, price, location, etc., ALWAYS look for filter/sort options FIRST before browsing results. Apply all relevant filters before scrolling through results. @@ -258,6 +259,7 @@ Action list should NEVER be empty. 12. Always compare current trajectory against the user's original request 13. Be efficient - combine actions when possible but verify results between major steps 14. NEVER fabricate URLs, image links, prices, or any data — only report values actually observed in browser state or tool outputs; if not found, say so +15. If the user explicitly requests scrolling (with or without IMPORTANT markers), ALWAYS use the scroll action — never substitute search_page or find_elements for an explicit scroll instruction When encountering errors or unexpected states: From 00748baa925114561df4c0b78bc9e9f3fe2c5719 Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Wed, 11 Mar 2026 11:00:20 -0700 Subject: [PATCH 4/6] made prompt bit more linient --- browser_use/agent/system_prompts/system_prompt.md | 2 +- .../agent/system_prompts/system_prompt_anthropic_flash.md | 2 +- browser_use/agent/system_prompts/system_prompt_no_thinking.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/browser_use/agent/system_prompts/system_prompt.md b/browser_use/agent/system_prompts/system_prompt.md index 8929fd827..de95d1b4c 100644 --- a/browser_use/agent/system_prompts/system_prompt.md +++ b/browser_use/agent/system_prompts/system_prompt.md @@ -145,7 +145,7 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification: 3. **Verify actions actually completed:** - If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened. - If you took a screenshot or downloaded a file — verify it exists in your file system. -4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Never construct URLs or use "representative" values. If not extracted, say not found — do not substitute. +4. **Verify data grounding:** Every URL, price, name, and value must be observed in your tool outputs, browser_state, or browser_vision (screenshot). Derived values (counts, totals, computed results) from observed data are allowed. Never fabricate URLs, invent values, or use "representative" placeholders — if not found, say so. 5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count. 6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** Partial results with `success=false` are more valuable than overclaiming success. diff --git a/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md b/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md index a28530491..01b0b0762 100644 --- a/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md +++ b/browser_use/agent/system_prompts/system_prompt_anthropic_flash.md @@ -93,7 +93,7 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification: 3. **Verify actions actually completed:** - If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened. - If you took a screenshot or downloaded a file — verify it exists in your file system. -4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Never construct URLs or use "representative" values. If not extracted, say not found — do not substitute. +4. **Verify data grounding:** Every URL, price, name, and value must be observed in your tool outputs, browser_state, or browser_vision (screenshot). Derived values (counts, totals, computed results) from observed data are allowed. Never fabricate URLs, invent values, or use "representative" placeholders — if not found, say so. 5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count. 6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** Partial results with `success=false` are more valuable than overclaiming success. diff --git a/browser_use/agent/system_prompts/system_prompt_no_thinking.md b/browser_use/agent/system_prompts/system_prompt_no_thinking.md index 2499742c2..8371fa1e2 100644 --- a/browser_use/agent/system_prompts/system_prompt_no_thinking.md +++ b/browser_use/agent/system_prompts/system_prompt_no_thinking.md @@ -130,7 +130,7 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification: 3. **Verify actions actually completed:** - If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened. - If you took a screenshot or downloaded a file — verify it exists in your file system. -4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Never construct URLs or use "representative" values. If not extracted, say not found — do not substitute. +4. **Verify data grounding:** Every URL, price, name, and value must be observed in your tool outputs, browser_state, or browser_vision (screenshot). Derived values (counts, totals, computed results) from observed data are allowed. Never fabricate URLs, invent values, or use "representative" placeholders — if not found, say so. 5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count. 6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** Partial results with `success=false` are more valuable than overclaiming success. From 3e767437d3667bd4079366cc1d9c3c8f74a00edd Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Wed, 11 Mar 2026 11:34:54 -0700 Subject: [PATCH 5/6] tighten data grounding to verbatim cop --- browser_use/agent/system_prompts/system_prompt.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/agent/system_prompts/system_prompt.md b/browser_use/agent/system_prompts/system_prompt.md index de95d1b4c..63a2c99e7 100644 --- a/browser_use/agent/system_prompts/system_prompt.md +++ b/browser_use/agent/system_prompts/system_prompt.md @@ -145,7 +145,7 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification: 3. **Verify actions actually completed:** - If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened. - If you took a screenshot or downloaded a file — verify it exists in your file system. -4. **Verify data grounding:** Every URL, price, name, and value must be observed in your tool outputs, browser_state, or browser_vision (screenshot). Derived values (counts, totals, computed results) from observed data are allowed. Never fabricate URLs, invent values, or use "representative" placeholders — if not found, say so. +4. **Verify data grounding:** Every URL, price, name, and value must appear **verbatim** as observed in your tool outputs, browser_state, or browser_vision (screenshot) — copy them exactly, do not paraphrase names or normalize/clean URLs. Derived values (counts, totals, computed results) from observed data are allowed. Never fabricate URLs, invent values, or use "representative" placeholders — if not found, say so. 5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count. 6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.** Partial results with `success=false` are more valuable than overclaiming success. From b43c7ddc0cbf39b4f850c3fa80308d16f61f864b Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Wed, 11 Mar 2026 12:16:20 -0700 Subject: [PATCH 6/6] reverted to old gemini sdk --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ddb7920f6..077f30000 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "typing-extensions==4.15.0", "uuid7==0.1.0", "authlib==1.6.6", - "google-genai==1.65.0", + "google-genai==1.60.0", "openai==2.16.0", "anthropic==0.76.0", "groq==1.0.0",