diff --git a/apps/python-sdk/firecrawl/v2/client.py b/apps/python-sdk/firecrawl/v2/client.py index d61349be2..5d5edab6c 100644 --- a/apps/python-sdk/firecrawl/v2/client.py +++ b/apps/python-sdk/firecrawl/v2/client.py @@ -54,10 +54,14 @@ from .watcher import Watcher class FirecrawlClient: """ Main Firecrawl v2 API client. - + This client provides a clean, modular interface to all Firecrawl functionality. """ - + + @staticmethod + def _is_cloud_service(url: str) -> bool: + return "api.firecrawl.dev" in url.lower() + def __init__( self, api_key: Optional[str] = None, @@ -68,7 +72,7 @@ class FirecrawlClient: ): """ Initialize the Firecrawl client. - + Args: api_key: Firecrawl API key (or set FIRECRAWL_API_KEY env var) api_url: Base URL for the Firecrawl API @@ -78,13 +82,13 @@ class FirecrawlClient: """ if api_key is None: api_key = os.getenv("FIRECRAWL_API_KEY") - - if not api_key: + + if self._is_cloud_service(api_url) and not api_key: raise ValueError( - "API key is required. Set FIRECRAWL_API_KEY environment variable " + "API key is required for the cloud API. Set FIRECRAWL_API_KEY environment variable " "or pass api_key parameter." ) - + self.config = ClientConfig( api_key=api_key, api_url=api_url, @@ -92,7 +96,7 @@ class FirecrawlClient: max_retries=max_retries, backoff_factor=backoff_factor ) - + self.http_client = HttpClient(api_key, api_url) def scrape( diff --git a/apps/python-sdk/firecrawl/v2/client_async.py b/apps/python-sdk/firecrawl/v2/client_async.py index 9e8c081a2..0e78f2f61 100644 --- a/apps/python-sdk/firecrawl/v2/client_async.py +++ b/apps/python-sdk/firecrawl/v2/client_async.py @@ -47,11 +47,15 @@ from .methods.aio import extract as async_extract # type: ignore[attr-defined] from .watcher_async import AsyncWatcher class AsyncFirecrawlClient: + @staticmethod + def _is_cloud_service(url: str) -> bool: + return "api.firecrawl.dev" in url.lower() + def __init__(self, api_key: Optional[str] = None, api_url: str = "https://api.firecrawl.dev"): if api_key is None: api_key = os.getenv("FIRECRAWL_API_KEY") - if not api_key: - raise ValueError("API key is required. Set FIRECRAWL_API_KEY or pass api_key.") + if self._is_cloud_service(api_url) and not api_key: + raise ValueError("API key is required for the cloud API. Set FIRECRAWL_API_KEY or pass api_key.") self.http_client = HttpClient(api_key, api_url) self.async_http_client = AsyncHttpClient(api_key, api_url) diff --git a/apps/python-sdk/firecrawl/v2/types.py b/apps/python-sdk/firecrawl/v2/types.py index 6d41f3a41..bd38e20e6 100644 --- a/apps/python-sdk/firecrawl/v2/types.py +++ b/apps/python-sdk/firecrawl/v2/types.py @@ -768,7 +768,7 @@ class ActiveCrawlsRequest(BaseModel): # Configuration types class ClientConfig(BaseModel): """Configuration for the Firecrawl client.""" - api_key: str + api_key: Optional[str] api_url: str = "https://api.firecrawl.dev" timeout: Optional[float] = None max_retries: int = 3 diff --git a/apps/python-sdk/firecrawl/v2/utils/http_client.py b/apps/python-sdk/firecrawl/v2/utils/http_client.py index 507457b2f..65b84d995 100644 --- a/apps/python-sdk/firecrawl/v2/utils/http_client.py +++ b/apps/python-sdk/firecrawl/v2/utils/http_client.py @@ -12,8 +12,8 @@ version = get_version() class HttpClient: """HTTP client with retry logic and error handling.""" - - def __init__(self, api_key: str, api_url: str): + + def __init__(self, api_key: Optional[str], api_url: str): self.api_key = api_key self.api_url = api_url @@ -43,8 +43,10 @@ class HttpClient: """Prepare headers for API requests.""" headers = { 'Content-Type': 'application/json', - 'Authorization': f'Bearer {self.api_key}', } + + if self.api_key: + headers['Authorization'] = f'Bearer {self.api_key}' if idempotency_key: headers['x-idempotency-key'] = idempotency_key diff --git a/apps/python-sdk/firecrawl/v2/utils/http_client_async.py b/apps/python-sdk/firecrawl/v2/utils/http_client_async.py index 73e7e729f..639b9891a 100644 --- a/apps/python-sdk/firecrawl/v2/utils/http_client_async.py +++ b/apps/python-sdk/firecrawl/v2/utils/http_client_async.py @@ -6,15 +6,19 @@ version = get_version() class AsyncHttpClient: - def __init__(self, api_key: str, api_url: str): + def __init__(self, api_key: Optional[str], api_url: str): self.api_key = api_key self.api_url = api_url + headers = { + "Content-Type": "application/json", + } + + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + self._client = httpx.AsyncClient( base_url=api_url, - headers={ - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json", - }, + headers=headers, limits=httpx.Limits(max_keepalive_connections=0), ) diff --git a/apps/python-sdk/tests/test_api_key_handling.py b/apps/python-sdk/tests/test_api_key_handling.py new file mode 100644 index 000000000..5cb845caf --- /dev/null +++ b/apps/python-sdk/tests/test_api_key_handling.py @@ -0,0 +1,44 @@ +import sys +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from firecrawl.v2.client import FirecrawlClient +from firecrawl.v2.client_async import AsyncFirecrawlClient + + +@pytest.fixture(autouse=True) +def clear_firecrawl_api_key_env(monkeypatch): + monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False) + yield + + +def test_cloud_requires_api_key(): + with pytest.raises(ValueError): + FirecrawlClient(api_url="https://api.firecrawl.dev") + + +def test_self_host_allows_missing_api_key(): + client = FirecrawlClient(api_url="http://localhost:3000") + assert client.http_client.api_key is None + + +def test_async_cloud_requires_api_key(): + with pytest.raises(ValueError): + AsyncFirecrawlClient(api_url="https://api.firecrawl.dev") + + +@pytest.mark.asyncio +async def test_async_self_host_allows_missing_api_key(): + client = AsyncFirecrawlClient(api_url="http://localhost:3000") + try: + assert client.http_client.api_key is None + await client.async_http_client.close() + finally: + # Ensure the underlying HTTPX client is closed even if assertions fail + if not client.async_http_client._client.is_closed: + await client.async_http_client.close()