diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth')
-rw-r--r-- | .venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/__init__.py | 0 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/test.py | 170 |
2 files changed, 170 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/__init__.py b/.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/__init__.py diff --git a/.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/test.py b/.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/test.py new file mode 100644 index 00000000..bbe5df68 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/firecrawl/__tests__/e2e_withAuth/test.py @@ -0,0 +1,170 @@ +import importlib.util +import pytest +import time +import os +from uuid import uuid4 +from dotenv import load_dotenv + +load_dotenv() + +API_URL = "http://127.0.0.1:3002" +ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py" +TEST_API_KEY = os.getenv('TEST_API_KEY') + +print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}") + +spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH) +firecrawl = importlib.util.module_from_spec(spec) +spec.loader.exec_module(firecrawl) +FirecrawlApp = firecrawl.FirecrawlApp + +def test_no_api_key(): + with pytest.raises(Exception) as excinfo: + invalid_app = FirecrawlApp(api_url=API_URL, version='v0') + assert "No API key provided" in str(excinfo.value) + +def test_scrape_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0') + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url('https://firecrawl.dev') + assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) + +# def test_blocklisted_url(): +# blocklisted_url = "https://facebook.com/fake-test" +# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') +# with pytest.raises(Exception) as excinfo: +# app.scrape_url(blocklisted_url) +# assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) + +def test_successful_response_with_valid_preview_token(): + app = FirecrawlApp(api_url=API_URL, api_key=os.getenv('PREVIEW_TOKEN'), version='v0') + response = app.scrape_url('https://roastmywebsite.ai') + assert response is not None + assert 'content' in response + assert "_Roast_" in response['content'] + +def test_scrape_url_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.scrape_url('https://roastmywebsite.ai') + print(response) + + assert response is not None + assert 'content' in response + assert 'markdown' in response + assert 'metadata' in response + assert 'html' not in response + assert "_Roast_" in response['content'] + +def test_successful_response_with_valid_api_key_and_include_html(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}}) + assert response is not None + assert 'content' in response + assert 'markdown' in response + assert 'html' in response + assert 'metadata' in response + assert "_Roast_" in response['content'] + assert "_Roast_" in response['markdown'] + assert "<h1" in response['html'] + +def test_successful_response_for_valid_scrape_with_pdf_file(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf') + assert response is not None + assert 'content' in response + assert 'metadata' in response + assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] + +def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001') + time.sleep(6) # wait for 6 seconds + assert response is not None + assert 'content' in response + assert 'metadata' in response + assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] + +def test_crawl_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0') + with pytest.raises(Exception) as excinfo: + invalid_app.crawl_url('https://firecrawl.dev') + assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) + +# def test_should_return_error_for_blocklisted_url(): +# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') +# blocklisted_url = "https://twitter.com/fake-test" +# with pytest.raises(Exception) as excinfo: +# app.crawl_url(blocklisted_url) +# assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) + +def test_crawl_url_wait_for_completion_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True) + assert response is not None + assert len(response) > 0 + assert 'content' in response[0] + assert "_Roast_" in response[0]['content'] + +def test_crawl_url_with_idempotency_key_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + uniqueIdempotencyKey = str(uuid4()) + response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) + assert response is not None + assert len(response) > 0 + assert 'content' in response[0] + assert "_Roast_" in response[0]['content'] + + with pytest.raises(Exception) as excinfo: + app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) + assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value) + +def test_check_crawl_status_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False) + assert response is not None + assert 'jobId' in response + + time.sleep(30) # wait for 30 seconds + status_response = app.check_crawl_status(response['jobId']) + assert status_response is not None + assert 'status' in status_response + assert status_response['status'] == 'completed' + assert 'data' in status_response + assert len(status_response['data']) > 0 + +def test_search_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.search("test query") + assert response is not None + assert 'content' in response[0] + assert len(response) > 2 + +def test_search_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0') + with pytest.raises(Exception) as excinfo: + invalid_app.search("test query") + assert "Unexpected error during search: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) + +def test_llm_extraction(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') + response = app.scrape_url("https://firecrawl.dev", { + 'extractorOptions': { + 'mode': 'llm-extraction', + 'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + 'extractionSchema': { + 'type': 'object', + 'properties': { + 'company_mission': {'type': 'string'}, + 'supports_sso': {'type': 'boolean'}, + 'is_open_source': {'type': 'boolean'} + }, + 'required': ['company_mission', 'supports_sso', 'is_open_source'] + } + } + }) + assert response is not None + assert 'llm_extraction' in response + llm_extraction = response['llm_extraction'] + assert 'company_mission' in llm_extraction + assert isinstance(llm_extraction['supports_sso'], bool) + assert isinstance(llm_extraction['is_open_source'], bool)
\ No newline at end of file |