Skip to main content
Glama
test_integration.py12.9 kB
#!/usr/bin/env python3 import json import os import sys import unittest from unittest.mock import patch import requests # Add src to path for imports sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) from websearch.core.content import fetch_single_page_content from websearch.engines.search import (search_bing, search_duckduckgo, search_startpage) from websearch.utils.cache import content_cache, search_cache class TestWebSearchIntegration(unittest.TestCase): """Integration tests for web search functionality""" def setUp(self): """Clear caches before each test""" search_cache.cache.clear() content_cache.cache.clear() def test_search_duckduckgo_real(self): """Test real DuckDuckGo search""" results = search_duckduckgo("python", 3) self.assertIsInstance(results, list) if results: # If we got results result = results[0] self.assertIn("title", result) self.assertIn("url", result) self.assertIn("snippet", result) self.assertIn("source", result) self.assertIn("rank", result) self.assertEqual(result["source"], "DuckDuckGo") self.assertGreaterEqual(result["rank"], 1) def test_search_bing_real(self): """Test real Bing search""" results = search_bing("python", 3) self.assertIsInstance(results, list) if results: # If we got results result = results[0] self.assertIn("title", result) self.assertIn("url", result) self.assertIn("snippet", result) self.assertIn("source", result) self.assertIn("rank", result) self.assertEqual(result["source"], "Bing") self.assertGreaterEqual(result["rank"], 1) def test_search_startpage_real(self): """Test real Startpage search""" results = search_startpage("python", 3) self.assertIsInstance(results, list) if results: # If we got results result = results[0] self.assertIn("title", result) self.assertIn("url", result) self.assertIn("snippet", result) self.assertIn("source", result) self.assertIn("rank", result) self.assertEqual(result["source"], "Startpage") self.assertGreaterEqual(result["rank"], 1) def test_ranking_preserved(self): """Test that ranking is preserved per engine""" results = search_duckduckgo("python programming", 5) if len(results) >= 2: # Check that ranks are sequential for i, result in enumerate(results): self.assertEqual(result["rank"], i + 1) def test_single_page_fetch(self): """Test fetching a single web page""" url = "https://httpbin.org/html" result_json = fetch_single_page_content(url) result = json.loads(result_json) self.assertIn("success", result) self.assertIn("url", result) self.assertIn("timestamp", result) self.assertIn("cached", result) if result["success"]: self.assertIn("content", result) self.assertIn("content_length", result) self.assertGreater(result["content_length"], 0) def test_batch_page_fetch_single_url(self): """Test batch fetch with single URL (string input)""" # Test the internal function directly since the tool is wrapped url = "https://httpbin.org/html" result_json = fetch_single_page_content(url) result = json.loads(result_json) # Should return single page result, not batch format self.assertIn("success", result) self.assertIn("url", result) self.assertNotIn("batch_request", result) def test_batch_page_fetch_multiple_urls(self): """Test batch fetch logic with multiple URLs""" # We'll test the batch logic manually since the tool is wrapped import threading from datetime import datetime urls = ["https://httpbin.org/html", "https://httpbin.org/json"] results = [] threads = [] thread_results = {} def fetch_url_thread(url_to_fetch: str, index: int): try: result_json = fetch_single_page_content(url_to_fetch) thread_results[index] = json.loads(result_json) except Exception as e: thread_results[index] = { "url": url_to_fetch, "success": False, "error": f"Thread error: {str(e)}", "timestamp": datetime.utcnow().isoformat() + "Z", "cached": False, } # Start threads for parallel fetching for i, url_to_fetch in enumerate(urls): thread = threading.Thread(target=fetch_url_thread, args=(url_to_fetch, i)) thread.start() threads.append(thread) # Wait for all threads to complete for thread in threads: thread.join(timeout=25) # Collect results in order for i in range(len(urls)): if i in thread_results: results.append(thread_results[i]) # Should have results for both URLs self.assertEqual(len(results), 2) for result in results: self.assertIn("success", result) self.assertIn("url", result) def test_content_caching(self): """Test that content caching works""" url = "https://httpbin.org/html" # First fetch result1_json = fetch_single_page_content(url) result1 = json.loads(result1_json) # Second fetch should be cached result2_json = fetch_single_page_content(url) result2 = json.loads(result2_json) # Both should succeed if result1["success"] and result2["success"]: # Content should be identical self.assertEqual(result1["content"], result2["content"]) # Check cache was used (this is logged, but we can't easily test it here) class TestCachingFunctionality(unittest.TestCase): """Test caching system""" def setUp(self): """Clear caches before each test""" search_cache.cache.clear() content_cache.cache.clear() def test_cache_basic_operations(self): """Test basic cache operations""" # Test set and get search_cache.set("test_key", {"data": "test_value"}) result = search_cache.get("test_key") self.assertIsNotNone(result) self.assertEqual(result["data"], "test_value") def test_cache_expiration(self): """Test cache expiration""" import time # Create cache with very short TTL from websearch.utils.cache import SimpleCache short_cache = SimpleCache(ttl_seconds=1) short_cache.set("test_key", {"data": "test_value"}) # Should be available immediately result = short_cache.get("test_key") self.assertIsNotNone(result) # Wait for expiration time.sleep(1.1) # Should be expired result = short_cache.get("test_key") self.assertIsNone(result) def test_cache_cleanup(self): """Test cache cleanup functionality""" from websearch.utils.cache import SimpleCache short_cache = SimpleCache(ttl_seconds=1) # Add some entries short_cache.set("key1", "value1") short_cache.set("key2", "value2") # Wait for expiration import time time.sleep(1.1) # Clear expired entries short_cache.clear_expired() # Cache should be empty self.assertEqual(len(short_cache.cache), 0) class TestMockedFunctionality(unittest.TestCase): """Test core logic with mocked network calls""" def test_deduplication_logic(self): """Test URL deduplication works correctly""" # Mock all search functions to return overlapping results with ( patch("websearch.engines.search.search_duckduckgo") as mock_ddg, patch("websearch.engines.search.search_bing") as mock_bing, patch("websearch.engines.search.search_startpage") as mock_sp, ): duplicate_url = "https://example.com" mock_ddg.return_value = [ { "title": "DDG Result", "url": duplicate_url, "snippet": "DDG snippet", "source": "DuckDuckGo", "rank": 1, } ] mock_bing.return_value = [ { "title": "Bing Result", "url": duplicate_url, "snippet": "Bing snippet", "source": "Bing", "rank": 1, } ] mock_sp.return_value = [ { "title": "SP Result", "url": "https://different.com", "snippet": "SP snippet", "source": "Startpage", "rank": 1, } ] # Test the deduplication logic manually all_results = ( mock_ddg("test", 5) + mock_bing("test", 5) + mock_sp("test", 5) ) seen_urls = set() unique_results = [] for result in all_results: if result["url"] not in seen_urls: seen_urls.add(result["url"]) unique_results.append(result) # Should have 2 unique URLs self.assertEqual(len(unique_results), 2) urls = [r["url"] for r in unique_results] self.assertIn(duplicate_url, urls) self.assertIn("https://different.com", urls) def test_error_handling(self): """Test error handling in page fetch""" from datetime import datetime from typing import Any, Dict result: Dict[str, Any] = { "url": "https://nonexistent.example", "timestamp": datetime.utcnow().isoformat() + "Z", } # Simulate request error try: raise requests.RequestException("Connection failed") except requests.RequestException as e: result.update( { "success": False, "content": None, "content_length": 0, "truncated": False, "error": f"Request error: {str(e)}", } ) self.assertFalse(result["success"]) self.assertIn("Connection failed", result["error"]) self.assertIsNone(result["content"]) def test_batch_error_handling(self): """Test error handling in batch fetch logic""" # Test the batch logic manually since the tool is wrapped import threading from datetime import datetime urls = [ "https://httpbin.org/html", "https://invalid-url-that-does-not-exist.com", ] results = [] threads = [] thread_results = {} def fetch_url_thread(url_to_fetch: str, index: int): try: result_json = fetch_single_page_content(url_to_fetch) thread_results[index] = json.loads(result_json) except Exception as e: thread_results[index] = { "url": url_to_fetch, "success": False, "error": f"Thread error: {str(e)}", "timestamp": datetime.utcnow().isoformat() + "Z", "cached": False, } # Start threads for parallel fetching for i, url_to_fetch in enumerate(urls): thread = threading.Thread(target=fetch_url_thread, args=(url_to_fetch, i)) thread.start() threads.append(thread) # Wait for all threads to complete for thread in threads: thread.join(timeout=25) # Collect results in order for i in range(len(urls)): if i in thread_results: results.append(thread_results[i]) # Should have results for both URLs self.assertEqual(len(results), 2) # Should have at least one success and one failure success_count = sum(1 for r in results if r.get("success", False)) failure_count = sum(1 for r in results if not r.get("success", False)) self.assertGreaterEqual(success_count, 0) self.assertGreaterEqual(failure_count, 0) if __name__ == "__main__": print("Running integration tests with real web calls...") unittest.main(verbosity=2)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vishalkg/web-search'

If you have feedback or need assistance with the MCP directory API, please join our Discord server