Skip to main content
Glama
test_semantic_search.py•16.5 kB
""" Unit tests for semantic search functionality. Tests the semantic_search MCP tool with mocked dependencies. """ import pytest from unittest.mock import Mock, AsyncMock, patch from pathlib import Path # Mock implementation (replace with actual import when refactored) async def semantic_search_mock( query: str, max_results: int = 5, min_score: float = 0.7, auto_index: bool = False, qdrant_client=None, collection_name: str = "test_collection" ) -> str: """Mock semantic search for testing.""" if not qdrant_client: return "Error: Qdrant not available. Is it running?" # Check collection exists try: info = qdrant_client.get_collection(collection_name) if info.points_count == 0: return f"Workspace not indexed yet. Run: index_workspace()" except Exception: return "Error: Collection not ready" # Simulate search try: results = qdrant_client.query_points( collection_name=collection_name, query=[0.1] * 1024, # Fake query embedding limit=max_results ).points # Filter by score results = [r for r in results if r.score >= min_score] if not results: return f"No results found for '{query}' with score >= {min_score}" # Format results output = f"Found {len(results)} matches for '{query}':\n\n" for i, result in enumerate(results, 1): payload = result.payload or {} file_path = payload.get("path", "unknown") text = payload.get("text", "")[:200] output += f"{i}. {file_path} (score: {result.score:.2f})\n" output += f" {text.strip()}...\n\n" return output except Exception as e: return f"Error: {str(e)}" @pytest.mark.unit @pytest.mark.asyncio class TestSemanticSearch: """Test suite for semantic search.""" async def test_search_no_qdrant(self): """Test search fails gracefully when Qdrant unavailable.""" result = await semantic_search_mock( "test query", qdrant_client=None ) assert "Error: Qdrant not available" in result async def test_search_empty_collection(self, mock_qdrant_client): """Test search on unindexed workspace.""" # Mock empty collection mock_info = Mock() mock_info.points_count = 0 mock_qdrant_client.get_collection.return_value = mock_info result = await semantic_search_mock( "test query", qdrant_client=mock_qdrant_client ) assert "not indexed yet" in result.lower() async def test_search_with_results(self, mock_qdrant_client): """Test successful search with results.""" # Mock collection with data mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info # Mock search results mock_point = Mock() mock_point.score = 0.92 mock_point.payload = { "path": "src/main.py", "text": "def handle_error(error): print(error)" } mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "error handling", qdrant_client=mock_qdrant_client ) assert "Found 1 matches" in result assert "src/main.py" in result assert "0.92" in result async def test_search_no_results_above_threshold(self, mock_qdrant_client): """Test search with no results above min_score.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info # Mock low-score result mock_point = Mock() mock_point.score = 0.5 # Below default 0.7 threshold mock_point.payload = {"path": "test.py", "text": "test"} mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "nonexistent pattern", qdrant_client=mock_qdrant_client, min_score=0.7 ) assert "No results found" in result async def test_search_respects_max_results(self, mock_qdrant_client): """Test max_results parameter is passed to query_points.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info # Create mock results (Qdrant would limit this, we simulate that) mock_points = [] for i in range(3): # Simulate Qdrant returning only 3 mock_point = Mock() mock_point.score = 0.9 - (i * 0.01) mock_point.payload = { "path": f"file{i}.py", "text": f"content {i}" } mock_points.append(mock_point) mock_search_result = Mock() mock_search_result.points = mock_points mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "test", max_results=3, qdrant_client=mock_qdrant_client ) # Verify max_results was passed to query_points call_args = mock_qdrant_client.query_points.call_args assert call_args[1]["limit"] == 3 # Should show exactly 3 results assert result.count("file") == 3 async def test_search_custom_min_score(self, mock_qdrant_client): """Test custom min_score filters results.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info # Create results with various scores mock_points = [ Mock(score=0.95, payload={"path": "high.py", "text": "high score"}), Mock(score=0.85, payload={"path": "medium.py", "text": "medium"}), Mock(score=0.75, payload={"path": "low.py", "text": "low score"}) ] mock_search_result = Mock() mock_search_result.points = mock_points mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "test", min_score=0.8, qdrant_client=mock_qdrant_client ) # Should only include high.py and medium.py assert "high.py" in result assert "medium.py" in result assert "low.py" not in result async def test_search_truncates_long_text(self, mock_qdrant_client): """Test that long result text is truncated.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info long_text = "x" * 500 # Very long text mock_point = Mock() mock_point.score = 0.9 mock_point.payload = {"path": "long.py", "text": long_text} mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "test", qdrant_client=mock_qdrant_client ) # Result should be truncated (default 200 chars) assert len(result.split("long.py")[1].split("\n")[0]) < 250 async def test_search_handles_missing_payload(self, mock_qdrant_client): """Test graceful handling of missing payload data.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info # Mock result with missing/None payload mock_point = Mock() mock_point.score = 0.9 mock_point.payload = None mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "test", qdrant_client=mock_qdrant_client ) # Should handle gracefully with "unknown" defaults assert "unknown" in result.lower() async def test_search_exception_handling(self, mock_qdrant_client): """Test error handling when search fails.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info # Mock search raising exception mock_qdrant_client.query_points.side_effect = Exception("Connection lost") result = await semantic_search_mock( "test", qdrant_client=mock_qdrant_client ) assert "Error" in result assert "Connection lost" in result @pytest.mark.unit @pytest.mark.asyncio class TestSearchEdgeCases: """Test edge cases for search functionality.""" @pytest.mark.parametrize("query,expected_in_result", [ ("error handling", "error handling"), ("async patterns", "async patterns"), ("regex: [a-z]+", "regex"), ("query with 'quotes'", "quotes"), ]) async def test_search_various_queries( self, mock_qdrant_client, query, expected_in_result ): """Parametrized test for various query formats.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info mock_point = Mock() mock_point.score = 0.9 mock_point.payload = {"path": "test.py", "text": "test content"} mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( query, qdrant_client=mock_qdrant_client ) assert expected_in_result.lower() in result.lower() async def test_search_empty_query(self, mock_qdrant_client): """Test search with empty query string.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info mock_point = Mock() mock_point.score = 0.9 mock_point.payload = {"path": "test.py", "text": "content"} mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "", qdrant_client=mock_qdrant_client ) # Should still work (semantic search on empty = broad results) assert "Found" in result or "Error" not in result async def test_search_very_long_query(self, mock_qdrant_client): """Test search with very long query string.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info mock_point = Mock() mock_point.score = 0.9 mock_point.payload = {"path": "test.py", "text": "content"} mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result long_query = "find all error handling patterns " * 100 # Very long result = await semantic_search_mock( long_query, qdrant_client=mock_qdrant_client ) # Should handle without error assert "Error" not in result or "Found" in result async def test_search_min_score_boundaries(self, mock_qdrant_client): """Test min_score at boundary values.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info mock_point = Mock() mock_point.score = 0.7 mock_point.payload = {"path": "test.py", "text": "content"} mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result # Test exact boundary (score == min_score should be included) result = await semantic_search_mock( "test", min_score=0.7, qdrant_client=mock_qdrant_client ) assert "Found 1" in result # Test just above (score < min_score should be excluded) result = await semantic_search_mock( "test", min_score=0.71, qdrant_client=mock_qdrant_client ) assert "No results found" in result async def test_search_collection_not_exists(self, mock_qdrant_client): """Test search when collection doesn't exist.""" mock_qdrant_client.get_collection.side_effect = Exception("Collection not found") result = await semantic_search_mock( "test", qdrant_client=mock_qdrant_client ) assert "Error" in result or "not ready" in result.lower() @pytest.mark.unit @pytest.mark.asyncio class TestSearchResultFormatting: """Test search result formatting and output.""" async def test_result_includes_all_required_fields(self, mock_qdrant_client): """Test that results include path, score, and text preview.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info mock_point = Mock() mock_point.score = 0.88 mock_point.payload = { "path": "src/utils/helper.py", "text": "def helper_function(): return True" } mock_search_result = Mock() mock_search_result.points = [mock_point] mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "helper", qdrant_client=mock_qdrant_client ) assert "src/utils/helper.py" in result assert "0.88" in result assert "helper_function" in result async def test_multiple_results_numbered_correctly(self, mock_qdrant_client): """Test that multiple results are numbered sequentially.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info mock_points = [] for i in range(3): mock_point = Mock() mock_point.score = 0.9 - (i * 0.01) mock_point.payload = {"path": f"file{i}.py", "text": f"content{i}"} mock_points.append(mock_point) mock_search_result = Mock() mock_search_result.points = mock_points mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "test", qdrant_client=mock_qdrant_client ) assert "1. file0.py" in result assert "2. file1.py" in result assert "3. file2.py" in result async def test_result_header_includes_match_count(self, mock_qdrant_client): """Test that header shows correct number of matches.""" mock_info = Mock() mock_info.points_count = 100 mock_qdrant_client.get_collection.return_value = mock_info mock_points = [ Mock(score=0.9, payload={"path": "a.py", "text": "a"}), Mock(score=0.85, payload={"path": "b.py", "text": "b"}), ] mock_search_result = Mock() mock_search_result.points = mock_points mock_qdrant_client.query_points.return_value = mock_search_result result = await semantic_search_mock( "test query", qdrant_client=mock_qdrant_client ) assert "Found 2 matches" in result assert "'test query'" in result

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mjdevaccount/AIStack-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server