Skip to main content
Glama
index.ts42.4 kB
#!/usr/bin/env node import { existsSync, readFileSync } from "node:fs"; import { dirname, join } from "node:path"; import { fileURLToPath } from "node:url"; import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; import { CallToolRequestSchema, GetPromptRequestSchema, ListPromptsRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; import Bottleneck from "bottleneck"; import express from "express"; import { z } from "zod"; import { DEFAULT_BATCH_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE, DEFAULT_CODE_EXTENSIONS, DEFAULT_IGNORE_PATTERNS, DEFAULT_SEARCH_LIMIT, } from "./code/config.js"; import { CodeIndexer } from "./code/indexer.js"; import type { CodeConfig } from "./code/types.js"; import { EmbeddingProviderFactory } from "./embeddings/factory.js"; import { BM25SparseVectorGenerator } from "./embeddings/sparse.js"; import { getPrompt, listPrompts, loadPromptsConfig, type PromptsConfig } from "./prompts/index.js"; import { renderTemplate, validateArguments } from "./prompts/template.js"; import { QdrantManager } from "./qdrant/client.js"; // Read package.json for version const __dirname = dirname(fileURLToPath(import.meta.url)); const pkg = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf-8")); // Validate environment variables const QDRANT_URL = process.env.QDRANT_URL || "http://localhost:6333"; const EMBEDDING_PROVIDER = (process.env.EMBEDDING_PROVIDER || "ollama").toLowerCase(); const TRANSPORT_MODE = (process.env.TRANSPORT_MODE || "stdio").toLowerCase(); const HTTP_PORT = parseInt(process.env.HTTP_PORT || "3000", 10); const PROMPTS_CONFIG_FILE = process.env.PROMPTS_CONFIG_FILE || join(__dirname, "../prompts.json"); // Validate HTTP_PORT when HTTP mode is selected if (TRANSPORT_MODE === "http") { if (Number.isNaN(HTTP_PORT) || HTTP_PORT < 1 || HTTP_PORT > 65535) { console.error( `Error: Invalid HTTP_PORT "${process.env.HTTP_PORT}". Must be a number between 1 and 65535.` ); process.exit(1); } } // Check for required API keys based on provider if (EMBEDDING_PROVIDER !== "ollama") { let apiKey: string | undefined; let requiredKeyName: string; switch (EMBEDDING_PROVIDER) { case "openai": apiKey = process.env.OPENAI_API_KEY; requiredKeyName = "OPENAI_API_KEY"; break; case "cohere": apiKey = process.env.COHERE_API_KEY; requiredKeyName = "COHERE_API_KEY"; break; case "voyage": apiKey = process.env.VOYAGE_API_KEY; requiredKeyName = "VOYAGE_API_KEY"; break; default: console.error( `Error: Unknown embedding provider "${EMBEDDING_PROVIDER}". Supported providers: openai, cohere, voyage, ollama.` ); process.exit(1); } if (!apiKey) { console.error(`Error: ${requiredKeyName} is required for ${EMBEDDING_PROVIDER} provider.`); process.exit(1); } } // Check if Ollama is running when using Ollama provider async function checkOllamaAvailability() { if (EMBEDDING_PROVIDER === "ollama") { const baseUrl = process.env.EMBEDDING_BASE_URL || "http://localhost:11434"; const isLocalhost = baseUrl.includes("localhost") || baseUrl.includes("127.0.0.1"); try { const response = await fetch(`${baseUrl}/api/version`); if (!response.ok) { throw new Error(`Ollama returned status ${response.status}`); } // Check if the required embedding model exists const tagsResponse = await fetch(`${baseUrl}/api/tags`); const { models } = await tagsResponse.json(); const modelName = process.env.EMBEDDING_MODEL || "nomic-embed-text"; const modelExists = models.some( (m: any) => m.name === modelName || m.name.startsWith(`${modelName}:`) ); if (!modelExists) { let errorMessage = `Error: Model '${modelName}' not found in Ollama.\n`; if (isLocalhost) { errorMessage += `Pull it with:\n` + ` - Using Docker: docker exec ollama ollama pull ${modelName}\n` + ` - Or locally: ollama pull ${modelName}`; } else { errorMessage += `Please ensure the model is available on your Ollama instance:\n` + ` ollama pull ${modelName}`; } console.error(errorMessage); process.exit(1); } } catch (error) { const errorMessage = error instanceof Error ? `Error: ${error.message}` : `Error: Ollama is not running at ${baseUrl}.\n`; let helpText = ""; if (isLocalhost) { helpText = `Please start Ollama:\n` + ` - Using Docker: docker compose up -d\n` + ` - Or install locally: curl -fsSL https://ollama.ai/install.sh | sh\n` + `\nThen pull the embedding model:\n` + ` ollama pull nomic-embed-text`; } else { helpText = `Please ensure:\n` + ` - Ollama is running at the specified URL\n` + ` - The URL is accessible from this machine\n` + ` - The embedding model is available (e.g., nomic-embed-text)`; } console.error(`${errorMessage}\n${helpText}`); process.exit(1); } } } // Initialize clients const qdrant = new QdrantManager(QDRANT_URL); const embeddings = EmbeddingProviderFactory.createFromEnv(); // Initialize code indexer const codeConfig: CodeConfig = { chunkSize: parseInt(process.env.CODE_CHUNK_SIZE || String(DEFAULT_CHUNK_SIZE), 10), chunkOverlap: parseInt(process.env.CODE_CHUNK_OVERLAP || String(DEFAULT_CHUNK_OVERLAP), 10), enableASTChunking: process.env.CODE_ENABLE_AST !== "false", supportedExtensions: DEFAULT_CODE_EXTENSIONS, ignorePatterns: DEFAULT_IGNORE_PATTERNS, batchSize: parseInt(process.env.CODE_BATCH_SIZE || String(DEFAULT_BATCH_SIZE), 10), defaultSearchLimit: parseInt(process.env.CODE_SEARCH_LIMIT || String(DEFAULT_SEARCH_LIMIT), 10), enableHybridSearch: process.env.CODE_ENABLE_HYBRID === "true", }; const codeIndexer = new CodeIndexer(qdrant, embeddings, codeConfig); // Load prompts configuration if file exists let promptsConfig: PromptsConfig | null = null; if (existsSync(PROMPTS_CONFIG_FILE)) { try { promptsConfig = loadPromptsConfig(PROMPTS_CONFIG_FILE); console.error(`Loaded ${promptsConfig.prompts.length} prompts from ${PROMPTS_CONFIG_FILE}`); } catch (error) { console.error(`Failed to load prompts configuration from ${PROMPTS_CONFIG_FILE}:`, error); process.exit(1); } } // Function to create a new MCP server instance // This is needed for HTTP transport in stateless mode where each request gets its own server function createServer() { const capabilities: { tools: Record<string, never>; resources: Record<string, never>; prompts?: Record<string, never>; } = { tools: {}, resources: {}, }; // Only add prompts capability if prompts are configured if (promptsConfig) { capabilities.prompts = {}; } return new Server( { name: pkg.name, version: pkg.version, }, { capabilities, } ); } // Create a shared MCP server for stdio mode const server = createServer(); // Function to register all handlers on a server instance function registerHandlers(server: Server) { // List available tools server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { name: "create_collection", description: "Create a new vector collection in Qdrant. The collection will be configured with the embedding provider's dimensions automatically. Set enableHybrid to true to enable hybrid search combining semantic and keyword search.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Name of the collection", }, distance: { type: "string", enum: ["Cosine", "Euclid", "Dot"], description: "Distance metric (default: Cosine)", }, enableHybrid: { type: "boolean", description: "Enable hybrid search with sparse vectors (default: false)", }, }, required: ["name"], }, }, { name: "add_documents", description: "Add documents to a collection. Documents will be automatically embedded using the configured embedding provider.", inputSchema: { type: "object", properties: { collection: { type: "string", description: "Name of the collection", }, documents: { type: "array", description: "Array of documents to add", items: { type: "object", properties: { id: { type: ["string", "number"], description: "Unique identifier for the document", }, text: { type: "string", description: "Text content to embed and store", }, metadata: { type: "object", description: "Optional metadata to store with the document", }, }, required: ["id", "text"], }, }, }, required: ["collection", "documents"], }, }, { name: "semantic_search", description: "Search for documents using natural language queries. Returns the most semantically similar documents.", inputSchema: { type: "object", properties: { collection: { type: "string", description: "Name of the collection to search", }, query: { type: "string", description: "Search query text", }, limit: { type: "number", description: "Maximum number of results (default: 5)", }, filter: { type: "object", description: "Optional metadata filter", }, }, required: ["collection", "query"], }, }, { name: "list_collections", description: "List all available collections in Qdrant.", inputSchema: { type: "object", properties: {}, }, }, { name: "delete_collection", description: "Delete a collection and all its documents.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Name of the collection to delete", }, }, required: ["name"], }, }, { name: "get_collection_info", description: "Get detailed information about a collection including vector size, point count, and distance metric.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Name of the collection", }, }, required: ["name"], }, }, { name: "delete_documents", description: "Delete specific documents from a collection by their IDs.", inputSchema: { type: "object", properties: { collection: { type: "string", description: "Name of the collection", }, ids: { type: "array", description: "Array of document IDs to delete", items: { type: ["string", "number"], }, }, }, required: ["collection", "ids"], }, }, { name: "hybrid_search", description: "Perform hybrid search combining semantic vector search with keyword search using BM25. This provides better results by combining the strengths of both approaches. The collection must be created with enableHybrid set to true.", inputSchema: { type: "object", properties: { collection: { type: "string", description: "Name of the collection to search", }, query: { type: "string", description: "Search query text", }, limit: { type: "number", description: "Maximum number of results (default: 5)", }, filter: { type: "object", description: "Optional metadata filter", }, }, required: ["collection", "query"], }, }, { name: "index_codebase", description: "Index a codebase for semantic code search. Automatically discovers files, chunks code intelligently using AST-aware parsing, and stores in vector database. Respects .gitignore and other ignore files.", inputSchema: { type: "object", properties: { path: { type: "string", description: "Absolute or relative path to codebase root directory", }, forceReindex: { type: "boolean", description: "Force full re-index even if already indexed (default: false)", }, extensions: { type: "array", items: { type: "string" }, description: "Custom file extensions to index (e.g., ['.proto', '.graphql'])", }, ignorePatterns: { type: "array", items: { type: "string" }, description: "Additional patterns to ignore (e.g., ['**/test/**', '**/*.test.ts'])", }, }, required: ["path"], }, }, { name: "search_code", description: "Search indexed codebase using natural language queries. Returns semantically relevant code chunks with file paths and line numbers.", inputSchema: { type: "object", properties: { path: { type: "string", description: "Path to codebase (must be indexed first)", }, query: { type: "string", description: "Natural language search query (e.g., 'authentication logic')", }, limit: { type: "number", description: "Maximum number of results (default: 5, max: 100)", }, fileTypes: { type: "array", items: { type: "string" }, description: "Filter by file extensions (e.g., ['.ts', '.py'])", }, pathPattern: { type: "string", description: "Filter by path glob pattern (e.g., 'src/services/**')", }, }, required: ["path", "query"], }, }, { name: "reindex_changes", description: "Incrementally re-index only changed files. Detects added, modified, and deleted files since last index. Requires previous indexing with index_codebase.", inputSchema: { type: "object", properties: { path: { type: "string", description: "Path to codebase", }, }, required: ["path"], }, }, { name: "get_index_status", description: "Get indexing status and statistics for a codebase.", inputSchema: { type: "object", properties: { path: { type: "string", description: "Path to codebase", }, }, required: ["path"], }, }, { name: "clear_index", description: "Delete all indexed data for a codebase. This is irreversible and will remove the entire collection.", inputSchema: { type: "object", properties: { path: { type: "string", description: "Path to codebase", }, }, required: ["path"], }, }, ], }; }); // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case "create_collection": { const { name, distance, enableHybrid } = CreateCollectionSchema.parse(args); const vectorSize = embeddings.getDimensions(); await qdrant.createCollection(name, vectorSize, distance, enableHybrid || false); let message = `Collection "${name}" created successfully with ${vectorSize} dimensions and ${distance || "Cosine"} distance metric.`; if (enableHybrid) { message += " Hybrid search is enabled for this collection."; } return { content: [ { type: "text", text: message, }, ], }; } case "add_documents": { const { collection, documents } = AddDocumentsSchema.parse(args); // Check if collection exists and get info const exists = await qdrant.collectionExists(collection); if (!exists) { return { content: [ { type: "text", text: `Error: Collection "${collection}" does not exist. Create it first using create_collection.`, }, ], isError: true, }; } const collectionInfo = await qdrant.getCollectionInfo(collection); // Generate embeddings for all documents const texts = documents.map((doc) => doc.text); const embeddingResults = await embeddings.embedBatch(texts); // If hybrid search is enabled, generate sparse vectors and use appropriate method if (collectionInfo.hybridEnabled) { const sparseGenerator = new BM25SparseVectorGenerator(); // Prepare points with both dense and sparse vectors const points = documents.map((doc, index) => ({ id: doc.id, vector: embeddingResults[index].embedding, sparseVector: sparseGenerator.generate(doc.text), payload: { text: doc.text, ...doc.metadata, }, })); await qdrant.addPointsWithSparse(collection, points); } else { // Standard dense-only vectors const points = documents.map((doc, index) => ({ id: doc.id, vector: embeddingResults[index].embedding, payload: { text: doc.text, ...doc.metadata, }, })); await qdrant.addPoints(collection, points); } return { content: [ { type: "text", text: `Successfully added ${documents.length} document(s) to collection "${collection}".`, }, ], }; } case "semantic_search": { const { collection, query, limit, filter } = SemanticSearchSchema.parse(args); // Check if collection exists const exists = await qdrant.collectionExists(collection); if (!exists) { return { content: [ { type: "text", text: `Error: Collection "${collection}" does not exist.`, }, ], isError: true, }; } // Generate embedding for query const { embedding } = await embeddings.embed(query); // Search const results = await qdrant.search(collection, embedding, limit || 5, filter); return { content: [ { type: "text", text: JSON.stringify(results, null, 2), }, ], }; } case "list_collections": { const collections = await qdrant.listCollections(); return { content: [ { type: "text", text: JSON.stringify(collections, null, 2), }, ], }; } case "delete_collection": { const { name } = DeleteCollectionSchema.parse(args); await qdrant.deleteCollection(name); return { content: [ { type: "text", text: `Collection "${name}" deleted successfully.`, }, ], }; } case "get_collection_info": { const { name } = GetCollectionInfoSchema.parse(args); const info = await qdrant.getCollectionInfo(name); return { content: [ { type: "text", text: JSON.stringify(info, null, 2), }, ], }; } case "delete_documents": { const { collection, ids } = DeleteDocumentsSchema.parse(args); await qdrant.deletePoints(collection, ids); return { content: [ { type: "text", text: `Successfully deleted ${ids.length} document(s) from collection "${collection}".`, }, ], }; } case "hybrid_search": { const { collection, query, limit, filter } = HybridSearchSchema.parse(args); // Check if collection exists const exists = await qdrant.collectionExists(collection); if (!exists) { return { content: [ { type: "text", text: `Error: Collection "${collection}" does not exist.`, }, ], isError: true, }; } // Check if collection has hybrid search enabled const collectionInfo = await qdrant.getCollectionInfo(collection); if (!collectionInfo.hybridEnabled) { return { content: [ { type: "text", text: `Error: Collection "${collection}" does not have hybrid search enabled. Create a new collection with enableHybrid set to true.`, }, ], isError: true, }; } // Generate dense embedding for query const { embedding } = await embeddings.embed(query); // Generate sparse vector for query const sparseGenerator = new BM25SparseVectorGenerator(); const sparseVector = sparseGenerator.generate(query); // Perform hybrid search const results = await qdrant.hybridSearch( collection, embedding, sparseVector, limit || 5, filter ); return { content: [ { type: "text", text: JSON.stringify(results, null, 2), }, ], }; } case "index_codebase": { const IndexCodebaseSchema = z.object({ path: z.string(), forceReindex: z.boolean().optional(), extensions: z.array(z.string()).optional(), ignorePatterns: z.array(z.string()).optional(), }); const { path, forceReindex, extensions, ignorePatterns } = IndexCodebaseSchema.parse(args); const stats = await codeIndexer.indexCodebase( path, { forceReindex, extensions, ignorePatterns }, (progress) => { // Progress callback - could send progress updates via SSE in future console.error(`[${progress.phase}] ${progress.percentage}% - ${progress.message}`); } ); let statusMessage = `Indexed ${stats.filesIndexed}/${stats.filesScanned} files (${stats.chunksCreated} chunks) in ${(stats.durationMs / 1000).toFixed(1)}s`; if (stats.status === "partial") { statusMessage += `\n\nWarnings:\n${stats.errors?.join("\n")}`; } else if (stats.status === "failed") { statusMessage = `Indexing failed:\n${stats.errors?.join("\n")}`; } return { content: [ { type: "text", text: statusMessage, }, ], isError: stats.status === "failed", }; } case "search_code": { const SearchCodeSchema = z.object({ path: z.string(), query: z.string(), limit: z.number().optional(), fileTypes: z.array(z.string()).optional(), pathPattern: z.string().optional(), }); const { path, query, limit, fileTypes, pathPattern } = SearchCodeSchema.parse(args); const results = await codeIndexer.searchCode(path, query, { limit, fileTypes, pathPattern, }); if (results.length === 0) { return { content: [ { type: "text", text: `No results found for query: "${query}"`, }, ], }; } // Format results with file references const formattedResults = results .map( (r, idx) => `\n--- Result ${idx + 1} (score: ${r.score.toFixed(3)}) ---\n` + `File: ${r.filePath}:${r.startLine}-${r.endLine}\n` + `Language: ${r.language}\n\n` + `${r.content}\n` ) .join("\n"); return { content: [ { type: "text", text: `Found ${results.length} result(s):\n${formattedResults}`, }, ], }; } case "get_index_status": { const GetIndexStatusSchema = z.object({ path: z.string(), }); const { path } = GetIndexStatusSchema.parse(args); const status = await codeIndexer.getIndexStatus(path); if (!status.isIndexed) { return { content: [ { type: "text", text: `Codebase at "${path}" is not indexed. Use index_codebase to index it first.`, }, ], }; } return { content: [ { type: "text", text: JSON.stringify(status, null, 2), }, ], }; } case "reindex_changes": { const ReindexChangesSchema = z.object({ path: z.string(), }); const { path } = ReindexChangesSchema.parse(args); const stats = await codeIndexer.reindexChanges(path, (progress) => { console.error(`[${progress.phase}] ${progress.percentage}% - ${progress.message}`); }); let message = `Incremental re-index complete:\n`; message += `- Files added: ${stats.filesAdded}\n`; message += `- Files modified: ${stats.filesModified}\n`; message += `- Files deleted: ${stats.filesDeleted}\n`; message += `- Chunks added: ${stats.chunksAdded}\n`; message += `- Duration: ${(stats.durationMs / 1000).toFixed(1)}s`; if (stats.filesAdded === 0 && stats.filesModified === 0 && stats.filesDeleted === 0) { message = `No changes detected. Codebase is up to date.`; } return { content: [ { type: "text", text: message, }, ], }; } case "clear_index": { const ClearIndexSchema = z.object({ path: z.string(), }); const { path } = ClearIndexSchema.parse(args); await codeIndexer.clearIndex(path); return { content: [ { type: "text", text: `Index cleared for codebase at "${path}".`, }, ], }; } default: return { content: [ { type: "text", text: `Unknown tool: ${name}`, }, ], isError: true, }; } } catch (error: any) { // Enhanced error details for debugging const errorDetails = error instanceof Error ? error.message : JSON.stringify(error, null, 2); console.error("Tool execution error:", { tool: name, error: errorDetails, stack: error?.stack, data: error?.data, }); return { content: [ { type: "text", text: `Error: ${errorDetails}`, }, ], isError: true, }; } }); // List available resources server.setRequestHandler(ListResourcesRequestSchema, async () => { const collections = await qdrant.listCollections(); return { resources: [ { uri: "qdrant://collections", name: "All Collections", description: "List of all vector collections in Qdrant", mimeType: "application/json", }, ...collections.map((name) => ({ uri: `qdrant://collection/${name}`, name: `Collection: ${name}`, description: `Details and statistics for collection "${name}"`, mimeType: "application/json", })), ], }; }); // Read resource content server.setRequestHandler(ReadResourceRequestSchema, async (request) => { const { uri } = request.params; if (uri === "qdrant://collections") { const collections = await qdrant.listCollections(); return { contents: [ { uri, mimeType: "application/json", text: JSON.stringify(collections, null, 2), }, ], }; } const collectionMatch = uri.match(/^qdrant:\/\/collection\/(.+)$/); if (collectionMatch) { const name = collectionMatch[1]; const info = await qdrant.getCollectionInfo(name); return { contents: [ { uri, mimeType: "application/json", text: JSON.stringify(info, null, 2), }, ], }; } return { contents: [ { uri, mimeType: "text/plain", text: `Unknown resource: ${uri}`, }, ], }; }); // List available prompts if (promptsConfig) { server.setRequestHandler(ListPromptsRequestSchema, async () => { const prompts = listPrompts(promptsConfig!); return { prompts: prompts.map((prompt) => ({ name: prompt.name, description: prompt.description, arguments: prompt.arguments.map((arg) => ({ name: arg.name, description: arg.description, required: arg.required, })), })), }; }); // Get prompt content server.setRequestHandler(GetPromptRequestSchema, async (request) => { const { name, arguments: args } = request.params; const prompt = getPrompt(promptsConfig!, name); if (!prompt) { throw new Error(`Unknown prompt: ${name}`); } try { // Validate arguments validateArguments(args || {}, prompt.arguments); // Render template const rendered = renderTemplate(prompt.template, args || {}, prompt.arguments); return { messages: [ { role: "user", content: { type: "text", text: rendered.text, }, }, ], }; } catch (error) { throw new Error( `Failed to render prompt "${name}": ${error instanceof Error ? error.message : String(error)}` ); } }); } } // Register handlers on the shared server for stdio mode registerHandlers(server); // Tool schemas const CreateCollectionSchema = z.object({ name: z.string().describe("Name of the collection"), distance: z .enum(["Cosine", "Euclid", "Dot"]) .optional() .describe("Distance metric (default: Cosine)"), enableHybrid: z .boolean() .optional() .describe("Enable hybrid search with sparse vectors (default: false)"), }); const AddDocumentsSchema = z.object({ collection: z.string().describe("Name of the collection"), documents: z .array( z.object({ id: z.union([z.string(), z.number()]).describe("Unique identifier for the document"), text: z.string().describe("Text content to embed and store"), metadata: z .record(z.any()) .optional() .describe("Optional metadata to store with the document"), }) ) .describe("Array of documents to add"), }); const SemanticSearchSchema = z.object({ collection: z.string().describe("Name of the collection to search"), query: z.string().describe("Search query text"), limit: z.number().optional().describe("Maximum number of results (default: 5)"), filter: z.record(z.any()).optional().describe("Optional metadata filter"), }); const DeleteCollectionSchema = z.object({ name: z.string().describe("Name of the collection to delete"), }); const GetCollectionInfoSchema = z.object({ name: z.string().describe("Name of the collection"), }); const DeleteDocumentsSchema = z.object({ collection: z.string().describe("Name of the collection"), ids: z.array(z.union([z.string(), z.number()])).describe("Array of document IDs to delete"), }); const HybridSearchSchema = z.object({ collection: z.string().describe("Name of the collection to search"), query: z.string().describe("Search query text"), limit: z.number().optional().describe("Maximum number of results (default: 5)"), filter: z.record(z.any()).optional().describe("Optional metadata filter"), }); // Start server with stdio transport async function startStdioServer() { await checkOllamaAvailability(); const transport = new StdioServerTransport(); await server.connect(transport); console.error("Qdrant MCP server running on stdio"); } // Constants for HTTP server configuration const RATE_LIMIT_MAX_REQUESTS = 100; // Max requests per window const RATE_LIMIT_WINDOW_MS = 15 * 60 * 1000; // 15 minutes const RATE_LIMIT_MAX_CONCURRENT = 10; // Max concurrent requests per IP const RATE_LIMITER_CLEANUP_INTERVAL_MS = 60 * 60 * 1000; // 1 hour const REQUEST_TIMEOUT_MS = 30 * 1000; // 30 seconds per request const SHUTDOWN_GRACE_PERIOD_MS = 10 * 1000; // 10 seconds // Start server with HTTP transport async function startHttpServer() { await checkOllamaAvailability(); const app = express(); app.use(express.json({ limit: "10mb" })); // Configure Express to trust proxy for correct IP detection app.set("trust proxy", true); // Rate limiter group: max 100 requests per 15 minutes per IP, max 10 concurrent per IP const rateLimiterGroup = new Bottleneck.Group({ reservoir: RATE_LIMIT_MAX_REQUESTS, reservoirRefreshAmount: RATE_LIMIT_MAX_REQUESTS, reservoirRefreshInterval: RATE_LIMIT_WINDOW_MS, maxConcurrent: RATE_LIMIT_MAX_CONCURRENT, }); // Helper function to send JSON-RPC error responses const sendErrorResponse = ( res: express.Response, code: number, message: string, httpStatus: number = 500 ) => { if (!res.headersSent) { res.status(httpStatus).json({ jsonrpc: "2.0", error: { code, message }, id: null, }); } }; // Periodic cleanup of inactive rate limiters to prevent memory leaks // Track last access time for each IP const ipLastAccess = new Map<string, number>(); const cleanupIntervalId = setInterval(() => { const now = Date.now(); const keysToDelete: string[] = []; ipLastAccess.forEach((lastAccess, ip) => { if (now - lastAccess > RATE_LIMITER_CLEANUP_INTERVAL_MS) { keysToDelete.push(ip); } }); keysToDelete.forEach((ip) => { rateLimiterGroup.deleteKey(ip); ipLastAccess.delete(ip); }); if (keysToDelete.length > 0) { console.error(`Cleaned up ${keysToDelete.length} inactive rate limiters`); } }, RATE_LIMITER_CLEANUP_INTERVAL_MS); // Rate limiting middleware const rateLimitMiddleware = async ( req: express.Request, res: express.Response, next: express.NextFunction ) => { const clientIp = req.ip || req.socket.remoteAddress || "unknown"; try { // Update last access time for this IP ipLastAccess.set(clientIp, Date.now()); // Get or create a limiter for this specific IP const limiter = rateLimiterGroup.key(clientIp); await limiter.schedule(() => Promise.resolve()); next(); } catch (error) { // Differentiate between rate limit errors and unexpected errors if (error instanceof Bottleneck.BottleneckError) { console.error(`Rate limit exceeded for IP ${clientIp}:`, error.message); } else { console.error("Unexpected rate limiting error:", error); } sendErrorResponse(res, -32000, "Too many requests", 429); } }; // Health check endpoint app.get("/health", (_req, res) => { res.json({ status: "ok", mode: TRANSPORT_MODE, version: pkg.version, embedding_provider: EMBEDDING_PROVIDER, }); }); app.post("/mcp", rateLimitMiddleware, async (req, res) => { // Create a new server for each request const requestServer = createServer(); registerHandlers(requestServer); // Create transport with enableJsonResponse const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined, enableJsonResponse: true, }); // Track cleanup state to prevent double cleanup let cleanedUp = false; const cleanup = async () => { if (cleanedUp) return; cleanedUp = true; await transport.close().catch(() => {}); await requestServer.close().catch(() => {}); }; // Set a timeout for the request to prevent hanging const timeoutId = setTimeout(() => { sendErrorResponse(res, -32000, "Request timeout", 504); cleanup().catch((err) => { console.error("Error during timeout cleanup:", err); }); }, REQUEST_TIMEOUT_MS); try { // Connect server to transport await requestServer.connect(transport); // Handle the request - this triggers message processing // The response will be sent asynchronously when the server calls transport.send() await transport.handleRequest(req, res, req.body); // Clean up AFTER the response finishes // Listen to multiple events to ensure cleanup happens in all scenarios const cleanupHandler = () => { clearTimeout(timeoutId); cleanup().catch((err) => { console.error("Error during response cleanup:", err); }); }; res.on("finish", cleanupHandler); res.on("close", cleanupHandler); res.on("error", (err) => { console.error("Response stream error:", err); cleanupHandler(); }); } catch (error) { clearTimeout(timeoutId); console.error("Error handling MCP request:", error); sendErrorResponse(res, -32603, "Internal server error"); await cleanup(); } }); const httpServer = app .listen(HTTP_PORT, () => { console.error(`Qdrant MCP server running on http://localhost:${HTTP_PORT}/mcp`); }) .on("error", (error) => { console.error("HTTP server error:", error); process.exit(1); }); // Graceful shutdown handling let isShuttingDown = false; const shutdown = () => { if (isShuttingDown) return; isShuttingDown = true; console.error("Shutdown signal received, closing HTTP server gracefully..."); // Clear the cleanup interval to allow graceful shutdown clearInterval(cleanupIntervalId); // Force shutdown after grace period const forceTimeout = setTimeout(() => { console.error("Forcing shutdown after timeout"); process.exit(1); }, SHUTDOWN_GRACE_PERIOD_MS); httpServer.close(() => { clearTimeout(forceTimeout); console.error("HTTP server closed"); process.exit(0); }); }; process.on("SIGTERM", shutdown); process.on("SIGINT", shutdown); } // Main entry point async function main() { if (TRANSPORT_MODE === "http") { await startHttpServer(); } else if (TRANSPORT_MODE === "stdio") { await startStdioServer(); } else { console.error( `Error: Invalid TRANSPORT_MODE "${TRANSPORT_MODE}". Supported modes: stdio, http.` ); process.exit(1); } } main().catch((error) => { console.error("Fatal error:", error); process.exit(1); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mhalder/qdrant-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server