Skip to main content
Glama

Enterprise Code Search MCP Server

CodeSearchEngine.ts18.2 kB
import { ChromaClient, OpenAIEmbeddingFunction, IncludeEnum } from 'chromadb'; import { Octokit } from '@octokit/rest'; import OpenAI from 'openai'; import * as fs from 'fs/promises'; import * as path from 'path'; import { createReadStream } from 'fs'; import { createInterface } from 'readline'; // Configuration interface export interface Config { embedding_provider: 'openai' | 'ollama'; openai_api_key?: string; openai_model: string; ollama_host: string; ollama_model: string; github_token?: string; chroma_host: string; chroma_port: number; collection_name: string; company_name: string; } // Default configuration export const getConfig = (): Config => ({ embedding_provider: (process.env.EMBEDDING_PROVIDER as 'openai' | 'ollama') || 'ollama', openai_api_key: process.env.OPENAI_API_KEY, openai_model: process.env.OPENAI_MODEL || 'text-embedding-3-small', ollama_host: process.env.OLLAMA_HOST || 'http://localhost:11434', ollama_model: process.env.OLLAMA_MODEL || 'nomic-embed-text', github_token: process.env.GITHUB_TOKEN, chroma_host: process.env.CHROMA_HOST || 'localhost', chroma_port: parseInt(process.env.CHROMA_PORT || '8000'), collection_name: process.env.COLLECTION_NAME || 'company_codebase_384d-new', company_name: process.env.COMPANY_NAME || 'MyCompany' }); // Custom Ollama Embedding Function export class OllamaEmbeddingFunction { private host: string; private model: string; constructor(host: string, model: string) { this.host = host; this.model = model; } async generate(texts: string[]): Promise<number[][]> { const embeddings: number[][] = []; for (const text of texts) { try { const response = await fetch(`${this.host}/api/embeddings`, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: this.model, prompt: text }) }); if (!response.ok) { throw new Error(`Ollama API error: ${response.statusText}`); } const data = await response.json(); embeddings.push(data.embedding); } catch (error) { console.error(`Error generating embedding for text: ${text.substring(0, 50)}...`, error); embeddings.push(new Array(384).fill(0)); } } return embeddings; } } // Shared code search engine export class CodeSearchEngine { protected chromaClient: ChromaClient; protected openai?: OpenAI; protected octokit?: Octokit; protected embedder: any; protected config: Config; constructor(config?: Config) { this.config = config || getConfig(); this.chromaClient = new ChromaClient({ path: `http://${this.config.chroma_host}:${this.config.chroma_port}` }); if (this.config.embedding_provider === 'openai') { if (!this.config.openai_api_key) { throw new Error('OpenAI API key required when using OpenAI embeddings'); } this.openai = new OpenAI({ apiKey: this.config.openai_api_key! }); this.embedder = new OpenAIEmbeddingFunction({ openai_api_key: this.config.openai_api_key!, openai_model: this.config.openai_model }); } else { this.embedder = new OllamaEmbeddingFunction(this.config.ollama_host, this.config.ollama_model); } if (this.config.github_token) { this.octokit = new Octokit({ auth: this.config.github_token }); } } // Default file patterns private getDefaultIncludePatterns(): string[] { return [ "*.php", "*.phtml", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8", "*.py", "*.pyw", "*.pyi", "*.pyc", "*.pyo", "*.js", "*.jsx", "*.mjs", "*.cjs", "*.ts", "*.tsx", "*.mts", "*.cts", "*.java", "*.jav", "*.jsp", "*.jspx", "*.cpp", "*.cxx", "*.cc", "*.c++", "*.hpp", "*.hxx", "*.h++", "*.c", "*.h", "*.cs", "*.csx", "*.go", "*.gox", "*.rs", "*.rlib", "*.rb", "*.rbw", "*.rake", "*.gemspec", "*.swift", "*.swiftinterface", "*.kt", "*.kts", "*.scala", "*.sc", "*.clj", "*.cljs", "*.cljc", "*.edn", "*.hs", "*.lhs", "*.ml", "*.mli", "*.fs", "*.fsi", "*.fsx", "*.fsscript", "*.erl", "*.hrl", "*.ex", "*.exs", "*.lua", "*.luac", "*.r", "*.R", "*.Rmd", "*.rmd", "*.m", "*.mm", "*.M", "*.pl", "*.pm", "*.t", "*.pod", "*.sh", "*.bash", "*.zsh", "*.fish", "*.ps1", "*.psm1", "*.psd1", "*.sql", "*.psql", "*.mysql", "*.sqlite", "*.html", "*.htm", "*.xhtml", "*.xml", "*.svg", "*.vue", "*.svelte", "*.css", "*.scss", "*.sass", "*.less", "*.styl", "*.md", "*.markdown", "*.mdown", "*.mkdn", "*.mdx", "*.txt", "*.text", "*.rtf", "*.yml", "*.yaml", "*.json", "*.jsonc", "*.json5", "*.toml", "*.ini", "*.cfg", "*.conf", "*.config", "*.dockerfile", "*.Dockerfile", "*.tf", "*.tfvars", "*.proto", "*.thrift", "*.graphql", "*.gql", "*.asm", "*.s", "*.S", "*.dart", "*.dartx", "*.elm", "*.nim", "*.nims", "*.zig", "*.v", "*.vh", "*.sv", "*.svh", "*.tex", "*.ltx", "*.sty", "*.rst", "*.rest", "*.adoc", "*.asciidoc", "*.org", "*.org_archive" ]; } private getDefaultExcludePatterns(): string[] { return [ "node_modules", "node_modules/**", ".git", ".git/**", "dist", "dist/**", "build", "build/**", "out", "out/**", "coverage", "coverage/**", "__pycache__", "__pycache__/**", "venv", "venv/**", ".venv", ".venv/**", ".next", ".next/**", "target", "target/**", ".cache", ".cache/**", "*.log", "*.tmp" ]; } async indexLocalProject(args: { project_path: string; project_name: string; include_patterns?: string[]; exclude_patterns?: string[]; }) { const { project_path, project_name, include_patterns = this.getDefaultIncludePatterns(), exclude_patterns = this.getDefaultExcludePatterns() } = args; try { const stats = await fs.stat(project_path); if (!stats.isDirectory()) { throw new Error(`Path is not a directory: ${project_path}`); } } catch (error) { throw new Error(`Cannot access project path: ${project_path}`); } const projectId = this.sanitizeProjectId(project_name); try { const collection = await this.getOrCreateCollection(); const files = await this.getFilesToIndex(project_path, include_patterns, exclude_patterns); const chunks = []; let processedFiles = 0; for (const filePath of files) { try { const relativePath = path.relative(project_path, filePath); const fileExtension = path.extname(filePath).slice(1) || 'txt'; const fileStats = await fs.stat(filePath); const fileSizeMB = fileStats.size / (1024 * 1024); console.error(`Processing file: ${relativePath} (${fileSizeMB.toFixed(2)} MB)`); const fileChunks = await this.processFileWithStreaming(filePath, relativePath, fileExtension); if (fileChunks.length > 0) { chunks.push(...fileChunks.map(chunk => ({ ...chunk, project_id: projectId, project_name, project_path, source_type: 'local', indexed_at: new Date().toISOString() }))); } processedFiles++; } catch (error) { console.error(`Error processing file ${filePath}:`, error); } } if (chunks.length > 0) { await this.storeChunksInBatches(collection, chunks, projectId); } return { content: [ { type: "text", text: `Successfully indexed local project: ${project_name}\n` + `Project ID: ${projectId}\n` + `Project Path: ${project_path}\n` + `Files processed: ${processedFiles}\n` + `Chunks created: ${chunks.length}\n` + `Embedding provider: ${this.config.embedding_provider}` } ] }; } catch (error) { throw error; } } async searchCodebase(args: { query: string; limit?: number; project_filter?: string; }) { const { query, limit = 10, project_filter } = args; const collection = await this.getOrCreateCollection(); let whereClause: any = {}; if (project_filter) { whereClause.project_id = { "$eq": project_filter }; } const results = await collection.query({ queryTexts: [query], nResults: limit, where: Object.keys(whereClause).length > 0 ? whereClause : undefined }); if (!results.documents[0] || results.documents[0].length === 0) { return { content: [ { type: "text", text: "No results found for your query." } ] }; } const formattedResults = results.documents[0] .map((doc, i) => { const metadata = results.metadatas?.[0]?.[i] as any; const distance = results.distances?.[0]?.[i] || 0; const similarity = (1 - distance).toFixed(3); return `## Result ${i + 1} (Similarity: ${similarity})\n` + `**File:** ${metadata?.file_path}\n` + `**Project:** ${metadata?.project_name}\n\n` + `\`\`\`${metadata?.file_type}\n${doc}\n\`\`\`\n`; }) .join('\n---\n\n'); return { content: [ { type: "text", text: `Found ${results.documents[0].length} results for: "${query}"\n\n${formattedResults}` } ] }; } async listIndexedProjects() { const collection = await this.getOrCreateCollection(); const results = await collection.get({ limit: 100000, include: [IncludeEnum.Metadatas] }); const metadatas = results.metadatas as any[] | undefined; if (!metadatas || metadatas.length === 0) { return { content: [ { type: "text", text: "No projects indexed yet." } ] }; } const projects = new Map<string, any>(); metadatas.forEach((metadata: any) => { if (!metadata) return; const projectId = metadata.project_id; if (!projectId) return; if (!projects.has(projectId)) { projects.set(projectId, { project_id: projectId, project_name: metadata.project_name, project_path: metadata.project_path, source_type: metadata.source_type, indexed_at: metadata.indexed_at, chunk_count: 0 }); } projects.get(projectId).chunk_count++; }); let output = `# Indexed Projects (${projects.size})\n\n`; Array.from(projects.values()).forEach(project => { output += `## ${project.project_name}\n`; output += `- **ID:** ${project.project_id}\n`; output += `- **Path:** ${project.project_path}\n`; output += `- **Source:** ${project.source_type}\n`; output += `- **Chunks:** ${project.chunk_count}\n\n`; }); return { content: [ { type: "text", text: output } ] }; } async getEmbeddingProviderInfo() { let info = `# Embedding Provider Information\n\n`; info += `**Current Provider:** ${this.config.embedding_provider}\n\n`; if (this.config.embedding_provider === 'openai') { info += `## OpenAI Configuration\n`; info += `- **Model:** ${this.config.openai_model}\n`; info += `- **API Key:** ${this.config.openai_api_key ? 'Configured ✓' : 'Not configured ✗'}\n`; } else { info += `## Ollama Configuration\n`; info += `- **Host:** ${this.config.ollama_host}\n`; info += `- **Model:** ${this.config.ollama_model}\n`; try { const response = await fetch(`${this.config.ollama_host}/api/tags`); if (response.ok) { const data = await response.json(); const models = data.models?.map((m: any) => m.name) || []; info += `- **Connection:** ✓ Connected\n`; info += `- **Available Models:** ${models.join(', ') || 'None'}\n`; } else { info += `- **Connection:** ✗ Failed to connect\n`; } } catch (error) { info += `- **Connection:** ✗ Error connecting\n`; } } return { content: [ { type: "text", text: info } ] }; } protected async getOrCreateCollection() { try { return await this.chromaClient.getCollection({ name: this.config.collection_name, embeddingFunction: this.embedder }); } catch { return await this.chromaClient.createCollection({ name: this.config.collection_name, embeddingFunction: this.embedder }); } } protected sanitizeProjectId(projectName: string): string { return projectName.toLowerCase().replace(/[^a-z0-9]/g, '_'); } private async processFileWithStreaming( filePath: string, relativePath: string, fileType: string, maxChunkSize: number = 1500 ): Promise<any[]> { return new Promise((resolve, reject) => { const chunks: any[] = []; let currentChunk = ''; let chunkIndex = 0; let lineCount = 0; const fileStream = createReadStream(filePath, { encoding: 'utf8' }); const rl = createInterface({ input: fileStream, crlfDelay: Infinity }); rl.on('line', (line) => { lineCount++; if (line.length > 10000) { console.error(`Skipping very long line ${lineCount} (${line.length} chars)`); return; } if (currentChunk.length + line.length > maxChunkSize && currentChunk.length > 0) { chunks.push({ content: currentChunk.trim(), file_path: relativePath, file_type: fileType, chunk_index: chunkIndex++ }); currentChunk = line; } else { currentChunk += (currentChunk ? '\n' : '') + line; } if (chunks.length >= 100) { console.error(`Processed ${lineCount} lines, created ${chunks.length} chunks so far...`); } }); rl.on('close', () => { if (currentChunk.trim()) { chunks.push({ content: currentChunk.trim(), file_path: relativePath, file_type: fileType, chunk_index: chunkIndex }); } console.error(`Finished processing ${relativePath}: ${lineCount} lines, ${chunks.length} chunks`); resolve(chunks); }); rl.on('error', (error) => { console.error(`Error reading file ${filePath}:`, error); reject(error); }); }); } private async getFilesToIndex( dirPath: string, includePatterns: string[], excludePatterns: string[] ): Promise<string[]> { const allFiles: string[] = []; const traverse = async (currentPath: string) => { try { const entries = await fs.readdir(currentPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(currentPath, entry.name); if (entry.isDirectory()) { const shouldExclude = excludePatterns.some(pattern => this.matchesPattern(path.relative(dirPath, fullPath), pattern) ); if (!shouldExclude) { await traverse(fullPath); } } else { const relativePath = path.relative(dirPath, fullPath); const shouldInclude = includePatterns.length === 0 || includePatterns.some(pattern => this.matchesPattern(relativePath, pattern)); const shouldExclude = excludePatterns.some(pattern => this.matchesPattern(relativePath, pattern) ); if (shouldInclude && !shouldExclude) { allFiles.push(fullPath); } } } } catch (error) { console.error(`Error reading directory ${currentPath}:`, error); } }; await traverse(dirPath); return allFiles; } private matchesPattern(filePath: string, pattern: string): boolean { if (pattern.endsWith('/**')) { const base = pattern.slice(0, -3); return filePath === base || filePath.startsWith(base + '/'); } if (pattern.startsWith('*.')) { const extension = pattern.slice(1); return filePath.endsWith(extension); } const escaped = pattern .replace(/[.*+?^${}()|[\]\\]/g, '\\$&') .replace(/\\\*\\\*/g, '.*') .replace(/\\\*/g, '.*') .replace(/\\\?/g, '.'); const regex = new RegExp(`^${escaped}$`); return regex.test(filePath); } private async storeChunksInBatches(collection: any, chunks: any[], projectId: string, batchSize: number = 100) { console.error(`Storing ${chunks.length} chunks for project ${projectId}...`); for (let i = 0; i < chunks.length; i += batchSize) { const batch = chunks.slice(i, i + batchSize); const ids = batch.map((_, idx) => `${projectId}_chunk_${i + idx}`); const documents = batch.map(chunk => chunk.content); const metadatas = batch.map(chunk => ({ file_path: chunk.file_path, project_id: chunk.project_id, project_name: chunk.project_name, project_path: chunk.project_path, source_type: chunk.source_type, file_type: chunk.file_type, chunk_index: chunk.chunk_index, indexed_at: chunk.indexed_at })); try { await collection.add({ ids, documents, metadatas }); console.error(`Stored batch ${Math.floor(i/batchSize) + 1}/${Math.ceil(chunks.length/batchSize)}`); } catch (error) { console.error(`Error storing batch ${Math.floor(i/batchSize) + 1}:`, error); throw error; } } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/damian-pramparo/semantic-context-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server