Chicken Business Management MCP Server

advanced-gemini-proxy.ts•24.8 kB

/** * Advanced Gemini Proxy with Gen AI SDK Integration * Production-ready implementation with streaming, safety, and advanced features */ import { GoogleGenerativeAI, HarmCategory, HarmBlockThreshold, GenerativeModel } from '@google/generative-ai'; import { createClient } from '@supabase/supabase-js'; import { v4 as uuidv4 } from 'uuid'; import dotenv from 'dotenv'; dotenv.config(); export interface GeminiConfig { temperature?: number; topP?: number; topK?: number; maxOutputTokens?: number; streaming?: boolean; safetyThreshold?: 'low' | 'medium' | 'high'; } export interface GeminiResponse { text: string; model: string; success: boolean; metadata: { tokensUsed?: number; processingTime: number; requestId: string; safetyRatings?: any[]; finishReason?: string; }; } export interface ModelCapabilities { maxTokens: number; rateLimit: { rpm: number; tpm: number }; costTier: 'low' | 'medium' | 'high'; features: string[]; bestFor: string[]; } export interface TaskRequest { type: 'text' | 'embedding' | 'flash' | 'preview' | 'audio' | 'multimodal'; complexity: 'simple' | 'medium' | 'complex'; priority: 'low' | 'medium' | 'high'; estimatedTokens?: number; requiresStructuredOutput?: boolean; } interface UsageTracker { model: string; requestCount: number; tokenCount: number; lastRequest: number; resetTime: number; } export class AdvancedGeminiProxy { private genAI: GoogleGenerativeAI; private supabase; private models: Map<string, ModelCapabilities> = new Map(); private rateLimitCache = new Map<string, { count: number; resetTime: number }>(); private usageTrackers: Map<string, UsageTracker> = new Map(); constructor() { if (!process.env.GEMINI_API_KEY) { throw new Error('GEMINI_API_KEY environment variable is required'); } this.genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY); this.supabase = createClient( process.env.SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY! ); this.initializeModels(); } private initializeModels() { this.models = new Map([ // Latest 2.5 series models for enhanced reasoning ['gemini-2.5-pro', { maxTokens: 8192, rateLimit: { rpm: 2, tpm: 250000 }, costTier: 'high', features: ['advanced-reasoning', 'long-context', 'structured-output'], bestFor: ['complex-analysis', 'business-insights', 'detailed-reports', 'strategic-planning'] }], ['gemini-2.5-flash', { maxTokens: 8192, rateLimit: { rpm: 10, tpm: 250000 }, costTier: 'medium', features: ['fast-response', 'structured-output', 'json-mode'], bestFor: ['structured-parsing', 'note-analysis', 'pattern-recognition', 'chicken-business-ops'] }], ['gemini-2.0-flash-exp', { maxTokens: 8192, rateLimit: { rpm: 30, tpm: 1000000 }, costTier: 'low', features: ['text', 'code', 'multimodal', 'fast', 'experimental'], bestFor: ['general', 'coding', 'analysis', 'development'] }], ['gemini-2.0-flash-thinking-exp', { maxTokens: 32768, rateLimit: { rpm: 15, tpm: 1000000 }, costTier: 'high', features: ['reasoning', 'complex-analysis', 'step-by-step'], bestFor: ['complex-reasoning', 'detailed-analysis', 'problem-solving'] }], ['gemini-2.0-flash', { maxTokens: 8192, rateLimit: { rpm: 15, tpm: 1000000 }, costTier: 'medium', features: ['general-parsing', 'conversation', 'moderate-complexity'], bestFor: ['general-parsing', 'conversation', 'moderate-complexity'] }], ['gemini-2.0-flash-lite', { maxTokens: 4096, rateLimit: { rpm: 15, tpm: 1000000 }, costTier: 'low', features: ['fast', 'lightweight'], bestFor: ['simple-parsing', 'quick-classification', 'lightweight-tasks'] }], ['gemini-1.5-pro', { maxTokens: 8192, rateLimit: { rpm: 5, tpm: 300000 }, costTier: 'high', features: ['text', 'code', 'long-context', 'multimodal'], bestFor: ['complex-tasks', 'long-documents', 'detailed-analysis'] }], ['gemini-1.5-flash', { maxTokens: 8192, rateLimit: { rpm: 15, tpm: 1000000 }, costTier: 'low', features: ['text', 'code', 'fast', 'efficient'], bestFor: ['quick-tasks', 'parsing', 'basic-analysis'] }], ['text-embedding-004', { maxTokens: 2048, rateLimit: { rpm: 100, tpm: 30000 }, costTier: 'low', features: ['embedding', 'semantic-search'], bestFor: ['embeddings', 'similarity', 'search'] }] ]); } /** * Intelligent model selection based on task requirements */ selectBestModel(taskType: { complexity: 'simple' | 'medium' | 'complex'; type: 'text' | 'code' | 'analysis' | 'reasoning' | 'embedding'; priority: 'low' | 'medium' | 'high'; contextLength?: 'short' | 'medium' | 'long'; }): string { const { complexity, type, priority, contextLength = 'medium' } = taskType; // Handle embeddings if (type === 'embedding') { return 'text-embedding-004'; } // Handle complex reasoning tasks if (complexity === 'complex' && type === 'reasoning') { return 'gemini-2.0-flash-thinking-exp'; } // Handle long context requirements if (contextLength === 'long' || complexity === 'complex') { return priority === 'high' ? 'gemini-1.5-pro' : 'gemini-2.0-flash-exp'; } // Handle simple/fast tasks if (complexity === 'simple' || priority === 'low') { return 'gemini-1.5-flash'; } // Default for medium complexity return 'gemini-2.0-flash-exp'; } /** * Initialize usage tracker for a model */ private initializeUsageTracker(modelId: string): void { this.usageTrackers.set(modelId, { model: modelId, requestCount: 0, tokenCount: 0, lastRequest: Date.now(), resetTime: Date.now() + 60000 // Reset in 1 minute }); } /** * Update usage tracker after a request */ private updateUsageTracker(modelId: string, tokensUsed: number = 0): void { let tracker = this.usageTrackers.get(modelId); if (!tracker) { this.initializeUsageTracker(modelId); tracker = this.usageTrackers.get(modelId)!; } tracker.requestCount++; tracker.tokenCount += tokensUsed; tracker.lastRequest = Date.now(); } /** * Enhanced rate limit checking with detailed feedback */ canMakeRequest(modelId: string): { allowed: boolean; waitTime?: number; reason?: string } { const model = this.models.get(modelId); if (!model) { return { allowed: false, reason: `Unknown model: ${modelId}` }; } const tracker = this.usageTrackers.get(modelId); if (!tracker) { // First request for this model this.initializeUsageTracker(modelId); return { allowed: true }; } const now = Date.now(); // Reset tracker if minute has passed if (now >= tracker.resetTime) { tracker.requestCount = 0; tracker.tokenCount = 0; tracker.resetTime = now + 60000; // Reset in 1 minute } // Check RPM limit if (tracker.requestCount >= model.rateLimit.rpm) { const waitTime = tracker.resetTime - now; return { allowed: false, waitTime, reason: `Rate limit exceeded: ${tracker.requestCount}/${model.rateLimit.rpm} RPM` }; } // Check TPM limit (approximate) if (tracker.tokenCount >= model.rateLimit.tpm) { const waitTime = tracker.resetTime - now; return { allowed: false, waitTime, reason: `Token limit exceeded: ${tracker.tokenCount}/${model.rateLimit.tpm} TPM` }; } return { allowed: true }; } /** * Smart model selection based on task requirements (enhanced version) */ selectOptimalModel(task: TaskRequest): string { const { type, complexity, priority, estimatedTokens } = task; // Handle embeddings if (type === 'embedding') { return 'text-embedding-004'; } // Handle complex reasoning tasks if (complexity === 'complex' && type === 'text') { return priority === 'high' ? 'gemini-2.5-pro' : 'gemini-2.0-flash-thinking-exp'; } // Handle structured output requirements if (task.requiresStructuredOutput) { return complexity === 'complex' ? 'gemini-2.5-pro' : 'gemini-2.5-flash'; } // Handle simple tasks efficiently if (complexity === 'simple') { return 'gemini-2.0-flash-lite'; } // Handle medium complexity with cost considerations if (complexity === 'medium') { return priority === 'high' ? 'gemini-2.5-flash' : 'gemini-2.0-flash'; } // Default fallback return 'gemini-2.0-flash-exp'; } /** * Enhanced request method with automatic model selection and rate limiting */ async makeIntelligentRequest( task: TaskRequest, prompt: string, config: GeminiConfig = {} ): Promise<GeminiResponse> { const startTime = Date.now(); const requestId = uuidv4(); try { // Select optimal model const modelId = this.selectOptimalModel(task); console.log(`🧠 Selected ${modelId} for ${task.type} task (${task.complexity} complexity)`); // Check rate limits const rateLimitCheck = this.canMakeRequest(modelId); if (!rateLimitCheck.allowed) { if (rateLimitCheck.waitTime) { console.log(`⏳ Rate limit reached. Waiting ${rateLimitCheck.waitTime}ms...`); await new Promise(resolve => setTimeout(resolve, rateLimitCheck.waitTime)); return this.makeIntelligentRequest(task, prompt, config); } else { throw new Error(`Rate limit exceeded: ${rateLimitCheck.reason}`); } } // Make the actual request using existing generateText method const response = await this.generateText(prompt, { ...config, taskType: { complexity: task.complexity, type: task.type as any, priority: task.priority }, requestId }); // Update usage tracking this.updateUsageTracker(modelId, response.metadata.tokensUsed || 0); return response; } catch (error) { console.error(`❌ Error in intelligent request:`, error); // Try fallback with simpler model if this was a complex task if (task.complexity !== 'simple') { console.log('🔄 Trying fallback with simpler model...'); const fallbackTask: TaskRequest = { ...task, complexity: 'simple' }; return this.makeIntelligentRequest(fallbackTask, prompt, config); } throw error; } } /** * Enhanced text generation with streaming support */ async generateText( prompt: string, config: GeminiConfig & { model?: string; taskType?: { complexity: 'simple' | 'medium' | 'complex'; type: 'text' | 'code' | 'analysis' | 'reasoning'; priority: 'low' | 'medium' | 'high'; contextLength?: 'short' | 'medium' | 'long'; }; userId?: string; requestId?: string; } = {} ): Promise<GeminiResponse> { const startTime = Date.now(); const requestId = config.requestId || uuidv4(); // Select optimal model const selectedModel = config.model || (config.taskType ? this.selectBestModel(config.taskType) : 'gemini-2.0-flash-exp'); try { // Check rate limits this.checkRateLimit(selectedModel); const model = this.getModel(selectedModel, config); const result = config.streaming ? await this.generateStreamingText(model, prompt, requestId) : await this.generateNonStreamingText(model, prompt, requestId); const processingTime = Date.now() - startTime; // Log successful request await this.logRequest( selectedModel, prompt, result, true, undefined, processingTime, config.userId, requestId ); return { text: result.text, model: selectedModel, success: true, metadata: { tokensUsed: result.tokensUsed, processingTime, requestId, safetyRatings: result.safetyRatings, finishReason: result.finishReason } }; } catch (error) { const processingTime = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); await this.logRequest( selectedModel, prompt, null, false, errorMessage, processingTime, config.userId, requestId ); throw new Error(`Gemini generation failed: ${errorMessage}`); } } /** * Generate embeddings with batch support */ async generateEmbeddings( texts: string[], options: { batchSize?: number; userId?: string; requestId?: string; } = {} ): Promise<{ embeddings: number[][]; dimensions: number; model: string; metadata: { processingTime: number; requestId: string }; }> { const startTime = Date.now(); const requestId = options.requestId || uuidv4(); const batchSize = options.batchSize || 10; const model = this.genAI.getGenerativeModel({ model: 'text-embedding-004' }); try { const embeddings: number[][] = []; // Process in batches to respect rate limits for (let i = 0; i < texts.length; i += batchSize) { const batch = texts.slice(i, i + batchSize); const batchPromises = batch.map(async (text) => { const result = await model.embedContent(text); return result.embedding.values; }); const batchResults = await Promise.all(batchPromises); embeddings.push(...batchResults); // Rate limiting delay between batches if (i + batchSize < texts.length) { await new Promise(resolve => setTimeout(resolve, 100)); } } const processingTime = Date.now() - startTime; // Log embedding generation await this.logRequest( 'text-embedding-004', `Embedding ${texts.length} texts`, { embeddings: embeddings.length }, true, undefined, processingTime, options.userId, requestId ); return { embeddings, dimensions: embeddings[0]?.length || 0, model: 'text-embedding-004', metadata: { processingTime, requestId } }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Embedding generation failed: ${errorMessage}`); } } /** * Get configured model instance */ private getModel(modelName: string, config: GeminiConfig): GenerativeModel { const safetySettings = this.getSafetySettings(config.safetyThreshold || 'medium'); return this.genAI.getGenerativeModel({ model: modelName, generationConfig: { temperature: config.temperature || 0.3, topP: config.topP || 0.8, topK: config.topK || 40, maxOutputTokens: config.maxOutputTokens || 2048, }, safetySettings }); } /** * Generate text without streaming */ private async generateNonStreamingText( model: GenerativeModel, prompt: string, requestId: string ): Promise<{ text: string; tokensUsed?: number; safetyRatings?: any[]; finishReason?: string; }> { const result = await model.generateContent(prompt); const response = result.response; if (!response.text()) { throw new Error('No text generated by model'); } return { text: response.text(), tokensUsed: result.response.usageMetadata?.totalTokenCount, safetyRatings: response.candidates?.[0]?.safetyRatings, finishReason: response.candidates?.[0]?.finishReason }; } /** * Generate text with streaming */ private async generateStreamingText( model: GenerativeModel, prompt: string, requestId: string ): Promise<{ text: string; tokensUsed?: number; safetyRatings?: any[]; finishReason?: string; }> { const result = await model.generateContentStream(prompt); let fullText = ''; let safetyRatings; let finishReason; let tokensUsed; for await (const chunk of result.stream) { const chunkText = chunk.text(); fullText += chunkText; // Store metadata from the last chunk if (chunk.candidates?.[0]) { safetyRatings = chunk.candidates[0].safetyRatings; finishReason = chunk.candidates[0].finishReason; } } // Get final response for usage metadata const finalResponse = await result.response; tokensUsed = finalResponse.usageMetadata?.totalTokenCount; return { text: fullText, tokensUsed, safetyRatings, finishReason }; } /** * Get safety settings based on threshold */ private getSafetySettings(threshold: 'low' | 'medium' | 'high') { const thresholds = { low: HarmBlockThreshold.BLOCK_ONLY_HIGH, medium: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, high: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE }; const selectedThreshold = thresholds[threshold]; return [ { category: HarmCategory.HARM_CATEGORY_HARASSMENT, threshold: selectedThreshold, }, { category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold: selectedThreshold, }, { category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold: selectedThreshold, }, { category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold: selectedThreshold, }, ]; } /** * Rate limiting check */ private checkRateLimit(model: string): void { const modelConfig = this.models.get(model); if (!modelConfig) return; const now = Date.now(); const windowMs = 60 * 1000; // 1 minute const key = `${model}_${Math.floor(now / windowMs)}`; const currentLimit = this.rateLimitCache.get(key) || { count: 0, resetTime: now + windowMs }; if (currentLimit.count >= modelConfig.rateLimit.rpm) { throw new Error(`Rate limit exceeded for model ${model}. Try again in ${Math.ceil((currentLimit.resetTime - now) / 1000)} seconds.`); } currentLimit.count++; this.rateLimitCache.set(key, currentLimit); // Clean up old entries if (this.rateLimitCache.size > 100) { for (const [key, value] of this.rateLimitCache.entries()) { if (value.resetTime < now) { this.rateLimitCache.delete(key); } } } } /** * Comprehensive request logging */ private async logRequest( model: string, prompt: string, response: any, success: boolean, errorMessage?: string, processingTime?: number, userId?: string, requestId?: string ): Promise<void> { if (process.env.ENABLE_AI_AUDIT_LOGS !== 'true') return; try { await this.supabase.from('ai_audit_logs').insert({ id: uuidv4(), operation_type: 'genai_sdk_call', input_data: { model, prompt: prompt.substring(0, 500), prompt_length: prompt.length }, output_data: success ? { text: response?.text?.substring(0, 500), tokensUsed: response?.tokensUsed, finishReason: response?.finishReason } : null, model_used: model, tokens_used: response?.tokensUsed || 0, success, error_message: errorMessage, processing_time_ms: processingTime, user_id: userId, request_id: requestId, metadata: { sdk_version: 'google-genai', safety_ratings: response?.safetyRatings, model_capabilities: this.models.get(model), timestamp: new Date().toISOString() } }); } catch (logError) { console.warn('Failed to log request:', logError); } } /** * Health check with model-specific testing */ async healthCheck(): Promise<{ overall: 'healthy' | 'degraded' | 'unhealthy'; models: Record<string, { status: 'healthy' | 'unhealthy'; latency?: number; error?: string }>; }> { const modelTests = Array.from(this.models.keys()).slice(0, 3); // Test first 3 models const results: Record<string, any> = {}; await Promise.allSettled( modelTests.map(async (modelName) => { if (modelName === 'text-embedding-004') { // Test embedding model try { const startTime = Date.now(); await this.generateEmbeddings(['health check test'], { batchSize: 1 }); results[modelName] = { status: 'healthy', latency: Date.now() - startTime }; } catch (error) { results[modelName] = { status: 'unhealthy', error: error instanceof Error ? error.message : String(error) }; } } else { // Test text generation model try { const startTime = Date.now(); await this.generateText('Health check', { model: modelName, maxOutputTokens: 10, temperature: 0 }); results[modelName] = { status: 'healthy', latency: Date.now() - startTime }; } catch (error) { results[modelName] = { status: 'unhealthy', error: error instanceof Error ? error.message : String(error) }; } } }) ); const healthyCount = Object.values(results).filter((r: any) => r.status === 'healthy').length; const overall = healthyCount === modelTests.length ? 'healthy' : healthyCount > 0 ? 'degraded' : 'unhealthy'; return { overall, models: results }; } /** * Get model capabilities and status */ getModelInfo(): Record<string, ModelCapabilities & { available: boolean }> { const info: Record<string, ModelCapabilities & { available: boolean }> = {}; for (const [name, capabilities] of this.models.entries()) { info[name] = { ...capabilities, available: true }; } return info; } /** * Batch processing with intelligent load balancing */ async batchProcess( requests: Array<{ prompt: string; config?: GeminiConfig & { taskType?: any }; priority?: 'low' | 'medium' | 'high'; }>, options: { maxConcurrency?: number; userId?: string; requestId?: string; } = {} ): Promise<Array<{ success: boolean; result?: GeminiResponse; error?: string; index: number }>> { const { maxConcurrency = 3, userId, requestId = uuidv4() } = options; const results: Array<{ success: boolean; result?: GeminiResponse; error?: string; index: number }> = []; // Sort by priority const sortedRequests = requests.map((req, index) => ({ ...req, originalIndex: index })) .sort((a, b) => { const priorityOrder = { high: 3, medium: 2, low: 1 }; return (priorityOrder[b.priority || 'medium'] || 2) - (priorityOrder[a.priority || 'medium'] || 2); }); // Process in batches for (let i = 0; i < sortedRequests.length; i += maxConcurrency) { const batch = sortedRequests.slice(i, i + maxConcurrency); const batchPromises = batch.map(async (req) => { try { const result = await this.generateText(req.prompt, { ...req.config, userId, requestId: `${requestId}_${req.originalIndex}` }); return { success: true, result, index: req.originalIndex }; } catch (error) { return { success: false, error: error instanceof Error ? error.message : String(error), index: req.originalIndex }; } }); const batchResults = await Promise.allSettled(batchPromises); for (const result of batchResults) { if (result.status === 'fulfilled') { results.push(result.value); } else { results.push({ success: false, error: result.reason instanceof Error ? result.reason.message : String(result.reason), index: -1 }); } } // Rate limiting delay between batches if (i + maxConcurrency < sortedRequests.length) { await new Promise(resolve => setTimeout(resolve, 200)); } } // Sort results back to original order return results.sort((a, b) => a.index - b.index); } } export default AdvancedGeminiProxy;

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PSYGER02/mcpserver'

If you have feedback or need assistance with the MCP directory API, please join our Discord server