Skip to main content
Glama
chaos-tests.jsβ€’13.8 kB
#!/usr/bin/env node /** * Azure Chaos Studio Integration for Azure AI MCP Server * * This script orchestrates chaos engineering experiments using Azure Chaos Studio * to test the resilience and reliability of the Azure AI MCP Server. */ import { DefaultAzureCredential } from '@azure/identity'; import { ChaosManagementClient } from '@azure/arm-chaos'; import axios from 'axios'; import winston from 'winston'; // Configuration const config = { subscriptionId: process.env.AZURE_SUBSCRIPTION_ID, resourceGroupName: process.env.AZURE_RESOURCE_GROUP_NAME || 'rg-azure-ai-mcp-server-prod', chaosStudioEndpoint: process.env.AZURE_CHAOS_STUDIO_ENDPOINT, containerAppName: process.env.CONTAINER_APP_NAME || 'ca-azure-ai-mcp-server-prod', healthCheckUrl: process.env.HEALTH_CHECK_URL, testDurationMinutes: parseInt(process.env.CHAOS_TEST_DURATION || '5'), recoveryTimeoutMinutes: parseInt(process.env.CHAOS_RECOVERY_TIMEOUT || '10'), }; // Logger setup const logger = winston.createLogger({ level: 'info', format: winston.format.combine( winston.format.timestamp(), winston.format.json() ), transports: [ new winston.transports.Console(), new winston.transports.File({ filename: 'chaos-tests.log' }) ] }); class ChaosTestRunner { constructor() { this.credential = new DefaultAzureCredential(); this.chaosClient = new ChaosManagementClient(this.credential, config.subscriptionId); this.testResults = []; } /** * Run all chaos engineering experiments */ async runAllTests() { logger.info('Starting chaos engineering test suite'); const experiments = [ { name: 'container-app-stop', description: 'Stop container app instances to test auto-recovery', type: 'service-disruption' }, { name: 'network-latency', description: 'Introduce network latency to test timeout handling', type: 'network-chaos' }, { name: 'cpu-pressure', description: 'Apply CPU pressure to test performance under load', type: 'resource-exhaustion' }, { name: 'memory-pressure', description: 'Apply memory pressure to test memory management', type: 'resource-exhaustion' }, { name: 'dependency-failure', description: 'Simulate Azure service failures', type: 'dependency-chaos' } ]; for (const experiment of experiments) { try { logger.info(`Running experiment: ${experiment.name}`); const result = await this.runExperiment(experiment); this.testResults.push(result); // Wait between experiments await this.sleep(30000); } catch (error) { logger.error(`Experiment ${experiment.name} failed:`, error); this.testResults.push({ name: experiment.name, status: 'failed', error: error.message }); } } return this.generateReport(); } /** * Run a specific chaos experiment */ async runExperiment(experiment) { const startTime = Date.now(); // Pre-experiment health check const preHealthy = await this.checkHealth(); if (!preHealthy) { throw new Error('System not healthy before experiment'); } // Start experiment logger.info(`Starting experiment: ${experiment.name}`); const experimentId = await this.startChaosExperiment(experiment); // Monitor during experiment const monitoringResults = await this.monitorExperiment(experiment, experimentId); // Stop experiment await this.stopChaosExperiment(experimentId); // Wait for recovery const recovered = await this.waitForRecovery(); const endTime = Date.now(); const duration = endTime - startTime; return { name: experiment.name, description: experiment.description, type: experiment.type, status: recovered ? 'passed' : 'failed', duration: duration, preHealthy: preHealthy, recovered: recovered, monitoring: monitoringResults, timestamp: new Date().toISOString() }; } /** * Start a chaos experiment using Azure Chaos Studio */ async startChaosExperiment(experiment) { const experimentDefinition = this.createExperimentDefinition(experiment); try { // Create experiment if it doesn't exist const experimentName = `chaos-${experiment.name}-${Date.now()}`; await this.chaosClient.experiments.createOrUpdate( config.resourceGroupName, experimentName, experimentDefinition ); // Start the experiment await this.chaosClient.experiments.start( config.resourceGroupName, experimentName ); logger.info(`Chaos experiment ${experimentName} started`); return experimentName; } catch (error) { logger.error('Failed to start chaos experiment:', error); throw error; } } /** * Create experiment definition based on type */ createExperimentDefinition(experiment) { const baseDefinition = { location: 'East US', identity: { type: 'SystemAssigned' }, selectors: [ { type: 'List', id: 'selector1', targets: [ { type: 'ChaosTarget', id: `/subscriptions/${config.subscriptionId}/resourceGroups/${config.resourceGroupName}/providers/Microsoft.App/containerApps/${config.containerAppName}/providers/Microsoft.Chaos/targets/Microsoft-ContainerApp` } ] } ] }; switch (experiment.type) { case 'service-disruption': return { ...baseDefinition, steps: [ { name: 'Step1', branches: [ { name: 'Branch1', actions: [ { type: 'continuous', name: 'urn:csci:microsoft:containerApp:stop/1.0', duration: `PT${config.testDurationMinutes}M`, parameters: [ { key: 'abruptShutdown', value: 'false' } ], selectorId: 'selector1' } ] } ] } ] }; case 'network-chaos': return { ...baseDefinition, steps: [ { name: 'Step1', branches: [ { name: 'Branch1', actions: [ { type: 'continuous', name: 'urn:csci:microsoft:containerApp:networkLatency/1.0', duration: `PT${config.testDurationMinutes}M`, parameters: [ { key: 'latencyMs', value: '1000' }, { key: 'jitterMs', value: '200' } ], selectorId: 'selector1' } ] } ] } ] }; case 'resource-exhaustion': return { ...baseDefinition, steps: [ { name: 'Step1', branches: [ { name: 'Branch1', actions: [ { type: 'continuous', name: experiment.name.includes('cpu') ? 'urn:csci:microsoft:containerApp:cpuPressure/1.0' : 'urn:csci:microsoft:containerApp:memoryPressure/1.0', duration: `PT${config.testDurationMinutes}M`, parameters: [ { key: 'pressureLevel', value: '80' } ], selectorId: 'selector1' } ] } ] } ] }; default: throw new Error(`Unknown experiment type: ${experiment.type}`); } } /** * Monitor experiment progress and system behavior */ async monitorExperiment(experiment, experimentId) { const monitoringResults = { healthChecks: [], responseTime: [], errorRate: [], resourceUtilization: [] }; const monitoringDuration = config.testDurationMinutes * 60 * 1000; const checkInterval = 30000; // 30 seconds const startTime = Date.now(); while (Date.now() - startTime < monitoringDuration) { try { // Health check const healthResult = await this.checkHealth(); monitoringResults.healthChecks.push({ timestamp: new Date().toISOString(), healthy: healthResult }); // Response time check const responseTime = await this.measureResponseTime(); monitoringResults.responseTime.push({ timestamp: new Date().toISOString(), responseTime: responseTime }); // Error rate check (would need to query Application Insights) const errorRate = await this.getErrorRate(); monitoringResults.errorRate.push({ timestamp: new Date().toISOString(), errorRate: errorRate }); logger.info(`Monitoring - Health: ${healthResult}, Response Time: ${responseTime}ms, Error Rate: ${errorRate}%`); } catch (error) { logger.warn('Monitoring check failed:', error.message); } await this.sleep(checkInterval); } return monitoringResults; } /** * Stop chaos experiment */ async stopChaosExperiment(experimentId) { try { await this.chaosClient.experiments.cancel( config.resourceGroupName, experimentId ); logger.info(`Chaos experiment ${experimentId} stopped`); } catch (error) { logger.error('Failed to stop chaos experiment:', error); } } /** * Check system health */ async checkHealth() { try { const response = await axios.get(`${config.healthCheckUrl}/health`, { timeout: 10000 }); return response.status === 200; } catch (error) { logger.warn('Health check failed:', error.message); return false; } } /** * Measure response time */ async measureResponseTime() { const startTime = Date.now(); try { await axios.get(`${config.healthCheckUrl}/health`, { timeout: 30000 }); return Date.now() - startTime; } catch (error) { return -1; // Indicate failure } } /** * Get error rate from monitoring */ async getErrorRate() { // In a real implementation, this would query Application Insights // For now, return a simulated value return Math.random() * 5; // 0-5% error rate } /** * Wait for system recovery after experiment */ async waitForRecovery() { const maxWaitTime = config.recoveryTimeoutMinutes * 60 * 1000; const checkInterval = 10000; // 10 seconds const startTime = Date.now(); while (Date.now() - startTime < maxWaitTime) { const healthy = await this.checkHealth(); if (healthy) { logger.info('System recovered successfully'); return true; } logger.info('Waiting for system recovery...'); await this.sleep(checkInterval); } logger.error('System failed to recover within timeout'); return false; } /** * Generate comprehensive test report */ generateReport() { const report = { summary: { totalTests: this.testResults.length, passed: this.testResults.filter(r => r.status === 'passed').length, failed: this.testResults.filter(r => r.status === 'failed').length, timestamp: new Date().toISOString() }, results: this.testResults, recommendations: this.generateRecommendations() }; logger.info('Chaos engineering test report:', report); return report; } /** * Generate recommendations based on test results */ generateRecommendations() { const recommendations = []; const failedTests = this.testResults.filter(r => r.status === 'failed'); if (failedTests.length > 0) { recommendations.push('Review failed experiments and improve system resilience'); } const slowRecovery = this.testResults.filter(r => r.duration > 300000); // 5 minutes if (slowRecovery.length > 0) { recommendations.push('Optimize recovery time for better RTO'); } if (this.testResults.length === 0) { recommendations.push('No tests executed - verify chaos studio configuration'); } return recommendations; } /** * Utility function for delays */ sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } } // Main execution async function main() { try { const runner = new ChaosTestRunner(); const report = await runner.runAllTests(); console.log('\n=== CHAOS ENGINEERING TEST REPORT ==='); console.log(JSON.stringify(report, null, 2)); // Exit with appropriate code process.exit(report.summary.failed > 0 ? 1 : 0); } catch (error) { logger.error('Chaos test execution failed:', error); process.exit(1); } } // Run if called directly if (import.meta.url === `file://${process.argv[1]}`) { main(); } export { ChaosTestRunner };

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/caiotk/nexguideai-azure-ai-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server