import { encode } from 'gpt-tokenizer';
/**
* Token measurement engine for analyzing MCP tool schema token consumption
*/
export class TokenMeasurementEngine {
constructor() {
this.tokenizer = null;
this.initializeTokenizer();
}
/**
* Initialize the tokenizer using gpt-tokenizer (free, offline BPE tokenization)
*/
initializeTokenizer() {
// Using gpt-tokenizer - a fast, accurate BPE tokenizer
// Compatible with OpenAI's tiktoken (same algorithm)
this.tokenizer = {
encode: (text) => this.countTokens(text)
};
}
/**
* Count tokens using real BPE tokenization (gpt-tokenizer)
* @param {string} text Text to count tokens for
* @returns {number} Actual token count
*/
countTokens(text) {
if (!text || typeof text !== 'string') return 0;
try {
// Use gpt-tokenizer's encode function
const tokens = encode(text);
return tokens.length;
} catch (error) {
console.error('Error tokenizing text:', error);
return 0;
}
}
/**
* Count tokens in a tool definition (handles both real MCP and simulated formats)
* @param {Object} tool Tool definition object
* @returns {Object} Token breakdown for the tool
*/
countToolTokens(tool) {
const breakdown = {
name: this.tokenizer.encode(tool.name || ''),
title: this.tokenizer.encode(tool.title || ''), // MCP tools have optional title
description: this.tokenizer.encode(tool.description || ''),
inputSchema: this.tokenizer.encode(JSON.stringify(tool.inputSchema || {}, null, 2)),
outputSchema: this.tokenizer.encode(JSON.stringify(tool.outputSchema || {}, null, 2)), // NEW: output schema
annotations: this.tokenizer.encode(JSON.stringify(tool.annotations || {}, null, 2)),
additionalFields: 0, // NEW: for any other fields we find
total: 0
};
// Count additional fields beyond the standard ones
const standardFields = new Set(['name', 'title', 'description', 'inputSchema', 'outputSchema', 'annotations']);
Object.keys(tool).forEach(field => {
if (!standardFields.has(field)) {
const fieldValue = tool[field];
if (fieldValue !== undefined && fieldValue !== null) {
const fieldContent = typeof fieldValue === 'object'
? JSON.stringify(fieldValue, null, 2)
: String(fieldValue);
breakdown.additionalFields += this.tokenizer.encode(fieldContent);
}
}
});
breakdown.total = breakdown.name + breakdown.title + breakdown.description +
breakdown.inputSchema + breakdown.outputSchema + breakdown.annotations +
breakdown.additionalFields;
return breakdown;
}
/**
* Count tokens for an entire server's tool collection
* @param {Object} serverResult Server extraction result
* @returns {Object} Server token analysis
*/
countServerTokens(serverResult) {
if (!serverResult.success || !serverResult.tools) {
return {
serverName: serverResult.serverName,
success: false,
error: serverResult.error,
totalTokens: 0,
tools: []
};
}
const toolAnalysis = serverResult.tools.map(tool => ({
name: tool.name,
tokens: this.countToolTokens(tool),
complexity: this.analyzeSchemaComplexity(tool.inputSchema)
}));
const totalTokens = toolAnalysis.reduce((sum, tool) => sum + tool.tokens.total, 0);
return {
serverName: serverResult.serverName,
success: true,
totalTokens,
toolCount: toolAnalysis.length,
averageTokensPerTool: Math.round(totalTokens / toolAnalysis.length),
tools: toolAnalysis,
summary: this.generateServerTokenSummary(toolAnalysis)
};
}
/**
* Analyze schema complexity metrics
* @param {Object} schema JSON schema object
* @returns {Object} Complexity metrics
*/
analyzeSchemaComplexity(schema) {
if (!schema || typeof schema !== 'object') {
return { depth: 0, properties: 0, complexity: 'simple' };
}
const depth = this.calculateSchemaDepth(schema);
const properties = this.countSchemaProperties(schema);
let complexity = 'simple';
if (depth > 3 || properties > 10) {
complexity = 'complex';
} else if (depth > 2 || properties > 5) {
complexity = 'moderate';
}
return {
depth,
properties,
complexity,
hasArrays: this.hasArrayTypes(schema),
hasNestedObjects: this.hasNestedObjects(schema),
hasEnums: this.hasEnumTypes(schema)
};
}
/**
* Calculate the maximum depth of a JSON schema
* @param {Object} schema JSON schema object
* @param {number} currentDepth Current depth level
* @returns {number} Maximum depth
*/
calculateSchemaDepth(schema, currentDepth = 0) {
if (!schema || typeof schema !== 'object') return currentDepth;
let maxDepth = currentDepth;
if (schema.properties) {
for (const prop of Object.values(schema.properties)) {
const depth = this.calculateSchemaDepth(prop, currentDepth + 1);
maxDepth = Math.max(maxDepth, depth);
}
}
if (schema.items) {
const depth = this.calculateSchemaDepth(schema.items, currentDepth + 1);
maxDepth = Math.max(maxDepth, depth);
}
return maxDepth;
}
/**
* Count total properties in a schema (including nested)
* @param {Object} schema JSON schema object
* @returns {number} Total property count
*/
countSchemaProperties(schema) {
if (!schema || typeof schema !== 'object') return 0;
let count = 0;
if (schema.properties) {
count += Object.keys(schema.properties).length;
for (const prop of Object.values(schema.properties)) {
count += this.countSchemaProperties(prop);
}
}
if (schema.items) {
count += this.countSchemaProperties(schema.items);
}
return count;
}
/**
* Check if schema contains array types
* @param {Object} schema JSON schema object
* @returns {boolean} Whether schema has arrays
*/
hasArrayTypes(schema) {
if (!schema || typeof schema !== 'object') return false;
if (schema.type === 'array') return true;
if (schema.properties) {
return Object.values(schema.properties).some(prop => this.hasArrayTypes(prop));
}
return false;
}
/**
* Check if schema has nested objects
* @param {Object} schema JSON schema object
* @returns {boolean} Whether schema has nested objects
*/
hasNestedObjects(schema) {
if (!schema || typeof schema !== 'object') return false;
if (schema.properties) {
return Object.values(schema.properties).some(prop =>
prop.type === 'object' || this.hasNestedObjects(prop)
);
}
return false;
}
/**
* Check if schema contains enum types
* @param {Object} schema JSON schema object
* @returns {boolean} Whether schema has enums
*/
hasEnumTypes(schema) {
if (!schema || typeof schema !== 'object') return false;
if (schema.enum) return true;
if (schema.properties) {
return Object.values(schema.properties).some(prop => this.hasEnumTypes(prop));
}
return false;
}
/**
* Generate summary statistics for server token usage
* @param {Array} toolAnalysis Array of tool token analyses
* @returns {Object} Summary statistics
*/
generateServerTokenSummary(toolAnalysis) {
if (toolAnalysis.length === 0) {
return { heaviestTool: null, lightestTool: null, complexityDistribution: {} };
}
const sortedByTokens = toolAnalysis.sort((a, b) => b.tokens.total - a.tokens.total);
const complexityDistribution = toolAnalysis.reduce((acc, tool) => {
const complexity = tool.complexity.complexity;
acc[complexity] = (acc[complexity] || 0) + 1;
return acc;
}, {});
return {
heaviestTool: {
name: sortedByTokens[0].name,
tokens: sortedByTokens[0].tokens.total
},
lightestTool: {
name: sortedByTokens[sortedByTokens.length - 1].name,
tokens: sortedByTokens[sortedByTokens.length - 1].tokens.total
},
complexityDistribution,
averageComplexity: this.calculateAverageComplexity(toolAnalysis)
};
}
/**
* Calculate average complexity score
* @param {Array} toolAnalysis Array of tool analyses
* @returns {number} Average complexity score
*/
calculateAverageComplexity(toolAnalysis) {
const complexityScores = toolAnalysis.map(tool => {
switch (tool.complexity.complexity) {
case 'simple': return 1;
case 'moderate': return 2;
case 'complex': return 3;
default: return 1;
}
});
return complexityScores.reduce((sum, score) => sum + score, 0) / complexityScores.length;
}
/**
* Measure baseline token usage (built-in Claude Code tools)
* Note: Built-in tool token counts are not directly measurable.
* This method tracks the tool list but does not estimate token counts.
* @returns {Object} Baseline tool information
*/
measureBaselineTokens() {
const builtInTools = [
'Task', 'Bash', 'Grep', 'Read', 'Edit', 'MultiEdit', 'Write',
'WebFetch', 'WebSearch', 'TodoWrite', 'NotebookRead', 'NotebookEdit',
'LS', 'Glob', 'ExitPlanMode'
];
return {
builtInTools: builtInTools.length,
toolNames: builtInTools,
note: 'Built-in tool token counts are not directly measurable via this tool'
};
}
/**
* Calculate total MCP overhead
* @param {Array} serverResults Array of server token analyses
* @returns {Object} Total overhead calculation
*/
calculateTotalOverhead(serverResults) {
const baseline = this.measureBaselineTokens();
const mcpTokens = serverResults.reduce((sum, server) => sum + (server.totalTokens || 0), 0);
const contextWindow = 200000; // 200k token context window
const overheadPercentage = (mcpTokens / contextWindow) * 100;
return {
builtInToolCount: baseline.builtInTools,
builtInToolNote: baseline.note,
mcpTokens,
contextWindow,
overheadPercentage: Math.round(overheadPercentage * 100) / 100,
remainingContext: contextWindow - mcpTokens,
effectiveContext: Math.max(0, contextWindow - mcpTokens)
};
}
}