Skip to main content
Glama
parser.ts22.6 kB
import { createReadStream } from 'fs'; import { createInterface } from 'readline'; import { join } from 'path'; import { ClaudeMessage, CompactMessage, ConversationSession } from './types.js'; import { getClaudeProjectsPath, decodeProjectPath, extractContentFromMessage, calculateRelevanceScore, formatTimestamp, } from './utils.js'; export class ConversationParser { private sessions: Map<string, ConversationSession> = new Map(); async parseJsonlFile( projectDir: string, filename: string, query?: string, timeFilter?: (timestamp: string) => boolean ): Promise<CompactMessage[]> { const messages: CompactMessage[] = []; const filePath = join(getClaudeProjectsPath(), projectDir, filename); try { const fileStream = createReadStream(filePath, { encoding: 'utf8' }); const rl = createInterface({ input: fileStream, crlfDelay: Infinity, }); for await (const line of rl) { if (!line.trim()) continue; try { const claudeMessage: ClaudeMessage = JSON.parse(line); // Apply time filter if provided if (timeFilter && !timeFilter(claudeMessage.timestamp)) { continue; } const content = extractContentFromMessage(claudeMessage.message || {}); if (!content) continue; const compactMessage: CompactMessage = { uuid: claudeMessage.uuid, timestamp: formatTimestamp(claudeMessage.timestamp), type: claudeMessage.type, content: this.smartContentPreservation(content, 3000), // Smart content extraction with more space sessionId: claudeMessage.sessionId, projectPath: decodeProjectPath(projectDir), relevanceScore: query ? calculateRelevanceScore(claudeMessage, query, projectDir) : 0, context: this.extractContext(claudeMessage, content), }; messages.push(compactMessage); // Track session info this.updateSessionInfo(claudeMessage, projectDir); } catch (parseError) { // Gracefully handle corrupted JSONL lines console.warn(`Skipping malformed line in ${filename}:`, parseError); continue; } } } catch (error) { console.error(`Error reading file ${filename}:`, error); } return messages; } private extractContext(message: ClaudeMessage, content: string): CompactMessage['context'] { const context: CompactMessage['context'] = {}; // Extract file references - ENHANCED for comprehensive detection like GLOBAL const filePatterns = [ // Standard file extensions - much more comprehensive /[\w\-/\\.]+\.(ts|tsx|js|jsx|json|md|py|java|cpp|c|h|css|html|yml|yaml|toml|rs|go|txt|log|env|config|gitignore|lock|sql|sh|bat|php|rb|swift|kt|scala|fs|clj|ex|elm|vue|svelte|astro)(?:\b|$)/gi, // File paths in git status output /(?:modified|added|deleted|new file|renamed):\s+([^\n\r\t]+)/gi, // File paths with common prefixes /(?:src\/|\.\/|\.\.\/|~\/|\/)[^\s]+\.(ts|tsx|js|jsx|json|md|py|java|cpp|c|h|css|html|yml|yaml|toml|rs|go|txt|log|env|config|gitignore|lock|sql|sh|bat|php|rb|swift|kt|scala|fs|clj|ex|elm|vue|svelte|astro)/gi, // Standalone common files like CLAUDE.md, README.md, package.json /\b(CLAUDE\.md|README\.md|package\.json|tsconfig\.json|next\.config\.js|tailwind\.config\.js|vite\.config\.js|webpack\.config\.js|babel\.config\.js|eslint\.config\.js|prettier\.config\.js|\.env|\.gitignore|Dockerfile|docker-compose\.yml)\b/gi, /src\/[\w\-/\\.]+/gi, /\.\/[\w\-/\\.]+/gi, ]; const files = new Set<string>(); filePatterns.forEach((pattern) => { const matches = content.match(pattern); if (matches) { matches.forEach((match) => files.add(match)); } }); if (files.size > 0) { context.filesReferenced = Array.from(files); } // Extract tool usage from multiple sources const tools = new Set<string>(); // Method 1: Direct tool_use content extraction from message structure if (message.message?.content) { const toolContent = Array.isArray(message.message.content) ? message.message.content : [message.message.content]; toolContent .filter((item) => item && item.type === 'tool_use' && item.name) .map((item) => item.name) .filter(Boolean) .forEach((tool) => { // Clean up tool names (remove mcp__ prefixes, extract core name) const cleanName = tool.replace(/^mcp__.*?__/, '').replace(/[_-]/g, ''); if (cleanName) tools.add(cleanName); }); } // Method 2: Extract from assistant type messages with tool_use content if (message.type === 'assistant' && message.message?.content) { const toolContent = Array.isArray(message.message.content) ? message.message.content : [message.message.content]; toolContent .filter((item) => item && item.type === 'tool_use' && item.name) .forEach((item) => { const cleanName = item.name.replace(/^mcp__.*?__/, '').replace(/[_-]/g, ''); if (cleanName) tools.add(cleanName); }); } // Method 3: Look for tool usage patterns in content text const toolPatterns = [ /\[Tool:\s*(\w+)\]/gi, // Matches [Tool: Read], [Tool: Edit], etc. /Called the (\w+) tool/gi, // Matches "Called the Read tool" /\bmcp__[\w-]+__([\w-]+)/gi, // MCP tool calls /Result of calling the (\w+) tool/gi, // Tool results /tool_use.*?"name":\s*"([^"]+)"/gi, // JSON tool_use name extraction ]; toolPatterns.forEach((pattern) => { // Reset the regex to ensure we start from the beginning pattern.lastIndex = 0; let match; while ((match = pattern.exec(content)) !== null) { if (match[1]) { // Extract the captured group (tool name) const cleanName = match[1].replace(/^mcp__.*?__/, '').replace(/[_-]/g, ''); if (cleanName) tools.add(cleanName); } // Prevent infinite loop on zero-length matches if (match.index === pattern.lastIndex) { pattern.lastIndex++; } } }); if (tools.size > 0) { context.toolsUsed = Array.from(tools); } // Extract error patterns const errorPatterns = [ /error[:\s]+([^\n]+)/gi, /failed[:\s]+([^\n]+)/gi, /exception[:\s]+([^\n]+)/gi, /cannot[:\s]+([^\n]+)/gi, /unable to[:\s]+([^\n]+)/gi, ]; const errors = new Set<string>(); errorPatterns.forEach((pattern) => { const matches = content.match(pattern); if (matches) { matches.forEach((match) => errors.add(match.substring(0, 100))); } }); if (errors.size > 0) { context.errorPatterns = Array.from(errors); } // Extract Claude's valuable insights - solutions, explanations, actions if (message.type === 'assistant') { const insights = this.extractClaudeInsights(content); if (insights.length > 0) { context.claudeInsights = insights; } } // Extract code snippets and technical solutions const codeSnippets = this.extractCodeSnippets(content); if (codeSnippets.length > 0) { context.codeSnippets = codeSnippets; } // Extract action items and next steps const actionItems = this.extractActionItems(content); if (actionItems.length > 0) { context.actionItems = actionItems; } return Object.keys(context).length > 0 ? context : undefined; } public smartContentPreservation(content: string, maxLength: number): string { if (content.length <= maxLength) return content; // First, extract the most valuable sentences/paragraphs const valuableContent = this.extractMostValuableContent(content, maxLength); if (valuableContent.length <= maxLength) { return valuableContent; } // Detect content type and apply appropriate strategy const contentType = this.detectContentType(content); switch (contentType) { case 'code': return this.preserveCodeBlocks(content, maxLength); case 'error': return this.preserveErrorMessages(content, maxLength); case 'technical': return this.preserveTechnicalContent(content, maxLength); default: return this.intelligentTruncation(content, maxLength); } } private detectContentType(content: string): 'code' | 'error' | 'technical' | 'conversational' { // Code block detection if (content.includes('```') || content.includes('function ') || content.includes('const ') || content.includes('import ') || content.includes('export ') || content.match(/\{\s*\n.*\}\s*$/s)) { return 'code'; } // Error message detection if (content.match(/(error|exception|failed|cannot|unable to|stack trace)/i) && content.match(/at \w+|line \d+|:\d+:\d+/)) { return 'error'; } // Technical content detection if (content.match(/\.(ts|js|json|md|py|java|cpp|rs|go|yml|yaml)\b/) || content.includes('src/') || content.includes('./') || content.match(/\w+:\d+/) || content.includes('tool_use')) { return 'technical'; } return 'conversational'; } private preserveCodeBlocks(content: string, maxLength: number): string { // Try to preserve complete code blocks const codeBlockRegex = /```[\s\S]*?```/g; const codeBlocks = content.match(codeBlockRegex) || []; if (codeBlocks.length > 0) { let preserved = ''; let remainingLength = maxLength; for (const block of codeBlocks) { if (block.length <= remainingLength) { preserved += block + '\n'; remainingLength -= block.length + 1; } else { // If we can't fit the whole block, include context and truncate const contextBefore = content.substring(0, content.indexOf(block)).slice(-100); preserved += contextBefore + block.substring(0, remainingLength - contextBefore.length - 3) + '...'; break; } } return preserved.trim(); } // No code blocks, preserve function definitions and imports return this.preserveTechnicalContent(content, maxLength); } private preserveErrorMessages(content: string, maxLength: number): string { // Preserve error messages and stack traces completely const errorRegex = /(error|exception|failed)[\s\S]*?(\n\n|\n(?=[A-Z])|$)/gi; const errors = content.match(errorRegex) || []; if (errors.length > 0) { const mainError = errors[0]; if (mainError && mainError.length <= maxLength) { return mainError + (errors.length > 1 ? '\n... (additional errors truncated)' : ''); } } // If error is too long, preserve the beginning and any stack trace const stackTrace = content.match(/at [\s\S]*$/); if (stackTrace) { const errorPart = content.substring(0, maxLength - stackTrace[0].length - 10); return errorPart + '\n...\n' + stackTrace[0]; } return this.intelligentTruncation(content, maxLength); } private preserveTechnicalContent(content: string, maxLength: number): string { // Extract and preserve key technical elements const technicalElements = []; // File paths and line numbers const filePaths = content.match(/[\w\-/\\.]+\.(ts|js|json|md|py|java|cpp|rs|go|yml|yaml)(?::\d+)?/g) || []; technicalElements.push(...filePaths); // Function definitions const functions = content.match(/(function \w+|const \w+ =|export \w+|class \w+)/g) || []; technicalElements.push(...functions); // Tool usage const tools = content.match(/tool_use.*?"name":\s*"([^"]+)"/g) || []; technicalElements.push(...tools); // Commands const commands = content.match(/`[^`]+`/g) || []; technicalElements.push(...commands); if (technicalElements.length > 0) { const preserved = technicalElements.join(' | '); if (preserved.length <= maxLength) { // Add some context around the technical elements const contextLength = maxLength - preserved.length - 20; const context = content.substring(0, contextLength); return context + '\n--- Key elements: ' + preserved; } } return this.intelligentTruncation(content, maxLength); } private intelligentTruncation(content: string, maxLength: number): string { if (content.length <= maxLength) return content; // Try to truncate at natural boundaries const boundaries = ['\n\n', '. ', '! ', '? ', '\n', ', ', ' ']; for (const boundary of boundaries) { const lastBoundary = content.lastIndexOf(boundary, maxLength - 3); if (lastBoundary > maxLength * 0.7) { // Don't truncate too early return content.substring(0, lastBoundary) + '...'; } } // Fallback to character limit with ellipsis return content.substring(0, maxLength - 3) + '...'; } // Extract Claude's most valuable insights from assistant messages private extractClaudeInsights(content: string): string[] { const insights: string[] = []; // Solution patterns - capture Claude's solutions const solutionPatterns = [ /(?:solution|fix|resolve|answer)[:\s]*([^\n.]{20,200})/gi, /(?:here's how|to fix this|you can)[:\s]*([^\n.]{20,200})/gi, /(?:the issue is|problem is|cause is)[:\s]*([^\n.]{20,200})/gi, /(?:✅|✓|fixed|solved|resolved)[:\s]*([^\n.]{15,150})/gi ]; solutionPatterns.forEach(pattern => { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { if (match[1] && match[1].trim().length > 15) { insights.push(`Solution: ${match[1].trim()}`); } } }); // Explanation patterns - capture Claude's explanations const explanationPatterns = [ /(?:this means|this is because|the reason)[:\s]*([^\n.]{25,250})/gi, /(?:explanation|basically|in other words)[:\s]*([^\n.]{25,200})/gi ]; explanationPatterns.forEach(pattern => { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { if (match[1] && match[1].trim().length > 20) { insights.push(`Explanation: ${match[1].trim()}`); } } }); return insights.slice(0, 3); // Top 3 most valuable insights } // Extract code snippets with context private extractCodeSnippets(content: string): string[] { const snippets: string[] = []; // Extract code blocks const codeBlockRegex = /```[\w]*\n([\s\S]*?)\n```/g; let match: RegExpExecArray | null; while ((match = codeBlockRegex.exec(content)) !== null) { if (match[1] && match[1].trim().length > 10) { const snippet = match[1].trim(); snippets.push(snippet.length > 100 ? snippet.substring(0, 100) + '...' : snippet); } } // Extract inline code with context const inlineCodeRegex = /`([^`]{10,80})`/g; let inlineMatch: RegExpExecArray | null; while ((inlineMatch = inlineCodeRegex.exec(content)) !== null) { if (inlineMatch?.[1] && !snippets.some(s => s.includes(inlineMatch![1]))) { snippets.push(inlineMatch[1]); } } return snippets.slice(0, 3); // Top 3 code snippets } // Extract actionable items and next steps private extractActionItems(content: string): string[] { const actions: string[] = []; // Action patterns const actionPatterns = [ /(?:next step|now|then|first|finally|to do)[:\s]*([^\n.]{15,150})/gi, /(?:run|execute|install|update|create|add|remove)[:\s]*([^\n.]{10,100})/gi, /(?:you should|you need to|you can)[:\s]*([^\n.]{15,150})/gi, /\d+\.\s+([^\n.]{15,150})/g, // Numbered lists /[-*]\s+([^\n.]{15,150})/g // Bullet points ]; actionPatterns.forEach(pattern => { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { if (match[1] && match[1].trim().length > 10) { const action = match[1].trim(); if (!actions.some(a => a.includes(action.substring(0, 20)))) { actions.push(action); } } } }); return actions.slice(0, 4); // Top 4 action items } // Extract the most valuable content by prioritizing sentences with high information density private extractMostValuableContent(content: string, maxLength: number): string { // For structured content (code, errors), preserve original order and structure if (this.hasStructuredContent(content)) { return this.preserveStructuredContent(content, maxLength); } // For conversational content, use sentence-based extraction const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 20); // Score sentences based on value indicators const scoredSentences = sentences.map(sentence => { let score = 0; // High value keywords const highValueTerms = [ 'solution', 'fix', 'error', 'problem', 'resolved', 'working', 'success', 'function', 'class', 'import', 'export', 'const', 'let', 'var', 'install', 'update', 'create', 'build', 'test', 'deploy', 'file', 'path', 'directory', 'config', 'settings' ]; const lowerSentence = sentence.toLowerCase(); highValueTerms.forEach(term => { if (lowerSentence.includes(term)) score += 2; }); // Boost sentences with code or technical references if (sentence.includes('`') || sentence.includes('/') || sentence.includes('.ts') || sentence.includes('.js')) { score += 3; } // Boost sentences that explain outcomes or provide answers if (lowerSentence.includes('now') || lowerSentence.includes('result') || lowerSentence.includes('this will')) { score += 2; } // Penalize very short or generic sentences if (sentence.length < 40) score -= 1; if (lowerSentence.includes('this session is being continued') || lowerSentence.includes('caveat:') || lowerSentence.includes('command-name>') || lowerSentence.includes('generated by the user while running') || lowerSentence.includes('local-command-stdout') || lowerSentence.includes('analysis:') || lowerSentence.includes('command-message>') || lowerSentence.includes('system-reminder') || content.length < 50) { score -= 50; // Aggressively eliminate noise and short content } return { sentence: sentence.trim(), score }; }); // Sort by score and build result const sortedSentences = scoredSentences .filter(s => s.score > 0) .sort((a, b) => b.score - a.score); let result = ''; for (const { sentence } of sortedSentences) { if (result.length + sentence.length + 2 <= maxLength) { result += sentence + '. '; } else { break; } } return result.trim() || content.substring(0, maxLength - 3) + '...'; } private hasStructuredContent(content: string): boolean { return content.includes('function ') || content.includes('Error:') || content.includes('Exception:') || content.includes('```') || content.match(/at \w+.*:\d+:\d+/) !== null || content.includes('Solution:') || content.includes('TypeError:'); } private preserveStructuredContent(content: string, maxLength: number): string { // For structured content, preserve the first occurrence of each key section const sections = []; // Extract function definitions const functionMatch = content.match(/function\s+\w+[^}]*\}/); if (functionMatch) { sections.push({ content: functionMatch[0], priority: 3, type: 'function' }); } // Extract error messages const errorMatch = content.match(/(Error|Exception|TypeError):[^\n]*(\n[^\n]*)*?(?=\n\n|\n[A-Z]|$)/); if (errorMatch) { sections.push({ content: errorMatch[0], priority: 3, type: 'error' }); } // Extract solutions const solutionMatch = content.match(/Solution:[^\n]*(\n[^\n]*)*?(?=\n\n|\n[A-Z]|$)/); if (solutionMatch) { sections.push({ content: solutionMatch[0], priority: 2, type: 'solution' }); } // Sort by priority and fit within limit sections.sort((a, b) => b.priority - a.priority); let result = ''; for (const section of sections) { if (result.length + section.content.length + 2 <= maxLength) { result += section.content + '\n\n'; } else { // Try to fit a truncated version const remaining = maxLength - result.length - 5; if (remaining > 50) { result += section.content.substring(0, remaining) + '...'; } break; } } return result.trim(); } private updateSessionInfo(message: ClaudeMessage, projectDir: string): void { const sessionId = message.sessionId; if (!this.sessions.has(sessionId)) { this.sessions.set(sessionId, { sessionId, projectPath: decodeProjectPath(projectDir), startTime: this.isValidTimestamp(message.timestamp) ? message.timestamp : new Date().toISOString(), endTime: this.isValidTimestamp(message.timestamp) ? message.timestamp : new Date().toISOString(), messageCount: 0, }); } const session = this.sessions.get(sessionId)!; session.endTime = this.isValidTimestamp(message.timestamp) ? message.timestamp : session.endTime; session.messageCount++; // Update start time if this message is earlier (with timestamp validation) if (this.isValidTimestamp(message.timestamp) && this.isValidTimestamp(session.startTime)) { if (new Date(message.timestamp) < new Date(session.startTime)) { session.startTime = message.timestamp; } } } getSession(sessionId: string): ConversationSession | undefined { return this.sessions.get(sessionId); } getAllSessions(): ConversationSession[] { return Array.from(this.sessions.values()).sort( (a, b) => new Date(b.endTime).getTime() - new Date(a.endTime).getTime() ); } private isValidTimestamp(timestamp: string): boolean { if (!timestamp || typeof timestamp !== 'string') return false; const date = new Date(timestamp); return !isNaN(date.getTime()) && date.getFullYear() > 2020; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Vvkmnn/claude-historian'

If you have feedback or need assistance with the MCP directory API, please join our Discord server