Skip to main content
Glama
ContentExtractor.ts9.38 kB
/** * ContentExtractor - Extracts components from DollhouseMCP skills for Anthropic conversion * * Identifies and extracts: * - Code blocks (bash, python, etc.) → scripts/ * - Documentation sections → reference/ * - Examples → examples/ * - Main instructions (preserved in SKILL.md) * * SECURITY MODEL: * - This is a FORMAT ANALYSIS tool, not a security boundary * - Preserves content exactly as-is for mechanical transformation * - No modification, sanitization, or validation * - Used by converters which are format transformers, not security gates */ export interface ExtractedSection { type: 'code' | 'documentation' | 'example' | 'main'; language?: string; title: string; content: string; startLine: number; endLine: number; filename?: string; // Suggested filename for extraction } export class ContentExtractor { /** * Parse DollhouseMCP markdown content and identify extractable sections * REFACTORED: Simplified by extracting code block and section handling logic */ extractSections(content: string): ExtractedSection[] { // NOTE: No Unicode normalization - preserves content fidelity for conversion const sections: ExtractedSection[] = []; const lines = content.split('\n'); const state = { inCodeBlock: false, codeBlockStart: 0, codeBlockLanguage: '', codeBlockContent: [] as string[], currentSection: '', sectionStart: 0 }; for (let i = 0; i < lines.length; i++) { const line = lines[i]; this.processLine(line, i, state, sections); } return sections; } /** * Process a single line and update state * REFACTORED: Extracted to reduce cognitive complexity */ private processLine( line: string, lineIndex: number, state: { inCodeBlock: boolean; codeBlockStart: number; codeBlockLanguage: string; codeBlockContent: string[]; currentSection: string; sectionStart: number; }, sections: ExtractedSection[] ): void { if (line.startsWith('```')) { this.handleCodeBlockBoundary(line, lineIndex, state, sections); } else if (state.inCodeBlock) { state.codeBlockContent.push(line); } else if (line.startsWith('##')) { this.handleSectionHeader(line, lineIndex, state); } } /** * Handle code block start/end boundary * REFACTORED: Extracted to reduce cognitive complexity */ private handleCodeBlockBoundary( line: string, lineIndex: number, state: { inCodeBlock: boolean; codeBlockStart: number; codeBlockLanguage: string; codeBlockContent: string[]; currentSection: string; }, sections: ExtractedSection[] ): void { if (state.inCodeBlock) { // End of code block state.inCodeBlock = false; this.addCodeBlockIfExtractable(lineIndex, state, sections); } else { // Start of code block state.inCodeBlock = true; state.codeBlockStart = lineIndex; state.codeBlockLanguage = line.substring(3).trim(); state.codeBlockContent = []; } } /** * Add code block to sections if it should be extracted * REFACTORED: Extracted to reduce cognitive complexity */ private addCodeBlockIfExtractable( endLineIndex: number, state: { codeBlockStart: number; codeBlockLanguage: string; codeBlockContent: string[]; currentSection: string; }, sections: ExtractedSection[] ): void { if (!this.shouldExtractCodeBlock(state.codeBlockLanguage, state.codeBlockContent)) { return; } sections.push({ type: 'code', language: state.codeBlockLanguage, title: this.inferCodeBlockTitle(state.codeBlockContent, state.currentSection), content: state.codeBlockContent.join('\n'), startLine: state.codeBlockStart, endLine: endLineIndex, filename: this.generateScriptFilename( state.codeBlockLanguage, state.codeBlockContent, state.currentSection ) }); } /** * Handle section header * REFACTORED: Extracted to reduce cognitive complexity */ private handleSectionHeader( line: string, lineIndex: number, state: { currentSection: string; sectionStart: number; } ): void { state.currentSection = line.substring(2).trim(); state.sectionStart = lineIndex; // Check if this section should be extracted // (Currently just tracking for context - full extraction logic not implemented) // Future enhancement: use shouldExtractSection result for section extraction } /** * Determine if a code block should be extracted to a separate file */ private shouldExtractCodeBlock(language: string, content: string[]): boolean { // Extract bash, python, javascript scripts const extractableLanguages = ['bash', 'sh', 'python', 'py', 'javascript', 'js', 'typescript', 'ts']; if (!extractableLanguages.includes(language.toLowerCase())) { return false; } // Extract if it's substantial (more than 3 lines) return content.length > 3; } /** * Determine if a documentation section should be extracted */ private shouldExtractSection(sectionTitle: string): boolean { const extractableSections = [ 'input formats', 'error handling', 'supported clients', 'command building', 'configuration', 'api reference', 'troubleshooting' ]; return extractableSections.some(pattern => sectionTitle.toLowerCase().includes(pattern) ); } /** * Generate appropriate filename for extracted script */ private generateScriptFilename(language: string, content: string[], section: string): string { // Look for meaningful names in comments const firstLine = content[0] || ''; // Common patterns: "# Pre-execution checks", "# Install server", etc. if (firstLine.startsWith('#')) { const titleMatch = /^#\s*(.+)/.exec(firstLine); if (titleMatch) { const title = titleMatch[1].toLowerCase() .replaceAll(/[^a-z0-9\s-]/g, '') .replaceAll(/\s+/g, '-'); return `${title}.${this.getExtension(language)}`; } } // Use section name if (section) { const sectionSlug = section.toLowerCase() .replaceAll(/[^a-z0-9\s-]/g, '') .replaceAll(/\s+/g, '-'); return `${sectionSlug}.${this.getExtension(language)}`; } return `script.${this.getExtension(language)}`; } /** * Infer title for code block from surrounding context */ private inferCodeBlockTitle(content: string[], section: string): string { const firstLine = content[0] || ''; // Check for comment at start if (firstLine.startsWith('#') || firstLine.startsWith('//')) { return firstLine.replace(/^[#/\s]+/, '').trim(); } return section || 'Script'; } /** * Get file extension for language */ private getExtension(language: string): string { const extensions: Record<string, string> = { bash: 'sh', sh: 'sh', python: 'py', py: 'py', javascript: 'js', js: 'js', typescript: 'ts', ts: 'ts' }; return extensions[language.toLowerCase()] || 'txt'; } /** * Extract complete documentation section (including subsections) */ extractDocumentationSection(content: string, sectionTitle: string): string | null { // NOTE: No Unicode normalization - preserves content fidelity const lines = content.split('\n'); let capturing = false; let sectionContent: string[] = []; let sectionLevel = 0; for (const line of lines) { if (line.startsWith('##')) { const level = /^#+/.exec(line)?.[0].length || 0; const title = line.substring(level).trim(); if (!capturing && title.toLowerCase().includes(sectionTitle.toLowerCase())) { capturing = true; sectionLevel = level; sectionContent.push(line); } else if (capturing && level <= sectionLevel) { // End of section break; } else if (capturing) { sectionContent.push(line); } } else if (capturing) { sectionContent.push(line); } } return sectionContent.length > 0 ? sectionContent.join('\n') : null; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DollhouseMCP/mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server