Skip to main content
Glama

Django Styleguide MCP Server

by dmmeteo
main.py•7.76 kB
"""MCP markdown splitter - Split large markdown files into smaller documents.""" import os import re import shutil from pathlib import Path from typing import List, Dict, Optional from markdown_it import MarkdownIt def generate_docs( input_file: str, output_dir: str = "docs", url_prefix: str = "https://example.com", base_path: str = "/docs", max_level: int = 6, toc_file: str = "llms.txt", ) -> None: """ Split a large markdown file into smaller files and generate TOC with absolute URLs. Uses a single-pass algorithm through the AST for maximum efficiency. Args: input_file: Path to the large markdown file to split output_dir: Directory to save the split files (default: "docs") url_prefix: URL prefix for absolute links (e.g., "https://example.com") base_path: Base path for docs (e.g., "/docs") max_level: Maximum header level to split at (1=H1, 2=H2, 3=H3, etc.) toc_file: Path to the TOC file to generate (default: "llms.txt") Raises: FileNotFoundError: If input file doesn't exist ValueError: If max_level is invalid """ if not os.path.exists(input_file): raise FileNotFoundError(f"Input file not found: {input_file}") if max_level < 1 or max_level > 6: raise ValueError("max_level must be between 1 and 6") # Clean up existing docs directory if os.path.exists(output_dir): shutil.rmtree(output_dir) print(f"Cleaned up existing directory: {output_dir}") # Ensure output directory exists Path(output_dir).mkdir(parents=True, exist_ok=True) # Read the input file try: with open(input_file, "r", encoding="utf-8") as f: content = f.read() except UnicodeDecodeError as e: raise ValueError(f"Unable to read file {input_file}: {e}") # Parse markdown to AST md = MarkdownIt("commonmark") tokens = md.parse(content) # Single-pass algorithm: parse AST and generate files + TOC simultaneously sections_generated = 0 # Ensure TOC directory exists toc_dir = os.path.dirname(toc_file) if toc_dir and not os.path.exists(toc_dir): Path(toc_dir).mkdir(parents=True, exist_ok=True) try: with open(toc_file, "w", encoding="utf-8") as toc_file_handle: toc_file_handle.write("# Table of Contents\n\n") current_section = None content_lines = content.split('\n') section_starts = [] # Store all section start positions # First pass: collect all section starts for i, token in enumerate(tokens): if token.type == "heading_open" and int(token.tag[1]) <= max_level: header_text = extract_heading_text(tokens, i) if header_text and hasattr(token, 'map') and token.map: section_starts.append({ "line": token.map[0], "header": header_text, "level": int(token.tag[1]), "filename": generate_filename(header_text) }) # Second pass: generate sections and TOC for idx, section_info in enumerate(section_starts): # Write to TOC level = section_info["level"] header_text = section_info["header"] filename = section_info["filename"] url = f"{url_prefix.rstrip('/')}{base_path.rstrip('/')}/{filename}" indent = " " * (level - 1) toc_file_handle.write(f"{indent}- [{header_text}]({url})\n") # Determine section boundaries start_line = section_info["line"] if idx + 1 < len(section_starts): end_line = section_starts[idx + 1]["line"] else: end_line = len(content_lines) # Save section section = { "filename": filename, "header": header_text, "level": level, "start_line": start_line, "end_line": end_line } save_section_by_lines(section, content_lines, output_dir) sections_generated += 1 except Exception as e: print(f"Warning: Failed to write TOC file {toc_file}: {e}") print(f"Documentation generated successfully!") print(f"Files saved to: {output_dir}") print(f"TOC saved to: {toc_file}") print(f"Generated {sections_generated} files (filtered by max_level={max_level})") def extract_heading_text(tokens: List, heading_open_idx: int) -> Optional[str]: """ Extract heading text from tokens starting at heading_open token. Args: tokens: List of all tokens heading_open_idx: Index of heading_open token Returns: Heading text or None if not found """ # Look for the inline token that contains the heading text for i in range(heading_open_idx + 1, min(heading_open_idx + 3, len(tokens))): token = tokens[i] if token.type == "inline": return token.content return None def save_section_by_lines(section: Dict, content_lines: List[str], output_dir: str) -> None: """ Save a section to file using line-based approach for perfect reconstruction. Args: section: Section dictionary with line positions content_lines: All lines from the original content output_dir: Output directory path """ filepath = os.path.join(output_dir, section["filename"]) try: start_line = section["start_line"] end_line = section.get("end_line", len(content_lines)) # Extract section content directly from original lines section_lines = content_lines[start_line:end_line] content = "\n".join(section_lines).strip() with open(filepath, "w", encoding="utf-8") as f: f.write(content) except Exception as e: print(f"Warning: Failed to write file {filepath}: {e}") def generate_filename(header_text: str) -> str: """ Generate a URL-safe filename from header text. This follows similar logic to md_toc's anchor generation but for filenames. Args: header_text: The header text to convert to filename Returns: A safe filename with .md extension """ # Convert to lowercase filename = header_text.lower() # Replace spaces and special characters with hyphens filename = re.sub(r"[^\w\s-]", "", filename) filename = re.sub(r"[-\s]+", "-", filename) # Remove leading/trailing hyphens filename = filename.strip("-") # Ensure we have something if not filename: filename = "untitled" # Add .md extension return f"{filename}.md" def generate_anchor_link(header_text: str) -> str: """ Generate anchor link from header text using GitHub-style formatting. This mimics md_toc's anchor generation logic. Args: header_text: The header text to convert to anchor Returns: GitHub-style anchor link starting with # """ # Convert to lowercase anchor = header_text.lower() # Replace spaces with hyphens anchor = re.sub(r"\s+", "-", anchor) # Remove special characters except hyphens anchor = re.sub(r"[^\w\-]", "", anchor) # Remove multiple consecutive hyphens anchor = re.sub(r"-+", "-", anchor) # Remove leading/trailing hyphens anchor = anchor.strip("-") return f"#{anchor}"

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dmmeteo/django-styleguide-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server