Skip to main content
Glama
sir_schema.py4.52 kB
""" SIR (Structured Intermediate Representation) Schema 基于文章《智能体如何高效处理 Markdown:结构化解析与语义编辑方案》的SIR实现 SIR 是一个结构化的中间表示,将Markdown文档转换为语义化的树状结构, 使智能体能够进行语义级别的编辑操作,而不是直接操作文本。 """ from typing import TypedDict, List, Optional, Dict, Any, Literal from dataclasses import dataclass from enum import Enum class NodeType(str, Enum): """SIR 节点类型枚举""" DOCUMENT = "document" SECTION = "section" HEADING = "heading" PARAGRAPH = "paragraph" CODE_BLOCK = "code_block" LIST = "list" LIST_ITEM = "list_item" TABLE = "table" TABLE_ROW = "table_row" TABLE_CELL = "table_cell" BLOCKQUOTE = "blockquote" HR = "hr" HTML_BLOCK = "html_block" INLINE = "inline" class HeadingLevel(int, Enum): """标题级别枚举""" H1 = 1 H2 = 2 H3 = 3 H4 = 4 H5 = 5 H6 = 6 class SourcePosition(TypedDict): """源代码位置信息""" line: int column: int offset: int class SourceLocation(TypedDict): """源代码位置范围""" start: SourcePosition end: SourcePosition filename: Optional[str] class SIRMetadata(TypedDict): """SIR 元数据""" version: str generator: str created_at: str source_file: Optional[str] stats: Dict[str, Any] class SIRNode(TypedDict): """SIR 节点基础接口""" id: str type: NodeType content: Optional[str] children: List['SIRNode'] attributes: Dict[str, Any] source_location: Optional[SourceLocation] parent_id: Optional[str] class HeadingNode(SIRNode): """标题节点""" type: Literal[NodeType.HEADING] level: HeadingLevel title: str anchor: Optional[str] auto_number: Optional[str] class ParagraphNode(SIRNode): """段落节点""" type: Literal[NodeType.PARAGRAPH] class CodeBlockNode(SIRNode): """代码块节点""" type: Literal[NodeType.CODE_BLOCK] language: Optional[str] info: Optional[str] class ListNode(SIRNode): """列表节点""" type: Literal[NodeType.LIST] ordered: bool start: Optional[int] tight: bool class ListItemNode(SIRNode): """列表项节点""" type: Literal[NodeType.LIST_ITEM] checked: Optional[bool] spread: bool class TableNode(SIRNode): """表格节点""" type: Literal[NodeType.TABLE] header: bool align: List[Optional[Literal['left', 'right', 'center']]] class TableRowNode(SIRNode): """表格行节点""" type: Literal[NodeType.TABLE_ROW] is_header: bool class TableCellNode(SIRNode): """表格单元格节点""" type: Literal[NodeType.TABLE_CELL] class BlockquoteNode(SIRNode): """引用块节点""" type: Literal[NodeType.BLOCKQUOTE] class SIRDocument(TypedDict): """完整的 SIR 文档表示""" metadata: SIRMetadata ast: SIRNode source_map: Dict[str, SourceLocation] errors: List[Dict[str, Any]] warnings: List[Dict[str, Any]] @dataclass class SIRConfig: """SIR 配置选项""" # 是否保留源代码位置信息 preserve_source_locations: bool = True # 是否生成锚点 generate_anchors: bool = True # 是否自动编号标题 auto_number_headings: bool = False # 是否解析内联格式 parse_inline_formats: bool = True # 是否验证文档结构 validate_structure: bool = True # 最大嵌套深度 max_nesting_depth: int = 20 # 允许的HTML标签 allowed_html_tags: List[str] = None def __post_init__(self): if self.allowed_html_tags is None: self.allowed_html_tags = [ 'div', 'span', 'p', 'br', 'hr', 'a', 'img', 'strong', 'em', 'code', 'pre', 'blockquote', 'ul', 'ol', 'li' ] # 类型别名,方便使用 SIRNodeType = SIRNode SIRTree = List[SIRNode] def create_sir_metadata(source_file: Optional[str] = None) -> SIRMetadata: """创建默认的 SIR 元数据""" from datetime import datetime return { "version": "1.0.0", "generator": "markdown-toc-mcp-sir", "created_at": datetime.now().isoformat(), "source_file": source_file, "stats": { "node_count": 0, "heading_count": 0, "paragraph_count": 0, "code_block_count": 0, "list_count": 0, "table_count": 0 } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ForceInjection/markdown-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server