Skip to main content
Glama
general_parser.py6.41 kB
"""Parser for general PrestaShop documentation files.""" from pathlib import Path from typing import Dict, Optional from .base_parser import BaseParser from ..config import DOCS_PATH, DOC_TYPES class GeneralParser(BaseParser): """Fallback parser for general PrestaShop documentation. This parser handles all documentation files that don't match more specialized parsers (hooks, guides, etc.). It extracts basic frontmatter and content, making all documentation searchable. Handles: - Concept documentation - Architecture documentation - Reference documentation - Contributing guides - Project information - FAQ entries - Any other markdown documentation """ def can_parse(self, file_path: Path, frontmatter: Dict) -> bool: """Check if this parser can handle the file. The GeneralParser accepts any markdown file as a fallback, so it always returns True. It should be registered last in the ParserRegistry. Args: file_path: Path to the file frontmatter: Extracted YAML frontmatter Returns: True (always accepts as fallback parser) """ # This is the fallback parser - it accepts everything # It should be registered last in the parser registry return True def parse(self, file_path: Path) -> Optional[Dict]: """Parse a general PrestaShop documentation file. Args: file_path: Path to the documentation file Returns: Parsed documentation data or None """ if self.should_skip_file(file_path): return None try: content = file_path.read_text(encoding="utf-8") except Exception as e: print(f"Error reading {file_path}: {e}") return None # Extract frontmatter frontmatter, markdown_content = self.extract_frontmatter(content) # Skip empty files if not markdown_content.strip(): return None # Extract basic metadata title = frontmatter.get("title", "") if not title: title = frontmatter.get("menuTitle", "") if not title: title = file_path.stem.replace("-", " ").replace("_", " ").title() menu_title = frontmatter.get("menuTitle", "") weight = frontmatter.get("weight") description = frontmatter.get("description", "") # If no description in frontmatter, try to extract from content if not description: description = self._extract_description(markdown_content) # Extract code examples code_examples = self.extract_code_examples(markdown_content) # Determine doc_type based on category/path doc_type = self._infer_doc_type(file_path, frontmatter) # Get category and subcategory category = self.get_category_from_path(file_path, DOCS_PATH) subcategory = self.get_subcategory_from_path(file_path, DOCS_PATH) # Build relative path try: relative_path = str(file_path.relative_to(DOCS_PATH)) except ValueError: relative_path = file_path.name # Clean title clean_title = self.clean_title(title) return { "name": file_path.stem, "title": clean_title, "category": category, "subcategory": subcategory, "doc_type": doc_type, "path": relative_path, "origin": "", "location": "", "content": markdown_content, "version": self.extract_version(frontmatter, markdown_content), "metadata": { "menu_title": menu_title, "weight": weight, "description": description, "code_examples": code_examples, "frontmatter": frontmatter, # Store full frontmatter for flexibility }, } def _extract_description(self, markdown: str) -> str: """Extract the main description from markdown content. Args: markdown: Markdown content Returns: Main description text (max 500 chars) """ # Get first paragraph before any headers or code blocks lines = markdown.split("\n") description_lines = [] for line in lines: stripped = line.strip() # Skip Hugo shortcodes if stripped.startswith("{{"): continue # Stop at headers or code blocks (unless we haven't found content yet) if stripped.startswith("#") or stripped.startswith("```"): if description_lines: break else: continue # Collect non-empty lines if stripped: description_lines.append(stripped) elif description_lines: # Empty line after content - we're done break description = " ".join(description_lines) # Limit to 500 characters if len(description) > 500: description = description[:497] + "..." return description def _infer_doc_type(self, file_path: Path, frontmatter: Dict) -> str: """Infer the document type based on path and frontmatter. Args: file_path: Path to the file frontmatter: Extracted frontmatter Returns: Document type string """ path_str = str(file_path).lower() category = self.get_category_from_path(file_path, DOCS_PATH) # FAQ category if category == "faq": return DOC_TYPES["faq"] # API categories if category in ["webservice", "admin-api"]: return DOC_TYPES["api"] # Component documentation if "components" in path_str: return DOC_TYPES["component"] # Reference documentation if "reference" in path_str or "references" in path_str: return DOC_TYPES["reference"] # Check frontmatter for type hints if "type" in frontmatter: doc_type_hint = str(frontmatter["type"]).lower() if doc_type_hint in DOC_TYPES.values(): return doc_type_hint # Default to general return DOC_TYPES["general"]

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/florinel-chis/prestashop-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server