Skip to main content
Glama
guide_parser.py9.13 kB
"""Parser for PrestaShop guide and tutorial documentation files.""" import re from pathlib import Path from typing import Dict, List, Optional from .base_parser import BaseParser from ..config import DOCS_PATH, DOC_TYPES class GuideParser(BaseParser): """Parser for PrestaShop guide and tutorial documentation. Handles: - Installation guides (basics/) - Deployment documentation (basics/, scale/) - Configuration guides - Testing documentation (testing/) - Scale documentation (scale/) """ # Keywords that indicate a guide or tutorial GUIDE_KEYWORDS = [ "installation", "deploy", "deployment", "setup", "configuration", "getting started", "quick start", "prerequisites", ] TUTORIAL_KEYWORDS = [ "tutorial", "how to", "step by step", "walkthrough", "example", ] # Categories that typically contain guides GUIDE_CATEGORIES = ["basics", "scale", "testing"] def can_parse(self, file_path: Path, frontmatter: Dict) -> bool: """Check if this is a guide or tutorial file. Args: file_path: Path to the file frontmatter: Extracted YAML frontmatter Returns: True if this is a guide/tutorial documentation file """ # Check if file is in guide categories path_str = str(file_path) category = self.get_category_from_path(file_path, DOCS_PATH) if category in self.GUIDE_CATEGORIES: # Not all files in these categories are guides, but it's a good indicator # We'll also check content/frontmatter # Check frontmatter for guide indicators title = frontmatter.get("title", "").lower() menu_title = frontmatter.get("menuTitle", "").lower() # Check if it's a tutorial for keyword in self.TUTORIAL_KEYWORDS: if keyword in title or keyword in menu_title: return True # Check if it's a guide for keyword in self.GUIDE_KEYWORDS: if keyword in title or keyword in menu_title or keyword in path_str.lower(): return True # Files in basics/installation/ are almost always guides if "basics/installation" in path_str: return True # Files in scale/deployment/ are almost always guides if "scale/deployment" in path_str or "scale/performance" in path_str: return True # Files in testing/ about setup are guides if "testing" in path_str and any(kw in path_str.lower() for kw in ["setup", "installation", "configuration"]): return True return False def parse(self, file_path: Path) -> Optional[Dict]: """Parse a PrestaShop guide/tutorial file. Args: file_path: Path to the guide file Returns: Parsed guide data or None """ if self.should_skip_file(file_path): return None try: content = file_path.read_text(encoding="utf-8") except Exception as e: print(f"Error reading {file_path}: {e}") return None # Extract frontmatter frontmatter, markdown_content = self.extract_frontmatter(content) if not frontmatter and not markdown_content: return None # Determine if this is a tutorial or guide title = frontmatter.get("title", file_path.stem) title_lower = title.lower() path_lower = str(file_path).lower() doc_type = DOC_TYPES["guide"] # Default to guide # Check if it's a tutorial for keyword in self.TUTORIAL_KEYWORDS: if keyword in title_lower or keyword in path_lower: doc_type = DOC_TYPES["tutorial"] break # Extract metadata menu_title = frontmatter.get("menuTitle", "") weight = frontmatter.get("weight") description = frontmatter.get("description", "") # If no description in frontmatter, try to extract from content if not description: description = self._extract_description(markdown_content) # Extract code examples code_examples = self.extract_code_examples(markdown_content) # Extract steps if this is a tutorial steps = [] if doc_type == DOC_TYPES["tutorial"]: steps = self._extract_steps(markdown_content) # Extract prerequisites prerequisites = self._extract_prerequisites(markdown_content) # Get category and subcategory category = self.get_category_from_path(file_path, DOCS_PATH) subcategory = self.get_subcategory_from_path(file_path, DOCS_PATH) # Build relative path try: relative_path = str(file_path.relative_to(DOCS_PATH)) except ValueError: relative_path = file_path.name # Clean title clean_title = self.clean_title(title) return { "name": file_path.stem, "title": clean_title, "category": category, "subcategory": subcategory, "doc_type": doc_type, "path": relative_path, "origin": "", # Guides don't have origin like hooks "location": "", # Guides don't have location like hooks "content": markdown_content, "version": self.extract_version(frontmatter, markdown_content), "metadata": { "menu_title": menu_title, "weight": weight, "description": description, "code_examples": code_examples, "steps": steps, "prerequisites": prerequisites, }, } def _extract_description(self, markdown: str) -> str: """Extract the main description from markdown content. Args: markdown: Markdown content Returns: Main description text """ # Get first paragraph before any headers or code blocks lines = markdown.split("\n") description_lines = [] for line in lines: stripped = line.strip() # Stop at headers, code blocks, or lists if stripped.startswith("#") or stripped.startswith("```") or stripped.startswith("-") or stripped.startswith("*"): if description_lines: # Only break if we already have content break else: continue # Skip leading headers/lists if stripped and not stripped.startswith("{{"): # Skip Hugo shortcodes description_lines.append(stripped) elif description_lines and not stripped: # Empty line after content - we're done break return " ".join(description_lines)[:500] # Limit to 500 chars def _extract_steps(self, markdown: str) -> List[str]: """Extract numbered steps from tutorial content. Args: markdown: Markdown content Returns: List of step descriptions """ steps = [] # Look for numbered lists (1. 2. 3. or 1) 2) 3)) step_pattern = r"^(?:\d+[\.\)])\s+(.+)$" lines = markdown.split("\n") for line in lines: match = re.match(step_pattern, line.strip()) if match: steps.append(match.group(1)) # Also look for headers like "## Step 1:" or "### Step 1" step_header_pattern = r"^#{2,4}\s+Step\s+\d+[:\s]+(.+)$" for line in lines: match = re.match(step_header_pattern, line.strip(), re.IGNORECASE) if match: steps.append(match.group(1)) return steps[:20] # Limit to 20 steps def _extract_prerequisites(self, markdown: str) -> List[str]: """Extract prerequisites from content. Args: markdown: Markdown content Returns: List of prerequisites """ prerequisites = [] # Look for prerequisite sections lines = markdown.split("\n") in_prereq_section = False for i, line in enumerate(lines): stripped = line.strip() # Check for prerequisite headers if re.match(r"^#{2,4}\s+(prerequisite|requirements|before you begin)", stripped, re.IGNORECASE): in_prereq_section = True continue # If we're in the section, collect list items if in_prereq_section: # Stop at next header if stripped.startswith("#"): break # Collect list items if stripped.startswith("-") or stripped.startswith("*"): prereq = stripped.lstrip("-*").strip() if prereq: prerequisites.append(prereq) return prerequisites[:10] # Limit to 10 prerequisites

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/florinel-chis/prestashop-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server