"""Parser for PrestaShop guide and tutorial documentation files."""
import re
from pathlib import Path
from typing import Dict, List, Optional
from .base_parser import BaseParser
from ..config import DOCS_PATH, DOC_TYPES
class GuideParser(BaseParser):
"""Parser for PrestaShop guide and tutorial documentation.
Handles:
- Installation guides (basics/)
- Deployment documentation (basics/, scale/)
- Configuration guides
- Testing documentation (testing/)
- Scale documentation (scale/)
"""
# Keywords that indicate a guide or tutorial
GUIDE_KEYWORDS = [
"installation",
"deploy",
"deployment",
"setup",
"configuration",
"getting started",
"quick start",
"prerequisites",
]
TUTORIAL_KEYWORDS = [
"tutorial",
"how to",
"step by step",
"walkthrough",
"example",
]
# Categories that typically contain guides
GUIDE_CATEGORIES = ["basics", "scale", "testing"]
def can_parse(self, file_path: Path, frontmatter: Dict) -> bool:
"""Check if this is a guide or tutorial file.
Args:
file_path: Path to the file
frontmatter: Extracted YAML frontmatter
Returns:
True if this is a guide/tutorial documentation file
"""
# Check if file is in guide categories
path_str = str(file_path)
category = self.get_category_from_path(file_path, DOCS_PATH)
if category in self.GUIDE_CATEGORIES:
# Not all files in these categories are guides, but it's a good indicator
# We'll also check content/frontmatter
# Check frontmatter for guide indicators
title = frontmatter.get("title", "").lower()
menu_title = frontmatter.get("menuTitle", "").lower()
# Check if it's a tutorial
for keyword in self.TUTORIAL_KEYWORDS:
if keyword in title or keyword in menu_title:
return True
# Check if it's a guide
for keyword in self.GUIDE_KEYWORDS:
if keyword in title or keyword in menu_title or keyword in path_str.lower():
return True
# Files in basics/installation/ are almost always guides
if "basics/installation" in path_str:
return True
# Files in scale/deployment/ are almost always guides
if "scale/deployment" in path_str or "scale/performance" in path_str:
return True
# Files in testing/ about setup are guides
if "testing" in path_str and any(kw in path_str.lower() for kw in ["setup", "installation", "configuration"]):
return True
return False
def parse(self, file_path: Path) -> Optional[Dict]:
"""Parse a PrestaShop guide/tutorial file.
Args:
file_path: Path to the guide file
Returns:
Parsed guide data or None
"""
if self.should_skip_file(file_path):
return None
try:
content = file_path.read_text(encoding="utf-8")
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
# Extract frontmatter
frontmatter, markdown_content = self.extract_frontmatter(content)
if not frontmatter and not markdown_content:
return None
# Determine if this is a tutorial or guide
title = frontmatter.get("title", file_path.stem)
title_lower = title.lower()
path_lower = str(file_path).lower()
doc_type = DOC_TYPES["guide"] # Default to guide
# Check if it's a tutorial
for keyword in self.TUTORIAL_KEYWORDS:
if keyword in title_lower or keyword in path_lower:
doc_type = DOC_TYPES["tutorial"]
break
# Extract metadata
menu_title = frontmatter.get("menuTitle", "")
weight = frontmatter.get("weight")
description = frontmatter.get("description", "")
# If no description in frontmatter, try to extract from content
if not description:
description = self._extract_description(markdown_content)
# Extract code examples
code_examples = self.extract_code_examples(markdown_content)
# Extract steps if this is a tutorial
steps = []
if doc_type == DOC_TYPES["tutorial"]:
steps = self._extract_steps(markdown_content)
# Extract prerequisites
prerequisites = self._extract_prerequisites(markdown_content)
# Get category and subcategory
category = self.get_category_from_path(file_path, DOCS_PATH)
subcategory = self.get_subcategory_from_path(file_path, DOCS_PATH)
# Build relative path
try:
relative_path = str(file_path.relative_to(DOCS_PATH))
except ValueError:
relative_path = file_path.name
# Clean title
clean_title = self.clean_title(title)
return {
"name": file_path.stem,
"title": clean_title,
"category": category,
"subcategory": subcategory,
"doc_type": doc_type,
"path": relative_path,
"origin": "", # Guides don't have origin like hooks
"location": "", # Guides don't have location like hooks
"content": markdown_content,
"version": self.extract_version(frontmatter, markdown_content),
"metadata": {
"menu_title": menu_title,
"weight": weight,
"description": description,
"code_examples": code_examples,
"steps": steps,
"prerequisites": prerequisites,
},
}
def _extract_description(self, markdown: str) -> str:
"""Extract the main description from markdown content.
Args:
markdown: Markdown content
Returns:
Main description text
"""
# Get first paragraph before any headers or code blocks
lines = markdown.split("\n")
description_lines = []
for line in lines:
stripped = line.strip()
# Stop at headers, code blocks, or lists
if stripped.startswith("#") or stripped.startswith("```") or stripped.startswith("-") or stripped.startswith("*"):
if description_lines: # Only break if we already have content
break
else:
continue # Skip leading headers/lists
if stripped and not stripped.startswith("{{"): # Skip Hugo shortcodes
description_lines.append(stripped)
elif description_lines and not stripped:
# Empty line after content - we're done
break
return " ".join(description_lines)[:500] # Limit to 500 chars
def _extract_steps(self, markdown: str) -> List[str]:
"""Extract numbered steps from tutorial content.
Args:
markdown: Markdown content
Returns:
List of step descriptions
"""
steps = []
# Look for numbered lists (1. 2. 3. or 1) 2) 3))
step_pattern = r"^(?:\d+[\.\)])\s+(.+)$"
lines = markdown.split("\n")
for line in lines:
match = re.match(step_pattern, line.strip())
if match:
steps.append(match.group(1))
# Also look for headers like "## Step 1:" or "### Step 1"
step_header_pattern = r"^#{2,4}\s+Step\s+\d+[:\s]+(.+)$"
for line in lines:
match = re.match(step_header_pattern, line.strip(), re.IGNORECASE)
if match:
steps.append(match.group(1))
return steps[:20] # Limit to 20 steps
def _extract_prerequisites(self, markdown: str) -> List[str]:
"""Extract prerequisites from content.
Args:
markdown: Markdown content
Returns:
List of prerequisites
"""
prerequisites = []
# Look for prerequisite sections
lines = markdown.split("\n")
in_prereq_section = False
for i, line in enumerate(lines):
stripped = line.strip()
# Check for prerequisite headers
if re.match(r"^#{2,4}\s+(prerequisite|requirements|before you begin)", stripped, re.IGNORECASE):
in_prereq_section = True
continue
# If we're in the section, collect list items
if in_prereq_section:
# Stop at next header
if stripped.startswith("#"):
break
# Collect list items
if stripped.startswith("-") or stripped.startswith("*"):
prereq = stripped.lstrip("-*").strip()
if prereq:
prerequisites.append(prereq)
return prerequisites[:10] # Limit to 10 prerequisites