"""Parser for PrestaShop hook documentation files."""
import re
from pathlib import Path
from typing import Dict, List, Optional
import yaml
from .base_parser import BaseParser
from ..config import DOCS_PATH, DOC_TYPES
class HookParser(BaseParser):
"""Parser for PrestaShop hook documentation."""
def can_parse(self, file_path: Path, frontmatter: Dict) -> bool:
"""Check if this is a hook file.
Args:
file_path: Path to the file
frontmatter: Extracted YAML frontmatter
Returns:
True if this is a hook documentation file
"""
# Check if file is in hooks directory
path_str = str(file_path)
if "hooks/list-of-hooks" in path_str:
return True
# Check frontmatter for hook-specific fields
hook_markers = ["hookTitle", "hookAliases", "Title"]
if any(marker in frontmatter for marker in hook_markers):
# Additional check: has type field (even if null)
if "type" in frontmatter:
return True
return False
def parse(self, file_path: Path) -> Optional[Dict]:
"""Parse a PrestaShop hook file.
Args:
file_path: Path to the hook file
Returns:
Parsed hook data or None
"""
if self.should_skip_file(file_path):
return None
try:
content = file_path.read_text(encoding="utf-8")
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
# Extract frontmatter
frontmatter, markdown_content = self.extract_frontmatter(content)
if not frontmatter:
return None
# Extract hook-specific data
hook_name = frontmatter.get("Title", file_path.stem)
hook_type = frontmatter.get("type") or "unknown"
origin = frontmatter.get("origin") or "unknown"
locations = frontmatter.get("locations", [])
description = frontmatter.get("description", "")
files = frontmatter.get("files", [])
hook_aliases = frontmatter.get("hookAliases", [])
# Convert locations to string
if isinstance(locations, list):
locations_str = ", ".join(locations)
else:
locations_str = str(locations) if locations else ""
# Extract code examples
code_examples = self.extract_code_examples(markdown_content)
# Build GitHub references
github_refs = []
if files:
for file_info in files:
if isinstance(file_info, dict) and "url" in file_info:
github_refs.append(file_info["url"])
# Get category and subcategory
category = self.get_category_from_path(file_path, DOCS_PATH)
subcategory = self.get_subcategory_from_path(file_path, DOCS_PATH)
# Build relative path
try:
relative_path = str(file_path.relative_to(DOCS_PATH))
except ValueError:
relative_path = file_path.name
return {
"name": hook_name,
"title": hook_name,
"category": category,
"subcategory": subcategory,
"doc_type": DOC_TYPES["hook"],
"path": relative_path,
"origin": origin,
"location": locations_str,
"content": markdown_content,
"version": self.extract_version(frontmatter, markdown_content),
"metadata": {
"type": hook_type,
"aliases": hook_aliases if hook_aliases else [],
"github_refs": github_refs,
"code_examples": code_examples,
"description": description,
},
}
# Backward compatibility: keep the old function interface
def parse_hook_file(file_path: Path) -> Optional[Dict]:
"""Parse a PrestaShop hook markdown file with YAML frontmatter.
This function is kept for backward compatibility.
New code should use HookParser class.
Args:
file_path: Path to the hook .md file
Returns:
Dictionary with parsed hook data, or None if parsing fails
"""
parser = HookParser()
result = parser.parse(file_path)
if not result:
return None
# Convert to old format for backward compatibility
metadata = result.get("metadata", {})
return {
"name": result["name"],
"type": metadata.get("type", "unknown"),
"origin": result.get("origin", "unknown"),
"locations": result.get("location", ""),
"description": metadata.get("description", ""),
"aliases": metadata.get("aliases", []),
"github_refs": metadata.get("github_refs", []),
"code_examples": metadata.get("code_examples", []),
"content": result["content"],
"path": result["path"],
}
def extract_code_examples(markdown: str) -> List[str]:
"""Extract code blocks from markdown content.
Args:
markdown: Markdown content
Returns:
List of code examples
"""
# Match code blocks with triple backticks
pattern = r"```(?:\w+)?\n(.*?)```"
matches = re.findall(pattern, markdown, re.DOTALL)
return [match.strip() for match in matches if match.strip()]
def extract_description(markdown: str) -> str:
"""Extract the main description from markdown content.
Args:
markdown: Markdown content
Returns:
Main description text
"""
# Get first paragraph before any headers or code blocks
lines = markdown.split("\n")
description_lines = []
for line in lines:
stripped = line.strip()
# Stop at headers, code blocks, or empty lines after content
if stripped.startswith("#") or stripped.startswith("```"):
break
if stripped and not stripped.startswith("{{"): # Skip Hugo shortcodes
description_lines.append(stripped)
elif description_lines and not stripped:
break
return " ".join(description_lines)