"""Parser for general PrestaShop documentation files."""
from pathlib import Path
from typing import Dict, Optional
from .base_parser import BaseParser
from ..config import DOCS_PATH, DOC_TYPES
class GeneralParser(BaseParser):
"""Fallback parser for general PrestaShop documentation.
This parser handles all documentation files that don't match
more specialized parsers (hooks, guides, etc.). It extracts
basic frontmatter and content, making all documentation searchable.
Handles:
- Concept documentation
- Architecture documentation
- Reference documentation
- Contributing guides
- Project information
- FAQ entries
- Any other markdown documentation
"""
def can_parse(self, file_path: Path, frontmatter: Dict) -> bool:
"""Check if this parser can handle the file.
The GeneralParser accepts any markdown file as a fallback,
so it always returns True. It should be registered last
in the ParserRegistry.
Args:
file_path: Path to the file
frontmatter: Extracted YAML frontmatter
Returns:
True (always accepts as fallback parser)
"""
# This is the fallback parser - it accepts everything
# It should be registered last in the parser registry
return True
def parse(self, file_path: Path) -> Optional[Dict]:
"""Parse a general PrestaShop documentation file.
Args:
file_path: Path to the documentation file
Returns:
Parsed documentation data or None
"""
if self.should_skip_file(file_path):
return None
try:
content = file_path.read_text(encoding="utf-8")
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
# Extract frontmatter
frontmatter, markdown_content = self.extract_frontmatter(content)
# Skip empty files
if not markdown_content.strip():
return None
# Extract basic metadata
title = frontmatter.get("title", "")
if not title:
title = frontmatter.get("menuTitle", "")
if not title:
title = file_path.stem.replace("-", " ").replace("_", " ").title()
menu_title = frontmatter.get("menuTitle", "")
weight = frontmatter.get("weight")
description = frontmatter.get("description", "")
# If no description in frontmatter, try to extract from content
if not description:
description = self._extract_description(markdown_content)
# Extract code examples
code_examples = self.extract_code_examples(markdown_content)
# Determine doc_type based on category/path
doc_type = self._infer_doc_type(file_path, frontmatter)
# Get category and subcategory
category = self.get_category_from_path(file_path, DOCS_PATH)
subcategory = self.get_subcategory_from_path(file_path, DOCS_PATH)
# Build relative path
try:
relative_path = str(file_path.relative_to(DOCS_PATH))
except ValueError:
relative_path = file_path.name
# Clean title
clean_title = self.clean_title(title)
return {
"name": file_path.stem,
"title": clean_title,
"category": category,
"subcategory": subcategory,
"doc_type": doc_type,
"path": relative_path,
"origin": "",
"location": "",
"content": markdown_content,
"version": self.extract_version(frontmatter, markdown_content),
"metadata": {
"menu_title": menu_title,
"weight": weight,
"description": description,
"code_examples": code_examples,
"frontmatter": frontmatter, # Store full frontmatter for flexibility
},
}
def _extract_description(self, markdown: str) -> str:
"""Extract the main description from markdown content.
Args:
markdown: Markdown content
Returns:
Main description text (max 500 chars)
"""
# Get first paragraph before any headers or code blocks
lines = markdown.split("\n")
description_lines = []
for line in lines:
stripped = line.strip()
# Skip Hugo shortcodes
if stripped.startswith("{{"):
continue
# Stop at headers or code blocks (unless we haven't found content yet)
if stripped.startswith("#") or stripped.startswith("```"):
if description_lines:
break
else:
continue
# Collect non-empty lines
if stripped:
description_lines.append(stripped)
elif description_lines:
# Empty line after content - we're done
break
description = " ".join(description_lines)
# Limit to 500 characters
if len(description) > 500:
description = description[:497] + "..."
return description
def _infer_doc_type(self, file_path: Path, frontmatter: Dict) -> str:
"""Infer the document type based on path and frontmatter.
Args:
file_path: Path to the file
frontmatter: Extracted frontmatter
Returns:
Document type string
"""
path_str = str(file_path).lower()
category = self.get_category_from_path(file_path, DOCS_PATH)
# FAQ category
if category == "faq":
return DOC_TYPES["faq"]
# API categories
if category in ["webservice", "admin-api"]:
return DOC_TYPES["api"]
# Component documentation
if "components" in path_str:
return DOC_TYPES["component"]
# Reference documentation
if "reference" in path_str or "references" in path_str:
return DOC_TYPES["reference"]
# Check frontmatter for type hints
if "type" in frontmatter:
doc_type_hint = str(frontmatter["type"]).lower()
if doc_type_hint in DOC_TYPES.values():
return doc_type_hint
# Default to general
return DOC_TYPES["general"]