Coverage for src/docstring_format_checker/core.py: 100%
235 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 12:45 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 12:45 +0000
1# ============================================================================ #
2# #
3# Title: Title #
4# Purpose: Purpose #
5# Notes: Notes #
6# Author: chrimaho #
7# Created: Created #
8# References: References #
9# Sources: Sources #
10# Edited: Edited #
11# #
12# ============================================================================ #
15# ---------------------------------------------------------------------------- #
16# #
17# Overview ####
18# #
19# ---------------------------------------------------------------------------- #
22# ---------------------------------------------------------------------------- #
23# Description ####
24# ---------------------------------------------------------------------------- #
27"""
28!!! note "Summary"
29 Core docstring checking functionality.
30"""
33# ---------------------------------------------------------------------------- #
34# #
35# Setup ####
36# #
37# ---------------------------------------------------------------------------- #
40## --------------------------------------------------------------------------- #
41## Imports ####
42## --------------------------------------------------------------------------- #
45# ## Python StdLib Imports ----
46import ast
47import fnmatch
48import re
49from pathlib import Path
50from typing import Literal, NamedTuple, Optional, Union
52# ## Local First Party Imports ----
53from docstring_format_checker.config import SectionConfig
54from docstring_format_checker.utils.exceptions import (
55 DirectoryNotFoundError,
56 DocstringError,
57 InvalidFileError,
58)
61## --------------------------------------------------------------------------- #
62## Exports ####
63## --------------------------------------------------------------------------- #
66__all__: list[str] = [
67 "DocstringChecker",
68 "FunctionAndClassDetails",
69 "SectionConfig",
70 "DocstringError",
71]
74# ---------------------------------------------------------------------------- #
75# #
76# Main Section ####
77# #
78# ---------------------------------------------------------------------------- #
81class FunctionAndClassDetails(NamedTuple):
82 """
83 Details about a function or class found in the AST.
84 """
86 item_type: Literal["function", "class", "method"]
87 name: str
88 node: Union[ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef]
89 lineno: int
90 parent_class: Optional[str] = None
93class DocstringChecker:
94 """
95 Main class for checking docstring format and completeness.
96 """
98 def __init__(self, sections_config: list[SectionConfig]) -> None:
99 """
100 !!! note "Summary"
101 Initialize the docstring checker.
103 Params:
104 sections_config (list[SectionConfig]):
105 List of section configurations to check against.
106 """
107 self.sections_config: list[SectionConfig] = sections_config
108 self.required_sections: list[SectionConfig] = [s for s in sections_config if s.required]
109 self.optional_sections: list[SectionConfig] = [s for s in sections_config if not s.required]
111 def check_file(self, file_path: Union[str, Path]) -> list[DocstringError]:
112 """
113 !!! note "Summary"
114 Check docstrings in a Python file.
116 Params:
117 file_path (Union[str, Path]):
118 Path to the Python file to check.
120 Returns:
121 (list[DocstringError]):
122 List of DocstringError objects for any validation failures.
124 Raises:
125 (FileNotFoundError):
126 If the file doesn't exist.
127 (InvalidFileError):
128 If the file is not a Python file.
129 (UnicodeError):
130 If the file can't be decoded.
131 (SyntaxError):
132 If the file contains invalid Python syntax.
133 """
135 file_path = Path(file_path)
136 if not file_path.exists():
137 raise FileNotFoundError(f"File not found: {file_path}")
139 if file_path.suffix != ".py":
140 raise InvalidFileError(f"File must be a Python file (.py): {file_path}")
142 # Read and parse the file
143 try:
144 with open(file_path, encoding="utf-8") as f:
145 content: str = f.read()
146 except UnicodeDecodeError as e:
147 raise UnicodeError(f"Cannot decode file {file_path}: {e}") from e
149 try:
150 tree: ast.Module = ast.parse(content)
151 except SyntaxError as e:
152 raise SyntaxError(f"Invalid Python syntax in {file_path}: {e}") from e
154 # Extract all functions and classes
155 items: list[FunctionAndClassDetails] = self._extract_items(tree)
157 # Check each item
158 errors: list[DocstringError] = []
159 for item in items:
160 try:
161 self._check_single_docstring(item, str(file_path))
162 except DocstringError as e:
163 errors.append(e)
165 return errors
167 def check_directory(
168 self,
169 directory_path: Union[str, Path],
170 recursive: bool = True,
171 exclude_patterns: Optional[list[str]] = None,
172 ) -> dict[str, list[DocstringError]]:
173 """
174 !!! note "Summary"
175 Check docstrings in all Python files in a directory.
177 Params:
178 directory_path (Union[str, Path]):
179 Path to the directory to check.
180 recursive (bool):
181 Whether to check subdirectories recursively.
182 exclude_patterns (Optional[list[str]]):
183 List of glob patterns to exclude.
185 Raises:
186 (FileNotFoundError):
187 If the directory doesn't exist.
188 (DirectoryNotFoundError):
189 If the path is not a directory.
191 Returns:
192 (dict[str, list[DocstringError]]):
193 Dictionary mapping file paths to lists of DocstringError objects.
194 """
196 directory_path = Path(directory_path)
197 if not directory_path.exists():
198 raise FileNotFoundError(f"Directory not found: {directory_path}")
200 if not directory_path.is_dir():
201 raise DirectoryNotFoundError(f"Path is not a directory: {directory_path}")
203 # Find all Python files
204 if recursive:
205 pattern = "**/*.py"
206 else:
207 pattern = "*.py"
209 python_files: list[Path] = list(directory_path.glob(pattern))
211 # Filter out excluded patterns
212 if exclude_patterns:
213 filtered_files: list[Path] = []
214 for file_path in python_files:
215 relative_path: Path = file_path.relative_to(directory_path)
216 should_exclude = False
217 for pattern in exclude_patterns:
218 if fnmatch.fnmatch(str(relative_path), pattern):
219 should_exclude = True
220 break
221 if not should_exclude:
222 filtered_files.append(file_path)
223 python_files = filtered_files
225 # Check each file
226 results: dict[str, list[DocstringError]] = {}
227 for file_path in python_files:
228 try:
229 errors: list[DocstringError] = self.check_file(file_path)
230 if errors: # Only include files with errors
231 results[str(file_path)] = errors
232 except (FileNotFoundError, ValueError, SyntaxError) as e:
233 # Create a special error for file-level issues
234 error = DocstringError(
235 message=str(e),
236 file_path=str(file_path),
237 line_number=0,
238 item_name="",
239 item_type="file",
240 )
241 results[str(file_path)] = [error]
243 return results
245 def _is_overload_function(self, node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) -> bool:
246 """
247 !!! note "Summary"
248 Check if a function definition is decorated with @overload.
250 Params:
251 node (Union[ast.FunctionDef, ast.AsyncFunctionDef]):
252 The function node to check for @overload decorator.
254 Returns:
255 (bool):
256 True if the function has @overload decorator, False otherwise.
257 """
258 for decorator in node.decorator_list:
259 # Handle direct name reference: @overload
260 if isinstance(decorator, ast.Name) and decorator.id == "overload":
261 return True
262 # Handle attribute reference: @typing.overload
263 elif isinstance(decorator, ast.Attribute) and decorator.attr == "overload":
264 return True
265 return False
267 def _extract_items(self, tree: ast.AST) -> list[FunctionAndClassDetails]:
268 """
269 !!! note "Summary"
270 Extract all functions and classes from the AST.
272 Params:
273 tree (ast.AST):
274 The Abstract Syntax Tree (AST) to extract items from.
276 Returns:
277 (list[FunctionAndClassDetails]):
278 A list of extracted function and class details.
279 """
281 items: list[FunctionAndClassDetails] = []
283 class ItemVisitor(ast.NodeVisitor):
285 def __init__(self, checker: DocstringChecker) -> None:
286 self.class_stack: list[str] = []
287 self.checker: DocstringChecker = checker
289 def visit_ClassDef(self, node: ast.ClassDef) -> None:
290 if not node.name.startswith("_"): # Skip private classes
291 items.append(
292 FunctionAndClassDetails(
293 item_type="class",
294 name=node.name,
295 node=node,
296 lineno=node.lineno,
297 parent_class=None,
298 )
299 )
301 # Visit methods in this class
302 self.class_stack.append(node.name)
303 self.generic_visit(node)
304 self.class_stack.pop()
306 def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
307 self._visit_function(node)
309 def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
310 self._visit_function(node)
312 def _visit_function(self, node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) -> None:
313 """Visit function definition node (sync or async)."""
315 if not node.name.startswith("_"): # Skip private functions
316 # Skip @overload functions - they don't need docstrings
318 if not self.checker._is_overload_function(node):
319 item_type: Literal["function", "method"] = "method" if self.class_stack else "function"
320 parent_class: Optional[str] = self.class_stack[-1] if self.class_stack else None
322 items.append(
323 FunctionAndClassDetails(
324 item_type=item_type,
325 name=node.name,
326 node=node,
327 lineno=node.lineno,
328 parent_class=parent_class,
329 )
330 )
332 self.generic_visit(node)
334 visitor = ItemVisitor(self)
335 visitor.visit(tree)
337 return items
339 def _check_single_docstring(self, item: FunctionAndClassDetails, file_path: str) -> None:
340 """
341 !!! note "Summary"
342 Check a single function or class docstring.
344 Params:
345 item (FunctionAndClassDetails):
346 The function or class to check.
347 file_path (str):
348 The path to the file containing the item.
350 Returns:
351 (None):
352 Nothing is returned.
353 """
355 docstring: Optional[str] = ast.get_docstring(item.node)
357 # Check if any required sections apply to this item type
358 requires_docstring = False
359 applicable_sections: list[SectionConfig] = []
361 for section in self.sections_config:
362 if section.required:
363 # Check if this section applies to this item type
364 if section.type == "free_text":
365 # Free text sections apply only to functions and methods, not classes
366 if isinstance(item.node, (ast.FunctionDef, ast.AsyncFunctionDef)):
367 requires_docstring = True
368 applicable_sections.append(section)
369 elif section.type == "list_name_and_type":
370 if section.name.lower() == "params" and isinstance(
371 item.node, (ast.FunctionDef, ast.AsyncFunctionDef)
372 ):
373 # Params only apply to functions/methods
374 requires_docstring = True
375 applicable_sections.append(section)
376 elif section.name.lower() in ["returns", "return"] and isinstance(
377 item.node, (ast.FunctionDef, ast.AsyncFunctionDef)
378 ):
379 # Returns only apply to functions/methods
380 requires_docstring = True
381 applicable_sections.append(section)
382 elif section.type in ["list_type", "list_name"]:
383 # These sections apply to functions/methods that might have them
384 if isinstance(item.node, (ast.FunctionDef, ast.AsyncFunctionDef)):
385 requires_docstring = True
386 applicable_sections.append(section)
388 if not docstring:
389 if requires_docstring:
390 message: str = f"Missing docstring for {item.item_type}"
391 raise DocstringError(
392 message=message,
393 file_path=file_path,
394 line_number=item.lineno,
395 item_name=item.name,
396 item_type=item.item_type,
397 )
398 return # No docstring required
400 # Validate docstring sections if docstring exists
401 self._validate_docstring_sections(docstring, item, file_path)
403 def _validate_docstring_sections(
404 self,
405 docstring: str,
406 item: FunctionAndClassDetails,
407 file_path: str,
408 ) -> None:
409 """
410 !!! note "Summary"
411 Validate the sections within a docstring.
413 Params:
414 docstring (str):
415 The docstring to validate.
416 item (FunctionAndClassDetails):
417 The function or class to check.
418 file_path (str):
419 The path to the file containing the item.
421 Returns:
422 (None):
423 Nothing is returned.
424 """
425 errors: list[str] = []
427 # Check each required section
428 for section in self.required_sections:
429 if section.type == "free_text":
430 if not self._check_free_text_section(docstring, section):
431 errors.append(f"Missing required section: {section.name}")
433 elif section.type == "list_name_and_type":
434 if section.name.lower() == "params" and isinstance(item.node, (ast.FunctionDef, ast.AsyncFunctionDef)):
435 if not self._check_params_section(docstring, item.node):
436 errors.append("Missing or invalid Params section")
437 elif section.name.lower() in ["returns", "return"]:
438 if not self._check_returns_section(docstring):
439 errors.append("Missing or invalid Returns section")
441 elif section.type == "list_type":
442 if section.name.lower() in ["raises", "raise"]:
443 if not self._check_raises_section(docstring):
444 errors.append("Missing or invalid Raises section")
445 elif section.name.lower() in ["yields", "yield"]:
446 if not self._check_yields_section(docstring):
447 errors.append("Missing or invalid Yields section")
449 elif section.type == "list_name":
450 # Simple name sections - check if they exist
451 if not self._check_simple_section(docstring, section.name):
452 errors.append(f"Missing required section: {section.name}")
454 # Check section order
455 order_errors: list[str] = self._check_section_order(docstring)
456 errors.extend(order_errors)
458 # Check for mutual exclusivity (returns vs yields)
459 if self._has_both_returns_and_yields(docstring):
460 errors.append("Docstring cannot have both Returns and Yields sections")
462 if errors:
463 combined_message: str = "; ".join(errors)
464 raise DocstringError(
465 message=combined_message,
466 file_path=file_path,
467 line_number=item.lineno,
468 item_name=item.name,
469 item_type=item.item_type,
470 )
472 def _check_free_text_section(self, docstring: str, section: SectionConfig) -> bool:
473 """
474 !!! note "Summary"
475 Check if a free text section exists in the docstring.
477 Params:
478 docstring (str):
479 The docstring to check.
480 section (SectionConfig):
481 The section configuration to validate.
483 Returns:
484 (bool):
485 `True` if the section exists, `False` otherwise.
486 """
487 if section.admonition and section.prefix:
488 # Format like: !!! note "Summary"
489 pattern = rf'{re.escape(section.prefix)}\s+{re.escape(section.admonition)}\s+".*{re.escape(section.name)}"'
490 return bool(re.search(pattern, docstring, re.IGNORECASE))
491 elif section.name.lower() in ["summary"]:
492 # For summary, accept either formal format or simple docstring
493 formal_pattern = r'!!! note "Summary"'
494 if re.search(formal_pattern, docstring, re.IGNORECASE):
495 return True
496 # Accept any non-empty docstring as summary
497 return len(docstring.strip()) > 0
498 elif section.name.lower() in ["examples", "example"]:
499 # Look for examples section
500 return bool(re.search(r'\?\?\?\+ example "Examples"', docstring, re.IGNORECASE))
502 return True # Default to true for unknown free text sections
504 def _check_params_section(self, docstring: str, node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) -> bool:
505 """
506 !!! note "Summary"
507 Check if the Params section exists and documents all parameters.
509 Params:
510 docstring (str):
511 The docstring to check.
512 node (Union[ast.FunctionDef, ast.AsyncFunctionDef]):
513 The function node to check.
515 Returns:
516 (bool):
517 `True` if the section exists and is valid, `False` otherwise.
518 """
519 # Get function parameters (excluding 'self' for methods)
520 params: list[str] = [arg.arg for arg in node.args.args if arg.arg != "self"]
522 if not params:
523 return True # No parameters to document
525 # Check if Params section exists
526 if not re.search(r"Params:", docstring):
527 return False
529 # Check each parameter is documented
530 for param in params:
531 param_pattern: str = rf"{re.escape(param)}\s*\([^)]+\):"
532 if not re.search(param_pattern, docstring):
533 return False
535 return True
537 def _check_returns_section(self, docstring: str) -> bool:
538 """
539 !!! note "Summary"
540 Check if the Returns section exists.
542 Params:
543 docstring (str):
544 The docstring to check.
546 Returns:
547 (bool):
548 `True` if the section exists, `False` otherwise.
549 """
550 return bool(re.search(r"Returns:", docstring))
552 def _check_raises_section(self, docstring: str) -> bool:
553 """
554 !!! note "Summary"
555 Check if the Raises section exists.
557 Params:
558 docstring (str):
559 The docstring to check.
561 Returns:
562 (bool):
563 `True` if the section exists, `False` otherwise.
564 """
565 return bool(re.search(r"Raises:", docstring))
567 def _has_both_returns_and_yields(self, docstring: str) -> bool:
568 """
569 !!! note "Summary"
570 Check if docstring has both Returns and Yields sections.
572 Params:
573 docstring (str):
574 The docstring to check.
576 Returns:
577 (bool):
578 `True` if the section exists, `False` otherwise.
579 """
580 has_returns = bool(re.search(r"Returns:", docstring))
581 has_yields = bool(re.search(r"Yields:", docstring))
582 return has_returns and has_yields
584 def _check_section_order(self, docstring: str) -> list[str]:
585 """
586 !!! note "Summary"
587 Check that sections appear in the correct order.
589 Params:
590 docstring (str):
591 The docstring to check.
593 Returns:
594 (list[str]):
595 A list of error messages, if any.
596 """
597 # Build expected order from configuration
598 section_patterns: list[tuple[str, str]] = []
599 for section in sorted(self.sections_config, key=lambda x: x.order):
600 if section.type == "free_text" and section.admonition and section.prefix:
601 pattern: str = (
602 rf'{re.escape(section.prefix)}\s+{re.escape(section.admonition)}\s+".*{re.escape(section.name)}"'
603 )
604 section_patterns.append((pattern, section.name))
605 elif section.name.lower() == "params":
606 section_patterns.append((r"Params:", "Params"))
607 elif section.name.lower() in ["returns", "return"]:
608 section_patterns.append((r"Returns:", "Returns"))
609 elif section.name.lower() in ["yields", "yield"]:
610 section_patterns.append((r"Yields:", "Yields"))
611 elif section.name.lower() in ["raises", "raise"]:
612 section_patterns.append((r"Raises:", "Raises"))
614 # Add some default patterns for common sections
615 default_patterns: list[tuple[str, str]] = [
616 (r'!!! note "Summary"', "Summary"),
617 (r'!!! details "Details"', "Details"),
618 (r'\?\?\?\+ example "Examples"', "Examples"),
619 (r'\?\?\?\+ success "Credit"', "Credit"),
620 (r'\?\?\?\+ calculation "Equation"', "Equation"),
621 (r'\?\?\?\+ info "Notes"', "Notes"),
622 (r'\?\?\? question "References"', "References"),
623 (r'\?\?\? tip "See Also"', "See Also"),
624 ]
626 all_patterns: list[tuple[str, str]] = section_patterns + default_patterns
628 found_sections: list[tuple[int, str]] = []
629 for pattern, section_name in all_patterns:
630 match: Optional[re.Match[str]] = re.search(pattern, docstring, re.IGNORECASE)
631 if match:
632 found_sections.append((match.start(), section_name))
634 # Sort by position in docstring
635 found_sections.sort(key=lambda x: x[0])
637 # Build expected order
638 expected_order: list[str] = [s.name.title() for s in sorted(self.sections_config, key=lambda x: x.order)]
639 expected_order.extend(
640 [
641 "Summary",
642 "Details",
643 "Examples",
644 "Credit",
645 "Equation",
646 "Notes",
647 "References",
648 "See Also",
649 ]
650 )
652 # Check order matches expected order
653 errors: list[str] = []
654 last_expected_index = -1
655 for _, section_name in found_sections:
656 try:
657 current_index: int = expected_order.index(section_name)
658 if current_index < last_expected_index:
659 errors.append(f"Section '{section_name}' appears out of order")
660 last_expected_index: int = current_index
661 except ValueError:
662 # Section not in expected order list - might be OK
663 pass
665 return errors
667 def _check_yields_section(self, docstring: str) -> bool:
668 """
669 !!! note "Summary"
670 Check if the Yields section exists.
672 Params:
673 docstring (str):
674 The docstring to check.
676 Returns:
677 (bool):
678 `True` if the section exists, `False` otherwise.
679 """
680 return bool(re.search(r"Yields:", docstring))
682 def _check_simple_section(self, docstring: str, section_name: str) -> bool:
683 """
684 !!! note "Summary"
685 Check if a simple named section exists.
687 Params:
688 docstring (str):
689 The docstring to check.
690 section_name (str):
691 The name of the section to check for.
693 Returns:
694 (bool):
695 `True` if the section exists, `False` otherwise.
696 """
697 pattern = rf"{re.escape(section_name)}:"
698 return bool(re.search(pattern, docstring, re.IGNORECASE))