Coverage for src/docstring_format_checker/core.py: 100%

235 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-04 12:45 +0000

1# ============================================================================ # 

2# # 

3# Title: Title # 

4# Purpose: Purpose # 

5# Notes: Notes # 

6# Author: chrimaho # 

7# Created: Created # 

8# References: References # 

9# Sources: Sources # 

10# Edited: Edited # 

11# # 

12# ============================================================================ # 

13 

14 

15# ---------------------------------------------------------------------------- # 

16# # 

17# Overview #### 

18# # 

19# ---------------------------------------------------------------------------- # 

20 

21 

22# ---------------------------------------------------------------------------- # 

23# Description #### 

24# ---------------------------------------------------------------------------- # 

25 

26 

27""" 

28!!! note "Summary" 

29 Core docstring checking functionality. 

30""" 

31 

32 

33# ---------------------------------------------------------------------------- # 

34# # 

35# Setup #### 

36# # 

37# ---------------------------------------------------------------------------- # 

38 

39 

40## --------------------------------------------------------------------------- # 

41## Imports #### 

42## --------------------------------------------------------------------------- # 

43 

44 

45# ## Python StdLib Imports ---- 

46import ast 

47import fnmatch 

48import re 

49from pathlib import Path 

50from typing import Literal, NamedTuple, Optional, Union 

51 

52# ## Local First Party Imports ---- 

53from docstring_format_checker.config import SectionConfig 

54from docstring_format_checker.utils.exceptions import ( 

55 DirectoryNotFoundError, 

56 DocstringError, 

57 InvalidFileError, 

58) 

59 

60 

61## --------------------------------------------------------------------------- # 

62## Exports #### 

63## --------------------------------------------------------------------------- # 

64 

65 

66__all__: list[str] = [ 

67 "DocstringChecker", 

68 "FunctionAndClassDetails", 

69 "SectionConfig", 

70 "DocstringError", 

71] 

72 

73 

74# ---------------------------------------------------------------------------- # 

75# # 

76# Main Section #### 

77# # 

78# ---------------------------------------------------------------------------- # 

79 

80 

81class FunctionAndClassDetails(NamedTuple): 

82 """ 

83 Details about a function or class found in the AST. 

84 """ 

85 

86 item_type: Literal["function", "class", "method"] 

87 name: str 

88 node: Union[ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef] 

89 lineno: int 

90 parent_class: Optional[str] = None 

91 

92 

93class DocstringChecker: 

94 """ 

95 Main class for checking docstring format and completeness. 

96 """ 

97 

98 def __init__(self, sections_config: list[SectionConfig]) -> None: 

99 """ 

100 !!! note "Summary" 

101 Initialize the docstring checker. 

102 

103 Params: 

104 sections_config (list[SectionConfig]): 

105 List of section configurations to check against. 

106 """ 

107 self.sections_config: list[SectionConfig] = sections_config 

108 self.required_sections: list[SectionConfig] = [s for s in sections_config if s.required] 

109 self.optional_sections: list[SectionConfig] = [s for s in sections_config if not s.required] 

110 

111 def check_file(self, file_path: Union[str, Path]) -> list[DocstringError]: 

112 """ 

113 !!! note "Summary" 

114 Check docstrings in a Python file. 

115 

116 Params: 

117 file_path (Union[str, Path]): 

118 Path to the Python file to check. 

119 

120 Returns: 

121 (list[DocstringError]): 

122 List of DocstringError objects for any validation failures. 

123 

124 Raises: 

125 (FileNotFoundError): 

126 If the file doesn't exist. 

127 (InvalidFileError): 

128 If the file is not a Python file. 

129 (UnicodeError): 

130 If the file can't be decoded. 

131 (SyntaxError): 

132 If the file contains invalid Python syntax. 

133 """ 

134 

135 file_path = Path(file_path) 

136 if not file_path.exists(): 

137 raise FileNotFoundError(f"File not found: {file_path}") 

138 

139 if file_path.suffix != ".py": 

140 raise InvalidFileError(f"File must be a Python file (.py): {file_path}") 

141 

142 # Read and parse the file 

143 try: 

144 with open(file_path, encoding="utf-8") as f: 

145 content: str = f.read() 

146 except UnicodeDecodeError as e: 

147 raise UnicodeError(f"Cannot decode file {file_path}: {e}") from e 

148 

149 try: 

150 tree: ast.Module = ast.parse(content) 

151 except SyntaxError as e: 

152 raise SyntaxError(f"Invalid Python syntax in {file_path}: {e}") from e 

153 

154 # Extract all functions and classes 

155 items: list[FunctionAndClassDetails] = self._extract_items(tree) 

156 

157 # Check each item 

158 errors: list[DocstringError] = [] 

159 for item in items: 

160 try: 

161 self._check_single_docstring(item, str(file_path)) 

162 except DocstringError as e: 

163 errors.append(e) 

164 

165 return errors 

166 

167 def check_directory( 

168 self, 

169 directory_path: Union[str, Path], 

170 recursive: bool = True, 

171 exclude_patterns: Optional[list[str]] = None, 

172 ) -> dict[str, list[DocstringError]]: 

173 """ 

174 !!! note "Summary" 

175 Check docstrings in all Python files in a directory. 

176 

177 Params: 

178 directory_path (Union[str, Path]): 

179 Path to the directory to check. 

180 recursive (bool): 

181 Whether to check subdirectories recursively. 

182 exclude_patterns (Optional[list[str]]): 

183 List of glob patterns to exclude. 

184 

185 Raises: 

186 (FileNotFoundError): 

187 If the directory doesn't exist. 

188 (DirectoryNotFoundError): 

189 If the path is not a directory. 

190 

191 Returns: 

192 (dict[str, list[DocstringError]]): 

193 Dictionary mapping file paths to lists of DocstringError objects. 

194 """ 

195 

196 directory_path = Path(directory_path) 

197 if not directory_path.exists(): 

198 raise FileNotFoundError(f"Directory not found: {directory_path}") 

199 

200 if not directory_path.is_dir(): 

201 raise DirectoryNotFoundError(f"Path is not a directory: {directory_path}") 

202 

203 # Find all Python files 

204 if recursive: 

205 pattern = "**/*.py" 

206 else: 

207 pattern = "*.py" 

208 

209 python_files: list[Path] = list(directory_path.glob(pattern)) 

210 

211 # Filter out excluded patterns 

212 if exclude_patterns: 

213 filtered_files: list[Path] = [] 

214 for file_path in python_files: 

215 relative_path: Path = file_path.relative_to(directory_path) 

216 should_exclude = False 

217 for pattern in exclude_patterns: 

218 if fnmatch.fnmatch(str(relative_path), pattern): 

219 should_exclude = True 

220 break 

221 if not should_exclude: 

222 filtered_files.append(file_path) 

223 python_files = filtered_files 

224 

225 # Check each file 

226 results: dict[str, list[DocstringError]] = {} 

227 for file_path in python_files: 

228 try: 

229 errors: list[DocstringError] = self.check_file(file_path) 

230 if errors: # Only include files with errors 

231 results[str(file_path)] = errors 

232 except (FileNotFoundError, ValueError, SyntaxError) as e: 

233 # Create a special error for file-level issues 

234 error = DocstringError( 

235 message=str(e), 

236 file_path=str(file_path), 

237 line_number=0, 

238 item_name="", 

239 item_type="file", 

240 ) 

241 results[str(file_path)] = [error] 

242 

243 return results 

244 

245 def _is_overload_function(self, node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) -> bool: 

246 """ 

247 !!! note "Summary" 

248 Check if a function definition is decorated with @overload. 

249 

250 Params: 

251 node (Union[ast.FunctionDef, ast.AsyncFunctionDef]): 

252 The function node to check for @overload decorator. 

253 

254 Returns: 

255 (bool): 

256 True if the function has @overload decorator, False otherwise. 

257 """ 

258 for decorator in node.decorator_list: 

259 # Handle direct name reference: @overload 

260 if isinstance(decorator, ast.Name) and decorator.id == "overload": 

261 return True 

262 # Handle attribute reference: @typing.overload 

263 elif isinstance(decorator, ast.Attribute) and decorator.attr == "overload": 

264 return True 

265 return False 

266 

267 def _extract_items(self, tree: ast.AST) -> list[FunctionAndClassDetails]: 

268 """ 

269 !!! note "Summary" 

270 Extract all functions and classes from the AST. 

271 

272 Params: 

273 tree (ast.AST): 

274 The Abstract Syntax Tree (AST) to extract items from. 

275 

276 Returns: 

277 (list[FunctionAndClassDetails]): 

278 A list of extracted function and class details. 

279 """ 

280 

281 items: list[FunctionAndClassDetails] = [] 

282 

283 class ItemVisitor(ast.NodeVisitor): 

284 

285 def __init__(self, checker: DocstringChecker) -> None: 

286 self.class_stack: list[str] = [] 

287 self.checker: DocstringChecker = checker 

288 

289 def visit_ClassDef(self, node: ast.ClassDef) -> None: 

290 if not node.name.startswith("_"): # Skip private classes 

291 items.append( 

292 FunctionAndClassDetails( 

293 item_type="class", 

294 name=node.name, 

295 node=node, 

296 lineno=node.lineno, 

297 parent_class=None, 

298 ) 

299 ) 

300 

301 # Visit methods in this class 

302 self.class_stack.append(node.name) 

303 self.generic_visit(node) 

304 self.class_stack.pop() 

305 

306 def visit_FunctionDef(self, node: ast.FunctionDef) -> None: 

307 self._visit_function(node) 

308 

309 def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: 

310 self._visit_function(node) 

311 

312 def _visit_function(self, node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) -> None: 

313 """Visit function definition node (sync or async).""" 

314 

315 if not node.name.startswith("_"): # Skip private functions 

316 # Skip @overload functions - they don't need docstrings 

317 

318 if not self.checker._is_overload_function(node): 

319 item_type: Literal["function", "method"] = "method" if self.class_stack else "function" 

320 parent_class: Optional[str] = self.class_stack[-1] if self.class_stack else None 

321 

322 items.append( 

323 FunctionAndClassDetails( 

324 item_type=item_type, 

325 name=node.name, 

326 node=node, 

327 lineno=node.lineno, 

328 parent_class=parent_class, 

329 ) 

330 ) 

331 

332 self.generic_visit(node) 

333 

334 visitor = ItemVisitor(self) 

335 visitor.visit(tree) 

336 

337 return items 

338 

339 def _check_single_docstring(self, item: FunctionAndClassDetails, file_path: str) -> None: 

340 """ 

341 !!! note "Summary" 

342 Check a single function or class docstring. 

343 

344 Params: 

345 item (FunctionAndClassDetails): 

346 The function or class to check. 

347 file_path (str): 

348 The path to the file containing the item. 

349 

350 Returns: 

351 (None): 

352 Nothing is returned. 

353 """ 

354 

355 docstring: Optional[str] = ast.get_docstring(item.node) 

356 

357 # Check if any required sections apply to this item type 

358 requires_docstring = False 

359 applicable_sections: list[SectionConfig] = [] 

360 

361 for section in self.sections_config: 

362 if section.required: 

363 # Check if this section applies to this item type 

364 if section.type == "free_text": 

365 # Free text sections apply only to functions and methods, not classes 

366 if isinstance(item.node, (ast.FunctionDef, ast.AsyncFunctionDef)): 

367 requires_docstring = True 

368 applicable_sections.append(section) 

369 elif section.type == "list_name_and_type": 

370 if section.name.lower() == "params" and isinstance( 

371 item.node, (ast.FunctionDef, ast.AsyncFunctionDef) 

372 ): 

373 # Params only apply to functions/methods 

374 requires_docstring = True 

375 applicable_sections.append(section) 

376 elif section.name.lower() in ["returns", "return"] and isinstance( 

377 item.node, (ast.FunctionDef, ast.AsyncFunctionDef) 

378 ): 

379 # Returns only apply to functions/methods 

380 requires_docstring = True 

381 applicable_sections.append(section) 

382 elif section.type in ["list_type", "list_name"]: 

383 # These sections apply to functions/methods that might have them 

384 if isinstance(item.node, (ast.FunctionDef, ast.AsyncFunctionDef)): 

385 requires_docstring = True 

386 applicable_sections.append(section) 

387 

388 if not docstring: 

389 if requires_docstring: 

390 message: str = f"Missing docstring for {item.item_type}" 

391 raise DocstringError( 

392 message=message, 

393 file_path=file_path, 

394 line_number=item.lineno, 

395 item_name=item.name, 

396 item_type=item.item_type, 

397 ) 

398 return # No docstring required 

399 

400 # Validate docstring sections if docstring exists 

401 self._validate_docstring_sections(docstring, item, file_path) 

402 

403 def _validate_docstring_sections( 

404 self, 

405 docstring: str, 

406 item: FunctionAndClassDetails, 

407 file_path: str, 

408 ) -> None: 

409 """ 

410 !!! note "Summary" 

411 Validate the sections within a docstring. 

412 

413 Params: 

414 docstring (str): 

415 The docstring to validate. 

416 item (FunctionAndClassDetails): 

417 The function or class to check. 

418 file_path (str): 

419 The path to the file containing the item. 

420 

421 Returns: 

422 (None): 

423 Nothing is returned. 

424 """ 

425 errors: list[str] = [] 

426 

427 # Check each required section 

428 for section in self.required_sections: 

429 if section.type == "free_text": 

430 if not self._check_free_text_section(docstring, section): 

431 errors.append(f"Missing required section: {section.name}") 

432 

433 elif section.type == "list_name_and_type": 

434 if section.name.lower() == "params" and isinstance(item.node, (ast.FunctionDef, ast.AsyncFunctionDef)): 

435 if not self._check_params_section(docstring, item.node): 

436 errors.append("Missing or invalid Params section") 

437 elif section.name.lower() in ["returns", "return"]: 

438 if not self._check_returns_section(docstring): 

439 errors.append("Missing or invalid Returns section") 

440 

441 elif section.type == "list_type": 

442 if section.name.lower() in ["raises", "raise"]: 

443 if not self._check_raises_section(docstring): 

444 errors.append("Missing or invalid Raises section") 

445 elif section.name.lower() in ["yields", "yield"]: 

446 if not self._check_yields_section(docstring): 

447 errors.append("Missing or invalid Yields section") 

448 

449 elif section.type == "list_name": 

450 # Simple name sections - check if they exist 

451 if not self._check_simple_section(docstring, section.name): 

452 errors.append(f"Missing required section: {section.name}") 

453 

454 # Check section order 

455 order_errors: list[str] = self._check_section_order(docstring) 

456 errors.extend(order_errors) 

457 

458 # Check for mutual exclusivity (returns vs yields) 

459 if self._has_both_returns_and_yields(docstring): 

460 errors.append("Docstring cannot have both Returns and Yields sections") 

461 

462 if errors: 

463 combined_message: str = "; ".join(errors) 

464 raise DocstringError( 

465 message=combined_message, 

466 file_path=file_path, 

467 line_number=item.lineno, 

468 item_name=item.name, 

469 item_type=item.item_type, 

470 ) 

471 

472 def _check_free_text_section(self, docstring: str, section: SectionConfig) -> bool: 

473 """ 

474 !!! note "Summary" 

475 Check if a free text section exists in the docstring. 

476 

477 Params: 

478 docstring (str): 

479 The docstring to check. 

480 section (SectionConfig): 

481 The section configuration to validate. 

482 

483 Returns: 

484 (bool): 

485 `True` if the section exists, `False` otherwise. 

486 """ 

487 if section.admonition and section.prefix: 

488 # Format like: !!! note "Summary" 

489 pattern = rf'{re.escape(section.prefix)}\s+{re.escape(section.admonition)}\s+".*{re.escape(section.name)}"' 

490 return bool(re.search(pattern, docstring, re.IGNORECASE)) 

491 elif section.name.lower() in ["summary"]: 

492 # For summary, accept either formal format or simple docstring 

493 formal_pattern = r'!!! note "Summary"' 

494 if re.search(formal_pattern, docstring, re.IGNORECASE): 

495 return True 

496 # Accept any non-empty docstring as summary 

497 return len(docstring.strip()) > 0 

498 elif section.name.lower() in ["examples", "example"]: 

499 # Look for examples section 

500 return bool(re.search(r'\?\?\?\+ example "Examples"', docstring, re.IGNORECASE)) 

501 

502 return True # Default to true for unknown free text sections 

503 

504 def _check_params_section(self, docstring: str, node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) -> bool: 

505 """ 

506 !!! note "Summary" 

507 Check if the Params section exists and documents all parameters. 

508 

509 Params: 

510 docstring (str): 

511 The docstring to check. 

512 node (Union[ast.FunctionDef, ast.AsyncFunctionDef]): 

513 The function node to check. 

514 

515 Returns: 

516 (bool): 

517 `True` if the section exists and is valid, `False` otherwise. 

518 """ 

519 # Get function parameters (excluding 'self' for methods) 

520 params: list[str] = [arg.arg for arg in node.args.args if arg.arg != "self"] 

521 

522 if not params: 

523 return True # No parameters to document 

524 

525 # Check if Params section exists 

526 if not re.search(r"Params:", docstring): 

527 return False 

528 

529 # Check each parameter is documented 

530 for param in params: 

531 param_pattern: str = rf"{re.escape(param)}\s*\([^)]+\):" 

532 if not re.search(param_pattern, docstring): 

533 return False 

534 

535 return True 

536 

537 def _check_returns_section(self, docstring: str) -> bool: 

538 """ 

539 !!! note "Summary" 

540 Check if the Returns section exists. 

541 

542 Params: 

543 docstring (str): 

544 The docstring to check. 

545 

546 Returns: 

547 (bool): 

548 `True` if the section exists, `False` otherwise. 

549 """ 

550 return bool(re.search(r"Returns:", docstring)) 

551 

552 def _check_raises_section(self, docstring: str) -> bool: 

553 """ 

554 !!! note "Summary" 

555 Check if the Raises section exists. 

556 

557 Params: 

558 docstring (str): 

559 The docstring to check. 

560 

561 Returns: 

562 (bool): 

563 `True` if the section exists, `False` otherwise. 

564 """ 

565 return bool(re.search(r"Raises:", docstring)) 

566 

567 def _has_both_returns_and_yields(self, docstring: str) -> bool: 

568 """ 

569 !!! note "Summary" 

570 Check if docstring has both Returns and Yields sections. 

571 

572 Params: 

573 docstring (str): 

574 The docstring to check. 

575 

576 Returns: 

577 (bool): 

578 `True` if the section exists, `False` otherwise. 

579 """ 

580 has_returns = bool(re.search(r"Returns:", docstring)) 

581 has_yields = bool(re.search(r"Yields:", docstring)) 

582 return has_returns and has_yields 

583 

584 def _check_section_order(self, docstring: str) -> list[str]: 

585 """ 

586 !!! note "Summary" 

587 Check that sections appear in the correct order. 

588 

589 Params: 

590 docstring (str): 

591 The docstring to check. 

592 

593 Returns: 

594 (list[str]): 

595 A list of error messages, if any. 

596 """ 

597 # Build expected order from configuration 

598 section_patterns: list[tuple[str, str]] = [] 

599 for section in sorted(self.sections_config, key=lambda x: x.order): 

600 if section.type == "free_text" and section.admonition and section.prefix: 

601 pattern: str = ( 

602 rf'{re.escape(section.prefix)}\s+{re.escape(section.admonition)}\s+".*{re.escape(section.name)}"' 

603 ) 

604 section_patterns.append((pattern, section.name)) 

605 elif section.name.lower() == "params": 

606 section_patterns.append((r"Params:", "Params")) 

607 elif section.name.lower() in ["returns", "return"]: 

608 section_patterns.append((r"Returns:", "Returns")) 

609 elif section.name.lower() in ["yields", "yield"]: 

610 section_patterns.append((r"Yields:", "Yields")) 

611 elif section.name.lower() in ["raises", "raise"]: 

612 section_patterns.append((r"Raises:", "Raises")) 

613 

614 # Add some default patterns for common sections 

615 default_patterns: list[tuple[str, str]] = [ 

616 (r'!!! note "Summary"', "Summary"), 

617 (r'!!! details "Details"', "Details"), 

618 (r'\?\?\?\+ example "Examples"', "Examples"), 

619 (r'\?\?\?\+ success "Credit"', "Credit"), 

620 (r'\?\?\?\+ calculation "Equation"', "Equation"), 

621 (r'\?\?\?\+ info "Notes"', "Notes"), 

622 (r'\?\?\? question "References"', "References"), 

623 (r'\?\?\? tip "See Also"', "See Also"), 

624 ] 

625 

626 all_patterns: list[tuple[str, str]] = section_patterns + default_patterns 

627 

628 found_sections: list[tuple[int, str]] = [] 

629 for pattern, section_name in all_patterns: 

630 match: Optional[re.Match[str]] = re.search(pattern, docstring, re.IGNORECASE) 

631 if match: 

632 found_sections.append((match.start(), section_name)) 

633 

634 # Sort by position in docstring 

635 found_sections.sort(key=lambda x: x[0]) 

636 

637 # Build expected order 

638 expected_order: list[str] = [s.name.title() for s in sorted(self.sections_config, key=lambda x: x.order)] 

639 expected_order.extend( 

640 [ 

641 "Summary", 

642 "Details", 

643 "Examples", 

644 "Credit", 

645 "Equation", 

646 "Notes", 

647 "References", 

648 "See Also", 

649 ] 

650 ) 

651 

652 # Check order matches expected order 

653 errors: list[str] = [] 

654 last_expected_index = -1 

655 for _, section_name in found_sections: 

656 try: 

657 current_index: int = expected_order.index(section_name) 

658 if current_index < last_expected_index: 

659 errors.append(f"Section '{section_name}' appears out of order") 

660 last_expected_index: int = current_index 

661 except ValueError: 

662 # Section not in expected order list - might be OK 

663 pass 

664 

665 return errors 

666 

667 def _check_yields_section(self, docstring: str) -> bool: 

668 """ 

669 !!! note "Summary" 

670 Check if the Yields section exists. 

671 

672 Params: 

673 docstring (str): 

674 The docstring to check. 

675 

676 Returns: 

677 (bool): 

678 `True` if the section exists, `False` otherwise. 

679 """ 

680 return bool(re.search(r"Yields:", docstring)) 

681 

682 def _check_simple_section(self, docstring: str, section_name: str) -> bool: 

683 """ 

684 !!! note "Summary" 

685 Check if a simple named section exists. 

686 

687 Params: 

688 docstring (str): 

689 The docstring to check. 

690 section_name (str): 

691 The name of the section to check for. 

692 

693 Returns: 

694 (bool): 

695 `True` if the section exists, `False` otherwise. 

696 """ 

697 pattern = rf"{re.escape(section_name)}:" 

698 return bool(re.search(pattern, docstring, re.IGNORECASE))