check_binding_retval.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. #!/usr/bin/env python3
  2. """
  3. Static analysis tool to check Python binding functions for missing py_retval() assignments.
  4. This tool checks whether Python binding functions properly set return values before returning true.
  5. According to pocketpy conventions, when a binding function returns true, it MUST either:
  6. 1. Assign a value to py_retval() using py_new* functions, py_assign, etc.
  7. 2. Set the return value to None using py_newnone(py_retval())
  8. 3. Call a function that sets py_retval() internally (like py_import, py_call, py_iter, etc.)
  9. Usage:
  10. python scripts/check_binding_retval.py [--verbose]
  11. Exit codes:
  12. 0: No issues found
  13. 1: Issues found
  14. 2: Script error
  15. """
  16. import os
  17. import re
  18. import sys
  19. import argparse
  20. from typing import List, Dict, Tuple, Set
  21. # Functions that set py_retval() internally
  22. RETVAL_SETTING_FUNCTIONS = {
  23. 'py_import', # Sets py_retval() on success
  24. 'py_call', # Sets py_retval() with result
  25. 'py_iter', # Sets py_retval() with iterator
  26. 'py_str', # Sets py_retval() with string representation
  27. 'py_repr', # Sets py_retval() with repr string
  28. 'py_getattr', # Sets py_retval() with attribute value
  29. 'py_next', # Sets py_retval() with next value
  30. 'py_getitem', # Sets py_retval() with item
  31. 'py_vectorcall', # Sets py_retval() with call result
  32. }
  33. # Patterns that indicate py_retval() is being set
  34. RETVAL_PATTERNS = [
  35. r'py_retval\(\)', # Direct py_retval() usage
  36. r'py_new\w+\s*\(\s*py_retval\(\)', # py_newint(py_retval(), ...)
  37. r'py_assign\s*\(\s*py_retval\(\)', # py_assign(py_retval(), ...)
  38. r'\*py_retval\(\)\s*=', # *py_retval() = ...
  39. ]
  40. class BindingChecker:
  41. """Checker for Python binding functions."""
  42. def __init__(self, verbose: bool = False):
  43. self.verbose = verbose
  44. self.issues: List[Dict] = []
  45. def log(self, message: str):
  46. """Log message if verbose mode is enabled."""
  47. if self.verbose:
  48. print(f"[DEBUG] {message}")
  49. def find_c_files(self, *directories: str) -> List[str]:
  50. """Find all .c files in the given directories."""
  51. c_files = []
  52. for directory in directories:
  53. if not os.path.exists(directory):
  54. self.log(f"Directory not found: {directory}")
  55. continue
  56. for root, _, files in os.walk(directory):
  57. for file in files:
  58. if file.endswith('.c'):
  59. c_files.append(os.path.join(root, file))
  60. return c_files
  61. def extract_functions(self, content: str) -> Dict[str, Dict]:
  62. """Extract all bool-returning functions from C code."""
  63. # Pattern to match function declarations (start of bool functions)
  64. pattern = r'(?:static\s+)?bool\s+(\w+)\s*\(([^)]*)\)\s*\{'
  65. functions = {}
  66. for match in re.finditer(pattern, content):
  67. func_name = match.group(1)
  68. func_params = match.group(2)
  69. start_pos = match.end() # Position after the opening brace
  70. # Find matching closing brace using brace counting
  71. brace_count = 1
  72. pos = start_pos
  73. while pos < len(content) and brace_count > 0:
  74. if content[pos] == '{':
  75. brace_count += 1
  76. elif content[pos] == '}':
  77. brace_count -= 1
  78. pos += 1
  79. if brace_count == 0:
  80. # Successfully found matching brace
  81. func_body = content[start_pos:pos-1] # Exclude closing brace
  82. full_func = content[match.start():pos]
  83. functions[func_name] = {
  84. 'params': func_params,
  85. 'body': func_body,
  86. 'full': full_func,
  87. 'start_pos': match.start(),
  88. }
  89. return functions
  90. def get_bound_functions(self, content: str) -> Set[str]:
  91. """Find functions that are bound as Python callables."""
  92. bound_funcs = set()
  93. # Binding patterns used in pocketpy
  94. patterns = [
  95. r'py_bindfunc\s*\([^,]+,\s*"[^"]+",\s*(\w+)\)',
  96. r'py_bind\s*\([^,]+,\s*"[^"]*",\s*(\w+)\)',
  97. r'py_bindmagic\s*\([^,]+,\s*\w+,\s*(\w+)\)',
  98. r'py_bindmethod\s*\([^,]+,\s*"[^"]+",\s*(\w+)\)',
  99. r'py_bindproperty\s*\([^,]+,\s*"[^"]+",\s*(\w+)(?:,|\))',
  100. ]
  101. for pattern in patterns:
  102. for match in re.finditer(pattern, content):
  103. func_name = match.group(1)
  104. bound_funcs.add(func_name)
  105. self.log(f"Found bound function: {func_name}")
  106. return bound_funcs
  107. def remove_comments(self, code: str) -> str:
  108. """Remove C-style comments from code."""
  109. # Remove single-line comments
  110. code = re.sub(r'//.*?$', '', code, flags=re.MULTILINE)
  111. # Remove multi-line comments
  112. code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
  113. return code
  114. def has_retval_usage(self, func_body: str) -> bool:
  115. """Check if function body uses py_retval() in any form."""
  116. # Remove comments to avoid false positives
  117. code_without_comments = self.remove_comments(func_body)
  118. # Check for direct patterns
  119. for pattern in RETVAL_PATTERNS:
  120. if re.search(pattern, code_without_comments):
  121. return True
  122. # Check for functions that set py_retval internally
  123. # Use word boundaries to avoid matching substrings in comments or other identifiers
  124. for func in RETVAL_SETTING_FUNCTIONS:
  125. pattern = r'\b' + re.escape(func) + r'\s*\('
  126. if re.search(pattern, code_without_comments):
  127. return True
  128. return False
  129. def analyze_return_statements(self, func_body: str, func_name: str) -> List[Dict]:
  130. """Analyze return true statements in the function."""
  131. lines = func_body.split('\n')
  132. suspicious_returns = []
  133. for i, line in enumerate(lines):
  134. # Look for "return true" statements
  135. if re.search(r'\breturn\s+true\b', line):
  136. # Get context (10 lines before the return)
  137. start = max(0, i - 10)
  138. context_lines = lines[start:i+1]
  139. context = '\n'.join(context_lines)
  140. suspicious_returns.append({
  141. 'line_num': i + 1,
  142. 'line': line.strip(),
  143. 'context': context,
  144. })
  145. return suspicious_returns
  146. def check_function(self, func_name: str, func_info: Dict, filepath: str) -> bool:
  147. """
  148. Check if a bound function properly sets py_retval() before returning true.
  149. Returns True if there's an issue, False otherwise.
  150. """
  151. func_body = func_info['body']
  152. # Skip if function doesn't return true
  153. if 'return true' not in func_body:
  154. self.log(f"Function {func_name} doesn't return true, skipping")
  155. return False
  156. # Check if function has any py_retval usage
  157. if self.has_retval_usage(func_body):
  158. self.log(f"Function {func_name} uses py_retval(), OK")
  159. return False
  160. # Found a potential issue
  161. self.log(f"Function {func_name} returns true without py_retval()!")
  162. suspicious_returns = self.analyze_return_statements(func_body, func_name)
  163. issue = {
  164. 'file': filepath,
  165. 'function': func_name,
  166. 'full_code': func_info['full'],
  167. 'suspicious_returns': suspicious_returns,
  168. }
  169. self.issues.append(issue)
  170. return True
  171. def check_file(self, filepath: str) -> int:
  172. """Check all bound functions in a file."""
  173. self.log(f"Checking file: {filepath}")
  174. try:
  175. with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
  176. content = f.read()
  177. except Exception as e:
  178. print(f"Error reading {filepath}: {e}", file=sys.stderr)
  179. return 0
  180. # Extract functions and find bound ones
  181. functions = self.extract_functions(content)
  182. bound_funcs = self.get_bound_functions(content)
  183. if not bound_funcs:
  184. self.log(f"No bound functions found in {filepath}")
  185. return 0
  186. issues_count = 0
  187. for func_name in bound_funcs:
  188. if func_name not in functions:
  189. self.log(f"Bound function {func_name} not found in extracted functions")
  190. continue
  191. if self.check_function(func_name, functions[func_name], filepath):
  192. issues_count += 1
  193. return issues_count
  194. def check_directories(self, *directories: str) -> int:
  195. """Check all C files in the given directories."""
  196. c_files = self.find_c_files(*directories)
  197. if not c_files:
  198. print("No C files found to check", file=sys.stderr)
  199. return 0
  200. self.log(f"Found {len(c_files)} C files to check")
  201. total_issues = 0
  202. for filepath in c_files:
  203. issues = self.check_file(filepath)
  204. total_issues += issues
  205. return total_issues
  206. def print_report(self):
  207. """Print a detailed report of all issues found."""
  208. if not self.issues:
  209. print("✓ No issues found! All Python binding functions properly set py_retval().")
  210. return
  211. print(f"\n{'='*80}")
  212. print(f"Found {len(self.issues)} function(s) with potential issues:")
  213. print(f"{'='*80}\n")
  214. for i, issue in enumerate(self.issues, 1):
  215. print(f"Issue #{i}:")
  216. print(f" File: {issue['file']}")
  217. print(f" Function: {issue['function']}")
  218. print(f" Problem: Function returns true but doesn't set py_retval()")
  219. print(f"\n Function code:")
  220. print(" " + "-" * 76)
  221. for line in issue['full_code'].split('\n'):
  222. print(f" {line}")
  223. print(" " + "-" * 76)
  224. if issue['suspicious_returns']:
  225. print(f"\n Found {len(issue['suspicious_returns'])} 'return true' statement(s):")
  226. for ret in issue['suspicious_returns']:
  227. print(f" Line {ret['line_num']}: {ret['line']}")
  228. print(f"\n{'='*80}\n")
  229. def main():
  230. parser = argparse.ArgumentParser(
  231. description='Check Python binding functions for missing py_retval() assignments',
  232. formatter_class=argparse.RawDescriptionHelpFormatter,
  233. epilog=__doc__
  234. )
  235. parser.add_argument(
  236. '--verbose', '-v',
  237. action='store_true',
  238. help='Enable verbose output for debugging'
  239. )
  240. parser.add_argument(
  241. '--dirs',
  242. nargs='+',
  243. default=['src/bindings', 'src/modules'],
  244. help='Directories to check (default: src/bindings src/modules)'
  245. )
  246. args = parser.parse_args()
  247. # Create checker and run analysis
  248. checker = BindingChecker(verbose=args.verbose)
  249. print("Checking Python binding functions for missing py_retval() assignments...")
  250. print(f"Target directories: {', '.join(args.dirs)}")
  251. print()
  252. try:
  253. total_issues = checker.check_directories(*args.dirs)
  254. checker.print_report()
  255. # Exit with appropriate code
  256. sys.exit(1 if total_issues > 0 else 0)
  257. except Exception as e:
  258. print(f"\nError during analysis: {e}", file=sys.stderr)
  259. if args.verbose:
  260. import traceback
  261. traceback.print_exc()
  262. sys.exit(2)
  263. if __name__ == '__main__':
  264. main()