【AI】python实现输出指定目录下c/c++文件的函数调用了哪些函数
HappyOJ  post at 2 months ago 12.2k 0 0
AI

python实现输出指定目录下c/c++文件的函数调用了哪些函数

  1. import re
  2. from pathlib import Path
  3. from collections import defaultdict
  4. TRACE_FUNCTION = {'cmd_main_entry', 'cmd_match_unique_string'}
  5. # C/C++ keywords
  6. C_KEYWORDS = {
  7. 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
  8. 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
  9. 'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static',
  10. 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while',
  11. '_Alignas', '_Alignof', '_Atomic', '_Bool', '_Complex', '_Generic', '_Imaginary',
  12. '_Noreturn', '_Static_assert', '_Thread_local'
  13. }
  14. # C++ additional keywords
  15. CPP_KEYWORDS = {
  16. 'alignas', 'alignof', 'and', 'and_eq', 'asm', 'atomic_cancel', 'atomic_commit',
  17. 'atomic_noexcept', 'auto', 'bitand', 'bitor', 'bool', 'break', 'case', 'catch',
  18. 'char', 'char8_t', 'char16_t', 'char32_t', 'class', 'compl', 'concept', 'const',
  19. 'consteval', 'constexpr', 'constinit', 'const_cast', 'continue', 'co_await', 'co_return',
  20. 'co_yield', 'decltype', 'default', 'delete', 'do', 'double', 'dynamic_cast', 'else',
  21. 'enum', 'explicit', 'export', 'extern', 'false', 'float', 'for', 'friend', 'goto', 'if',
  22. 'inline', 'int', 'long', 'mutable', 'namespace', 'new', 'noexcept', 'not', 'not_eq', 'nullptr',
  23. 'operator', 'or', 'or_eq', 'private', 'protected', 'public', 'reflexpr', 'register', 'reinterpret_cast',
  24. 'requires', 'return', 'short', 'signed', 'sizeof', 'static', 'static_assert', 'static_cast', 'struct',
  25. 'switch', 'synchronized', 'template', 'this', 'thread_local', 'throw', 'true', 'try', 'typedef', 'typeid',
  26. 'typename', 'union', 'unsigned', 'using', 'virtual', 'void', 'volatile', 'wchar_t', 'while', 'xor', 'xor_eq'
  27. }
  28. # Combine C and C++ keywords
  29. ALL_KEYWORDS = C_KEYWORDS.union(CPP_KEYWORDS)
  30. # Control flow statements that take parentheses
  31. CONTROL_FLOW_STATEMENTS = {
  32. 'if', 'for', 'while', 'switch', 'catch'
  33. }
  34. # C++ cast operators
  35. CPP_CAST_OPERATORS = {
  36. 'static_cast', 'dynamic_cast', 'reinterpret_cast', 'const_cast'
  37. }
  38. def find_functions_in_code(source_code: str, is_cpp: bool) -> list[tuple[str, str, int, str]]:
  39. """Find all function definitions in C/C++ code
  40. Args:
  41. source_code (str): Source code
  42. is_cpp (bool): True for C++, False for C
  43. Returns:
  44. list[tuple[str, str, int, str]]: List of functions (name, return_type, line_number, body)
  45. """
  46. code = remove_comments(source_code)
  47. # Improved pattern to match function signature more accurately
  48. if is_cpp:
  49. signature_pattern = re.compile(r'''
  50. (?:template\s*<[^>]*>\s*)? # Optional template declaration
  51. ([\w\:\*&\s]+?) # Return type
  52. \s*
  53. ([\w\:]+) # Function name
  54. \s*
  55. \(([^)]*)\) # Parameter list
  56. \s*
  57. (?:const)?\s* # Optional const qualifier
  58. \{ # Function body start
  59. ''', re.VERBOSE)
  60. else:
  61. signature_pattern = re.compile(r'''
  62. (\w+\s*[\*\s]*?) # Return type
  63. \s+
  64. (\w+) # Function name
  65. \s*
  66. \(([^)]*)\) # Parameter list
  67. \s*
  68. \{ # Function body start
  69. ''', re.VERBOSE)
  70. functions = []
  71. for match in signature_pattern.finditer(code):
  72. return_type = match.group(1).strip()
  73. function_name = match.group(2).strip()
  74. start_pos = match.end() - 1 # Position of the opening brace {
  75. # Find the matching closing brace using a stack
  76. body, end_pos = extract_balanced_block(code, start_pos)
  77. if body is None:
  78. continue
  79. line_number = code[:start_pos].count('\n') + 1
  80. if function_name not in ALL_KEYWORDS:
  81. functions.append((function_name, return_type, line_number, body))
  82. return functions
  83. def extract_balanced_block(code: str, start_pos: int) -> tuple[str, int]:
  84. """Extract a balanced block of code starting at start_pos, handling nested braces"""
  85. if start_pos >= len(code) or code[start_pos] != '{':
  86. return None, start_pos
  87. stack = ['{']
  88. end_pos = start_pos + 1
  89. while end_pos < len(code):
  90. char = code[end_pos]
  91. if char == '{':
  92. stack.append('{')
  93. elif char == '}':
  94. stack.pop()
  95. if not stack:
  96. # Found matching closing brace
  97. return code[start_pos+1:end_pos], end_pos + 1
  98. end_pos += 1
  99. # If we reach here, there's an unmatched opening brace
  100. return code[start_pos+1:], end_pos
  101. def extract_function_calls(function_name:str, function_body: str, is_cpp: bool) -> list[str]:
  102. """Extract function calls from a function body
  103. Args:
  104. function_body (str): Function body code
  105. is_cpp (bool): True for C++, False for C
  106. Returns:
  107. list[str]: List of called function names
  108. """
  109. # Remove strings and character literals to avoid false matches
  110. code = re.sub(r'"[^"]*"', '', function_body)
  111. code = re.sub(r"'[^']*'", '', code)
  112. # Remove comments
  113. code = remove_comments(code)
  114. # Split code into statements to better identify control flow
  115. statements = split_into_statements(code)
  116. if function_name in TRACE_FUNCTION:
  117. print(f'{function_name}{statements}')
  118. calls = []
  119. for stmt in statements:
  120. stmt = stmt.strip()
  121. if not stmt:
  122. continue
  123. # Basic function call pattern
  124. if is_cpp:
  125. call_pattern = re.compile(r'''
  126. ([\w:]+) # Function name (allow namespaces)
  127. \s*
  128. \(([^)]*)\) # Argument list
  129. (?:\s*->\s*[\w:]+)? # Optional member access after call
  130. ''', re.VERBOSE)
  131. else:
  132. call_pattern = re.compile(r'''
  133. (\w+) # Function name
  134. \s*
  135. \(([^)]*)\) # Argument list
  136. ''', re.VERBOSE)
  137. if function_name in TRACE_FUNCTION:
  138. print(f'{stmt}')
  139. for match in call_pattern.finditer(stmt):
  140. func_name = match.group(1)
  141. # Split namespaced name
  142. components = func_name.split('::')
  143. last_component = components[-1]
  144. if function_name in TRACE_FUNCTION:
  145. print(f'{func_name}, {match.group(2)}, {last_component}')
  146. # Filter out keywords
  147. if is_cpp:
  148. if all(comp not in ALL_KEYWORDS for comp in components):
  149. calls.append(func_name)
  150. else:
  151. if func_name not in C_KEYWORDS:
  152. calls.append(func_name)
  153. # Argument list have function calls
  154. arg_list = match.group(2)
  155. arg_pattern = re.compile(r'([\w:]+)\s*\(', re.VERBOSE)
  156. if function_name in TRACE_FUNCTION:
  157. print(f'arg_list: {arg_list}')
  158. for arg_match in arg_pattern.finditer(arg_list):
  159. arg_func_name = arg_match.group(1)
  160. if arg_func_name not in ALL_KEYWORDS and arg_func_name not in CONTROL_FLOW_STATEMENTS:
  161. if function_name in TRACE_FUNCTION:
  162. print(f'{function_name} arg-> {arg_func_name}')
  163. calls.append(arg_func_name)
  164. seen = set()
  165. unique_calls = []
  166. for call in calls:
  167. if call not in seen:
  168. seen.add(call)
  169. unique_calls.append(call)
  170. return unique_calls
  171. def split_into_statements(code: str) -> list[str]:
  172. """Split code into individual statements
  173. This is a simplified approach that handles basic cases.
  174. Does not handle nested blocks perfectly but works for most common cases.
  175. """
  176. statements = []
  177. current = ""
  178. paren_count = 0
  179. brace_count = 0
  180. bracket_count = 0
  181. for char in code:
  182. current += char
  183. # Track nested parentheses, braces, and brackets
  184. if char == '(':
  185. paren_count += 1
  186. elif char == ')':
  187. paren_count -= 1
  188. elif char == '{':
  189. brace_count += 1
  190. elif char == '}':
  191. brace_count -= 1
  192. elif char == '[':
  193. bracket_count += 1
  194. elif char == ']':
  195. bracket_count -= 1
  196. # End of statement when all brackets are closed and a semicolon is encountered
  197. if char == ';' and paren_count == 0 and brace_count == 0 and bracket_count == 0:
  198. statements.append(current.strip())
  199. current = ""
  200. # Add any remaining code (last statement without trailing semicolon)
  201. if current.strip():
  202. statements.append(current.strip())
  203. return statements
  204. def remove_comments(source_code: str) -> str:
  205. """Remove comments from C/C++ code"""
  206. # Remove line comments
  207. code = re.sub(r'//.*?$', '', source_code, flags=re.MULTILINE)
  208. # Remove block comments
  209. code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
  210. return code
  211. def process_source_file(file_path: Path) -> list[tuple[str, str, int, Path, list[str]]]:
  212. """Process a source file and return functions with call information
  213. Returns:
  214. list[tuple[str, str, int, Path, list[str]]]:
  215. List of functions (name, return_type, line_number, file_path, called_functions)
  216. """
  217. try:
  218. with open(file_path, 'r', encoding='utf-8') as file:
  219. content = file.read()
  220. is_cpp = file_path.suffix.lower() in {'.cpp', '.cxx', '.cc', '.c++', '.hpp', '.hxx', '.hh', '.h++'}
  221. functions = find_functions_in_code(content, is_cpp)
  222. result = []
  223. for name, ret_type, line, body in functions:
  224. called_functions = extract_function_calls(name, body, is_cpp)
  225. result.append((name, ret_type, line, file_path, called_functions))
  226. return result
  227. except UnicodeDecodeError:
  228. print(f"Error: File '{file_path}' is not UTF-8 encoded")
  229. return []
  230. except Exception as e:
  231. print(f"Error processing file '{file_path}': {e}")
  232. return []
  233. def find_source_files(directory: Path) -> list[Path]:
  234. """Find all C/C++ source files in a directory recursively"""
  235. c_extensions = {'*.c', '*.h'}
  236. cpp_extensions = {'*.cpp', '*.cxx', '*.cc', '*.c++', '.hpp', '*.hxx', '*.hh', '*.h++'}
  237. all_files = []
  238. for ext in c_extensions.union(cpp_extensions):
  239. all_files.extend(directory.rglob(ext))
  240. return all_files
  241. def main():
  242. """Main function for command-line usage"""
  243. import argparse
  244. parser = argparse.ArgumentParser(description='Analyze C/C++ functions and their calls')
  245. parser.add_argument('path', help='C/C++ source file or directory path')
  246. parser.add_argument('--calls', action='store_true', help='Show functions called by each function')
  247. args = parser.parse_args()
  248. path = Path(args.path)
  249. if not path.exists():
  250. print(f"Error: Path '{path}' does not exist")
  251. return
  252. all_functions = []
  253. if path.is_file():
  254. if path.suffix.lower() not in {'.c', '.h', '.cpp', '.cxx', '.cc', '.c++', '.hpp', '.hxx', '.hh', '.h++'}:
  255. print(f"Warning: File '{path}' may not be a C/C++ source file")
  256. all_functions = process_source_file(path)
  257. else:
  258. source_files = find_source_files(path)
  259. if not source_files:
  260. print(f"Error: No C/C++ files found in directory '{path}'")
  261. return
  262. print(f"Found {len(source_files)} source files in directory '{path}', analyzing...")
  263. for file in source_files:
  264. file_functions = process_source_file(file)
  265. all_functions.extend(file_functions)
  266. if not all_functions:
  267. print("No function definitions found")
  268. return
  269. if args.calls:
  270. print(f"Total {len(all_functions)} functions found with call information:")
  271. print("=" * 120)
  272. print(f"{'Function Name':<30} {'Return Type':<25} {'Line':<8} {'File Path':<40} {'Called Functions'}")
  273. print("-" * 120)
  274. for name, ret_type, line, file_path, called in all_functions:
  275. called_str = ', '.join(called) if called else '-'
  276. print(f"{name:<30} {ret_type:<25} {line:<8} {str(file_path):<40} {called_str}")
  277. else:
  278. print(f"Total {len(all_functions)} functions found:")
  279. print("=" * 90)
  280. print(f"{'Function Name':<30} {'Return Type':<25} {'Line':<8} {'File Path'}")
  281. print("-" * 90)
  282. for name, ret_type, line, file_path, _ in all_functions:
  283. print(f"{name:<30} {ret_type:<25} {line:<8} {file_path}")
  284. if __name__ == "__main__":
  285. main()

1747846820249.png