python实现输出指定目录下c/c++文件的函数调用了哪些函数
import re
from pathlib import Path
from collections import defaultdict
TRACE_FUNCTION = {'cmd_main_entry', 'cmd_match_unique_string'}
# C/C++ keywords
C_KEYWORDS = {
'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static',
'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while',
'_Alignas', '_Alignof', '_Atomic', '_Bool', '_Complex', '_Generic', '_Imaginary',
'_Noreturn', '_Static_assert', '_Thread_local'
}
# C++ additional keywords
CPP_KEYWORDS = {
'alignas', 'alignof', 'and', 'and_eq', 'asm', 'atomic_cancel', 'atomic_commit',
'atomic_noexcept', 'auto', 'bitand', 'bitor', 'bool', 'break', 'case', 'catch',
'char', 'char8_t', 'char16_t', 'char32_t', 'class', 'compl', 'concept', 'const',
'consteval', 'constexpr', 'constinit', 'const_cast', 'continue', 'co_await', 'co_return',
'co_yield', 'decltype', 'default', 'delete', 'do', 'double', 'dynamic_cast', 'else',
'enum', 'explicit', 'export', 'extern', 'false', 'float', 'for', 'friend', 'goto', 'if',
'inline', 'int', 'long', 'mutable', 'namespace', 'new', 'noexcept', 'not', 'not_eq', 'nullptr',
'operator', 'or', 'or_eq', 'private', 'protected', 'public', 'reflexpr', 'register', 'reinterpret_cast',
'requires', 'return', 'short', 'signed', 'sizeof', 'static', 'static_assert', 'static_cast', 'struct',
'switch', 'synchronized', 'template', 'this', 'thread_local', 'throw', 'true', 'try', 'typedef', 'typeid',
'typename', 'union', 'unsigned', 'using', 'virtual', 'void', 'volatile', 'wchar_t', 'while', 'xor', 'xor_eq'
}
# Combine C and C++ keywords
ALL_KEYWORDS = C_KEYWORDS.union(CPP_KEYWORDS)
# Control flow statements that take parentheses
CONTROL_FLOW_STATEMENTS = {
'if', 'for', 'while', 'switch', 'catch'
}
# C++ cast operators
CPP_CAST_OPERATORS = {
'static_cast', 'dynamic_cast', 'reinterpret_cast', 'const_cast'
}
def find_functions_in_code(source_code: str, is_cpp: bool) -> list[tuple[str, str, int, str]]:
"""Find all function definitions in C/C++ code
Args:
source_code (str): Source code
is_cpp (bool): True for C++, False for C
Returns:
list[tuple[str, str, int, str]]: List of functions (name, return_type, line_number, body)
"""
code = remove_comments(source_code)
# Improved pattern to match function signature more accurately
if is_cpp:
signature_pattern = re.compile(r'''
(?:template\s*<[^>]*>\s*)? # Optional template declaration
([\w\:\*&\s]+?) # Return type
\s*
([\w\:]+) # Function name
\s*
\(([^)]*)\) # Parameter list
\s*
(?:const)?\s* # Optional const qualifier
\{ # Function body start
''', re.VERBOSE)
else:
signature_pattern = re.compile(r'''
(\w+\s*[\*\s]*?) # Return type
\s+
(\w+) # Function name
\s*
\(([^)]*)\) # Parameter list
\s*
\{ # Function body start
''', re.VERBOSE)
functions = []
for match in signature_pattern.finditer(code):
return_type = match.group(1).strip()
function_name = match.group(2).strip()
start_pos = match.end() - 1 # Position of the opening brace {
# Find the matching closing brace using a stack
body, end_pos = extract_balanced_block(code, start_pos)
if body is None:
continue
line_number = code[:start_pos].count('\n') + 1
if function_name not in ALL_KEYWORDS:
functions.append((function_name, return_type, line_number, body))
return functions
def extract_balanced_block(code: str, start_pos: int) -> tuple[str, int]:
"""Extract a balanced block of code starting at start_pos, handling nested braces"""
if start_pos >= len(code) or code[start_pos] != '{':
return None, start_pos
stack = ['{']
end_pos = start_pos + 1
while end_pos < len(code):
char = code[end_pos]
if char == '{':
stack.append('{')
elif char == '}':
stack.pop()
if not stack:
# Found matching closing brace
return code[start_pos+1:end_pos], end_pos + 1
end_pos += 1
# If we reach here, there's an unmatched opening brace
return code[start_pos+1:], end_pos
def extract_function_calls(function_name:str, function_body: str, is_cpp: bool) -> list[str]:
"""Extract function calls from a function body
Args:
function_body (str): Function body code
is_cpp (bool): True for C++, False for C
Returns:
list[str]: List of called function names
"""
# Remove strings and character literals to avoid false matches
code = re.sub(r'"[^"]*"', '', function_body)
code = re.sub(r"'[^']*'", '', code)
# Remove comments
code = remove_comments(code)
# Split code into statements to better identify control flow
statements = split_into_statements(code)
if function_name in TRACE_FUNCTION:
print(f'{function_name}{statements}')
calls = []
for stmt in statements:
stmt = stmt.strip()
if not stmt:
continue
# Basic function call pattern
if is_cpp:
call_pattern = re.compile(r'''
([\w:]+) # Function name (allow namespaces)
\s*
\(([^)]*)\) # Argument list
(?:\s*->\s*[\w:]+)? # Optional member access after call
''', re.VERBOSE)
else:
call_pattern = re.compile(r'''
(\w+) # Function name
\s*
\(([^)]*)\) # Argument list
''', re.VERBOSE)
if function_name in TRACE_FUNCTION:
print(f'{stmt}')
for match in call_pattern.finditer(stmt):
func_name = match.group(1)
# Split namespaced name
components = func_name.split('::')
last_component = components[-1]
if function_name in TRACE_FUNCTION:
print(f'{func_name}, {match.group(2)}, {last_component}')
# Filter out keywords
if is_cpp:
if all(comp not in ALL_KEYWORDS for comp in components):
calls.append(func_name)
else:
if func_name not in C_KEYWORDS:
calls.append(func_name)
# Argument list have function calls
arg_list = match.group(2)
arg_pattern = re.compile(r'([\w:]+)\s*\(', re.VERBOSE)
if function_name in TRACE_FUNCTION:
print(f'arg_list: {arg_list}')
for arg_match in arg_pattern.finditer(arg_list):
arg_func_name = arg_match.group(1)
if arg_func_name not in ALL_KEYWORDS and arg_func_name not in CONTROL_FLOW_STATEMENTS:
if function_name in TRACE_FUNCTION:
print(f'{function_name} arg-> {arg_func_name}')
calls.append(arg_func_name)
seen = set()
unique_calls = []
for call in calls:
if call not in seen:
seen.add(call)
unique_calls.append(call)
return unique_calls
def split_into_statements(code: str) -> list[str]:
"""Split code into individual statements
This is a simplified approach that handles basic cases.
Does not handle nested blocks perfectly but works for most common cases.
"""
statements = []
current = ""
paren_count = 0
brace_count = 0
bracket_count = 0
for char in code:
current += char
# Track nested parentheses, braces, and brackets
if char == '(':
paren_count += 1
elif char == ')':
paren_count -= 1
elif char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
elif char == '[':
bracket_count += 1
elif char == ']':
bracket_count -= 1
# End of statement when all brackets are closed and a semicolon is encountered
if char == ';' and paren_count == 0 and brace_count == 0 and bracket_count == 0:
statements.append(current.strip())
current = ""
# Add any remaining code (last statement without trailing semicolon)
if current.strip():
statements.append(current.strip())
return statements
def remove_comments(source_code: str) -> str:
"""Remove comments from C/C++ code"""
# Remove line comments
code = re.sub(r'//.*?$', '', source_code, flags=re.MULTILINE)
# Remove block comments
code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
return code
def process_source_file(file_path: Path) -> list[tuple[str, str, int, Path, list[str]]]:
"""Process a source file and return functions with call information
Returns:
list[tuple[str, str, int, Path, list[str]]]:
List of functions (name, return_type, line_number, file_path, called_functions)
"""
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
is_cpp = file_path.suffix.lower() in {'.cpp', '.cxx', '.cc', '.c++', '.hpp', '.hxx', '.hh', '.h++'}
functions = find_functions_in_code(content, is_cpp)
result = []
for name, ret_type, line, body in functions:
called_functions = extract_function_calls(name, body, is_cpp)
result.append((name, ret_type, line, file_path, called_functions))
return result
except UnicodeDecodeError:
print(f"Error: File '{file_path}' is not UTF-8 encoded")
return []
except Exception as e:
print(f"Error processing file '{file_path}': {e}")
return []
def find_source_files(directory: Path) -> list[Path]:
"""Find all C/C++ source files in a directory recursively"""
c_extensions = {'*.c', '*.h'}
cpp_extensions = {'*.cpp', '*.cxx', '*.cc', '*.c++', '.hpp', '*.hxx', '*.hh', '*.h++'}
all_files = []
for ext in c_extensions.union(cpp_extensions):
all_files.extend(directory.rglob(ext))
return all_files
def main():
"""Main function for command-line usage"""
import argparse
parser = argparse.ArgumentParser(description='Analyze C/C++ functions and their calls')
parser.add_argument('path', help='C/C++ source file or directory path')
parser.add_argument('--calls', action='store_true', help='Show functions called by each function')
args = parser.parse_args()
path = Path(args.path)
if not path.exists():
print(f"Error: Path '{path}' does not exist")
return
all_functions = []
if path.is_file():
if path.suffix.lower() not in {'.c', '.h', '.cpp', '.cxx', '.cc', '.c++', '.hpp', '.hxx', '.hh', '.h++'}:
print(f"Warning: File '{path}' may not be a C/C++ source file")
all_functions = process_source_file(path)
else:
source_files = find_source_files(path)
if not source_files:
print(f"Error: No C/C++ files found in directory '{path}'")
return
print(f"Found {len(source_files)} source files in directory '{path}', analyzing...")
for file in source_files:
file_functions = process_source_file(file)
all_functions.extend(file_functions)
if not all_functions:
print("No function definitions found")
return
if args.calls:
print(f"Total {len(all_functions)} functions found with call information:")
print("=" * 120)
print(f"{'Function Name':<30} {'Return Type':<25} {'Line':<8} {'File Path':<40} {'Called Functions'}")
print("-" * 120)
for name, ret_type, line, file_path, called in all_functions:
called_str = ', '.join(called) if called else '-'
print(f"{name:<30} {ret_type:<25} {line:<8} {str(file_path):<40} {called_str}")
else:
print(f"Total {len(all_functions)} functions found:")
print("=" * 90)
print(f"{'Function Name':<30} {'Return Type':<25} {'Line':<8} {'File Path'}")
print("-" * 90)
for name, ret_type, line, file_path, _ in all_functions:
print(f"{name:<30} {ret_type:<25} {line:<8} {file_path}")
if __name__ == "__main__":
main()
![]() |
![]() |
2023 |
![]() |
![]() |
1970 |
![]() |
![]() |
1763 |
4 |
![]() |
1718 |
5 |
![]() |
1710 |
6 |
![]() |
1701 |
7 |
![]() |
1684 |
8 |
|
1678 |
9 |
![]() |
1664 |
10 |
![]() |
1653 |