29.4 程式碼理解模組
29.4.1 程式碼理解概述
程式碼理解模組是程式設計 Agent 的另一個核心能力,它能夠分析、解釋和理解現有程式碼的功能、結構和設計。程式碼理解涉及程式碼解析、語義分析、依賴分析等多個環節。
程式碼理解流程
輸入程式碼 ↓ 程式碼解析 ↓ 結構分析 ↓ 語義分析 ↓ 依賴分析 ↓ 功能推斷 ↓ 生成解釋
python
## 29.4.2 代码解析
### 代码解析器
python
python
class CodeParser:
"""代码解析器"""
def __init__(self):
self.parsers = {
'python': PythonParser(),
'javascript': JavaScriptParser(),
'java': JavaParser(),
'cpp': CppParser()
}
def parse(self, code: str, language: str) -> ParsedCode:
"""解析代码"""
parser = self.parsers.get(language.lower())
if not parser:
raise ValueError(f"Unsupported language: {language}")
return parser.parse(code)
class PythonParser:
"""Python 解析器"""
def parse(self, code: str) -> ParsedCode:
"""解析 Python 代码"""
try:
tree = ast.parse(code)
parsed_code = ParsedCode(
language='python',
original_code=code,
ast=tree
)
# 提取类
parsed_code.classes = self._extract_classes(tree)
# 提取函数
parsed_code.functions = self._extract_functions(tree)
# 提取导入
parsed_code.imports = self._extract_imports(tree)
# 提取全局变量
parsed_code.global_variables = self._extract_global_variables(tree)
return parsed_code
except SyntaxError as e:
raise ValueError(f"Invalid Python code: {e}")
def _extract_classes(self, tree: ast.AST) -> List[ClassInfo]:
"""提取类信息"""
classes = []
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
class_info = ClassInfo(
name=node.name,
bases=[self._get_name(base) for base in node.bases],
methods=[self._extract_method(m) for m in node.body
if isinstance(m, ast.FunctionDef)],
attributes=[self._extract_attribute(a) for a in node.body
if isinstance(a, ast.Assign)],
docstring=ast.get_docstring(node)
)
classes.append(class_info)
return classes
def _extract_functions(self, tree: ast.AST) -> List[FunctionInfo]:
"""提取函数信息"""
functions = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
# 跳过类中的方法
if not any(isinstance(parent, ast.ClassDef)
for parent in ast.walk(tree)
if node in ast.iter_child_nodes(parent)):
function_info = FunctionInfo(
name=node.name,
arguments=[arg.arg for arg in node.args.args],
return_type=self._get_return_type(node),
docstring=ast.get_docstring(node),
decorators=[self._get_name(d) for d in node.decorator_list]
)
functions.append(function_info)
return functions
def _extract_imports(self, tree: ast.AST) -> List[ImportInfo]:
"""提取导入信息"""
imports = []
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
import_info = ImportInfo(
module=alias.name,
alias=alias.asname,
type='import'
)
imports.append(import_info)
elif isinstance(node, ast.ImportFrom):
for alias in node.names:
import_info = ImportInfo(
module=node.module,
name=alias.name,
alias=alias.asname,
type='from'
)
imports.append(import_info)
return imports
def _extract_global_variables(self, tree: ast.AST) -> List[VariableInfo]:
"""提取全局变量"""
variables = []
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
# 只提取模块级别的变量
if isinstance(node, ast.Module):
for target in node.targets:
if isinstance(target, ast.Name):
var_info = VariableInfo(
name=target.id,
type=self._infer_type(node.value),
value=self._get_value(node.value)
)
variables.append(var_info)
return variables
def _extract_method(self, node: ast.FunctionDef) -> MethodInfo:
"""提取方法信息"""
return MethodInfo(
name=node.name,
arguments=[arg.arg for arg in node.args.args],
return_type=self._get_return_type(node),
docstring=ast.get_docstring(node),
is_static=any(isinstance(d, ast.Name) and d.id == 'staticmethod'
for d in node.decorator_list),
is_classmethod=any(isinstance(d, ast.Name) and d.id == 'classmethod'
for d in node.decorator_list)
)
def _extract_attribute(self, node: ast.Assign) -> AttributeInfo:
"""提取属性信息"""
target = node.targets[0]
if isinstance(target, ast.Name):
return AttributeInfo(
name=target.id,
type=self._infer_type(node.value),
value=self._get_value(node.value)
)
return None
def _get_name(self, node: ast.AST) -> str:
"""获取节点名称"""
if isinstance(node, ast.Name):
return node.id
elif isinstance(node, ast.Attribute):
return f"{self._get_name(node.value)}.{node.attr}"
return str(node)
def _get_return_type(self, node: ast.FunctionDef) -> str:
"""获取返回类型"""
if node.returns:
return self._get_name(node.returns)
return "None"
def _infer_type(self, node: ast.AST) -> str:
"""推断类型"""
if isinstance(node, ast.Constant):
return type(node.value).__name__
elif isinstance(node, ast.List):
return "list"
elif isinstance(node, ast.Dict):
return "dict"
elif isinstance(node, ast.Call):
return self._get_name(node.func)
return "Any"
def _get_value(self, node: ast.AST) -> Any:
"""获取值"""
if isinstance(node, ast.Constant):
return node.value
return None
```## 29.4.3 結構分析
```python
### 结构分析器
class StructureAnalyzer:
"""结构分析器"""
def analyze(self, parsed_code: ParsedCode) -> StructureAnalysis:
"""分析代码结构"""
analysis = StructureAnalysis()
# 分析类的层次结构
analysis.class_hierarchy = self._analyze_class_hierarchy(
parsed_code.classes
)
# 分析函数调用关系
analysis.call_graph = self._analyze_call_graph(parsed_code)
# 分析模块依赖
analysis.dependencies = self._analyze_dependencies(parsed_code)
# 分析代码复杂度
analysis.complexity = self._analyze_complexity(parsed_code)
return analysis
def _analyze_class_hierarchy(self,
classes: List[ClassInfo]) -> Dict[str, List[str]]:
"""分析类层次结构"""
hierarchy = {}
for cls in classes:
hierarchy[cls.name] = cls.bases
return hierarchy
def _analyze_call_graph(self,
parsed_code: ParsedCode) -> Dict[str, List[str]]:
"""分析函数调用关系"""
call_graph = {}
# 分析函数调用
for func in parsed_code.functions:
calls = self._extract_function_calls(func, parsed_code)
call_graph[func.name] = calls
# 分析方法调用
for cls in parsed_code.classes:
for method in cls.methods:
calls = self._extract_method_calls(method, cls, parsed_code)
call_graph[f"{cls.name}.{method.name}"] = calls
return call_graph
def _extract_function_calls(self, func: FunctionInfo,
parsed_code: ParsedCode) -> List[str]:
"""提取函数调用"""
calls = []
# 这里需要更复杂的 AST 分析
# 简化实现:从函数体中提取调用
return calls
def _extract_method_calls(self, method: MethodInfo,
cls: ClassInfo,
parsed_code: ParsedCode) -> List[str]:
"""提取方法调用"""
calls = []
# 这里需要更复杂的 AST 分析
# 简化实现:从方法体中提取调用
return calls
def _analyze_dependencies(self,
parsed_code: ParsedCode) -> List[Dependency]:
"""分析依赖关系"""
dependencies = []
# 分析导入依赖
for imp in parsed_code.imports:
dependency = Dependency(
type='import',
source=parsed_code.language,
target=imp.module,
strength='external'
)
dependencies.append(dependency)
# 分析类继承依赖
for cls in parsed_code.classes:
for base in cls.bases:
dependency = Dependency(
type='inheritance',
source=cls.name,
target=base,
strength='strong'
)
dependencies.append(dependency)
return dependencies
def _analyze_complexity(self,
parsed_code: ParsedCode) -> ComplexityMetrics:
"""分析代码复杂度"""
metrics = ComplexityMetrics()
# 计算圈复杂度
metrics.cyclomatic_complexity = self._calculate_cyclomatic_complexity(
parsed_code
)
# 计算认知复杂度
metrics.cognitive_complexity = self._calculate_cognitive_complexity(
parsed_code
)
# 计算维护性指数
metrics.maintainability_index = self._calculate_maintainability_index(
parsed_code
)
return metrics
def _calculate_cyclomatic_complexity(self,
parsed_code: ParsedCode) -> float:
"""计算圈复杂度"""
complexity = 1 # 基础复杂度
# 遍历 AST,计算决策点
for node in ast.walk(parsed_code.ast):
if isinstance(node, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
complexity += 1
elif isinstance(node, ast.BoolOp):
complexity += len(node.values) - 1
return complexity
def _calculate_cognitive_complexity(self,
parsed_code: ParsedCode) -> float:
"""计算认知复杂度"""
# 简化实现
return self._calculate_cyclomatic_complexity(parsed_code) * 1.5
def _calculate_maintainability_index(self,
parsed_code: ParsedCode) -> float:
"""计算维护性指数"""
# 简化实现
loc = len(parsed_code.original_code.split('\n'))
complexity = self._calculate_cyclomatic_complexity(parsed_code)
# MI = 171 - 5.2 * ln(V) - 0.23 * G - 16.2 * ln(L)
# V = 圈复杂度, G = 代码行数, L = 代码行数
mi = 171 - 5.2 * math.log(complexity) - 0.23 * loc - 16.2 * math.log(loc)
return max(0, min(100, mi))
## 29.4.4 语义分析
### 语义分析器
pythonpython
```python
class SemanticAnalyzer:
"""語義分析器"""
def __init__(self, llm_client: LLMClient):
self.llm_client = llm_client
async def analyze(self, parsed_code: ParsedCode,
structure: StructureAnalysis) -> SemanticAnalysis:
"""分析程式碼語義"""
analysis = SemanticAnalysis()
# 分析程式碼意圖
analysis.purpose = await self._analyze_purpose(parsed_code)
# 分析演算法
analysis.algorithms = await self._analyze_algorithms(parsed_code)
# 分析設計模式
analysis.design_patterns = await self._analyze_design_patterns(
parsed_code,
structure
)
# 分析資料流
analysis.data_flow = await self._analyze_data_flow(parsed_code)
return analysis
async def _analyze_purpose(self,
parsed_code: ParsedCode) -> str:
"""分析程式碼目的"""
prompt = f"""
分析以下程式碼的主要目的和功能:
{parsed_code.original_code}
請用簡潔的語言描述這段程式碼的主要功能。
"""
return await self.llm_client.complete(prompt)
async def _analyze_algorithms(self,
parsed_code: ParsedCode) -> List[AlgorithmInfo]:
"""分析演算法"""
prompt = f"""
識別以下程式碼中使用的演算法:
{parsed_code.original_code}
請識別:
1. 使用的主要演算法(排序、搜尋、圖演算法等)
2. 演算法的時間複雜度
3. 演算法的空間複雜度
以 JSON 格式返回結果。
"""
response = await self.llm_client.complete(prompt)
return self._parse_algorithms(response)
async def _analyze_design_patterns(self,
parsed_code: ParsedCode,
structure: StructureAnalysis) -> List[str]:
"""分析設計模式"""
prompt = f"""
識別以下程式碼中使用的設計模式:
類:{parsed_code.classes}
函式:{parsed_code.functions}
類層次結構:{structure.class_hierarchy}
請識別使用的設計模式。
"""
response = await self.llm_client.complete(prompt)
return self._parse_design_patterns(response)
async def _analyze_data_flow(self,
parsed_code: ParsedCode) -> DataFlowAnalysis:
"""分析資料流"""
analysis = DataFlowAnalysis()
# 分析輸入
analysis.inputs = self._identify_inputs(parsed_code)
# 分析輸出
analysis.outputs = self._identify_outputs(parsed_code)
# 分析轉換
analysis.transformations = self._identify_transformations(parsed_code)
return analysis
def _identify_inputs(self, parsed_code: ParsedCode) -> List[str]:
"""識別輸入"""
inputs = []
# 函式引數
for func in parsed_code.functions:
inputs.extend(func.arguments)
# 方法引數
for cls in parsed_code.classes:
for method in cls.methods:
inputs.extend(method.arguments)
return list(set(inputs))
def _identify_outputs(self, parsed_code: ParsedCode) -> List[str]:
"""識別輸出"""
outputs = []
# 函式返回值
for func in parsed_code.functions:
if func.return_type != "None":
outputs.append(f"{func.name}() -> {func.return_type}")
# 方法返回值
for cls in parsed_code.classes:
for method in cls.methods:
if method.return_type != "None":
outputs.append(f"{cls.name}.{method.name}() -> {method.return_type}")
return outputs
def _identify_transformations(self,
parsed_code: ParsedCode) -> List[str]:
"""識別資料轉換"""
transformations = []
# 這裡需要更復雜的分析
# 簡化實現:基於函式名推斷
for func in parsed_code.functions:
if any(keyword in func.name.lower()
for keyword in ['transform', 'convert', 'process', 'compute']):
transformations.append(func.name)
return transformations
```## 29.4.5 代码解释生成
### 解释生成器
class ExplanationGenerator:
"""解释生成器"""
def __init__(self, llm_client: LLMClient):
self.llm_client = llm_client
async def generate_explanation(self,
parsed_code: ParsedCode,
structure: StructureAnalysis,
semantic: SemanticAnalysis) -> CodeExplanation:
"""生成代码解释"""
explanation = CodeExplanation()
# 生成总体概述
explanation.overview = await self._generate_overview(
parsed_code,
semantic
)
# 生成类解释
explanation.class_explanations = await self._generate_class_explanations(
parsed_code.classes,
structure
)
# 生成函数解释
explanation.function_explanations = await self._generate_function_explanations(
parsed_code.functions,
structure
)
# 生成算法解释
explanation.algorithm_explanations = await self._generate_algorithm_explanations(
semantic.algorithms
)
# 生成使用示例
explanation.usage_examples = await self._generate_usage_examples(
parsed_code
)
return explanation
async def _generate_overview(self,
parsed_code: ParsedCode,
semantic: SemanticAnalysis) -> str:
"""生成总体概述"""
prompt = f"""
为以下代码生成总体概述:
代码目的:{semantic.purpose}
类:{[c.name for c in parsed_code.classes]}
函数:{[f.name for f in parsed_code.functions]}
请生成一个清晰的总体概述,包括:
1. 代码的主要功能
2. 主要组件
3. 整体架构
"""
return await self.llm_client.complete(prompt)
async def _generate_class_explanations(self,
classes: List[ClassInfo],
structure: StructureAnalysis) -> Dict[str, str]:
"""生成类解释"""
explanations = {}
for cls in classes:
prompt = f"""
为以下类生成详细解释:
类名:{cls.name}
父类:{cls.bases}
方法:{[m.name for m in cls.methods]}
属性:{[a.name for a in cls.attributes if a]}
文档字符串:{cls.docstring}
请生成详细的类解释,包括:
1. 类的职责
2. 主要方法的功能
3. 使用场景
"""
explanation = await self.llm_client.complete(prompt)
explanations[cls.name] = explanation
return explanations
async def _generate_function_explanations(self,
functions: List[FunctionInfo],
structure: StructureAnalysis) -> Dict[str, str]:
"""生成函数解释"""
explanations = {}
for func in functions:
prompt = f"""
为以下函数生成详细解释:
函数名:{func.name}
参数:{func.arguments}
返回类型:{func.return_type}
文档字符串:{func.docstring}
请生成详细的函数解释,包括:
1. 函数的功能
2. 参数说明
3. 返回值说明
4. 使用示例
"""
explanation = await self.llm_client.complete(prompt)
explanations[func.name] = explanation
return explanations
async def _generate_algorithm_explanations(self,
algorithms: List[AlgorithmInfo]) -> Dict[str, str]:
"""生成算法解释"""
explanations = {}
for algo in algorithms:
prompt = f"""
为以下算法生成详细解释:
算法名称:{algo.name}
时间复杂度:{algo.time_complexity}
空间复杂度:{algo.space_complexity}
请生成详细的算法解释,包括:
1. 算法原理
2. 实现细节
3. 优缺点分析
4. 适用场景
"""
explanation = await self.llm_client.complete(prompt)
explanations[algo.name] = explanation
return explanations
async def _generate_usage_examples(self,
parsed_code: ParsedCode) -> List[str]:
"""生成使用示例"""
examples = []
# 为每个类生成示例
for cls in parsed_code.classes:
prompt = f"""
为以下类生成使用示例:
类名:{cls.name}
方法:{[m.name for m in cls.methods]}
请生成 2-3 个实用的使用示例。
"""
example = await self.llm_client.complete(prompt)
examples.append(example)
# 为主要函数生成示例
for func in parsed_code.functions[:3]: # 只生成前 3 个函数的示例
prompt = f"""
为以下函数生成使用示例:
函数名:{func.name}
参数:{func.arguments}
请生成 1-2 个实用的使用示例。
"""
example = await self.llm_client.complete(prompt)
examples.append(example)
return examples