Skip to content

29.4 程式碼理解模組

29.4.1 程式碼理解概述

程式碼理解模組是程式設計 Agent 的另一個核心能力,它能夠分析、解釋和理解現有程式碼的功能、結構和設計。程式碼理解涉及程式碼解析、語義分析、依賴分析等多個環節。

程式碼理解流程

輸入程式碼 ↓ 程式碼解析 ↓ 結構分析 ↓ 語義分析 ↓ 依賴分析 ↓ 功能推斷 ↓ 生成解釋

python
## 29.4.2 代码解析

### 代码解析器

    python


    python

    class CodeParser:
        """代码解析器"""

        def __init__(self):
            self.parsers = {
                'python': PythonParser(),
                'javascript': JavaScriptParser(),
                'java': JavaParser(),
                'cpp': CppParser()
            }

        def parse(self, code: str, language: str) -> ParsedCode:
            """解析代码"""
            parser = self.parsers.get(language.lower())

            if not parser:
                raise ValueError(f"Unsupported language: {language}")

            return parser.parse(code)

    class PythonParser:
        """Python 解析器"""

        def parse(self, code: str) -> ParsedCode:
            """解析 Python 代码"""
            try:
                tree = ast.parse(code)

                parsed_code = ParsedCode(
                    language='python',
                    original_code=code,
                    ast=tree
                )

                # 提取类
                parsed_code.classes = self._extract_classes(tree)

                # 提取函数
                parsed_code.functions = self._extract_functions(tree)

                # 提取导入
                parsed_code.imports = self._extract_imports(tree)

                # 提取全局变量
                parsed_code.global_variables = self._extract_global_variables(tree)

                return parsed_code

            except SyntaxError as e:
                raise ValueError(f"Invalid Python code: {e}")

        def _extract_classes(self, tree: ast.AST) -> List[ClassInfo]:
            """提取类信息"""
            classes = []

            for node in ast.walk(tree):
                if isinstance(node, ast.ClassDef):
                    class_info = ClassInfo(
                        name=node.name,
                        bases=[self._get_name(base) for base in node.bases],
                        methods=[self._extract_method(m) for m in node.body
                                if isinstance(m, ast.FunctionDef)],
                        attributes=[self._extract_attribute(a) for a in node.body
                                  if isinstance(a, ast.Assign)],
                        docstring=ast.get_docstring(node)
                    )
                    classes.append(class_info)

            return classes

        def _extract_functions(self, tree: ast.AST) -> List[FunctionInfo]:
            """提取函数信息"""
            functions = []

            for node in ast.walk(tree):
                if isinstance(node, ast.FunctionDef):
                    # 跳过类中的方法
                    if not any(isinstance(parent, ast.ClassDef)
                              for parent in ast.walk(tree)
                              if node in ast.iter_child_nodes(parent)):
                        function_info = FunctionInfo(
                            name=node.name,
                            arguments=[arg.arg for arg in node.args.args],
                            return_type=self._get_return_type(node),
                            docstring=ast.get_docstring(node),
                            decorators=[self._get_name(d) for d in node.decorator_list]
                        )
                        functions.append(function_info)

            return functions

        def _extract_imports(self, tree: ast.AST) -> List[ImportInfo]:
            """提取导入信息"""
            imports = []

            for node in ast.walk(tree):
                if isinstance(node, ast.Import):
                    for alias in node.names:
                        import_info = ImportInfo(
                            module=alias.name,
                            alias=alias.asname,
                            type='import'
                        )
                        imports.append(import_info)
                elif isinstance(node, ast.ImportFrom):
                    for alias in node.names:
                        import_info = ImportInfo(
                            module=node.module,
                            name=alias.name,
                            alias=alias.asname,
                            type='from'
                        )
                        imports.append(import_info)

            return imports

        def _extract_global_variables(self, tree: ast.AST) -> List[VariableInfo]:
            """提取全局变量"""
            variables = []

            for node in ast.walk(tree):
                if isinstance(node, ast.Assign):
                    # 只提取模块级别的变量
                    if isinstance(node, ast.Module):
                        for target in node.targets:
                            if isinstance(target, ast.Name):
                                var_info = VariableInfo(
                                    name=target.id,
                                    type=self._infer_type(node.value),
                                    value=self._get_value(node.value)
                                )
                                variables.append(var_info)

            return variables

        def _extract_method(self, node: ast.FunctionDef) -> MethodInfo:
            """提取方法信息"""
            return MethodInfo(
                name=node.name,
                arguments=[arg.arg for arg in node.args.args],
                return_type=self._get_return_type(node),
                docstring=ast.get_docstring(node),
                is_static=any(isinstance(d, ast.Name) and d.id == 'staticmethod'
                              for d in node.decorator_list),
                is_classmethod=any(isinstance(d, ast.Name) and d.id == 'classmethod'
                                  for d in node.decorator_list)
            )

        def _extract_attribute(self, node: ast.Assign) -> AttributeInfo:
            """提取属性信息"""
            target = node.targets[0]
            if isinstance(target, ast.Name):
                return AttributeInfo(
                    name=target.id,
                    type=self._infer_type(node.value),
                    value=self._get_value(node.value)
                )
            return None

        def _get_name(self, node: ast.AST) -> str:
            """获取节点名称"""
            if isinstance(node, ast.Name):
                return node.id
            elif isinstance(node, ast.Attribute):
                return f"{self._get_name(node.value)}.{node.attr}"
            return str(node)

        def _get_return_type(self, node: ast.FunctionDef) -> str:
            """获取返回类型"""
            if node.returns:
                return self._get_name(node.returns)
            return "None"

        def _infer_type(self, node: ast.AST) -> str:
            """推断类型"""
            if isinstance(node, ast.Constant):
                return type(node.value).__name__
            elif isinstance(node, ast.List):
                return "list"
            elif isinstance(node, ast.Dict):
                return "dict"
            elif isinstance(node, ast.Call):
                return self._get_name(node.func)
            return "Any"

        def _get_value(self, node: ast.AST) -> Any:
            """获取值"""
            if isinstance(node, ast.Constant):
                return node.value
            return None

    ```## 29.4.3 結構分析

```python
    ### 结构分析器

    class StructureAnalyzer:
    """结构分析器"""
    def analyze(self, parsed_code: ParsedCode) -> StructureAnalysis:
    """分析代码结构"""
    analysis = StructureAnalysis()
    # 分析类的层次结构
    analysis.class_hierarchy = self._analyze_class_hierarchy(
    parsed_code.classes
    )
    # 分析函数调用关系
    analysis.call_graph = self._analyze_call_graph(parsed_code)
    # 分析模块依赖
    analysis.dependencies = self._analyze_dependencies(parsed_code)
    # 分析代码复杂度
    analysis.complexity = self._analyze_complexity(parsed_code)
    return analysis
    def _analyze_class_hierarchy(self,
    classes: List[ClassInfo]) -> Dict[str, List[str]]:
    """分析类层次结构"""
    hierarchy = {}
    for cls in classes:
    hierarchy[cls.name] = cls.bases
    return hierarchy
    def _analyze_call_graph(self,
    parsed_code: ParsedCode) -> Dict[str, List[str]]:
    """分析函数调用关系"""
    call_graph = {}
    # 分析函数调用
    for func in parsed_code.functions:
    calls = self._extract_function_calls(func, parsed_code)
    call_graph[func.name] = calls
    # 分析方法调用
    for cls in parsed_code.classes:
    for method in cls.methods:
    calls = self._extract_method_calls(method, cls, parsed_code)
    call_graph[f"{cls.name}.{method.name}"] = calls
    return call_graph
    def _extract_function_calls(self, func: FunctionInfo,
    parsed_code: ParsedCode) -> List[str]:
    """提取函数调用"""
    calls = []
    # 这里需要更复杂的 AST 分析
    # 简化实现:从函数体中提取调用
    return calls
    def _extract_method_calls(self, method: MethodInfo,
    cls: ClassInfo,
    parsed_code: ParsedCode) -> List[str]:
    """提取方法调用"""
    calls = []
    # 这里需要更复杂的 AST 分析
    # 简化实现:从方法体中提取调用
    return calls
    def _analyze_dependencies(self,
    parsed_code: ParsedCode) -> List[Dependency]:
    """分析依赖关系"""
    dependencies = []
    # 分析导入依赖
    for imp in parsed_code.imports:
    dependency = Dependency(
    type='import',
    source=parsed_code.language,
    target=imp.module,
    strength='external'
    )
    dependencies.append(dependency)
    # 分析类继承依赖
    for cls in parsed_code.classes:
    for base in cls.bases:
    dependency = Dependency(
    type='inheritance',
    source=cls.name,
    target=base,
    strength='strong'
    )
    dependencies.append(dependency)
    return dependencies
    def _analyze_complexity(self,
    parsed_code: ParsedCode) -> ComplexityMetrics:
    """分析代码复杂度"""
    metrics = ComplexityMetrics()
    # 计算圈复杂度
    metrics.cyclomatic_complexity = self._calculate_cyclomatic_complexity(
    parsed_code
    )
    # 计算认知复杂度
    metrics.cognitive_complexity = self._calculate_cognitive_complexity(
    parsed_code
    )
    # 计算维护性指数
    metrics.maintainability_index = self._calculate_maintainability_index(
    parsed_code
    )
    return metrics
    def _calculate_cyclomatic_complexity(self,
    parsed_code: ParsedCode) -> float:
    """计算圈复杂度"""
    complexity = 1  # 基础复杂度
    # 遍历 AST,计算决策点
    for node in ast.walk(parsed_code.ast):
    if isinstance(node, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
    complexity += 1
    elif isinstance(node, ast.BoolOp):
    complexity += len(node.values) - 1
    return complexity
    def _calculate_cognitive_complexity(self,
    parsed_code: ParsedCode) -> float:
    """计算认知复杂度"""
    # 简化实现
    return self._calculate_cyclomatic_complexity(parsed_code) * 1.5
    def _calculate_maintainability_index(self,
    parsed_code: ParsedCode) -> float:
    """计算维护性指数"""
    # 简化实现
    loc = len(parsed_code.original_code.split('\n'))
    complexity = self._calculate_cyclomatic_complexity(parsed_code)
    # MI = 171 - 5.2 * ln(V) - 0.23 * G - 16.2 * ln(L)
    # V = 圈复杂度, G = 代码行数, L = 代码行数
    mi = 171 - 5.2 * math.log(complexity) - 0.23 * loc - 16.2 * math.log(loc)
    return max(0, min(100, mi))

## 29.4.4 语义分析

### 语义分析器

    python
python

    ```python

    class SemanticAnalyzer:
        """語義分析器"""

        def __init__(self, llm_client: LLMClient):
            self.llm_client = llm_client

        async def analyze(self, parsed_code: ParsedCode,
                         structure: StructureAnalysis) -> SemanticAnalysis:
            """分析程式碼語義"""
            analysis = SemanticAnalysis()

            # 分析程式碼意圖

            analysis.purpose = await self._analyze_purpose(parsed_code)

            # 分析演算法

            analysis.algorithms = await self._analyze_algorithms(parsed_code)

            # 分析設計模式

            analysis.design_patterns = await self._analyze_design_patterns(
                parsed_code,
                structure
            )

            # 分析資料流

            analysis.data_flow = await self._analyze_data_flow(parsed_code)

            return analysis

        async def _analyze_purpose(self,
                                  parsed_code: ParsedCode) -> str:
            """分析程式碼目的"""
            prompt = f"""
            分析以下程式碼的主要目的和功能:

            {parsed_code.original_code}

            請用簡潔的語言描述這段程式碼的主要功能。
            """

            return await self.llm_client.complete(prompt)

        async def _analyze_algorithms(self,
                                     parsed_code: ParsedCode) -> List[AlgorithmInfo]:
            """分析演算法"""
            prompt = f"""
            識別以下程式碼中使用的演算法:

            {parsed_code.original_code}

            請識別:
            1. 使用的主要演算法(排序、搜尋、圖演算法等)
            2. 演算法的時間複雜度
            3. 演算法的空間複雜度

            以 JSON 格式返回結果。
            """

            response = await self.llm_client.complete(prompt)
            return self._parse_algorithms(response)

        async def _analyze_design_patterns(self,
                                          parsed_code: ParsedCode,
                                          structure: StructureAnalysis) -> List[str]:
            """分析設計模式"""
            prompt = f"""
            識別以下程式碼中使用的設計模式:

            類:{parsed_code.classes}
            函式:{parsed_code.functions}
            類層次結構:{structure.class_hierarchy}

            請識別使用的設計模式。
            """

            response = await self.llm_client.complete(prompt)
            return self._parse_design_patterns(response)

        async def _analyze_data_flow(self,
                                    parsed_code: ParsedCode) -> DataFlowAnalysis:
            """分析資料流"""
            analysis = DataFlowAnalysis()

            # 分析輸入

            analysis.inputs = self._identify_inputs(parsed_code)

            # 分析輸出

            analysis.outputs = self._identify_outputs(parsed_code)

            # 分析轉換

            analysis.transformations = self._identify_transformations(parsed_code)

            return analysis

        def _identify_inputs(self, parsed_code: ParsedCode) -> List[str]:
            """識別輸入"""
            inputs = []

            # 函式引數

            for func in parsed_code.functions:
                inputs.extend(func.arguments)

            # 方法引數

            for cls in parsed_code.classes:
                for method in cls.methods:
                    inputs.extend(method.arguments)

            return list(set(inputs))

        def _identify_outputs(self, parsed_code: ParsedCode) -> List[str]:
            """識別輸出"""
            outputs = []

            # 函式返回值

            for func in parsed_code.functions:
                if func.return_type != "None":
                    outputs.append(f"{func.name}() -> {func.return_type}")

            # 方法返回值

            for cls in parsed_code.classes:
                for method in cls.methods:
                    if method.return_type != "None":
                        outputs.append(f"{cls.name}.{method.name}() -> {method.return_type}")

            return outputs

        def _identify_transformations(self,
                                     parsed_code: ParsedCode) -> List[str]:
            """識別資料轉換"""
            transformations = []

            # 這裡需要更復雜的分析

            # 簡化實現:基於函式名推斷

            for func in parsed_code.functions:
                if any(keyword in func.name.lower()
                      for keyword in ['transform', 'convert', 'process', 'compute']):
                    transformations.append(func.name)

            return transformations

    ```## 29.4.5 代码解释生成

    ### 解释生成器

    class ExplanationGenerator:
    """解释生成器"""
    def __init__(self, llm_client: LLMClient):
    self.llm_client = llm_client
    async def generate_explanation(self,
    parsed_code: ParsedCode,
    structure: StructureAnalysis,
    semantic: SemanticAnalysis) -> CodeExplanation:
    """生成代码解释"""
    explanation = CodeExplanation()
    # 生成总体概述
    explanation.overview = await self._generate_overview(
    parsed_code,
    semantic
    )
    # 生成类解释
    explanation.class_explanations = await self._generate_class_explanations(
    parsed_code.classes,
    structure
    )
    # 生成函数解释
    explanation.function_explanations = await self._generate_function_explanations(
    parsed_code.functions,
    structure
    )
    # 生成算法解释
    explanation.algorithm_explanations = await self._generate_algorithm_explanations(
    semantic.algorithms
    )
    # 生成使用示例
    explanation.usage_examples = await self._generate_usage_examples(
    parsed_code
    )
    return explanation
    async def _generate_overview(self,
    parsed_code: ParsedCode,
    semantic: SemanticAnalysis) -> str:
    """生成总体概述"""
    prompt = f"""
    为以下代码生成总体概述:
    代码目的:{semantic.purpose}
    类:{[c.name for c in parsed_code.classes]}
    函数:{[f.name for f in parsed_code.functions]}
    请生成一个清晰的总体概述,包括:
    1. 代码的主要功能
    2. 主要组件
    3. 整体架构
    """
    return await self.llm_client.complete(prompt)
    async def _generate_class_explanations(self,
    classes: List[ClassInfo],
    structure: StructureAnalysis) -> Dict[str, str]:
    """生成类解释"""
    explanations = {}
    for cls in classes:
    prompt = f"""
    为以下类生成详细解释:
    类名:{cls.name}
    父类:{cls.bases}
    方法:{[m.name for m in cls.methods]}
    属性:{[a.name for a in cls.attributes if a]}
    文档字符串:{cls.docstring}
    请生成详细的类解释,包括:
    1. 类的职责
    2. 主要方法的功能
    3. 使用场景
    """
    explanation = await self.llm_client.complete(prompt)
    explanations[cls.name] = explanation
    return explanations
    async def _generate_function_explanations(self,
    functions: List[FunctionInfo],
    structure: StructureAnalysis) -> Dict[str, str]:
    """生成函数解释"""
    explanations = {}
    for func in functions:
    prompt = f"""
    为以下函数生成详细解释:
    函数名:{func.name}
    参数:{func.arguments}
    返回类型:{func.return_type}
    文档字符串:{func.docstring}
    请生成详细的函数解释,包括:
    1. 函数的功能
    2. 参数说明
    3. 返回值说明
    4. 使用示例
    """
    explanation = await self.llm_client.complete(prompt)
    explanations[func.name] = explanation
    return explanations
    async def _generate_algorithm_explanations(self,
    algorithms: List[AlgorithmInfo]) -> Dict[str, str]:
    """生成算法解释"""
    explanations = {}
    for algo in algorithms:
    prompt = f"""
    为以下算法生成详细解释:
    算法名称:{algo.name}
    时间复杂度:{algo.time_complexity}
    空间复杂度:{algo.space_complexity}
    请生成详细的算法解释,包括:
    1. 算法原理
    2. 实现细节
    3. 优缺点分析
    4. 适用场景
    """
    explanation = await self.llm_client.complete(prompt)
    explanations[algo.name] = explanation
    return explanations
    async def _generate_usage_examples(self,
    parsed_code: ParsedCode) -> List[str]:
    """生成使用示例"""
    examples = []
    # 为每个类生成示例
    for cls in parsed_code.classes:
    prompt = f"""
    为以下类生成使用示例:
    类名:{cls.name}
    方法:{[m.name for m in cls.methods]}
    请生成 2-3 个实用的使用示例。
    """
    example = await self.llm_client.complete(prompt)
    examples.append(example)
    # 为主要函数生成示例
    for func in parsed_code.functions[:3]:  # 只生成前 3 个函数的示例
    prompt = f"""
    为以下函数生成使用示例:
    函数名:{func.name}
    参数:{func.arguments}
    请生成 1-2 个实用的使用示例。
    """
    example = await self.llm_client.complete(prompt)
    examples.append(example)
    return examples

基于 MIT 许可发布 | 永久导航