bangbang-aigc-server/detailed_comparison/content_generation/new_content_judger.py.json

{
  "file_path": "travel-algorithms/travel_algorithms/content_generation/content_judger.py",
  "file_size": 12662,
  "line_count": 380,
  "functions": [
    {
      "name": "__init__",
      "line_start": 26,
      "line_end": 51,
      "args": [
        {
          "name": "self"
        },
        {
          "name": "config",
          "type_hint": "AlgorithmConfig"
        }
      ],
      "return_type": null,
      "docstring": "初始化内容审核器\n\nArgs:\n    config: 算法配置",
      "is_async": false,
      "decorators": [],
      "code": "    def __init__(self, config: AlgorithmConfig):\n        \"\"\"\n        初始化内容审核器\n\n        Args:\n            config: 算法配置\n        \"\"\"\n        self.config = config\n        self.ai_service = AIService(config.ai_model)\n        self.output_manager = OutputManager(config.output)\n        self.prompt_manager = PromptManager(config.prompts, config.resources)\n        \n        # 初始化JSON处理器\n        self.json_processor = JSONProcessor(\n            enable_repair=config.content_generation.enable_json_repair,\n            max_repair_attempts=config.content_generation.json_repair_attempts\n        )\n        \n        # 获取任务特定的模型配置和字段配置\n        self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n        self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n        \n        # 审核阈值配置\n        self.judge_threshold = config.content_generation.judge_threshold\n        \n        logger.info(f\"内容审核器初始化完成，使用模型参数: {self.task_model_config}\")",
      "code_hash": "c99b924d21358871a472386e4961f637"
    },
    {
      "name": "_parse_judge_result",
      "line_start": 149,
      "line_end": 182,
      "args": [
        {
          "name": "self"
        },
        {
          "name": "raw_output",
          "type_hint": "str"
        }
      ],
      "return_type": "Tuple[bool, str, float, Dict[str, Any]]",
      "docstring": "解析审核结果\n\nArgs:\n    raw_output: AI原始输出\n\nReturns:\n    Tuple[是否通过, 错误信息, 置信度, 完整结果]",
      "is_async": false,
      "decorators": [],
      "code": "    def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n        \"\"\"\n        解析审核结果\n\n        Args:\n            raw_output: AI原始输出\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度, 完整结果]\n        \"\"\"\n        try:\n            # 使用JSON处理器解析\n            parsed_data = self.json_processor.parse_llm_output(\n                raw_output=raw_output,\n                expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n                required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n            )\n\n            # 根据实际judgeContent/system.txt的输出格式进行解析\n            if isinstance(parsed_data, dict):\n                judge_result = self._normalize_judge_data(parsed_data)\n            else:\n                # 回退处理\n                judge_result = self._create_fallback_judge_data(raw_output)\n\n            # 判断是否通过\n            is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n            return is_passed, error_message, confidence, judge_result\n\n        except Exception as e:\n            logger.error(f\"审核结果解析失败: {e}\")\n            # 返回保守的失败结果\n            return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}",
      "code_hash": "14c8a5de7e3df0bd0989c7936afaa699"
    },
    {
      "name": "_normalize_judge_data",
      "line_start": 184,
      "line_end": 215,
      "args": [
        {
          "name": "self"
        },
        {
          "name": "parsed_data",
          "type_hint": "Dict[str, Any]"
        }
      ],
      "return_type": "Dict[str, Any]",
      "docstring": "标准化审核数据格式\n\nArgs:\n    parsed_data: 解析后的数据\n\nReturns:\n    标准化后的审核结果字典",
      "is_async": false,
      "decorators": [],
      "code": "    def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n        \"\"\"\n        标准化审核数据格式\n\n        Args:\n            parsed_data: 解析后的数据\n\n        Returns:\n            标准化后的审核结果字典\n        \"\"\"\n        # 根据实际judgeContent/system.txt的输出格式进行标准化\n        normalized_data = {\n            # 核心字段（根据实际system.txt的输出格式）\n            \"analysis\": parsed_data.get(\"analysis\", \"\"),\n            \"title\": parsed_data.get(\"title\", \"\"),\n            \"content\": parsed_data.get(\"content\", \"\"),\n            \n            # 计算字段\n            \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n            \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n            \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n            \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n            \n            # 元数据\n            \"metadata\": {\n                \"original_parsed_data\": parsed_data,\n                \"normalization_version\": \"v1\",\n                \"generated_at\": self.output_manager.run_id\n            }\n        }\n\n        return normalized_data",
      "code_hash": "01e6875325d6a9af0a71cb6df1dbd987"
    },
    {
      "name": "_create_fallback_judge_data",
      "line_start": 217,
      "line_end": 241,
      "args": [
        {
          "name": "self"
        },
        {
          "name": "raw_output",
          "type_hint": "str"
        }
      ],
      "return_type": "Dict[str, Any]",
      "docstring": "创建回退的审核数据（当JSON解析失败时）\n\nArgs:\n    raw_output: 原始输出\n\nReturns:\n    回退的审核结果字典",
      "is_async": false,
      "decorators": [],
      "code": "    def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n        \"\"\"\n        创建回退的审核数据（当JSON解析失败时）\n\n        Args:\n            raw_output: 原始输出\n\n        Returns:\n            回退的审核结果字典\n        \"\"\"\n        # 尝试从原始文本中提取信息\n        return {\n            \"analysis\": raw_output,\n            \"title\": \"\",\n            \"content\": \"\",\n            \"has_analysis\": bool(raw_output.strip()),\n            \"title_changed\": False,\n            \"content_changed\": False,\n            \"analysis_length\": len(raw_output),\n            \"metadata\": {\n                \"parsing_method\": \"fallback_text_extraction\",\n                \"generated_at\": self.output_manager.run_id,\n                \"original_raw_output\": raw_output\n            }\n        }",
      "code_hash": "20800a0738db2e9a451f0a81aec58d45"
    },
    {
      "name": "_evaluate_pass_status",
      "line_start": 243,
      "line_end": 306,
      "args": [
        {
          "name": "self"
        },
        {
          "name": "judge_result",
          "type_hint": "Dict[str, Any]"
        }
      ],
      "return_type": "Tuple[bool, str, float]",
      "docstring": "评估是否通过审核\n\nArgs:\n    judge_result: 审核结果\n\nReturns:\n    Tuple[是否通过, 错误信息, 置信度]",
      "is_async": false,
      "decorators": [],
      "code": "    def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n        \"\"\"\n        评估是否通过审核\n\n        Args:\n            judge_result: 审核结果\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度]\n        \"\"\"\n        try:\n            analysis = judge_result.get(\"analysis\", \"\")\n            title = judge_result.get(\"title\", \"\")\n            content = judge_result.get(\"content\", \"\")\n\n            # 基础检查：是否有分析内容\n            if not analysis:\n                return False, \"缺少审核分析\", 0.1\n\n            # 检查是否有修改后的内容\n            has_corrections = bool(title) or bool(content)\n            \n            # 根据分析内容进行判断（简单的关键词判断）\n            analysis_lower = analysis.lower()\n            negative_keywords = [\n                \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n                \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n            ]\n            \n            positive_keywords = [\n                \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n                \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n            ]\n\n            negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n            positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n            # 计算置信度\n            if has_corrections:\n                # 如果有修改，说明原内容有问题，但修改后应该是通过的\n                confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n                is_passed = True\n                error_message = \"\"\n            elif negative_score > positive_score:\n                # 负面评价较多\n                confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n                is_passed = False\n                error_message = \"内容质量不符合要求\"\n            else:\n                # 正面评价较多或无明显问题\n                confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n                is_passed = True\n                error_message = \"\"\n\n            # 应用阈值\n            if confidence < self.judge_threshold:\n                is_passed = False\n                error_message = error_message or \"审核置信度不足\"\n\n            return is_passed, error_message, confidence\n\n        except Exception as e:\n            logger.error(f\"审核状态评估失败: {e}\")\n            return False, f\"评估失败: {str(e)}\", 0.0",
      "code_hash": "8ecdc33854003596b83b1f520ca25f64"
    },
    {
      "name": "get_judge_stats",
      "line_start": 366,
      "line_end": 381,
      "args": [
        {
          "name": "self"
        }
      ],
      "return_type": "Dict[str, Any]",
      "docstring": "获取审核统计信息\n\nReturns:\n    统计信息字典",
      "is_async": false,
      "decorators": [],
      "code": "    def get_judge_stats(self) -> Dict[str, Any]:\n        \"\"\"\n        获取审核统计信息\n\n        Returns:\n            统计信息字典\n        \"\"\"\n        return {\n            \"task_model_config\": self.task_model_config,\n            \"field_config\": self.field_config,\n            \"judge_threshold\": self.judge_threshold,\n            \"output_directory\": str(self.output_manager.run_output_dir),\n            \"ai_model_info\": self.ai_service.get_model_info(),\n            \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n            \"json_processor_enabled\": self.json_processor.enable_repair\n        } ",
      "code_hash": "fb9714107dd5e3a1a489cc517f147913"
    }
  ],
  "classes": [
    {
      "name": "ContentJudger",
      "line_start": 20,
      "line_end": 381,
      "bases": [],
      "methods": [
        {
          "name": "__init__",
          "line_start": 26,
          "line_end": 51,
          "args": [
            {
              "name": "self"
            },
            {
              "name": "config",
              "type_hint": "AlgorithmConfig"
            }
          ],
          "return_type": null,
          "docstring": "初始化内容审核器\n\nArgs:\n    config: 算法配置",
          "is_async": false,
          "decorators": [],
          "code": "    def __init__(self, config: AlgorithmConfig):\n        \"\"\"\n        初始化内容审核器\n\n        Args:\n            config: 算法配置\n        \"\"\"\n        self.config = config\n        self.ai_service = AIService(config.ai_model)\n        self.output_manager = OutputManager(config.output)\n        self.prompt_manager = PromptManager(config.prompts, config.resources)\n        \n        # 初始化JSON处理器\n        self.json_processor = JSONProcessor(\n            enable_repair=config.content_generation.enable_json_repair,\n            max_repair_attempts=config.content_generation.json_repair_attempts\n        )\n        \n        # 获取任务特定的模型配置和字段配置\n        self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n        self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n        \n        # 审核阈值配置\n        self.judge_threshold = config.content_generation.judge_threshold\n        \n        logger.info(f\"内容审核器初始化完成，使用模型参数: {self.task_model_config}\")",
          "code_hash": "c99b924d21358871a472386e4961f637"
        },
        {
          "name": "judge_content",
          "line_start": 53,
          "line_end": 147,
          "args": [
            {
              "name": "self"
            },
            {
              "name": "tweet_content",
              "type_hint": "str"
            },
            {
              "name": "object_content",
              "type_hint": "str"
            },
            {
              "name": "product_content",
              "type_hint": "str"
            },
            {
              "name": "refer_content",
              "type_hint": "str"
            },
            {
              "name": "content_metadata",
              "type_hint": "Optional[Dict[str, Any]]"
            }
          ],
          "return_type": "Tuple[bool, str, float, Dict[str, Any]]",
          "docstring": "审核内容质量\n\nArgs:\n    tweet_content: 待审核的文案内容\n    object_content: 景区/对象信息\n    product_content: 产品信息\n    refer_content: 参考内容\n    content_metadata: 内容元数据\n    **kwargs: 其他参数\n\nReturns:\n    Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\nRaises:\n    ContentGenerationError: 审核失败时抛出",
          "is_async": true,
          "decorators": [],
          "code": "    async def judge_content(\n        self,\n        tweet_content: str,\n        object_content: str,\n        product_content: str,\n        refer_content: str = \"\",\n        content_metadata: Optional[Dict[str, Any]] = None,\n        **kwargs\n    ) -> Tuple[bool, str, float, Dict[str, Any]]:\n        \"\"\"\n        审核内容质量\n\n        Args:\n            tweet_content: 待审核的文案内容\n            object_content: 景区/对象信息\n            product_content: 产品信息\n            refer_content: 参考内容\n            content_metadata: 内容元数据\n            **kwargs: 其他参数\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\n        Raises:\n            ContentGenerationError: 审核失败时抛出\n        \"\"\"\n        try:\n            logger.info(\"开始执行内容审核流程\")\n\n            # 1. 构建提示词\n            system_prompt = self.prompt_manager.get_prompt(\"content_judging\", \"system\")\n            user_prompt_template = self.prompt_manager.get_prompt(\"content_judging\", \"user\")\n            \n            # 格式化用户提示词（根据实际judgeContent/user.txt格式）\n            user_prompt = self.prompt_manager.format_prompt(\n                user_prompt_template,\n                object_content=object_content,\n                product_content=product_content,\n                refer_content=refer_content,\n                tweet_content=tweet_content,\n                **kwargs\n            )\n\n            # 保存提示词（如果配置允许）\n            if self.config.output.save_prompts:\n                self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_judging\")\n                self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_judging\")\n\n            # 2. 调用AI审核\n            content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n                system_prompt=system_prompt,\n                user_prompt=user_prompt,\n                stage=\"内容审核\",\n                **self.task_model_config\n            )\n\n            # 保存原始响应（如果配置允许）\n            if self.config.output.save_raw_responses:\n                self.output_manager.save_text(content, \"raw_response\", \"content_judging\")\n\n            # 3. 解析审核结果\n            is_passed, error_message, confidence, judge_result = self._parse_judge_result(content)\n\n            # 4. 保存审核结果\n            complete_result = {\n                \"original_content\": tweet_content,\n                \"judge_result\": judge_result,\n                \"is_passed\": is_passed,\n                \"error_message\": error_message,\n                \"confidence\": confidence,\n                \"metadata\": {\n                    \"content_metadata\": content_metadata,\n                    \"judge_threshold\": self.judge_threshold,\n                    \"model_config\": self.task_model_config,\n                    \"field_config\": self.field_config,\n                    \"tokens\": {\n                        \"input\": input_tokens,\n                        \"output\": output_tokens\n                    },\n                    \"elapsed_time\": elapsed_time\n                }\n            }\n            \n            self.output_manager.save_json(complete_result, \"judged_content\")\n\n            # 5. 保存元数据\n            self.output_manager.save_metadata(complete_result[\"metadata\"], \"content_judging\")\n\n            logger.info(f\"内容审核完成，结果: {'通过' if is_passed else '不通过'}, 置信度: {confidence:.2f}\")\n            return is_passed, error_message, confidence, judge_result\n\n        except Exception as e:\n            error_msg = f\"内容审核失败: {str(e)}\"\n            logger.error(error_msg, exc_info=True)\n            raise ContentGenerationError(error_msg)",
          "code_hash": "b3ca81ff4c689fb92d930f725138b80a"
        },
        {
          "name": "_parse_judge_result",
          "line_start": 149,
          "line_end": 182,
          "args": [
            {
              "name": "self"
            },
            {
              "name": "raw_output",
              "type_hint": "str"
            }
          ],
          "return_type": "Tuple[bool, str, float, Dict[str, Any]]",
          "docstring": "解析审核结果\n\nArgs:\n    raw_output: AI原始输出\n\nReturns:\n    Tuple[是否通过, 错误信息, 置信度, 完整结果]",
          "is_async": false,
          "decorators": [],
          "code": "    def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n        \"\"\"\n        解析审核结果\n\n        Args:\n            raw_output: AI原始输出\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度, 完整结果]\n        \"\"\"\n        try:\n            # 使用JSON处理器解析\n            parsed_data = self.json_processor.parse_llm_output(\n                raw_output=raw_output,\n                expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n                required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n            )\n\n            # 根据实际judgeContent/system.txt的输出格式进行解析\n            if isinstance(parsed_data, dict):\n                judge_result = self._normalize_judge_data(parsed_data)\n            else:\n                # 回退处理\n                judge_result = self._create_fallback_judge_data(raw_output)\n\n            # 判断是否通过\n            is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n            return is_passed, error_message, confidence, judge_result\n\n        except Exception as e:\n            logger.error(f\"审核结果解析失败: {e}\")\n            # 返回保守的失败结果\n            return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}",
          "code_hash": "14c8a5de7e3df0bd0989c7936afaa699"
        },
        {
          "name": "_normalize_judge_data",
          "line_start": 184,
          "line_end": 215,
          "args": [
            {
              "name": "self"
            },
            {
              "name": "parsed_data",
              "type_hint": "Dict[str, Any]"
            }
          ],
          "return_type": "Dict[str, Any]",
          "docstring": "标准化审核数据格式\n\nArgs:\n    parsed_data: 解析后的数据\n\nReturns:\n    标准化后的审核结果字典",
          "is_async": false,
          "decorators": [],
          "code": "    def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n        \"\"\"\n        标准化审核数据格式\n\n        Args:\n            parsed_data: 解析后的数据\n\n        Returns:\n            标准化后的审核结果字典\n        \"\"\"\n        # 根据实际judgeContent/system.txt的输出格式进行标准化\n        normalized_data = {\n            # 核心字段（根据实际system.txt的输出格式）\n            \"analysis\": parsed_data.get(\"analysis\", \"\"),\n            \"title\": parsed_data.get(\"title\", \"\"),\n            \"content\": parsed_data.get(\"content\", \"\"),\n            \n            # 计算字段\n            \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n            \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n            \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n            \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n            \n            # 元数据\n            \"metadata\": {\n                \"original_parsed_data\": parsed_data,\n                \"normalization_version\": \"v1\",\n                \"generated_at\": self.output_manager.run_id\n            }\n        }\n\n        return normalized_data",
          "code_hash": "01e6875325d6a9af0a71cb6df1dbd987"
        },
        {
          "name": "_create_fallback_judge_data",
          "line_start": 217,
          "line_end": 241,
          "args": [
            {
              "name": "self"
            },
            {
              "name": "raw_output",
              "type_hint": "str"
            }
          ],
          "return_type": "Dict[str, Any]",
          "docstring": "创建回退的审核数据（当JSON解析失败时）\n\nArgs:\n    raw_output: 原始输出\n\nReturns:\n    回退的审核结果字典",
          "is_async": false,
          "decorators": [],
          "code": "    def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n        \"\"\"\n        创建回退的审核数据（当JSON解析失败时）\n\n        Args:\n            raw_output: 原始输出\n\n        Returns:\n            回退的审核结果字典\n        \"\"\"\n        # 尝试从原始文本中提取信息\n        return {\n            \"analysis\": raw_output,\n            \"title\": \"\",\n            \"content\": \"\",\n            \"has_analysis\": bool(raw_output.strip()),\n            \"title_changed\": False,\n            \"content_changed\": False,\n            \"analysis_length\": len(raw_output),\n            \"metadata\": {\n                \"parsing_method\": \"fallback_text_extraction\",\n                \"generated_at\": self.output_manager.run_id,\n                \"original_raw_output\": raw_output\n            }\n        }",
          "code_hash": "20800a0738db2e9a451f0a81aec58d45"
        },
        {
          "name": "_evaluate_pass_status",
          "line_start": 243,
          "line_end": 306,
          "args": [
            {
              "name": "self"
            },
            {
              "name": "judge_result",
              "type_hint": "Dict[str, Any]"
            }
          ],
          "return_type": "Tuple[bool, str, float]",
          "docstring": "评估是否通过审核\n\nArgs:\n    judge_result: 审核结果\n\nReturns:\n    Tuple[是否通过, 错误信息, 置信度]",
          "is_async": false,
          "decorators": [],
          "code": "    def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n        \"\"\"\n        评估是否通过审核\n\n        Args:\n            judge_result: 审核结果\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度]\n        \"\"\"\n        try:\n            analysis = judge_result.get(\"analysis\", \"\")\n            title = judge_result.get(\"title\", \"\")\n            content = judge_result.get(\"content\", \"\")\n\n            # 基础检查：是否有分析内容\n            if not analysis:\n                return False, \"缺少审核分析\", 0.1\n\n            # 检查是否有修改后的内容\n            has_corrections = bool(title) or bool(content)\n            \n            # 根据分析内容进行判断（简单的关键词判断）\n            analysis_lower = analysis.lower()\n            negative_keywords = [\n                \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n                \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n            ]\n            \n            positive_keywords = [\n                \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n                \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n            ]\n\n            negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n            positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n            # 计算置信度\n            if has_corrections:\n                # 如果有修改，说明原内容有问题，但修改后应该是通过的\n                confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n                is_passed = True\n                error_message = \"\"\n            elif negative_score > positive_score:\n                # 负面评价较多\n                confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n                is_passed = False\n                error_message = \"内容质量不符合要求\"\n            else:\n                # 正面评价较多或无明显问题\n                confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n                is_passed = True\n                error_message = \"\"\n\n            # 应用阈值\n            if confidence < self.judge_threshold:\n                is_passed = False\n                error_message = error_message or \"审核置信度不足\"\n\n            return is_passed, error_message, confidence\n\n        except Exception as e:\n            logger.error(f\"审核状态评估失败: {e}\")\n            return False, f\"评估失败: {str(e)}\", 0.0",
          "code_hash": "8ecdc33854003596b83b1f520ca25f64"
        },
        {
          "name": "judge_content_batch",
          "line_start": 308,
          "line_end": 336,
          "args": [
            {
              "name": "self"
            },
            {
              "name": "content_list",
              "type_hint": "List[Dict[str, Any]]"
            }
          ],
          "return_type": "Dict[str, Tuple[bool, str, float, Dict[str, Any]]]",
          "docstring": "批量审核内容\n\nArgs:\n    content_list: 内容列表，每个元素包含审核所需的参数\n\nReturns:\n    内容ID->审核结果的字典",
          "is_async": true,
          "decorators": [],
          "code": "    async def judge_content_batch(\n        self,\n        content_list: List[Dict[str, Any]]\n    ) -> Dict[str, Tuple[bool, str, float, Dict[str, Any]]]:\n        \"\"\"\n        批量审核内容\n\n        Args:\n            content_list: 内容列表，每个元素包含审核所需的参数\n\n        Returns:\n            内容ID->审核结果的字典\n        \"\"\"\n        results = {}\n        \n        for i, content_item in enumerate(content_list):\n            try:\n                content_id = content_item.get('id', f'content_{i+1}')\n                logger.info(f\"批量审核内容 {i+1}/{len(content_list)}: {content_id}\")\n                \n                result = await self.judge_content(**content_item)\n                results[content_id] = result\n                \n            except Exception as e:\n                logger.error(f\"批量审核第 {i+1} 项失败: {e}\")\n                content_id = content_item.get('id', f'content_{i+1}')\n                results[content_id] = (False, f\"审核失败: {str(e)}\", 0.0, {\"error\": str(e)})\n        \n        return results",
          "code_hash": "7f1e5e36028cd10495ed935ab7b7b2fc"
        },
        {
          "name": "test_judging",
          "line_start": 338,
          "line_end": 364,
          "args": [
            {
              "name": "self"
            }
          ],
          "return_type": "bool",
          "docstring": "测试内容审核功能\n\nReturns:\n    测试是否成功",
          "is_async": true,
          "decorators": [],
          "code": "    async def test_judging(self) -> bool:\n        \"\"\"\n        测试内容审核功能\n\n        Returns:\n            测试是否成功\n        \"\"\"\n        try:\n            test_content = \"\"\"\n            {\n                \"title\": \"上海外滩一日游\",\n                \"content\": \"上海外滩是著名的旅游景点，历史悠久，值得一游。\",\n                \"tag\": \"#上海 #外滩 #一日游\"\n            }\n            \"\"\"\n            \n            is_passed, error_message, confidence, judge_result = await self.judge_content(\n                tweet_content=test_content,\n                object_content=\"上海外滩景区信息\",\n                product_content=\"外滩一日游产品\"\n            )\n            \n            return confidence > 0.0\n            \n        except Exception as e:\n            logger.error(f\"内容审核测试失败: {e}\")\n            return False",
          "code_hash": "5af1394ec49309afde76259c211a6646"
        },
        {
          "name": "get_judge_stats",
          "line_start": 366,
          "line_end": 381,
          "args": [
            {
              "name": "self"
            }
          ],
          "return_type": "Dict[str, Any]",
          "docstring": "获取审核统计信息\n\nReturns:\n    统计信息字典",
          "is_async": false,
          "decorators": [],
          "code": "    def get_judge_stats(self) -> Dict[str, Any]:\n        \"\"\"\n        获取审核统计信息\n\n        Returns:\n            统计信息字典\n        \"\"\"\n        return {\n            \"task_model_config\": self.task_model_config,\n            \"field_config\": self.field_config,\n            \"judge_threshold\": self.judge_threshold,\n            \"output_directory\": str(self.output_manager.run_output_dir),\n            \"ai_model_info\": self.ai_service.get_model_info(),\n            \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n            \"json_processor_enabled\": self.json_processor.enable_repair\n        } ",
          "code_hash": "fb9714107dd5e3a1a489cc517f147913"
        }
      ],
      "docstring": "内容审核器 - 重构版本\n负责审核生成的内容质量，支持配置化参数和动态提示词",
      "decorators": [],
      "code": "class ContentJudger:\n    \"\"\"\n    内容审核器 - 重构版本\n    负责审核生成的内容质量，支持配置化参数和动态提示词\n    \"\"\"\n\n    def __init__(self, config: AlgorithmConfig):\n        \"\"\"\n        初始化内容审核器\n\n        Args:\n            config: 算法配置\n        \"\"\"\n        self.config = config\n        self.ai_service = AIService(config.ai_model)\n        self.output_manager = OutputManager(config.output)\n        self.prompt_manager = PromptManager(config.prompts, config.resources)\n        \n        # 初始化JSON处理器\n        self.json_processor = JSONProcessor(\n            enable_repair=config.content_generation.enable_json_repair,\n            max_repair_attempts=config.content_generation.json_repair_attempts\n        )\n        \n        # 获取任务特定的模型配置和字段配置\n        self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n        self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n        \n        # 审核阈值配置\n        self.judge_threshold = config.content_generation.judge_threshold\n        \n        logger.info(f\"内容审核器初始化完成，使用模型参数: {self.task_model_config}\")\n\n    async def judge_content(\n        self,\n        tweet_content: str,\n        object_content: str,\n        product_content: str,\n        refer_content: str = \"\",\n        content_metadata: Optional[Dict[str, Any]] = None,\n        **kwargs\n    ) -> Tuple[bool, str, float, Dict[str, Any]]:\n        \"\"\"\n        审核内容质量\n\n        Args:\n            tweet_content: 待审核的文案内容\n            object_content: 景区/对象信息\n            product_content: 产品信息\n            refer_content: 参考内容\n            content_metadata: 内容元数据\n            **kwargs: 其他参数\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\n        Raises:\n            ContentGenerationError: 审核失败时抛出\n        \"\"\"\n        try:\n            logger.info(\"开始执行内容审核流程\")\n\n            # 1. 构建提示词\n            system_prompt = self.prompt_manager.get_prompt(\"content_judging\", \"system\")\n            user_prompt_template = self.prompt_manager.get_prompt(\"content_judging\", \"user\")\n            \n            # 格式化用户提示词（根据实际judgeContent/user.txt格式）\n            user_prompt = self.prompt_manager.format_prompt(\n                user_prompt_template,\n                object_content=object_content,\n                product_content=product_content,\n                refer_content=refer_content,\n                tweet_content=tweet_content,\n                **kwargs\n            )\n\n            # 保存提示词（如果配置允许）\n            if self.config.output.save_prompts:\n                self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_judging\")\n                self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_judging\")\n\n            # 2. 调用AI审核\n            content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n                system_prompt=system_prompt,\n                user_prompt=user_prompt,\n                stage=\"内容审核\",\n                **self.task_model_config\n            )\n\n            # 保存原始响应（如果配置允许）\n            if self.config.output.save_raw_responses:\n                self.output_manager.save_text(content, \"raw_response\", \"content_judging\")\n\n            # 3. 解析审核结果\n            is_passed, error_message, confidence, judge_result = self._parse_judge_result(content)\n\n            # 4. 保存审核结果\n            complete_result = {\n                \"original_content\": tweet_content,\n                \"judge_result\": judge_result,\n                \"is_passed\": is_passed,\n                \"error_message\": error_message,\n                \"confidence\": confidence,\n                \"metadata\": {\n                    \"content_metadata\": content_metadata,\n                    \"judge_threshold\": self.judge_threshold,\n                    \"model_config\": self.task_model_config,\n                    \"field_config\": self.field_config,\n                    \"tokens\": {\n                        \"input\": input_tokens,\n                        \"output\": output_tokens\n                    },\n                    \"elapsed_time\": elapsed_time\n                }\n            }\n            \n            self.output_manager.save_json(complete_result, \"judged_content\")\n\n            # 5. 保存元数据\n            self.output_manager.save_metadata(complete_result[\"metadata\"], \"content_judging\")\n\n            logger.info(f\"内容审核完成，结果: {'通过' if is_passed else '不通过'}, 置信度: {confidence:.2f}\")\n            return is_passed, error_message, confidence, judge_result\n\n        except Exception as e:\n            error_msg = f\"内容审核失败: {str(e)}\"\n            logger.error(error_msg, exc_info=True)\n            raise ContentGenerationError(error_msg)\n\n    def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n        \"\"\"\n        解析审核结果\n\n        Args:\n            raw_output: AI原始输出\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度, 完整结果]\n        \"\"\"\n        try:\n            # 使用JSON处理器解析\n            parsed_data = self.json_processor.parse_llm_output(\n                raw_output=raw_output,\n                expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n                required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n            )\n\n            # 根据实际judgeContent/system.txt的输出格式进行解析\n            if isinstance(parsed_data, dict):\n                judge_result = self._normalize_judge_data(parsed_data)\n            else:\n                # 回退处理\n                judge_result = self._create_fallback_judge_data(raw_output)\n\n            # 判断是否通过\n            is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n            return is_passed, error_message, confidence, judge_result\n\n        except Exception as e:\n            logger.error(f\"审核结果解析失败: {e}\")\n            # 返回保守的失败结果\n            return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}\n\n    def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n        \"\"\"\n        标准化审核数据格式\n\n        Args:\n            parsed_data: 解析后的数据\n\n        Returns:\n            标准化后的审核结果字典\n        \"\"\"\n        # 根据实际judgeContent/system.txt的输出格式进行标准化\n        normalized_data = {\n            # 核心字段（根据实际system.txt的输出格式）\n            \"analysis\": parsed_data.get(\"analysis\", \"\"),\n            \"title\": parsed_data.get(\"title\", \"\"),\n            \"content\": parsed_data.get(\"content\", \"\"),\n            \n            # 计算字段\n            \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n            \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n            \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n            \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n            \n            # 元数据\n            \"metadata\": {\n                \"original_parsed_data\": parsed_data,\n                \"normalization_version\": \"v1\",\n                \"generated_at\": self.output_manager.run_id\n            }\n        }\n\n        return normalized_data\n\n    def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n        \"\"\"\n        创建回退的审核数据（当JSON解析失败时）\n\n        Args:\n            raw_output: 原始输出\n\n        Returns:\n            回退的审核结果字典\n        \"\"\"\n        # 尝试从原始文本中提取信息\n        return {\n            \"analysis\": raw_output,\n            \"title\": \"\",\n            \"content\": \"\",\n            \"has_analysis\": bool(raw_output.strip()),\n            \"title_changed\": False,\n            \"content_changed\": False,\n            \"analysis_length\": len(raw_output),\n            \"metadata\": {\n                \"parsing_method\": \"fallback_text_extraction\",\n                \"generated_at\": self.output_manager.run_id,\n                \"original_raw_output\": raw_output\n            }\n        }\n\n    def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n        \"\"\"\n        评估是否通过审核\n\n        Args:\n            judge_result: 审核结果\n\n        Returns:\n            Tuple[是否通过, 错误信息, 置信度]\n        \"\"\"\n        try:\n            analysis = judge_result.get(\"analysis\", \"\")\n            title = judge_result.get(\"title\", \"\")\n            content = judge_result.get(\"content\", \"\")\n\n            # 基础检查：是否有分析内容\n            if not analysis:\n                return False, \"缺少审核分析\", 0.1\n\n            # 检查是否有修改后的内容\n            has_corrections = bool(title) or bool(content)\n            \n            # 根据分析内容进行判断（简单的关键词判断）\n            analysis_lower = analysis.lower()\n            negative_keywords = [\n                \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n                \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n            ]\n            \n            positive_keywords = [\n                \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n                \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n            ]\n\n            negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n            positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n            # 计算置信度\n            if has_corrections:\n                # 如果有修改，说明原内容有问题，但修改后应该是通过的\n                confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n                is_passed = True\n                error_message = \"\"\n            elif negative_score > positive_score:\n                # 负面评价较多\n                confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n                is_passed = False\n                error_message = \"内容质量不符合要求\"\n            else:\n                # 正面评价较多或无明显问题\n                confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n                is_passed = True\n                error_message = \"\"\n\n            # 应用阈值\n            if confidence < self.judge_threshold:\n                is_passed = False\n                error_message = error_message or \"审核置信度不足\"\n\n            return is_passed, error_message, confidence\n\n        except Exception as e:\n            logger.error(f\"审核状态评估失败: {e}\")\n            return False, f\"评估失败: {str(e)}\", 0.0\n\n    async def judge_content_batch(\n        self,\n        content_list: List[Dict[str, Any]]\n    ) -> Dict[str, Tuple[bool, str, float, Dict[str, Any]]]:\n        \"\"\"\n        批量审核内容\n\n        Args:\n            content_list: 内容列表，每个元素包含审核所需的参数\n\n        Returns:\n            内容ID->审核结果的字典\n        \"\"\"\n        results = {}\n        \n        for i, content_item in enumerate(content_list):\n            try:\n                content_id = content_item.get('id', f'content_{i+1}')\n                logger.info(f\"批量审核内容 {i+1}/{len(content_list)}: {content_id}\")\n                \n                result = await self.judge_content(**content_item)\n                results[content_id] = result\n                \n            except Exception as e:\n                logger.error(f\"批量审核第 {i+1} 项失败: {e}\")\n                content_id = content_item.get('id', f'content_{i+1}')\n                results[content_id] = (False, f\"审核失败: {str(e)}\", 0.0, {\"error\": str(e)})\n        \n        return results\n\n    async def test_judging(self) -> bool:\n        \"\"\"\n        测试内容审核功能\n\n        Returns:\n            测试是否成功\n        \"\"\"\n        try:\n            test_content = \"\"\"\n            {\n                \"title\": \"上海外滩一日游\",\n                \"content\": \"上海外滩是著名的旅游景点，历史悠久，值得一游。\",\n                \"tag\": \"#上海 #外滩 #一日游\"\n            }\n            \"\"\"\n            \n            is_passed, error_message, confidence, judge_result = await self.judge_content(\n                tweet_content=test_content,\n                object_content=\"上海外滩景区信息\",\n                product_content=\"外滩一日游产品\"\n            )\n            \n            return confidence > 0.0\n            \n        except Exception as e:\n            logger.error(f\"内容审核测试失败: {e}\")\n            return False\n\n    def get_judge_stats(self) -> Dict[str, Any]:\n        \"\"\"\n        获取审核统计信息\n\n        Returns:\n            统计信息字典\n        \"\"\"\n        return {\n            \"task_model_config\": self.task_model_config,\n            \"field_config\": self.field_config,\n            \"judge_threshold\": self.judge_threshold,\n            \"output_directory\": str(self.output_manager.run_output_dir),\n            \"ai_model_info\": self.ai_service.get_model_info(),\n            \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n            \"json_processor_enabled\": self.json_processor.enable_repair\n        } ",
      "code_hash": "ed26ab81ebedb858282d7effc034dbaf"
    }
  ],
  "imports": [
    {
      "type": "import",
      "modules": [
        "logging"
      ],
      "aliases": []
    },
    {
      "type": "from_import",
      "module": "typing",
      "names": [
        "Dict",
        "Any",
        "Tuple",
        "Optional",
        "List"
      ],
      "aliases": [],
      "level": 0
    },
    {
      "type": "import",
      "modules": [
        "json"
      ],
      "aliases": []
    },
    {
      "type": "from_import",
      "module": "config",
      "names": [
        "AlgorithmConfig"
      ],
      "aliases": [],
      "level": 2
    },
    {
      "type": "from_import",
      "module": "core",
      "names": [
        "AIService",
        "OutputManager",
        "PromptManager",
        "JSONProcessor"
      ],
      "aliases": [],
      "level": 2
    },
    {
      "type": "from_import",
      "module": "exceptions",
      "names": [
        "ContentGenerationError"
      ],
      "aliases": [],
      "level": 2
    }
  ],
  "constants": [],
  "docstring": "Content Judger\n内容审核器 - 重构版本，使用动态提示词和JSON处理，支持真实的审核格式",
  "content_hash": "4d4b8b85bae5018e0fbe59f6cf144a10"
}