2025-07-31 15:35:23 +08:00

387 lines
47 KiB
JSON
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"file_path": "travel-algorithms/travel_algorithms/content_generation/content_judger.py",
"file_size": 12662,
"line_count": 380,
"functions": [
{
"name": "__init__",
"line_start": 26,
"line_end": 51,
"args": [
{
"name": "self"
},
{
"name": "config",
"type_hint": "AlgorithmConfig"
}
],
"return_type": null,
"docstring": "初始化内容审核器\n\nArgs:\n config: 算法配置",
"is_async": false,
"decorators": [],
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容审核器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n \n # 审核阈值配置\n self.judge_threshold = config.content_generation.judge_threshold\n \n logger.info(f\"内容审核器初始化完成,使用模型参数: {self.task_model_config}\")",
"code_hash": "c99b924d21358871a472386e4961f637"
},
{
"name": "_parse_judge_result",
"line_start": 149,
"line_end": 182,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Tuple[bool, str, float, Dict[str, Any]]",
"docstring": "解析审核结果\n\nArgs:\n raw_output: AI原始输出\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]",
"is_async": false,
"decorators": [],
"code": " def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 解析审核结果\n\n Args:\n raw_output: AI原始输出\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]\n \"\"\"\n try:\n # 使用JSON处理器解析\n parsed_data = self.json_processor.parse_llm_output(\n raw_output=raw_output,\n expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n )\n\n # 根据实际judgeContent/system.txt的输出格式进行解析\n if isinstance(parsed_data, dict):\n judge_result = self._normalize_judge_data(parsed_data)\n else:\n # 回退处理\n judge_result = self._create_fallback_judge_data(raw_output)\n\n # 判断是否通过\n is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n logger.error(f\"审核结果解析失败: {e}\")\n # 返回保守的失败结果\n return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}",
"code_hash": "14c8a5de7e3df0bd0989c7936afaa699"
},
{
"name": "_normalize_judge_data",
"line_start": 184,
"line_end": 215,
"args": [
{
"name": "self"
},
{
"name": "parsed_data",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "标准化审核数据格式\n\nArgs:\n parsed_data: 解析后的数据\n\nReturns:\n 标准化后的审核结果字典",
"is_async": false,
"decorators": [],
"code": " def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n \"\"\"\n 标准化审核数据格式\n\n Args:\n parsed_data: 解析后的数据\n\n Returns:\n 标准化后的审核结果字典\n \"\"\"\n # 根据实际judgeContent/system.txt的输出格式进行标准化\n normalized_data = {\n # 核心字段根据实际system.txt的输出格式\n \"analysis\": parsed_data.get(\"analysis\", \"\"),\n \"title\": parsed_data.get(\"title\", \"\"),\n \"content\": parsed_data.get(\"content\", \"\"),\n \n # 计算字段\n \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n \n # 元数据\n \"metadata\": {\n \"original_parsed_data\": parsed_data,\n \"normalization_version\": \"v1\",\n \"generated_at\": self.output_manager.run_id\n }\n }\n\n return normalized_data",
"code_hash": "01e6875325d6a9af0a71cb6df1dbd987"
},
{
"name": "_create_fallback_judge_data",
"line_start": 217,
"line_end": 241,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Dict[str, Any]",
"docstring": "创建回退的审核数据当JSON解析失败时\n\nArgs:\n raw_output: 原始输出\n\nReturns:\n 回退的审核结果字典",
"is_async": false,
"decorators": [],
"code": " def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n \"\"\"\n 创建回退的审核数据当JSON解析失败时\n\n Args:\n raw_output: 原始输出\n\n Returns:\n 回退的审核结果字典\n \"\"\"\n # 尝试从原始文本中提取信息\n return {\n \"analysis\": raw_output,\n \"title\": \"\",\n \"content\": \"\",\n \"has_analysis\": bool(raw_output.strip()),\n \"title_changed\": False,\n \"content_changed\": False,\n \"analysis_length\": len(raw_output),\n \"metadata\": {\n \"parsing_method\": \"fallback_text_extraction\",\n \"generated_at\": self.output_manager.run_id,\n \"original_raw_output\": raw_output\n }\n }",
"code_hash": "20800a0738db2e9a451f0a81aec58d45"
},
{
"name": "_evaluate_pass_status",
"line_start": 243,
"line_end": 306,
"args": [
{
"name": "self"
},
{
"name": "judge_result",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Tuple[bool, str, float]",
"docstring": "评估是否通过审核\n\nArgs:\n judge_result: 审核结果\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度]",
"is_async": false,
"decorators": [],
"code": " def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n \"\"\"\n 评估是否通过审核\n\n Args:\n judge_result: 审核结果\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度]\n \"\"\"\n try:\n analysis = judge_result.get(\"analysis\", \"\")\n title = judge_result.get(\"title\", \"\")\n content = judge_result.get(\"content\", \"\")\n\n # 基础检查:是否有分析内容\n if not analysis:\n return False, \"缺少审核分析\", 0.1\n\n # 检查是否有修改后的内容\n has_corrections = bool(title) or bool(content)\n \n # 根据分析内容进行判断(简单的关键词判断)\n analysis_lower = analysis.lower()\n negative_keywords = [\n \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n ]\n \n positive_keywords = [\n \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n ]\n\n negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n # 计算置信度\n if has_corrections:\n # 如果有修改,说明原内容有问题,但修改后应该是通过的\n confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n is_passed = True\n error_message = \"\"\n elif negative_score > positive_score:\n # 负面评价较多\n confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n is_passed = False\n error_message = \"内容质量不符合要求\"\n else:\n # 正面评价较多或无明显问题\n confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n is_passed = True\n error_message = \"\"\n\n # 应用阈值\n if confidence < self.judge_threshold:\n is_passed = False\n error_message = error_message or \"审核置信度不足\"\n\n return is_passed, error_message, confidence\n\n except Exception as e:\n logger.error(f\"审核状态评估失败: {e}\")\n return False, f\"评估失败: {str(e)}\", 0.0",
"code_hash": "8ecdc33854003596b83b1f520ca25f64"
},
{
"name": "get_judge_stats",
"line_start": 366,
"line_end": 381,
"args": [
{
"name": "self"
}
],
"return_type": "Dict[str, Any]",
"docstring": "获取审核统计信息\n\nReturns:\n 统计信息字典",
"is_async": false,
"decorators": [],
"code": " def get_judge_stats(self) -> Dict[str, Any]:\n \"\"\"\n 获取审核统计信息\n\n Returns:\n 统计信息字典\n \"\"\"\n return {\n \"task_model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"judge_threshold\": self.judge_threshold,\n \"output_directory\": str(self.output_manager.run_output_dir),\n \"ai_model_info\": self.ai_service.get_model_info(),\n \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n \"json_processor_enabled\": self.json_processor.enable_repair\n } ",
"code_hash": "fb9714107dd5e3a1a489cc517f147913"
}
],
"classes": [
{
"name": "ContentJudger",
"line_start": 20,
"line_end": 381,
"bases": [],
"methods": [
{
"name": "__init__",
"line_start": 26,
"line_end": 51,
"args": [
{
"name": "self"
},
{
"name": "config",
"type_hint": "AlgorithmConfig"
}
],
"return_type": null,
"docstring": "初始化内容审核器\n\nArgs:\n config: 算法配置",
"is_async": false,
"decorators": [],
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容审核器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n \n # 审核阈值配置\n self.judge_threshold = config.content_generation.judge_threshold\n \n logger.info(f\"内容审核器初始化完成,使用模型参数: {self.task_model_config}\")",
"code_hash": "c99b924d21358871a472386e4961f637"
},
{
"name": "judge_content",
"line_start": 53,
"line_end": 147,
"args": [
{
"name": "self"
},
{
"name": "tweet_content",
"type_hint": "str"
},
{
"name": "object_content",
"type_hint": "str"
},
{
"name": "product_content",
"type_hint": "str"
},
{
"name": "refer_content",
"type_hint": "str"
},
{
"name": "content_metadata",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Tuple[bool, str, float, Dict[str, Any]]",
"docstring": "审核内容质量\n\nArgs:\n tweet_content: 待审核的文案内容\n object_content: 景区/对象信息\n product_content: 产品信息\n refer_content: 参考内容\n content_metadata: 内容元数据\n **kwargs: 其他参数\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\nRaises:\n ContentGenerationError: 审核失败时抛出",
"is_async": true,
"decorators": [],
"code": " async def judge_content(\n self,\n tweet_content: str,\n object_content: str,\n product_content: str,\n refer_content: str = \"\",\n content_metadata: Optional[Dict[str, Any]] = None,\n **kwargs\n ) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 审核内容质量\n\n Args:\n tweet_content: 待审核的文案内容\n object_content: 景区/对象信息\n product_content: 产品信息\n refer_content: 参考内容\n content_metadata: 内容元数据\n **kwargs: 其他参数\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\n Raises:\n ContentGenerationError: 审核失败时抛出\n \"\"\"\n try:\n logger.info(\"开始执行内容审核流程\")\n\n # 1. 构建提示词\n system_prompt = self.prompt_manager.get_prompt(\"content_judging\", \"system\")\n user_prompt_template = self.prompt_manager.get_prompt(\"content_judging\", \"user\")\n \n # 格式化用户提示词根据实际judgeContent/user.txt格式\n user_prompt = self.prompt_manager.format_prompt(\n user_prompt_template,\n object_content=object_content,\n product_content=product_content,\n refer_content=refer_content,\n tweet_content=tweet_content,\n **kwargs\n )\n\n # 保存提示词(如果配置允许)\n if self.config.output.save_prompts:\n self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_judging\")\n self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_judging\")\n\n # 2. 调用AI审核\n content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n system_prompt=system_prompt,\n user_prompt=user_prompt,\n stage=\"内容审核\",\n **self.task_model_config\n )\n\n # 保存原始响应(如果配置允许)\n if self.config.output.save_raw_responses:\n self.output_manager.save_text(content, \"raw_response\", \"content_judging\")\n\n # 3. 解析审核结果\n is_passed, error_message, confidence, judge_result = self._parse_judge_result(content)\n\n # 4. 保存审核结果\n complete_result = {\n \"original_content\": tweet_content,\n \"judge_result\": judge_result,\n \"is_passed\": is_passed,\n \"error_message\": error_message,\n \"confidence\": confidence,\n \"metadata\": {\n \"content_metadata\": content_metadata,\n \"judge_threshold\": self.judge_threshold,\n \"model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"tokens\": {\n \"input\": input_tokens,\n \"output\": output_tokens\n },\n \"elapsed_time\": elapsed_time\n }\n }\n \n self.output_manager.save_json(complete_result, \"judged_content\")\n\n # 5. 保存元数据\n self.output_manager.save_metadata(complete_result[\"metadata\"], \"content_judging\")\n\n logger.info(f\"内容审核完成,结果: {'通过' if is_passed else '不通过'}, 置信度: {confidence:.2f}\")\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n error_msg = f\"内容审核失败: {str(e)}\"\n logger.error(error_msg, exc_info=True)\n raise ContentGenerationError(error_msg)",
"code_hash": "b3ca81ff4c689fb92d930f725138b80a"
},
{
"name": "_parse_judge_result",
"line_start": 149,
"line_end": 182,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Tuple[bool, str, float, Dict[str, Any]]",
"docstring": "解析审核结果\n\nArgs:\n raw_output: AI原始输出\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]",
"is_async": false,
"decorators": [],
"code": " def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 解析审核结果\n\n Args:\n raw_output: AI原始输出\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]\n \"\"\"\n try:\n # 使用JSON处理器解析\n parsed_data = self.json_processor.parse_llm_output(\n raw_output=raw_output,\n expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n )\n\n # 根据实际judgeContent/system.txt的输出格式进行解析\n if isinstance(parsed_data, dict):\n judge_result = self._normalize_judge_data(parsed_data)\n else:\n # 回退处理\n judge_result = self._create_fallback_judge_data(raw_output)\n\n # 判断是否通过\n is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n logger.error(f\"审核结果解析失败: {e}\")\n # 返回保守的失败结果\n return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}",
"code_hash": "14c8a5de7e3df0bd0989c7936afaa699"
},
{
"name": "_normalize_judge_data",
"line_start": 184,
"line_end": 215,
"args": [
{
"name": "self"
},
{
"name": "parsed_data",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "标准化审核数据格式\n\nArgs:\n parsed_data: 解析后的数据\n\nReturns:\n 标准化后的审核结果字典",
"is_async": false,
"decorators": [],
"code": " def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n \"\"\"\n 标准化审核数据格式\n\n Args:\n parsed_data: 解析后的数据\n\n Returns:\n 标准化后的审核结果字典\n \"\"\"\n # 根据实际judgeContent/system.txt的输出格式进行标准化\n normalized_data = {\n # 核心字段根据实际system.txt的输出格式\n \"analysis\": parsed_data.get(\"analysis\", \"\"),\n \"title\": parsed_data.get(\"title\", \"\"),\n \"content\": parsed_data.get(\"content\", \"\"),\n \n # 计算字段\n \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n \n # 元数据\n \"metadata\": {\n \"original_parsed_data\": parsed_data,\n \"normalization_version\": \"v1\",\n \"generated_at\": self.output_manager.run_id\n }\n }\n\n return normalized_data",
"code_hash": "01e6875325d6a9af0a71cb6df1dbd987"
},
{
"name": "_create_fallback_judge_data",
"line_start": 217,
"line_end": 241,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Dict[str, Any]",
"docstring": "退JSON\n\nArgs:\n raw_output: \n\nReturns:\n 退",
"is_async": false,
"decorators": [],
"code": " def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n \"\"\"\n 创建回退的审核数据当JSON解析失败时\n\n Args:\n raw_output: 原始输出\n\n Returns:\n 回退的审核结果字典\n \"\"\"\n # 尝试从原始文本中提取信息\n return {\n \"analysis\": raw_output,\n \"title\": \"\",\n \"content\": \"\",\n \"has_analysis\": bool(raw_output.strip()),\n \"title_changed\": False,\n \"content_changed\": False,\n \"analysis_length\": len(raw_output),\n \"metadata\": {\n \"parsing_method\": \"fallback_text_extraction\",\n \"generated_at\": self.output_manager.run_id,\n \"original_raw_output\": raw_output\n }\n }",
"code_hash": "20800a0738db2e9a451f0a81aec58d45"
},
{
"name": "_evaluate_pass_status",
"line_start": 243,
"line_end": 306,
"args": [
{
"name": "self"
},
{
"name": "judge_result",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Tuple[bool, str, float]",
"docstring": "评估是否通过审核\n\nArgs:\n judge_result: 审核结果\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度]",
"is_async": false,
"decorators": [],
"code": " def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n \"\"\"\n 评估是否通过审核\n\n Args:\n judge_result: 审核结果\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度]\n \"\"\"\n try:\n analysis = judge_result.get(\"analysis\", \"\")\n title = judge_result.get(\"title\", \"\")\n content = judge_result.get(\"content\", \"\")\n\n # 基础检查:是否有分析内容\n if not analysis:\n return False, \"缺少审核分析\", 0.1\n\n # 检查是否有修改后的内容\n has_corrections = bool(title) or bool(content)\n \n # 根据分析内容进行判断(简单的关键词判断)\n analysis_lower = analysis.lower()\n negative_keywords = [\n \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n ]\n \n positive_keywords = [\n \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n ]\n\n negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n # 计算置信度\n if has_corrections:\n # 如果有修改,说明原内容有问题,但修改后应该是通过的\n confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n is_passed = True\n error_message = \"\"\n elif negative_score > positive_score:\n # 负面评价较多\n confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n is_passed = False\n error_message = \"内容质量不符合要求\"\n else:\n # 正面评价较多或无明显问题\n confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n is_passed = True\n error_message = \"\"\n\n # 应用阈值\n if confidence < self.judge_threshold:\n is_passed = False\n error_message = error_message or \"审核置信度不足\"\n\n return is_passed, error_message, confidence\n\n except Exception as e:\n logger.error(f\"审核状态评估失败: {e}\")\n return False, f\"评估失败: {str(e)}\", 0.0",
"code_hash": "8ecdc33854003596b83b1f520ca25f64"
},
{
"name": "judge_content_batch",
"line_start": 308,
"line_end": 336,
"args": [
{
"name": "self"
},
{
"name": "content_list",
"type_hint": "List[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Tuple[bool, str, float, Dict[str, Any]]]",
"docstring": "批量审核内容\n\nArgs:\n content_list: 内容列表,每个元素包含审核所需的参数\n\nReturns:\n 内容ID->审核结果的字典",
"is_async": true,
"decorators": [],
"code": " async def judge_content_batch(\n self,\n content_list: List[Dict[str, Any]]\n ) -> Dict[str, Tuple[bool, str, float, Dict[str, Any]]]:\n \"\"\"\n 批量审核内容\n\n Args:\n content_list: 内容列表,每个元素包含审核所需的参数\n\n Returns:\n 内容ID->审核结果的字典\n \"\"\"\n results = {}\n \n for i, content_item in enumerate(content_list):\n try:\n content_id = content_item.get('id', f'content_{i+1}')\n logger.info(f\"批量审核内容 {i+1}/{len(content_list)}: {content_id}\")\n \n result = await self.judge_content(**content_item)\n results[content_id] = result\n \n except Exception as e:\n logger.error(f\"批量审核第 {i+1} 项失败: {e}\")\n content_id = content_item.get('id', f'content_{i+1}')\n results[content_id] = (False, f\"审核失败: {str(e)}\", 0.0, {\"error\": str(e)})\n \n return results",
"code_hash": "7f1e5e36028cd10495ed935ab7b7b2fc"
},
{
"name": "test_judging",
"line_start": 338,
"line_end": 364,
"args": [
{
"name": "self"
}
],
"return_type": "bool",
"docstring": "测试内容审核功能\n\nReturns:\n 测试是否成功",
"is_async": true,
"decorators": [],
"code": " async def test_judging(self) -> bool:\n \"\"\"\n 测试内容审核功能\n\n Returns:\n 测试是否成功\n \"\"\"\n try:\n test_content = \"\"\"\n {\n \"title\": \"上海外滩一日游\",\n \"content\": \"上海外滩是著名的旅游景点,历史悠久,值得一游。\",\n \"tag\": \"#上海 #外滩 #一日游\"\n }\n \"\"\"\n \n is_passed, error_message, confidence, judge_result = await self.judge_content(\n tweet_content=test_content,\n object_content=\"上海外滩景区信息\",\n product_content=\"外滩一日游产品\"\n )\n \n return confidence > 0.0\n \n except Exception as e:\n logger.error(f\"内容审核测试失败: {e}\")\n return False",
"code_hash": "5af1394ec49309afde76259c211a6646"
},
{
"name": "get_judge_stats",
"line_start": 366,
"line_end": 381,
"args": [
{
"name": "self"
}
],
"return_type": "Dict[str, Any]",
"docstring": "获取审核统计信息\n\nReturns:\n 统计信息字典",
"is_async": false,
"decorators": [],
"code": " def get_judge_stats(self) -> Dict[str, Any]:\n \"\"\"\n 获取审核统计信息\n\n Returns:\n 统计信息字典\n \"\"\"\n return {\n \"task_model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"judge_threshold\": self.judge_threshold,\n \"output_directory\": str(self.output_manager.run_output_dir),\n \"ai_model_info\": self.ai_service.get_model_info(),\n \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n \"json_processor_enabled\": self.json_processor.enable_repair\n } ",
"code_hash": "fb9714107dd5e3a1a489cc517f147913"
}
],
"docstring": "内容审核器 - 重构版本\n负责审核生成的内容质量支持配置化参数和动态提示词",
"decorators": [],
"code": "class ContentJudger:\n \"\"\"\n 内容审核器 - 重构版本\n 负责审核生成的内容质量,支持配置化参数和动态提示词\n \"\"\"\n\n def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容审核器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n \n # 审核阈值配置\n self.judge_threshold = config.content_generation.judge_threshold\n \n logger.info(f\"内容审核器初始化完成,使用模型参数: {self.task_model_config}\")\n\n async def judge_content(\n self,\n tweet_content: str,\n object_content: str,\n product_content: str,\n refer_content: str = \"\",\n content_metadata: Optional[Dict[str, Any]] = None,\n **kwargs\n ) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 审核内容质量\n\n Args:\n tweet_content: 待审核的文案内容\n object_content: 景区/对象信息\n product_content: 产品信息\n refer_content: 参考内容\n content_metadata: 内容元数据\n **kwargs: 其他参数\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\n Raises:\n ContentGenerationError: 审核失败时抛出\n \"\"\"\n try:\n logger.info(\"开始执行内容审核流程\")\n\n # 1. 构建提示词\n system_prompt = self.prompt_manager.get_prompt(\"content_judging\", \"system\")\n user_prompt_template = self.prompt_manager.get_prompt(\"content_judging\", \"user\")\n \n # 格式化用户提示词根据实际judgeContent/user.txt格式\n user_prompt = self.prompt_manager.format_prompt(\n user_prompt_template,\n object_content=object_content,\n product_content=product_content,\n refer_content=refer_content,\n tweet_content=tweet_content,\n **kwargs\n )\n\n # 保存提示词(如果配置允许)\n if self.config.output.save_prompts:\n self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_judging\")\n self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_judging\")\n\n # 2. 调用AI审核\n content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n system_prompt=system_prompt,\n user_prompt=user_prompt,\n stage=\"内容审核\",\n **self.task_model_config\n )\n\n # 保存原始响应(如果配置允许)\n if self.config.output.save_raw_responses:\n self.output_manager.save_text(content, \"raw_response\", \"content_judging\")\n\n # 3. 解析审核结果\n is_passed, error_message, confidence, judge_result = self._parse_judge_result(content)\n\n # 4. 保存审核结果\n complete_result = {\n \"original_content\": tweet_content,\n \"judge_result\": judge_result,\n \"is_passed\": is_passed,\n \"error_message\": error_message,\n \"confidence\": confidence,\n \"metadata\": {\n \"content_metadata\": content_metadata,\n \"judge_threshold\": self.judge_threshold,\n \"model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"tokens\": {\n \"input\": input_tokens,\n \"output\": output_tokens\n },\n \"elapsed_time\": elapsed_time\n }\n }\n \n self.output_manager.save_json(complete_result, \"judged_content\")\n\n # 5. 保存元数据\n self.output_manager.save_metadata(complete_result[\"metadata\"], \"content_judging\")\n\n logger.info(f\"内容审核完成,结果: {'通过' if is_passed else '不通过'}, 置信度: {confidence:.2f}\")\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n error_msg = f\"内容审核失败: {str(e)}\"\n logger.error(error_msg, exc_info=True)\n raise ContentGenerationError(error_msg)\n\n def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 解析审核结果\n\n Args:\n raw_output: AI原始输出\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]\n \"\"\"\n try:\n # 使用JSON处理器解析\n parsed_data = self.json_processor.parse_llm_output(\n raw_output=raw_output,\n expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n )\n\n # 根据实际judgeContent/system.txt的输出格式进行解析\n if isinstance(parsed_data, dict):\n judge_result = self._normalize_judge_data(parsed_data)\n else:\n # 回退处理\n judge_result = self._create_fallback_judge_data(raw_output)\n\n # 判断是否通过\n is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n logger.error(f\"审核结果解析失败: {e}\")\n # 返回保守的失败结果\n return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}\n\n def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n \"\"\"\n 标准化审核数据格式\n\n Args:\n parsed_data: 解析后的数据\n\n Returns:\n 标准化后的审核结果字典\n \"\"\"\n # 根据实际judgeContent/system.txt的输出格式进行标准化\n normalized_data = {\n # 核心字段根据实际system.txt的输出格式\n \"analysis\": parsed_data.get(\"analysis\", \"\"),\n \"title\": parsed_data.get(\"title\", \"\"),\n \"content\": parsed_data.get(\"content\", \"\"),\n \n # 计算字段\n \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n \n # 元数据\n \"metadata\": {\n \"original_parsed_data\": parsed_data,\n \"normalization_version\": \"v1\",\n \"generated_at\": self.output_manager.run_id\n }\n }\n\n return normalized_data\n\n def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n \"\"\"\n 创建回退的审核数据当JSON解析失败时\n\n Args:\n raw_output: 原始输出\n\n Returns:\n 回退的审核结果字典\n \"\"\"\n # 尝试从原始文本中提取信息\n return {\n \"analysis\": raw_output,\n \"title\": \"\",\n \"content\": \"\",\n \"has_analysis\": bool(raw_output.strip()),\n \"title_changed\": False,\n \"content_changed\": False,\n \"analysis_length\": len(raw_output),\n \"metadata\": {\n \"parsing_method\": \"fallback_text_extraction\",\n \"generated_at\": self.output_manager.run_id,\n \"original_raw_output\": raw_output\n }\n }\n\n def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n \"\"\"\n 评估是否通过审核\n\n Args:\n judge_result: 审核结果\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度]\n \"\"\"\n try:\n analysis = judge_result.get(\"analysis\", \"\")\n title = judge_result.get(\"title\", \"\")\n content = judge_result.get(\"content\", \"\")\n\n # 基础检查:是否有分析内容\n if not analysis:\n return False, \"缺少审核分析\", 0.1\n\n # 检查是否有修改后的内容\n has_corrections = bool(title) or bool(content)\n \n # 根据分析内容进行判断(简单的关键词判断)\n analysis_lower = analysis.lower()\n negative_keywords = [\n \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n ]\n \n positive_keywords = [\n \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n ]\n\n negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n # 计算置信度\n if has_corrections:\n # 如果有修改,说明原内容有问题,但修改后应该是通过的\n confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n is_passed = True\n error_message = \"\"\n elif negative_score > positive_score:\n # 负面评价较多\n confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n is_passed = False\n error_message = \"内容质量不符合要求\"\n else:\n # 正面评价较多或无明显问题\n confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n is_passed = True\n error_message = \"\"\n\n # 应用阈值\n if confidence < self.judge_threshold:\n is_passed = False\n error_message = error_message or \"审核置信度不足\"\n\n return is_passed, error_message, confidence\n\n except Exception as e:\n logger.error(f\"审核状态评估失败: {e}\")\n return False, f\"评估失败: {str(e)}\", 0.0\n\n async def judge_content_batch(\n self,\n content_list: List[Dict[str, Any]]\n ) -> Dict[str, Tuple[bool, str, float, Dict[str, Any]]]:\n \"\"\"\n 批量审核内容\n\n Args:\n content_list: 内容列表,每个元素包含审核所需的参数\n\n Returns:\n 内容ID->审核结果的字典\n \"\"\"\n results = {}\n \n for i, content_item in enumerate(content_list):\n try:\n content_id = content_item.get('id', f'content_{i+1}')\n logger.info(f\"批量审核内容 {i+1}/{len(content_list)}: {content_id}\")\n \n result = await self.judge_content(**content_item)\n results[content_id] = result\n \n except Exception as e:\n logger.error(f\"批量审核第 {i+1} 项失败: {e}\")\n content_id = content_item.get('id', f'content_{i+1}')\n results[content_id] = (False, f\"审核失败: {str(e)}\", 0.0, {\"error\": str(e)})\n \n return results\n\n async def test_judging(self) -> bool:\n \"\"\"\n 测试内容审核功能\n\n Returns:\n 测试是否成功\n \"\"\"\n try:\n test_content = \"\"\"\n {\n \"title\": \"上海外滩一日游\",\n \"content\": \"上海外滩是著名的旅游景点,历史悠久,值得一游。\",\n \"tag\": \"#上海 #外滩 #一日游\"\n }\n \"\"\"\n \n is_passed, error_message, confidence, judge_result = await self.judge_content(\n tweet_content=test_content,\n object_content=\"上海外滩景区信息\",\n product_content=\"外滩一日游产品\"\n )\n \n return confidence > 0.0\n \n except Exception as e:\n logger.error(f\"内容审核测试失败: {e}\")\n return False\n\n def get_judge_stats(self) -> Dict[str, Any]:\n \"\"\"\n 获取审核统计信息\n\n Returns:\n 统计信息字典\n \"\"\"\n return {\n \"task_model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"judge_threshold\": self.judge_threshold,\n \"output_directory\": str(self.output_manager.run_output_dir),\n \"ai_model_info\": self.ai_service.get_model_info(),\n \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n \"json_processor_enabled\": self.json_processor.enable_repair\n } ",
"code_hash": "ed26ab81ebedb858282d7effc034dbaf"
}
],
"imports": [
{
"type": "import",
"modules": [
"logging"
],
"aliases": []
},
{
"type": "from_import",
"module": "typing",
"names": [
"Dict",
"Any",
"Tuple",
"Optional",
"List"
],
"aliases": [],
"level": 0
},
{
"type": "import",
"modules": [
"json"
],
"aliases": []
},
{
"type": "from_import",
"module": "config",
"names": [
"AlgorithmConfig"
],
"aliases": [],
"level": 2
},
{
"type": "from_import",
"module": "core",
"names": [
"AIService",
"OutputManager",
"PromptManager",
"JSONProcessor"
],
"aliases": [],
"level": 2
},
{
"type": "from_import",
"module": "exceptions",
"names": [
"ContentGenerationError"
],
"aliases": [],
"level": 2
}
],
"constants": [],
"docstring": "Content Judger\n内容审核器 - 重构版本使用动态提示词和JSON处理支持真实的审核格式",
"content_hash": "4d4b8b85bae5018e0fbe59f6cf144a10"
}