387 lines
47 KiB
JSON
Raw Normal View History

2025-07-31 15:35:23 +08:00
{
"file_path": "travel-algorithms/travel_algorithms/content_generation/content_judger.py",
"file_size": 12662,
"line_count": 380,
"functions": [
{
"name": "__init__",
"line_start": 26,
"line_end": 51,
"args": [
{
"name": "self"
},
{
"name": "config",
"type_hint": "AlgorithmConfig"
}
],
"return_type": null,
"docstring": "初始化内容审核器\n\nArgs:\n config: 算法配置",
"is_async": false,
"decorators": [],
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容审核器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n \n # 审核阈值配置\n self.judge_threshold = config.content_generation.judge_threshold\n \n logger.info(f\"内容审核器初始化完成,使用模型参数: {self.task_model_config}\")",
"code_hash": "c99b924d21358871a472386e4961f637"
},
{
"name": "_parse_judge_result",
"line_start": 149,
"line_end": 182,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Tuple[bool, str, float, Dict[str, Any]]",
"docstring": "解析审核结果\n\nArgs:\n raw_output: AI原始输出\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]",
"is_async": false,
"decorators": [],
"code": " def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 解析审核结果\n\n Args:\n raw_output: AI原始输出\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]\n \"\"\"\n try:\n # 使用JSON处理器解析\n parsed_data = self.json_processor.parse_llm_output(\n raw_output=raw_output,\n expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n )\n\n # 根据实际judgeContent/system.txt的输出格式进行解析\n if isinstance(parsed_data, dict):\n judge_result = self._normalize_judge_data(parsed_data)\n else:\n # 回退处理\n judge_result = self._create_fallback_judge_data(raw_output)\n\n # 判断是否通过\n is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n logger.error(f\"审核结果解析失败: {e}\")\n # 返回保守的失败结果\n return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}",
"code_hash": "14c8a5de7e3df0bd0989c7936afaa699"
},
{
"name": "_normalize_judge_data",
"line_start": 184,
"line_end": 215,
"args": [
{
"name": "self"
},
{
"name": "parsed_data",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "标准化审核数据格式\n\nArgs:\n parsed_data: 解析后的数据\n\nReturns:\n 标准化后的审核结果字典",
"is_async": false,
"decorators": [],
"code": " def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n \"\"\"\n 标准化审核数据格式\n\n Args:\n parsed_data: 解析后的数据\n\n Returns:\n 标准化后的审核结果字典\n \"\"\"\n # 根据实际judgeContent/system.txt的输出格式进行标准化\n normalized_data = {\n # 核心字段根据实际system.txt的输出格式\n \"analysis\": parsed_data.get(\"analysis\", \"\"),\n \"title\": parsed_data.get(\"title\", \"\"),\n \"content\": parsed_data.get(\"content\", \"\"),\n \n # 计算字段\n \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n \n # 元数据\n \"metadata\": {\n \"original_parsed_data\": parsed_data,\n \"normalization_version\": \"v1\",\n \"generated_at\": self.output_manager.run_id\n }\n }\n\n return normalized_data",
"code_hash": "01e6875325d6a9af0a71cb6df1dbd987"
},
{
"name": "_create_fallback_judge_data",
"line_start": 217,
"line_end": 241,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Dict[str, Any]",
"docstring": "创建回退的审核数据当JSON解析失败时\n\nArgs:\n raw_output: 原始输出\n\nReturns:\n 回退的审核结果字典",
"is_async": false,
"decorators": [],
"code": " def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n \"\"\"\n 创建回退的审核数据当JSON解析失败时\n\n Args:\n raw_output: 原始输出\n\n Returns:\n 回退的审核结果字典\n \"\"\"\n # 尝试从原始文本中提取信息\n return {\n \"analysis\": raw_output,\n \"title\": \"\",\n \"content\": \"\",\n \"has_analysis\": bool(raw_output.strip()),\n \"title_changed\": False,\n \"content_changed\": False,\n \"analysis_length\": len(raw_output),\n \"metadata\": {\n \"parsing_method\": \"fallback_text_extraction\",\n \"generated_at\": self.output_manager.run_id,\n \"original_raw_output\": raw_output\n }\n }",
"code_hash": "20800a0738db2e9a451f0a81aec58d45"
},
{
"name": "_evaluate_pass_status",
"line_start": 243,
"line_end": 306,
"args": [
{
"name": "self"
},
{
"name": "judge_result",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Tuple[bool, str, float]",
"docstring": "评估是否通过审核\n\nArgs:\n judge_result: 审核结果\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度]",
"is_async": false,
"decorators": [],
"code": " def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n \"\"\"\n 评估是否通过审核\n\n Args:\n judge_result: 审核结果\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度]\n \"\"\"\n try:\n analysis = judge_result.get(\"analysis\", \"\")\n title = judge_result.get(\"title\", \"\")\n content = judge_result.get(\"content\", \"\")\n\n # 基础检查:是否有分析内容\n if not analysis:\n return False, \"缺少审核分析\", 0.1\n\n # 检查是否有修改后的内容\n has_corrections = bool(title) or bool(content)\n \n # 根据分析内容进行判断(简单的关键词判断)\n analysis_lower = analysis.lower()\n negative_keywords = [\n \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n ]\n \n positive_keywords = [\n \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n ]\n\n negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n # 计算置信度\n if has_corrections:\n # 如果有修改,说明原内容有问题,但修改后应该是通过的\n confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n is_passed = True\n error_message = \"\"\n elif negative_score > positive_score:\n # 负面评价较多\n confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n is_passed = False\n error_message = \"内容质量不符合要求\"\n else:\n # 正面评价较多或无明显问题\n confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n is_passed = True\n error_message = \"\"\n\n # 应用阈值\n if confidence < self.judge_threshold:\n is_passed = False\n error_message = error_message or \"审核置信度不足\"\n\n return is_passed, error_message, confidence\n\n except Exception as e:\n logger.error(f\"审核状态评估失败: {e}\")\n return False, f\"评估失败: {str(e)}\", 0.0",
"code_hash": "8ecdc33854003596b83b1f520ca25f64"
},
{
"name": "get_judge_stats",
"line_start": 366,
"line_end": 381,
"args": [
{
"name": "self"
}
],
"return_type": "Dict[str, Any]",
"docstring": "获取审核统计信息\n\nReturns:\n 统计信息字典",
"is_async": false,
"decorators": [],
"code": " def get_judge_stats(self) -> Dict[str, Any]:\n \"\"\"\n 获取审核统计信息\n\n Returns:\n 统计信息字典\n \"\"\"\n return {\n \"task_model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"judge_threshold\": self.judge_threshold,\n \"output_directory\": str(self.output_manager.run_output_dir),\n \"ai_model_info\": self.ai_service.get_model_info(),\n \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n \"json_processor_enabled\": self.json_processor.enable_repair\n } ",
"code_hash": "fb9714107dd5e3a1a489cc517f147913"
}
],
"classes": [
{
"name": "ContentJudger",
"line_start": 20,
"line_end": 381,
"bases": [],
"methods": [
{
"name": "__init__",
"line_start": 26,
"line_end": 51,
"args": [
{
"name": "self"
},
{
"name": "config",
"type_hint": "AlgorithmConfig"
}
],
"return_type": null,
"docstring": "初始化内容审核器\n\nArgs:\n config: 算法配置",
"is_async": false,
"decorators": [],
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容审核器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n \n # 审核阈值配置\n self.judge_threshold = config.content_generation.judge_threshold\n \n logger.info(f\"内容审核器初始化完成,使用模型参数: {self.task_model_config}\")",
"code_hash": "c99b924d21358871a472386e4961f637"
},
{
"name": "judge_content",
"line_start": 53,
"line_end": 147,
"args": [
{
"name": "self"
},
{
"name": "tweet_content",
"type_hint": "str"
},
{
"name": "object_content",
"type_hint": "str"
},
{
"name": "product_content",
"type_hint": "str"
},
{
"name": "refer_content",
"type_hint": "str"
},
{
"name": "content_metadata",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Tuple[bool, str, float, Dict[str, Any]]",
"docstring": "审核内容质量\n\nArgs:\n tweet_content: 待审核的文案内容\n object_content: 景区/对象信息\n product_content: 产品信息\n refer_content: 参考内容\n content_metadata: 内容元数据\n **kwargs: 其他参数\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\nRaises:\n ContentGenerationError: 审核失败时抛出",
"is_async": true,
"decorators": [],
"code": " async def judge_content(\n self,\n tweet_content: str,\n object_content: str,\n product_content: str,\n refer_content: str = \"\",\n content_metadata: Optional[Dict[str, Any]] = None,\n **kwargs\n ) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 审核内容质量\n\n Args:\n tweet_content: 待审核的文案内容\n object_content: 景区/对象信息\n product_content: 产品信息\n refer_content: 参考内容\n content_metadata: 内容元数据\n **kwargs: 其他参数\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\n Raises:\n ContentGenerationError: 审核失败时抛出\n \"\"\"\n try:\n logger.info(\"开始执行内容审核流程\")\n\n # 1. 构建提示词\n system_prompt = self.prompt_manager.get_prompt(\"content_judging\", \"system\")\n user_prompt_template = self.prompt_manager.get_prompt(\"content_judging\", \"user\")\n \n # 格式化用户提示词根据实际judgeContent/user.txt格式\n user_prompt = self.prompt_manager.format_prompt(\n user_prompt_template,\n object_content=object_content,\n product_content=product_content,\n refer_content=refer_content,\n tweet_content=tweet_content,\n **kwargs\n )\n\n # 保存提示词(如果配置允许)\n if self.config.output.save_prompts:\n self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_judging\")\n self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_judging\")\n\n # 2. 调用AI审核\n content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n system_prompt=system_prompt,\n user_prompt=user_prompt,\n stage=\"内容审核\",\n **self.task_model_config\n )\n\n # 保存原始响应(如果配置允许)\n if self.config.output.save_raw_responses:\n self.output_manager.save_text(content, \"raw_response\", \"content_judging\")\n\n # 3. 解析审核结果\n is_passed, error_message, confidence, judge_result = self._parse_judge_result(content)\n\n # 4. 保存审核结果\n complete_result = {\n \"original_content\": tweet_content,\n \"judge_result\": judge_result,\n \"is_passed\": is_passed,\n \"error_message\": error_message,\n \"confidence\": confidence,\n \"metadata\": {\n \"content_metadata\": content_metadata,\n \"judge_threshold\": self.judge_threshold,\n \"model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"tokens\": {\n \"input\": input_tokens,\n \"output\": output_tokens\n },\n \"elapsed_time\": elapsed_time\n }\n }\n \n self.output_manager.save_json(complete_result, \"judged_content\")\n\n # 5. 保存元数据\n self.output_manager.save_metadata(complete_result[\"metadata\"], \"content_judging\")\n\n logger.info(f\"内容审核完成,结果: {'通过' if is_passed else '不通过'}, 置信度: {confidence:.2f}\")\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n error_msg = f\"内容审核失败: {str(e)}\"\n logger.error(error_msg, exc_info=True)\n raise ContentGenerationError(error_msg)",
"code_hash": "b3ca81ff4c689fb92d930f725138b80a"
},
{
"name": "_parse_judge_result",
"line_start": 149,
"line_end": 182,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Tuple[bool, str, float, Dict[str, Any]]",
"docstring": "解析审核结果\n\nArgs:\n raw_output: AI原始输出\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]",
"is_async": false,
"decorators": [],
"code": " def _parse_judge_result(self, raw_output: str) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 解析审核结果\n\n Args:\n raw_output: AI原始输出\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 完整结果]\n \"\"\"\n try:\n # 使用JSON处理器解析\n parsed_data = self.json_processor.parse_llm_output(\n raw_output=raw_output,\n expected_fields=self.field_config.get(\"expected_fields\", [\"analysis\", \"title\", \"content\"]),\n required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n )\n\n # 根据实际judgeContent/system.txt的输出格式进行解析\n if isinstance(parsed_data, dict):\n judge_result = self._normalize_judge_data(parsed_data)\n else:\n # 回退处理\n judge_result = self._create_fallback_judge_data(raw_output)\n\n # 判断是否通过\n is_passed, error_message, confidence = self._evaluate_pass_status(judge_result)\n\n return is_passed, error_message, confidence, judge_result\n\n except Exception as e:\n logger.error(f\"审核结果解析失败: {e}\")\n # 返回保守的失败结果\n return False, f\"解析失败: {str(e)}\", 0.0, {\"error\": str(e), \"raw_output\": raw_output}",
"code_hash": "14c8a5de7e3df0bd0989c7936afaa699"
},
{
"name": "_normalize_judge_data",
"line_start": 184,
"line_end": 215,
"args": [
{
"name": "self"
},
{
"name": "parsed_data",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "标准化审核数据格式\n\nArgs:\n parsed_data: 解析后的数据\n\nReturns:\n 标准化后的审核结果字典",
"is_async": false,
"decorators": [],
"code": " def _normalize_judge_data(self, parsed_data: Dict[str, Any]) -> Dict[str, Any]:\n \"\"\"\n 标准化审核数据格式\n\n Args:\n parsed_data: 解析后的数据\n\n Returns:\n 标准化后的审核结果字典\n \"\"\"\n # 根据实际judgeContent/system.txt的输出格式进行标准化\n normalized_data = {\n # 核心字段根据实际system.txt的输出格式\n \"analysis\": parsed_data.get(\"analysis\", \"\"),\n \"title\": parsed_data.get(\"title\", \"\"),\n \"content\": parsed_data.get(\"content\", \"\"),\n \n # 计算字段\n \"has_analysis\": bool(parsed_data.get(\"analysis\", \"\").strip()),\n \"title_changed\": parsed_data.get(\"title\", \"\") != \"\",\n \"content_changed\": parsed_data.get(\"content\", \"\") != \"\",\n \"analysis_length\": len(parsed_data.get(\"analysis\", \"\")),\n \n # 元数据\n \"metadata\": {\n \"original_parsed_data\": parsed_data,\n \"normalization_version\": \"v1\",\n \"generated_at\": self.output_manager.run_id\n }\n }\n\n return normalized_data",
"code_hash": "01e6875325d6a9af0a71cb6df1dbd987"
},
{
"name": "_create_fallback_judge_data",
"line_start": 217,
"line_end": 241,
"args": [
{
"name": "self"
},
{
"name": "raw_output",
"type_hint": "str"
}
],
"return_type": "Dict[str, Any]",
"docstring": "创建回退的审核数据当JSON解析失败时\n\nArgs:\n raw_output: 原始输出\n\nReturns:\n 回退的审核结果字典",
"is_async": false,
"decorators": [],
"code": " def _create_fallback_judge_data(self, raw_output: str) -> Dict[str, Any]:\n \"\"\"\n 创建回退的审核数据当JSON解析失败时\n\n Args:\n raw_output: 原始输出\n\n Returns:\n 回退的审核结果字典\n \"\"\"\n # 尝试从原始文本中提取信息\n return {\n \"analysis\": raw_output,\n \"title\": \"\",\n \"content\": \"\",\n \"has_analysis\": bool(raw_output.strip()),\n \"title_changed\": False,\n \"content_changed\": False,\n \"analysis_length\": len(raw_output),\n \"metadata\": {\n \"parsing_method\": \"fallback_text_extraction\",\n \"generated_at\": self.output_manager.run_id,\n \"original_raw_output\": raw_output\n }\n }",
"code_hash": "20800a0738db2e9a451f0a81aec58d45"
},
{
"name": "_evaluate_pass_status",
"line_start": 243,
"line_end": 306,
"args": [
{
"name": "self"
},
{
"name": "judge_result",
"type_hint": "Dict[str, Any]"
}
],
"return_type": "Tuple[bool, str, float]",
"docstring": "评估是否通过审核\n\nArgs:\n judge_result: 审核结果\n\nReturns:\n Tuple[是否通过, 错误信息, 置信度]",
"is_async": false,
"decorators": [],
"code": " def _evaluate_pass_status(self, judge_result: Dict[str, Any]) -> Tuple[bool, str, float]:\n \"\"\"\n 评估是否通过审核\n\n Args:\n judge_result: 审核结果\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度]\n \"\"\"\n try:\n analysis = judge_result.get(\"analysis\", \"\")\n title = judge_result.get(\"title\", \"\")\n content = judge_result.get(\"content\", \"\")\n\n # 基础检查:是否有分析内容\n if not analysis:\n return False, \"缺少审核分析\", 0.1\n\n # 检查是否有修改后的内容\n has_corrections = bool(title) or bool(content)\n \n # 根据分析内容进行判断(简单的关键词判断)\n analysis_lower = analysis.lower()\n negative_keywords = [\n \"不符\", \"错误\", \"虚构\", \"不合理\", \"不准确\", \"缺少\", \"遗漏\", \n \"超过\", \"违反\", \"不当\", \"problem\", \"error\", \"incorrect\", \"missing\"\n ]\n \n positive_keywords = [\n \"符合\", \"正确\", \"准确\", \"合理\", \"完整\", \"适当\", \"良好\",\n \"correct\", \"accurate\", \"appropriate\", \"good\", \"proper\"\n ]\n\n negative_score = sum(1 for keyword in negative_keywords if keyword in analysis_lower)\n positive_score = sum(1 for keyword in positive_keywords if keyword in analysis_lower)\n\n # 计算置信度\n if has_corrections:\n # 如果有修改,说明原内容有问题,但修改后应该是通过的\n confidence = max(0.7, min(0.95, 0.7 + positive_score * 0.1))\n is_passed = True\n error_message = \"\"\n elif negative_score > positive_score:\n # 负面评价较多\n confidence = max(0.3, min(0.9, 0.5 + negative_score * 0.1))\n is_passed = False\n error_message = \"内容质量不符合要求\"\n else:\n # 正面评价较多或无明显问题\n confidence = max(0.6, min(0.95, 0.8 + positive_score * 0.05))\n is_passed = True\n error_message = \"\"\n\n # 应用阈值\n if confidence < self.judge_threshold:\n is_passed = False\n error_message = error_message or \"审核置信度不足\"\n\n return is_passed, error_message, confidence\n\n except Exception as e:\n logger.error(f\"审核状态评估失败: {e}\")\n return False, f\"评估失败: {str(e)}\", 0.0",
"code_hash": "8ecdc33854003596b83b1f520ca25f64"
},
{
"name": "judge_content_batch",
"line_start": 308,
"line_end": 336,
"args": [
{
"name": "self"
},
{
"name": "content_list",
"type_hint": "List[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Tuple[bool, str, float, Dict[str, Any]]]",
"docstring": "批量审核内容\n\nArgs:\n content_list: 内容列表,每个元素包含审核所需的参数\n\nReturns:\n 内容ID->审核结果的字典",
"is_async": true,
"decorators": [],
"code": " async def judge_content_batch(\n self,\n content_list: List[Dict[str, Any]]\n ) -> Dict[str, Tuple[bool, str, float, Dict[str, Any]]]:\n \"\"\"\n 批量审核内容\n\n Args:\n content_list: 内容列表,每个元素包含审核所需的参数\n\n Returns:\n 内容ID->审核结果的字典\n \"\"\"\n results = {}\n \n for i, content_item in enumerate(content_list):\n try:\n content_id = content_item.get('id', f'content_{i+1}')\n logger.info(f\"批量审核内容 {i+1}/{len(content_list)}: {content_id}\")\n \n result = await self.judge_content(**content_item)\n results[content_id] = result\n \n except Exception as e:\n logger.error(f\"批量审核第 {i+1} 项失败: {e}\")\n content_id = content_item.get('id', f'content_{i+1}')\n results[content_id] = (False, f\"审核失败: {str(e)}\", 0.0, {\"error\": str(e)})\n \n return results",
"code_hash": "7f1e5e36028cd10495ed935ab7b7b2fc"
},
{
"name": "test_judging",
"line_start": 338,
"line_end": 364,
"args": [
{
"name": "self"
}
],
"return_type": "bool",
"docstring": "测试内容审核功能\n\nReturns:\n 测试是否成功",
"is_async": true,
"decorators": [],
"code": " async def test_judging(self) -> bool:\n \"\"\"\n 测试内容审核功能\n\n Returns:\n 测试是否成功\n \"\"\"\n try:\n test_content = \"\"\"\n {\n \"title\": \"上海外滩一日游\",\n \"content\": \"上海外滩是著名的旅游景点,历史悠久,值得一游。\",\n \"tag\": \"#上海 #外滩 #一日游\"\n }\n \"\"\"\n \n is_passed, error_message, confidence, judge_result = await self.judge_content(\n tweet_content=test_content,\n object_content=\"上海外滩景区信息\",\n product_content=\"外滩一日游产品\"\n )\n \n return confidence > 0.0\n \n except Exception as e:\n logger.error(f\"内容审核测试失败: {e}\")\n return False",
"code_hash": "5af1394ec49309afde76259c211a6646"
},
{
"name": "get_judge_stats",
"line_start": 366,
"line_end": 381,
"args": [
{
"name": "self"
}
],
"return_type": "Dict[str, Any]",
"docstring": "获取审核统计信息\n\nReturns:\n 统计信息字典",
"is_async": false,
"decorators": [],
"code": " def get_judge_stats(self) -> Dict[str, Any]:\n \"\"\"\n 获取审核统计信息\n\n Returns:\n 统计信息字典\n \"\"\"\n return {\n \"task_model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"judge_threshold\": self.judge_threshold,\n \"output_directory\": str(self.output_manager.run_output_dir),\n \"ai_model_info\": self.ai_service.get_model_info(),\n \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_judging\", {}),\n \"json_processor_enabled\": self.json_processor.enable_repair\n } ",
"code_hash": "fb9714107dd5e3a1a489cc517f147913"
}
],
"docstring": "内容审核器 - 重构版本\n负责审核生成的内容质量支持配置化参数和动态提示词",
"decorators": [],
"code": "class ContentJudger:\n \"\"\"\n 内容审核器 - 重构版本\n 负责审核生成的内容质量,支持配置化参数和动态提示词\n \"\"\"\n\n def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容审核器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_judging\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_judging\", {})\n \n # 审核阈值配置\n self.judge_threshold = config.content_generation.judge_threshold\n \n logger.info(f\"内容审核器初始化完成,使用模型参数: {self.task_model_config}\")\n\n async def judge_content(\n self,\n tweet_content: str,\n object_content: str,\n product_content: str,\n refer_content: str = \"\",\n content_metadata: Optional[Dict[str, Any]] = None,\n **kwargs\n ) -> Tuple[bool, str, float, Dict[str, Any]]:\n \"\"\"\n 审核内容质量\n\n Args:\n tweet_content: 待审核的文案内容\n object_content: 景区/对象信息\n product_content: 产品信息\n refer_content: 参考内容\n content_metadata: 内容元数据\n **kwargs: 其他参数\n\n Returns:\n Tuple[是否通过, 错误信息, 置信度, 审核结果详情]\n\n Raises:\n ContentGenerationError: 审核失败时抛出\n \"\"\"\n try:\n logger.info(\"开始执行内容审核流程\")\n\n # 1. 构建提示词\n system_prompt = self.prompt_manager.get_prompt(\"content_judging\", \"system\")\n user_prompt_template = self.prompt_manager.get_prompt(\"content_judging\", \"user\")\n \n # 格式化用户提示词根据实际judgeContent/user.txt格式\n user_prompt = self.prompt_manager.format_prompt(\n user_prompt_template,\n object_content=object_content,\n product_content=product_content,\n refer_content=refer_content,\n tweet_content=tweet_content,\n **kwargs\n )\n\n # 保存提示词(如果配置允许)\n if self.config.output.save_prompts:\n self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_judging\")\n self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_judging\")\n\n # 2. 调用AI审核\n content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n system_prompt=system_prompt,\n user_prompt=user_prompt,\n stage=\"内容审核\",\n **self.task_model_config\n )\n\n # 保存原始响应(如果配置允许)\n if self.config.output.save_raw_responses:\n self.output_manager.save_text(content, \"raw_response\", \"content_judging\")\n\n # 3. 解析审核结果\n is_passed, error_message, confidence, judge_result = self._parse_judge_result(content)\n\n # 4. 保存审核结果\n complete_result = {\n \"original_content\": tweet_content,\n \"judge_result\": judge_result,\n \"is_passed\": is_passed,\n \"error_me
"code_hash": "ed26ab81ebedb858282d7effc034dbaf"
}
],
"imports": [
{
"type": "import",
"modules": [
"logging"
],
"aliases": []
},
{
"type": "from_import",
"module": "typing",
"names": [
"Dict",
"Any",
"Tuple",
"Optional",
"List"
],
"aliases": [],
"level": 0
},
{
"type": "import",
"modules": [
"json"
],
"aliases": []
},
{
"type": "from_import",
"module": "config",
"names": [
"AlgorithmConfig"
],
"aliases": [],
"level": 2
},
{
"type": "from_import",
"module": "core",
"names": [
"AIService",
"OutputManager",
"PromptManager",
"JSONProcessor"
],
"aliases": [],
"level": 2
},
{
"type": "from_import",
"module": "exceptions",
"names": [
"ContentGenerationError"
],
"aliases": [],
"level": 2
}
],
"constants": [],
"docstring": "Content Judger\n内容审核器 - 重构版本使用动态提示词和JSON处理支持真实的审核格式",
"content_hash": "4d4b8b85bae5018e0fbe59f6cf144a10"
}