Compare commits
3 Commits
b5ddb84cdd
...
157d3348a6
| Author | SHA1 | Date | |
|---|---|---|---|
| 157d3348a6 | |||
| ae6801a7d5 | |||
| 8feefeb053 |
Binary file not shown.
BIN
core/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
core/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
core/ai/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
core/ai/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
core/ai/__pycache__/ai_agent.cpython-38.pyc
Normal file
BIN
core/ai/__pycache__/ai_agent.cpython-38.pyc
Normal file
Binary file not shown.
@ -99,11 +99,8 @@ class AIAgent:
|
|||||||
time_cost = time.time() - start_time
|
time_cost = time.time() - start_time
|
||||||
|
|
||||||
if use_stream:
|
if use_stream:
|
||||||
# 流式处理暂时不返回token计数和时间,需要更复杂的实现
|
# 流式处理需要异步迭代
|
||||||
# 这里返回一个空的生成器,但实际逻辑在 _process_stream 中
|
full_text = await self._process_stream(response)
|
||||||
# 为了统一返回类型,我们可能需要重新设计这里
|
|
||||||
# 简化处理:流式模式下,我们返回拼接后的完整文本
|
|
||||||
full_text = "".join([chunk for chunk in self._process_stream(response)])
|
|
||||||
output_tokens = self.count_tokens(full_text)
|
output_tokens = self.count_tokens(full_text)
|
||||||
logger.info(f"{stage_info} 任务完成,耗时 {time_cost:.2f} 秒. 输出token数: {output_tokens}")
|
logger.info(f"{stage_info} 任务完成,耗时 {time_cost:.2f} 秒. 输出token数: {output_tokens}")
|
||||||
return full_text, input_tokens, output_tokens, time_cost
|
return full_text, input_tokens, output_tokens, time_cost
|
||||||
@ -130,16 +127,18 @@ class AIAgent:
|
|||||||
|
|
||||||
raise AIModelError(f"AI模型调用在 {self.config.max_retries} 次重试后失败") from last_exception
|
raise AIModelError(f"AI模型调用在 {self.config.max_retries} 次重试后失败") from last_exception
|
||||||
|
|
||||||
def _process_stream(self, response):
|
async def _process_stream(self, response):
|
||||||
"""处理流式响应"""
|
"""异步处理流式响应"""
|
||||||
full_response = []
|
full_response = []
|
||||||
for chunk in response:
|
async for chunk in response:
|
||||||
content = chunk.choices[0].delta.content
|
content = chunk.choices[0].delta.content
|
||||||
if content:
|
if content:
|
||||||
full_response.append(content)
|
full_response.append(content)
|
||||||
yield content
|
# 如果需要在这里实现真正的流式处理,可以使用回调函数或其他方式
|
||||||
|
|
||||||
logger.info(f"流式响应接收完成,总长度: {len(''.join(full_response))}")
|
full_text = "".join(full_response)
|
||||||
|
logger.info(f"流式响应接收完成,总长度: {len(full_text)}")
|
||||||
|
return full_text
|
||||||
|
|
||||||
def count_tokens(self, text: str) -> int:
|
def count_tokens(self, text: str) -> int:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Binary file not shown.
BIN
core/config/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
core/config/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
core/config/__pycache__/manager.cpython-38.pyc
Normal file
BIN
core/config/__pycache__/manager.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
core/config/__pycache__/models.cpython-38.pyc
Normal file
BIN
core/config/__pycache__/models.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
core/exception/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
core/exception/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
core/exception/__pycache__/exceptions.cpython-38.pyc
Normal file
BIN
core/exception/__pycache__/exceptions.cpython-38.pyc
Normal file
Binary file not shown.
@ -222,7 +222,7 @@ async def generate_content_for_topic(self, topic: Dict[str, Any]) -> Dict[str, A
|
|||||||
raw_result = await self.ai_agent.generate_text(...)
|
raw_result = await self.ai_agent.generate_text(...)
|
||||||
|
|
||||||
# 解析和保存结果
|
# 解析和保存结果
|
||||||
content_data = json.loads(raw_result)
|
content_data = json_repair.loads(raw_result)
|
||||||
self.output_manager.save_json(content_data, "article.json", ...)
|
self.output_manager.save_json(content_data, "article.json", ...)
|
||||||
|
|
||||||
return content_data
|
return content_data
|
||||||
|
|||||||
@ -1,7 +1,12 @@
|
|||||||
fastapi==0.115.12
|
json_repair==0.47.6
|
||||||
numpy==2.2.5
|
numpy==2.3.1
|
||||||
openai==1.77.0
|
openai==1.93.3
|
||||||
Pillow==11.2.1
|
opencv_python==4.11.0.86
|
||||||
|
opencv_python_headless==4.11.0.86
|
||||||
|
Pillow==11.3.0
|
||||||
|
psutil==6.1.0
|
||||||
|
pydantic==2.11.7
|
||||||
|
scikit_learn==1.7.0
|
||||||
|
scipy==1.16.0
|
||||||
|
simplejson==3.20.1
|
||||||
tiktoken==0.9.0
|
tiktoken==0.9.0
|
||||||
uvicorn==0.34.2
|
|
||||||
opencv-python
|
|
||||||
|
|||||||
@ -8,13 +8,13 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
|
import json_repair
|
||||||
import random
|
import random
|
||||||
import asyncio
|
import asyncio
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# 将项目根目录添加到Python路径中
|
# 将项目根目录添加到Python路径中
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ def read_data_file(file_path: str):
|
|||||||
|
|
||||||
# 尝试解析为标准JSON
|
# 尝试解析为标准JSON
|
||||||
try:
|
try:
|
||||||
return json.loads(content)
|
return json_repair.loads(content)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# 如果不是标准JSON,尝试解析为类JSON格式
|
# 如果不是标准JSON,尝试解析为类JSON格式
|
||||||
logger.info(f"文件 {file_path} 不是标准JSON格式,尝试其他解析方式...")
|
logger.info(f"文件 {file_path} 不是标准JSON格式,尝试其他解析方式...")
|
||||||
@ -220,7 +220,7 @@ async def generate_info_with_llm(ai_agent: AIAgent, scenic_info: dict, product_i
|
|||||||
|
|
||||||
if json_start >= 0 and json_end > json_start:
|
if json_start >= 0 and json_end > json_start:
|
||||||
json_str = response[json_start:json_end]
|
json_str = response[json_start:json_end]
|
||||||
content_dict = json.loads(json_str)
|
content_dict = json_repair.loads(json_str)
|
||||||
logger.info(f"LLM成功生成内容: {content_dict}")
|
logger.info(f"LLM成功生成内容: {content_dict}")
|
||||||
|
|
||||||
# 添加默认的按钮文本和分页信息
|
# 添加默认的按钮文本和分页信息
|
||||||
|
|||||||
Binary file not shown.
BIN
utils/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
utils/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
utils/__pycache__/file_io.cpython-38.pyc
Normal file
BIN
utils/__pycache__/file_io.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
utils/__pycache__/pipeline.cpython-38.pyc
Normal file
BIN
utils/__pycache__/pipeline.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
utils/__pycache__/prompts.cpython-38.pyc
Normal file
BIN
utils/__pycache__/prompts.cpython-38.pyc
Normal file
Binary file not shown.
@ -8,6 +8,7 @@
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import json
|
import json
|
||||||
|
import json_repair
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List, Dict, Any
|
||||||
@ -38,7 +39,7 @@ class ResourceLoader:
|
|||||||
if content is None:
|
if content is None:
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
return json.loads(content)
|
return json_repair.loads(content)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logger.error(f"解析JSON文件 '{file_path}' 失败: {e}")
|
logger.error(f"解析JSON文件 '{file_path}' 失败: {e}")
|
||||||
return None
|
return None
|
||||||
@ -133,4 +134,87 @@ class OutputManager:
|
|||||||
"""完成运行的最终操作"""
|
"""完成运行的最终操作"""
|
||||||
logger.info(f"Finalizing run: {self.run_id}")
|
logger.info(f"Finalizing run: {self.run_id}")
|
||||||
# 目前没有特殊操作,但可以用于未来的扩展,如创建清单文件
|
# 目前没有特殊操作,但可以用于未来的扩展,如创建清单文件
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def process_llm_json_text(text: Any) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
处理LLM返回的JSON字符串,支持多种格式提取:
|
||||||
|
1. 提取</think>后的内容
|
||||||
|
2. 提取```json和```之间的内容
|
||||||
|
3. 尝试直接解析整个文本
|
||||||
|
4. 使用json_repair修复格式问题
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: LLM返回的原始文本或已解析的对象
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
解析后的JSON对象,解析失败则返回None
|
||||||
|
"""
|
||||||
|
# 如果输入已经是字典类型,直接返回
|
||||||
|
if isinstance(text, dict):
|
||||||
|
return text
|
||||||
|
|
||||||
|
# 如果输入是列表类型,且要求返回字典,则返回None
|
||||||
|
if isinstance(text, list):
|
||||||
|
logger.warning("输入是列表类型,但期望返回字典类型")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 确保输入是字符串类型
|
||||||
|
if not isinstance(text, str):
|
||||||
|
try:
|
||||||
|
text = str(text)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"无法将输入转换为字符串: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not text or not text.strip():
|
||||||
|
logger.warning("收到空的LLM响应")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 存储可能的JSON文本
|
||||||
|
json_candidates = []
|
||||||
|
|
||||||
|
# 1. 尝试提取</think>后的内容
|
||||||
|
if "</think>" in text:
|
||||||
|
think_parts = text.split("</think>", 1)
|
||||||
|
if len(think_parts) > 1:
|
||||||
|
json_candidates.append(think_parts[1].strip())
|
||||||
|
|
||||||
|
# 2. 尝试提取```json和```之间的内容
|
||||||
|
json_code_blocks = []
|
||||||
|
# 匹配```json和```之间的内容
|
||||||
|
import re
|
||||||
|
json_blocks = re.findall(r"```(?:json)?\s*([\s\S]*?)```", text)
|
||||||
|
if json_blocks:
|
||||||
|
json_candidates.extend([block.strip() for block in json_blocks])
|
||||||
|
|
||||||
|
# 3. 直接使用json_repair解析
|
||||||
|
try:
|
||||||
|
return json_repair.loads(text)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 4. 添加原始文本作为候选
|
||||||
|
json_candidates.append(text.strip())
|
||||||
|
|
||||||
|
# 尝试解析每个候选文本
|
||||||
|
for candidate in json_candidates:
|
||||||
|
# 直接尝试解析
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
return json.loads(candidate)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 使用json_repair尝试修复
|
||||||
|
try:
|
||||||
|
import json_repair
|
||||||
|
return json_repair.loads(candidate)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 所有尝试都失败,记录错误并返回None
|
||||||
|
logger.error(f"无法解析LLM返回的JSON,尝试了{len(json_candidates)}种提取方式")
|
||||||
|
logger.debug(f"原始响应: {text[:200]}...") # 只记录前200个字符避免日志过大
|
||||||
|
return None
|
||||||
|
|||||||
@ -9,7 +9,7 @@ import json
|
|||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, List
|
||||||
|
|
||||||
from core.ai import AIAgent
|
from core.ai import AIAgent
|
||||||
|
from utils.file_io import ResourceLoader, process_llm_json_text
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class PosterContentGenerator:
|
class PosterContentGenerator:
|
||||||
@ -73,28 +73,9 @@ class PosterContentGenerator:
|
|||||||
self.logger.error("AI未能返回任何内容。")
|
self.logger.error("AI未能返回任何内容。")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 预处理并解析JSON
|
# 使用通用JSON解析函数处理响应
|
||||||
return self._parse_json_response(raw_response)
|
return process_llm_json_text(raw_response)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"调用AI生成文案时发生严重错误: {e}", exc_info=True)
|
self.logger.error(f"调用AI生成文案时发生严重错误: {e}", exc_info=True)
|
||||||
return None
|
|
||||||
|
|
||||||
def _parse_json_response(self, text: str) -> Optional[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
从AI返回的文本中提取并解析JSON。
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 找到第一个 '{' 和最后一个 '}' 来提取JSON字符串
|
|
||||||
start_index = text.find('{')
|
|
||||||
end_index = text.rfind('}')
|
|
||||||
if start_index != -1 and end_index != -1:
|
|
||||||
json_str = text[start_index : end_index + 1]
|
|
||||||
return json.loads(json_str)
|
|
||||||
else:
|
|
||||||
self.logger.error("在AI响应中未找到有效的JSON对象。")
|
|
||||||
return None
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
self.logger.error(f"解析AI返回的JSON失败: {e}")
|
|
||||||
self.logger.debug(f"原始响应文本: {text}")
|
|
||||||
return None
|
return None
|
||||||
Binary file not shown.
BIN
utils/tweet/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
utils/tweet/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
utils/tweet/__pycache__/topic_generator.cpython-38.pyc
Normal file
BIN
utils/tweet/__pycache__/topic_generator.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
@ -12,7 +12,7 @@ from typing import Dict, Any
|
|||||||
from core.ai import AIAgent
|
from core.ai import AIAgent
|
||||||
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
||||||
from utils.prompts import ContentPromptBuilder
|
from utils.prompts import ContentPromptBuilder
|
||||||
from utils.file_io import OutputManager
|
from utils.file_io import OutputManager, process_llm_json_text
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -67,7 +67,7 @@ class ContentGenerator:
|
|||||||
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
use_stream=False,
|
use_stream=True,
|
||||||
stage="内容生成",
|
stage="内容生成",
|
||||||
**model_params
|
**model_params
|
||||||
)
|
)
|
||||||
@ -77,12 +77,11 @@ class ContentGenerator:
|
|||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
# 3. 解析和保存结果
|
# 3. 解析和保存结果
|
||||||
try:
|
content_data = process_llm_json_text(raw_result)
|
||||||
# 假设结果是一个JSON字符串
|
if content_data:
|
||||||
content_data = json.loads(raw_result)
|
|
||||||
self.output_manager.save_json(content_data, "article.json", subdir=output_dir.name)
|
self.output_manager.save_json(content_data, "article.json", subdir=output_dir.name)
|
||||||
logger.info(f"成功为选题 {topic_index} 生成并保存内容。")
|
logger.info(f"成功为选题 {topic_index} 生成并保存内容。")
|
||||||
return content_data
|
return content_data
|
||||||
except json.JSONDecodeError as e:
|
else:
|
||||||
logger.error(f"解析内容JSON失败 for {topic_index}: {e}")
|
logger.error(f"解析内容JSON失败 for {topic_index}")
|
||||||
return {"error": "JSONDecodeError", "raw_content": raw_result}
|
return {"error": "JSONDecodeError", "raw_content": raw_result}
|
||||||
@ -7,12 +7,12 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
from json_repair import loads as json_repair_loads
|
from typing import Dict, Any, Union
|
||||||
from typing import Dict, Any
|
|
||||||
|
|
||||||
from core.ai import AIAgent
|
from core.ai import AIAgent
|
||||||
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
||||||
from utils.prompts import JudgerPromptBuilder
|
from utils.prompts import JudgerPromptBuilder
|
||||||
|
from utils.file_io import process_llm_json_text
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -36,12 +36,12 @@ class ContentJudger:
|
|||||||
self.prompt_builder = JudgerPromptBuilder(config_manager)
|
self.prompt_builder = JudgerPromptBuilder(config_manager)
|
||||||
self.output_manager = output_manager
|
self.output_manager = output_manager
|
||||||
|
|
||||||
async def judge_content(self, generated_content: str, topic: Dict[str, Any]) -> Dict[str, Any]:
|
async def judge_content(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
调用AI审核生成的内容
|
调用AI审核生成的内容
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
generated_content: 已生成的原始内容(JSON字符串格式)
|
generated_content: 已生成的原始内容(JSON字符串或字典对象)
|
||||||
topic: 与内容相关的原始选题字典
|
topic: 与内容相关的原始选题字典
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -55,18 +55,23 @@ class ContentJudger:
|
|||||||
|
|
||||||
# 从原始内容中提取tags
|
# 从原始内容中提取tags
|
||||||
original_tags = []
|
original_tags = []
|
||||||
try:
|
original_content = process_llm_json_text(generated_content)
|
||||||
original_content = json_repair_loads(generated_content)
|
if original_content and isinstance(original_content, dict) and "tags" in original_content:
|
||||||
if isinstance(original_content, dict) and "tags" in original_content:
|
original_tags = original_content.get("tags", [])
|
||||||
original_tags = original_content.get("tags", [])
|
logger.info(f"从原始内容中提取到标签: {original_tags}")
|
||||||
logger.info(f"从原始内容中提取到标签: {original_tags}")
|
else:
|
||||||
except Exception as e:
|
logger.warning("从原始内容提取标签失败")
|
||||||
logger.warning(f"从原始内容提取标签失败: {e}")
|
|
||||||
|
# 将字典转换为JSON字符串,以便在提示中使用
|
||||||
|
if isinstance(generated_content, dict):
|
||||||
|
generated_content_str = json.dumps(generated_content, ensure_ascii=False, indent=2)
|
||||||
|
else:
|
||||||
|
generated_content_str = str(generated_content)
|
||||||
|
|
||||||
# 1. 构建提示
|
# 1. 构建提示
|
||||||
system_prompt = self.prompt_builder.get_system_prompt()
|
system_prompt = self.prompt_builder.get_system_prompt()
|
||||||
user_prompt = self.prompt_builder.build_user_prompt(
|
user_prompt = self.prompt_builder.build_user_prompt(
|
||||||
generated_content=generated_content,
|
generated_content=generated_content_str,
|
||||||
topic=topic
|
topic=topic
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -91,7 +96,7 @@ class ContentJudger:
|
|||||||
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
use_stream=False,
|
use_stream=True,
|
||||||
stage="内容审核",
|
stage="内容审核",
|
||||||
**model_params
|
**model_params
|
||||||
)
|
)
|
||||||
@ -105,25 +110,21 @@ class ContentJudger:
|
|||||||
return {"judge_success": False, "error": str(e)}
|
return {"judge_success": False, "error": str(e)}
|
||||||
|
|
||||||
# 3. 解析结果
|
# 3. 解析结果
|
||||||
try:
|
judged_data = process_llm_json_text(raw_result)
|
||||||
judged_data = json_repair_loads(raw_result)
|
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
|
||||||
if isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
|
judged_data["judge_success"] = True
|
||||||
judged_data["judge_success"] = True
|
|
||||||
|
# 直接使用原始内容中的标签
|
||||||
|
if original_tags:
|
||||||
|
judged_data["tags"] = original_tags
|
||||||
|
# 如果原始内容中没有标签,则使用默认标签
|
||||||
|
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tags', [])}")
|
||||||
|
|
||||||
|
# 保存审核后的内容
|
||||||
|
if self.output_manager:
|
||||||
|
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
|
||||||
|
|
||||||
# 直接使用原始内容中的标签
|
return judged_data
|
||||||
if original_tags:
|
else:
|
||||||
judged_data["tags"] = original_tags
|
logger.warning(f"审核响应JSON格式不正确或缺少键")
|
||||||
# 如果原始内容中没有标签,则使用默认标签
|
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
|
||||||
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tags', [])}")
|
|
||||||
|
|
||||||
# 保存审核后的内容
|
|
||||||
if self.output_manager:
|
|
||||||
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
|
|
||||||
|
|
||||||
return judged_data
|
|
||||||
else:
|
|
||||||
logger.warning(f"审核响应JSON格式不正确或缺少键: {judged_data}")
|
|
||||||
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
|
|
||||||
except (json.JSONDecodeError, ValueError) as e:
|
|
||||||
logger.error(f"解析审核响应JSON失败: {e}")
|
|
||||||
return {"judge_success": False, "error": "JSONDecodeError", "raw_response": raw_result}
|
|
||||||
@ -71,7 +71,7 @@ class TopicGenerator:
|
|||||||
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
use_stream=False, # 选题生成通常不需要流式输出
|
use_stream=True, # 选题生成通常不需要流式输出
|
||||||
stage="选题生成",
|
stage="选题生成",
|
||||||
**model_params
|
**model_params
|
||||||
)
|
)
|
||||||
|
|||||||
@ -7,9 +7,8 @@ AI响应解析器模块
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
from typing import List, Dict, Any
|
from typing import List, Dict, Any
|
||||||
from json_repair import loads as json_repair_loads
|
from utils.file_io import process_llm_json_text
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -32,40 +31,28 @@ class TopicParser:
|
|||||||
"""
|
"""
|
||||||
logger.info("开始解析AI生成的选题...")
|
logger.info("开始解析AI生成的选题...")
|
||||||
|
|
||||||
# 1. 移除AI思考过程的 <think> 块
|
# 使用通用JSON解析函数解析原始文本
|
||||||
if "</think>" in raw_text:
|
parsed_json = process_llm_json_text(raw_text)
|
||||||
raw_text = raw_text.split("</think>", 1)[-1]
|
|
||||||
|
|
||||||
# 2. 移除Markdown代码块标记 (e.g., ```json ... ```)
|
if not parsed_json:
|
||||||
cleaned_text = re.sub(r'```json\s*|\s*```', '', raw_text.strip(), flags=re.MULTILINE)
|
logger.error("解析AI响应失败,无法获取JSON数据")
|
||||||
|
|
||||||
if not cleaned_text:
|
|
||||||
logger.error("移除元数据后,解析内容为空")
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
|
||||||
# 3. 使用json_repair修复可能不规范的JSON并解析
|
|
||||||
parsed_json = json_repair_loads(cleaned_text)
|
|
||||||
|
|
||||||
if not isinstance(parsed_json, list):
|
if not isinstance(parsed_json, list):
|
||||||
logger.error(f"解析结果不是列表,而是 {type(parsed_json)}")
|
logger.error(f"解析结果不是列表,而是 {type(parsed_json)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
logger.info(f"成功解析 {len(parsed_json)} 个选题对象。开始验证...")
|
logger.info(f"成功解析 {len(parsed_json)} 个选题对象。开始验证...")
|
||||||
|
|
||||||
# 4. 验证每个选题是否包含所有必需的键
|
# 验证每个选题是否包含所有必需的键
|
||||||
valid_topics = []
|
valid_topics = []
|
||||||
required_keys = {"index", "date", "logic", "object", "product", "style", "target_audience"}
|
required_keys = {"index", "date", "logic", "object", "product", "style", "target_audience"}
|
||||||
|
|
||||||
for i, item in enumerate(parsed_json):
|
for i, item in enumerate(parsed_json):
|
||||||
if isinstance(item, dict) and required_keys.issubset(item.keys()):
|
if isinstance(item, dict) and required_keys.issubset(item.keys()):
|
||||||
valid_topics.append(item)
|
valid_topics.append(item)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"第 {i+1} 个选题缺少必需键或格式不正确: {item}")
|
logger.warning(f"第 {i+1} 个选题缺少必需键或格式不正确: {item}")
|
||||||
|
|
||||||
logger.info(f"验证完成,获得 {len(valid_topics)} 个有效选题。")
|
logger.info(f"验证完成,获得 {len(valid_topics)} 个有效选题。")
|
||||||
return valid_topics
|
return valid_topics
|
||||||
|
|
||||||
except (json.JSONDecodeError, ValueError) as e:
|
|
||||||
logger.error(f"解析JSON失败: {e}", exc_info=True)
|
|
||||||
return []
|
|
||||||
Loading…
x
Reference in New Issue
Block a user