修改了文件的读取模式
This commit is contained in:
parent
44c79ec8e5
commit
674082e7d7
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -12,6 +12,7 @@ import traceback
|
|||||||
import sys
|
import sys
|
||||||
import base64
|
import base64
|
||||||
import re
|
import re
|
||||||
|
import random
|
||||||
sys.path.append('/root/autodl-tmp/TravelContentCreator') # 添加项目根目录
|
sys.path.append('/root/autodl-tmp/TravelContentCreator') # 添加项目根目录
|
||||||
from core.ai_agent import AI_Agent
|
from core.ai_agent import AI_Agent
|
||||||
|
|
||||||
@ -66,8 +67,8 @@ class ContentJudger:
|
|||||||
6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除。请保留文案中的换行符 \\n,不要修改或删除换行符。
|
6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除。请保留文案中的换行符 \\n,不要修改或删除换行符。
|
||||||
7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写"为情侣定制的山水秘境"是可以接受的。
|
7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写"为情侣定制的山水秘境"是可以接受的。
|
||||||
8. 案例如下,请参考案例评判真假信息的尺度,逐行逐句仔细分析不符点和修改思路,并按照分析思路落实对每一处不符的修改措施,严格审查每一篇文案:
|
8. 案例如下,请参考案例评判真假信息的尺度,逐行逐句仔细分析不符点和修改思路,并按照分析思路落实对每一处不符的修改措施,严格审查每一篇文案:
|
||||||
{
|
[
|
||||||
"产品资料":
|
"产品资料":
|
||||||
"周末不加收【南沙越秀喜来登】1088元/套,豪华客房1间1晚+双人自助早餐+自助晚餐+2大1小水鸟世界门票,免费儿童乐园,户外泳池+健身房~
|
"周末不加收【南沙越秀喜来登】1088元/套,豪华客房1间1晚+双人自助早餐+自助晚餐+2大1小水鸟世界门票,免费儿童乐园,户外泳池+健身房~
|
||||||
不想待在家,又想带娃出去玩?更不想开长途车、人挤人?为你推荐路程短、不塞车、景点多、坐地铁就能直达的溜娃地!
|
不想待在家,又想带娃出去玩?更不想开长途车、人挤人?为你推荐路程短、不塞车、景点多、坐地铁就能直达的溜娃地!
|
||||||
南沙越秀喜来登是广州南沙区首家国际品牌酒店,坐拥广州南大门,拥有得天独厚的中心位置,可俯瞰蕉门河美景,车程短,不出广州也能玩!
|
南沙越秀喜来登是广州南沙区首家国际品牌酒店,坐拥广州南大门,拥有得天独厚的中心位置,可俯瞰蕉门河美景,车程短,不出广州也能玩!
|
||||||
@ -97,15 +98,10 @@ class ContentJudger:
|
|||||||
|
|
||||||
酒店地址:广东省广州市南沙区海熙大街79-80号
|
酒店地址:广东省广州市南沙区海熙大街79-80号
|
||||||
导航关键词:广州南沙越秀喜来登酒店"
|
导航关键词:广州南沙越秀喜来登酒店"
|
||||||
|
|
||||||
|
"生成文案":
|
||||||
"生成文案":"[
|
|
||||||
"title": "五一遛娃👶必囤!南沙喜来登1088元住景观房+双早+门票",
|
"title": "五一遛娃👶必囤!南沙喜来登1088元住景观房+双早+门票",
|
||||||
|
"content": "五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088元住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:10:00-20:00全程开放,滑梯/积木/绘本一应俱全\n✅ 户外泳池:9:00-18:00恒温开放(五一期间每日消毒3次)\n✅ 健身房:8:00-22:00配备亲子瑜伽课程(需提前预约)\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50元吃到撑\n\n🍽️【家长友好细节】\n• 自助晚餐隐藏彩蛋:儿童餐区设独立洗手台+热食保温柜\n• 房内配置:加厚床垫/卡通洗漱杯/尿布台(无需额外购买)\n• 安全保障:全区域监控+24小时安保巡逻\n\n🎁【五一专属加码】\n5月1-5日期间入住,凭房卡可免费领取儿童防晒冰袖+湿巾礼包\n\n📌Tips:\n1. 周一至周四仅限双床房型,周五起可选大床房\n2. 水鸟世界门票需提前1小时至前台领取纸质票\n3. 地铁四号线金洲站下车,打车15分钟直达酒店\n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~"
|
||||||
"content": "
|
|
||||||
五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088元住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:10:00-20:00全程开放,滑梯/积木/绘本一应俱全\n✅ 户外泳池:9:00-18:00恒温开放(五一期间每日消毒3次)\n✅ 健身房:8:00-22:00配备亲子瑜伽课程(需提前预约)\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50元吃到撑\n\n🍽️【家长友好细节】\n• 自助晚餐隐藏彩蛋:儿童餐区设独立洗手台+热食保温柜\n• 房内配置:加厚床垫/卡通洗漱杯/尿布台(无需额外购买)\n• 安全保障:全区域监控+24小时安保巡逻\n\n🎁【五一专属加码】\n5月1-5日期间入住,凭房卡可免费领取儿童防晒冰袖+湿巾礼包\n\n📌Tips:\n1. 周一至周四仅限双床房型,周五起可选大床房\n2. 水鸟世界门票需提前1小时至前台领取纸质票\n3. 地铁四号线金洲站下车,打车15分钟直达酒店\n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~" "
|
|
||||||
]"
|
|
||||||
}
|
|
||||||
|
|
||||||
输出结果:
|
输出结果:
|
||||||
{
|
{
|
||||||
@ -113,8 +109,8 @@ class ContentJudger:
|
|||||||
"title": "五一遛娃👶必囤!喜来登1088景观房",
|
"title": "五一遛娃👶必囤!喜来登1088景观房",
|
||||||
"content": "五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088r住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:酒店设有免费儿童乐园,提供丰富的游乐设施,让孩子们尽情玩耍\n✅ 户外泳池:酒店配有户外无边泳池,供大人小孩一同享受清凉时光 \n✅ 健身房:酒店提供免费健身中心,适合家庭成员共同锻炼。\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50r吃到撑 \n\n🍽️【家长友好细节】 \n• 自助餐厅:供应鲜美海鲜、精美甜品等任君选择,大人小孩都爱吃 \n• 房内配置:55英寸超大纯平电视+独立的浴缸+超大的落地玻璃窗,尽览蕉门河风景,尽享亲子度假时光 \n• 安全保障:酒店设有完善的监控系统和安保措施,全力保障您与家人的安全 \n\n🎁【套餐专属福利】\n1、豪华客房一间一晚(周一至四只开放双床房) \n2、2大1小自助早晚餐 \n3、赠送2大1小水鸟世界门票(酒店前台领取),无需额外购买 \n\n📌Tips: \n1. 周一至周四仅限双床房型,周五起可选大床房 \n2. 酒店前台领取水鸟世界纸质门票 \n3. 地铁四号线金洲站下车,打车15分钟直达酒店 \n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~\n"
|
"content": "五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088r住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:酒店设有免费儿童乐园,提供丰富的游乐设施,让孩子们尽情玩耍\n✅ 户外泳池:酒店配有户外无边泳池,供大人小孩一同享受清凉时光 \n✅ 健身房:酒店提供免费健身中心,适合家庭成员共同锻炼。\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50r吃到撑 \n\n🍽️【家长友好细节】 \n• 自助餐厅:供应鲜美海鲜、精美甜品等任君选择,大人小孩都爱吃 \n• 房内配置:55英寸超大纯平电视+独立的浴缸+超大的落地玻璃窗,尽览蕉门河风景,尽享亲子度假时光 \n• 安全保障:酒店设有完善的监控系统和安保措施,全力保障您与家人的安全 \n\n🎁【套餐专属福利】\n1、豪华客房一间一晚(周一至四只开放双床房) \n2、2大1小自助早晚餐 \n3、赠送2大1小水鸟世界门票(酒店前台领取),无需额外购买 \n\n📌Tips: \n1. 周一至周四仅限双床房型,周五起可选大床房 \n2. 酒店前台领取水鸟世界纸质门票 \n3. 地铁四号线金洲站下车,打车15分钟直达酒店 \n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~\n"
|
||||||
}
|
}
|
||||||
|
]
|
||||||
8. 必须按照以下格式输出修改后内容,不需要输出无关内容
|
9. 必须按照以下格式输出修改后内容,不需要输出无关内容
|
||||||
{
|
{
|
||||||
"analysis" : "分析过程",
|
"analysis" : "分析过程",
|
||||||
"title": "修改后的标题",
|
"title": "修改后的标题",
|
||||||
@ -154,58 +150,133 @@ class ContentJudger:
|
|||||||
logging.error(f"从PromptManager获取系统提示词失败: {e}")
|
logging.error(f"从PromptManager获取系统提示词失败: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _split_content(self, result):
|
def _preprocess_for_json(self, text):
|
||||||
"""
|
"""预处理文本,处理JSON结构中的问题字符"""
|
||||||
参考tweet_generator的处理方式,解析AI返回的内容
|
if not isinstance(text, str):
|
||||||
|
return text
|
||||||
|
|
||||||
Args:
|
|
||||||
result: AI返回的原始结果
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: 解析后的JSON数据
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
# 处理AI可能返回的思考部分
|
# 1. 处理特殊Unicode字符和标点符号
|
||||||
processed_result = result
|
char_map = {
|
||||||
if "</think>" in result:
|
'"': '"', # 特殊Unicode引号替换为标准双引号
|
||||||
processed_result = result.split("</think>")[1] # 取</think>标签后的内容
|
'"': '"', # 特殊Unicode引号替换为标准双引号
|
||||||
|
''': "'", # 特殊Unicode单引号替换为标准单引号
|
||||||
|
''': "'", # 特殊Unicode单引号替换为标准单引号
|
||||||
|
',': ',', # 中文逗号替换为英文逗号
|
||||||
|
':': ':', # 中文冒号替换为英文冒号
|
||||||
|
'(': '(', # 中文括号替换为英文括号
|
||||||
|
')': ')', # 中文括号替换为英文括号
|
||||||
|
'\u200b': '', # 零宽空格直接移除
|
||||||
|
'\u200c': '', # 零宽不连字直接移除
|
||||||
|
'\u200d': '', # 零宽连字直接移除
|
||||||
|
'\u2028': ' ', # 行分隔符替换为空格
|
||||||
|
'\u2029': ' ' # 段落分隔符替换为空格
|
||||||
|
}
|
||||||
|
|
||||||
# 直接尝试解析JSON
|
# 应用字符替换
|
||||||
json_data = json.loads(processed_result)
|
for char, replacement in char_map.items():
|
||||||
json_data["error"] = False
|
text = text.replace(char, replacement)
|
||||||
json_data["judge_success"] = True
|
|
||||||
return json_data
|
|
||||||
|
|
||||||
except json.JSONDecodeError as json_err:
|
# 2. 处理控制字符 (ASCII < 32)
|
||||||
# JSON解析失败,记录错误并尝试更基本的处理方法
|
cleaned_text = ""
|
||||||
logging.warning(f"解析内容时出错: {json_err}, 尝试提取JSON部分")
|
for i, char in enumerate(text):
|
||||||
|
if ord(char) < 32: # ASCII 32以下是控制字符
|
||||||
|
if char in ['\n', '\r', '\t']: # 保留这些常用控制字符
|
||||||
|
cleaned_text += char
|
||||||
|
else: # 删除其他控制字符
|
||||||
|
logging.debug(f"移除位置{i}的无效控制字符(ASCII: {ord(char)})")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
cleaned_text += char
|
||||||
|
|
||||||
try:
|
# 3. 处理JSON结构特定问题
|
||||||
# 尝试找到JSON部分(从第一个{到最后一个})
|
# 处理大括号附近的换行符和空白
|
||||||
json_start = processed_result.find('{')
|
if cleaned_text.startswith('{\n'):
|
||||||
json_end = processed_result.rfind('}') + 1
|
cleaned_text = '{' + cleaned_text[2:]
|
||||||
|
|
||||||
|
if cleaned_text.startswith('{ '):
|
||||||
|
cleaned_text = '{' + cleaned_text[2:]
|
||||||
|
|
||||||
if json_start >= 0 and json_end > json_start:
|
if '\n}' in cleaned_text:
|
||||||
json_str = processed_result[json_start:json_end]
|
cleaned_text = cleaned_text.replace('\n}', '}')
|
||||||
json_data = json.loads(json_str)
|
|
||||||
json_data["error"] = False
|
if ' }' in cleaned_text:
|
||||||
json_data["judge_success"] = True
|
cleaned_text = cleaned_text.replace(' }', '}')
|
||||||
return json_data
|
|
||||||
except Exception as e:
|
# 4. 处理转义序列 - 保留\n、\r、\t的转义,移除其他转义
|
||||||
logging.error(f"尝试提取JSON部分失败: {e}")
|
import re
|
||||||
|
|
||||||
|
# 第一步:将要保留的转义序列临时替换为安全标记
|
||||||
|
safe_replacements = {
|
||||||
|
r'\\n': '@NEWLINE@', # 保留换行转义
|
||||||
|
r'\\r': '@RETURN@', # 保留回车转义
|
||||||
|
r'\\t': '@TAB@', # 保留制表符转义
|
||||||
|
}
|
||||||
|
|
||||||
|
# 应用安全替换
|
||||||
|
for pattern, replacement in safe_replacements.items():
|
||||||
|
cleaned_text = re.sub(pattern, replacement, cleaned_text)
|
||||||
|
|
||||||
|
# 第二步:移除除JSON必要转义外的所有反斜杠转义
|
||||||
|
# 处理常见的多余转义情况
|
||||||
|
cleaned_text = re.sub(r'\\([^\\/"bfnrtu])', r'\1', cleaned_text) # 移除非特殊字符前的反斜杠
|
||||||
|
cleaned_text = cleaned_text.replace('\\"', '"') # 将转义的双引号还原为普通双引号
|
||||||
|
cleaned_text = cleaned_text.replace('\\\'', '\'') # 将转义的单引号还原为普通单引号
|
||||||
|
cleaned_text = cleaned_text.replace('\\\\', '\\') # 将双反斜杠替换为单反斜杠
|
||||||
|
|
||||||
|
# 第三步:将安全标记替换回原始转义序列
|
||||||
|
reverse_replacements = {
|
||||||
|
'@NEWLINE@': '\\n', # 还原换行转义
|
||||||
|
'@RETURN@': '\\r', # 还原回车转义
|
||||||
|
'@TAB@': '\\t', # 还原制表符转义
|
||||||
|
}
|
||||||
|
|
||||||
|
# 应用反向替换
|
||||||
|
for marker, escape_seq in reverse_replacements.items():
|
||||||
|
cleaned_text = cleaned_text.replace(marker, escape_seq)
|
||||||
|
|
||||||
|
# 第四步:再次检查并修复字符串内的换行符(确保100%处理)
|
||||||
|
# 这个额外的步骤确保没有任何字符串值中包含实际的换行符
|
||||||
|
pattern = r'"([^"\\]*(\\.[^"\\]*)*)"' # 匹配所有JSON字符串(包括已经有转义字符的)
|
||||||
|
|
||||||
|
def fix_remaining_newlines(match):
|
||||||
|
string_value = match.group(1)
|
||||||
|
# 确保所有实际换行符都被转义
|
||||||
|
fixed_value = string_value.replace('\n', '\\n').replace('\r', '\\r')
|
||||||
|
return f'"{fixed_value}"'
|
||||||
|
|
||||||
|
cleaned_text = re.sub(pattern, fix_remaining_newlines, cleaned_text)
|
||||||
|
|
||||||
|
# 5. 确保逗号后换行不会导致问题
|
||||||
|
cleaned_text = cleaned_text.replace(',\n', ', ') # 替换逗号后的换行为空格
|
||||||
|
|
||||||
|
# 6. 尝试解析检验
|
||||||
|
try:
|
||||||
|
# 尝试进行轻度解析验证
|
||||||
|
json.loads(cleaned_text)
|
||||||
|
# 如果能成功解析,直接返回
|
||||||
|
return cleaned_text
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logging.debug(f"预处理后JSON仍有问题:{e},尝试最后的修复...")
|
||||||
|
# 最后的处理:使用simplejson替代内置json库尝试修复
|
||||||
|
try:
|
||||||
|
import simplejson
|
||||||
|
# 加载后再保存,让simplejson自己处理一些小问题
|
||||||
|
fixed_json = simplejson.loads(cleaned_text, strict=False)
|
||||||
|
return simplejson.dumps(fixed_json)
|
||||||
|
except:
|
||||||
|
# simplejson也失败了,继续后续流程
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 7. 记录处理后的文本,以便调试
|
||||||
|
logging.debug(f"JSON预处理后的文本长度: {len(cleaned_text)}")
|
||||||
|
return cleaned_text
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"解析内容时出错: {e}")
|
logging.exception(f"JSON预处理过程中出错: {e}")
|
||||||
|
# 发生异常时,返回原始文本,不做修改
|
||||||
# 所有解析方法都失败,返回一个默认结果
|
return text
|
||||||
return {
|
|
||||||
"title": "",
|
|
||||||
"content": "",
|
|
||||||
"error": True,
|
|
||||||
"judge_success": False,
|
|
||||||
"analysis": f"内容解析失败,错误信息: {str(e)}"
|
|
||||||
}
|
|
||||||
|
|
||||||
def judge_content(self, product_info, content, temperature=0.2, top_p=0.5, presence_penalty=0.0):
|
def judge_content(self, product_info, content, temperature=0.2, top_p=0.5, presence_penalty=0.0):
|
||||||
"""审核内容"""
|
"""审核内容"""
|
||||||
logging.info("开始内容审核流程")
|
logging.info("开始内容审核流程")
|
||||||
@ -220,48 +291,159 @@ class ContentJudger:
|
|||||||
system_prompt=self._system_prompt,
|
system_prompt=self._system_prompt,
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
file_folder=None,
|
file_folder=None,
|
||||||
temperature=self._temperature,
|
temperature=temperature, # 使用传入的参数
|
||||||
top_p=self._topp,
|
top_p=top_p, # 使用传入的参数
|
||||||
presence_penalty=self._presence_penatly,
|
presence_penalty=presence_penalty, # 使用传入的参数
|
||||||
)
|
)
|
||||||
|
|
||||||
# 保存原始响应以便调试
|
# 保存原始响应以便调试
|
||||||
self._save_response(result, response_id)
|
self._save_response(result, response_id)
|
||||||
|
logging.info(f"AI响应长度: {len(result)} 字符")
|
||||||
|
|
||||||
# 使用简化的解析方法处理响应
|
# 尝试多种方法提取JSON
|
||||||
content_json = self._split_content(result)
|
json_obj = None
|
||||||
|
error_msg = None
|
||||||
|
|
||||||
# 检查解析结果是否有错误
|
# 方法1: 提取{...}的JSON部分
|
||||||
if content_json.get("error", False):
|
try:
|
||||||
logging.warning(f"内容解析失败,使用原内容")
|
# 移除思考部分
|
||||||
return self._create_fallback_result(content)
|
processed_result = result.split("</think>", 1)[-1].strip() if "</think>" in result else result
|
||||||
|
|
||||||
|
# 找到最外层的大括号
|
||||||
|
json_start = processed_result.find('{')
|
||||||
|
json_end = processed_result.rfind('}') + 1
|
||||||
|
|
||||||
|
if json_start >= 0 and json_end > json_start:
|
||||||
|
# 提取JSON字符串
|
||||||
|
json_str = processed_result[json_start:json_end]
|
||||||
|
|
||||||
|
# 预处理JSON字符串
|
||||||
|
json_str = self._preprocess_for_json(json_str)
|
||||||
|
|
||||||
|
# 尝试解析JSON
|
||||||
|
json_obj = json.loads(json_str)
|
||||||
|
logging.info("方法1成功解析JSON")
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"方法1解析JSON失败: {e}"
|
||||||
|
logging.debug(error_msg)
|
||||||
|
# 继续尝试其他方法
|
||||||
|
|
||||||
# 检查必要字段是否存在
|
# 方法2: 尝试多行解析,逐行检查是否有合法JSON
|
||||||
if "title" not in content_json or "content" not in content_json:
|
if not json_obj:
|
||||||
logging.warning(f"解析结果缺少必要字段 'title' 或 'content'")
|
try:
|
||||||
content_json["judge_success"] = False
|
lines = result.split('\n')
|
||||||
return self._create_fallback_result(content)
|
for i, line in enumerate(lines):
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('{') and line.endswith('}'):
|
||||||
|
try:
|
||||||
|
# 尝试处理和解析这一行
|
||||||
|
processed_line = self._preprocess_for_json(line)
|
||||||
|
json_obj = json.loads(processed_line)
|
||||||
|
logging.info(f"方法2在第{i+1}行成功解析JSON")
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
# 继续尝试下一行
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
if not error_msg:
|
||||||
|
error_msg = f"方法2解析JSON失败: {e}"
|
||||||
|
logging.debug(error_msg)
|
||||||
|
|
||||||
# 添加Base64编码内容
|
# 方法3: 尝试使用正则表达式匹配最可能的JSON部分
|
||||||
result_dict = {
|
if not json_obj:
|
||||||
"judge_success": content_json.get("judge_success", True),
|
try:
|
||||||
|
import re
|
||||||
|
# 尝试匹配 {..."title":...,"content":...}
|
||||||
|
json_pattern = r'\{[^{}]*"title"[^{}]*"content"[^{}]*\}'
|
||||||
|
matches = re.findall(json_pattern, result, re.DOTALL)
|
||||||
|
|
||||||
|
if matches:
|
||||||
|
for match in matches:
|
||||||
|
try:
|
||||||
|
processed_match = self._preprocess_for_json(match)
|
||||||
|
json_obj = json.loads(processed_match)
|
||||||
|
logging.info("方法3成功解析JSON")
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
# 继续尝试下一个匹配
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
if not error_msg:
|
||||||
|
error_msg = f"方法3解析JSON失败: {e}"
|
||||||
|
logging.debug(error_msg)
|
||||||
|
|
||||||
|
# 处理解析结果
|
||||||
|
if json_obj and isinstance(json_obj, dict):
|
||||||
|
# 验证关键字段
|
||||||
|
if "title" in json_obj and "content" in json_obj:
|
||||||
|
# 构建结果字典
|
||||||
|
result_dict = {
|
||||||
|
"judge_success": True,
|
||||||
|
"judged": True,
|
||||||
|
"title": json_obj["title"],
|
||||||
|
"content": json_obj["content"],
|
||||||
|
"title_base64": base64.b64encode(json_obj["title"].encode('utf-8')).decode('utf-8'),
|
||||||
|
"content_base64": base64.b64encode(json_obj["content"].encode('utf-8')).decode('utf-8')
|
||||||
|
}
|
||||||
|
|
||||||
|
# 添加分析字段(如果存在)
|
||||||
|
if "analysis" in json_obj:
|
||||||
|
result_dict["analysis"] = json_obj["analysis"]
|
||||||
|
result_dict["analysis_base64"] = base64.b64encode(json_obj["analysis"].encode('utf-8')).decode('utf-8')
|
||||||
|
|
||||||
|
logging.info(f"成功提取内容: 标题({len(json_obj['title'])}字符), 内容({len(json_obj['content'])}字符)")
|
||||||
|
return result_dict
|
||||||
|
else:
|
||||||
|
# JSON对象缺少必要字段
|
||||||
|
logging.warning("解析的JSON缺少必要字段'title'或'content'")
|
||||||
|
error_msg = "缺少必要字段'title'或'content'"
|
||||||
|
# 保存错误日志
|
||||||
|
self._save_error_json(json.dumps(json_obj), error_msg, response_id)
|
||||||
|
else:
|
||||||
|
# 未找到有效的JSON
|
||||||
|
if error_msg:
|
||||||
|
logging.warning(f"JSON解析失败: {error_msg}")
|
||||||
|
else:
|
||||||
|
logging.warning("找不到有效的JSON结构")
|
||||||
|
|
||||||
|
# 保存可能的JSON字符串以供调试
|
||||||
|
if json_start >= 0 and json_end > json_start:
|
||||||
|
json_str = processed_result[json_start:json_end]
|
||||||
|
self._save_error_json(json_str, error_msg or "解析失败", response_id)
|
||||||
|
|
||||||
|
# 所有方法都失败,返回空内容
|
||||||
|
logging.info("内容审核过程未能产生有效结果,返回空内容")
|
||||||
|
empty_result = {
|
||||||
|
"judge_success": False,
|
||||||
"judged": True,
|
"judged": True,
|
||||||
"title": content_json["title"],
|
"title": "",
|
||||||
"content": content_json["content"],
|
"content": "",
|
||||||
"title_base64": base64.b64encode(content_json["title"].encode('utf-8')).decode('utf-8'),
|
"title_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'),
|
||||||
"content_base64": base64.b64encode(content_json["content"].encode('utf-8')).decode('utf-8')
|
"content_base64": base64.b64encode("".encode('utf-8')).decode('utf-8')
|
||||||
}
|
}
|
||||||
|
|
||||||
# 如果有analysis字段,也包含
|
if error_msg:
|
||||||
if "analysis" in content_json:
|
empty_result["analysis"] = f"内容审核失败: {error_msg}"
|
||||||
result_dict["analysis"] = content_json["analysis"]
|
empty_result["analysis_base64"] = base64.b64encode(f"内容审核失败: {error_msg}".encode('utf-8')).decode('utf-8')
|
||||||
result_dict["analysis_base64"] = base64.b64encode(content_json["analysis"].encode('utf-8')).decode('utf-8')
|
|
||||||
|
|
||||||
return result_dict
|
|
||||||
|
|
||||||
|
return empty_result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# 捕获所有异常
|
||||||
|
error_traceback = traceback.format_exc()
|
||||||
logging.exception(f"审核过程中出错: {e}")
|
logging.exception(f"审核过程中出错: {e}")
|
||||||
return self._create_fallback_result(content, error_msg=str(e))
|
logging.debug(f"详细错误: {error_traceback}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"judge_success": False,
|
||||||
|
"judged": True,
|
||||||
|
"title": "",
|
||||||
|
"content": "",
|
||||||
|
"title_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'),
|
||||||
|
"content_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'),
|
||||||
|
"analysis": f"内容审核过程出错: {e}",
|
||||||
|
"analysis_base64": base64.b64encode(f"内容审核过程出错: {e}".encode('utf-8')).decode('utf-8')
|
||||||
|
}
|
||||||
|
|
||||||
def _save_response(self, response, response_id):
|
def _save_response(self, response, response_id):
|
||||||
"""保存原始响应"""
|
"""保存原始响应"""
|
||||||
@ -273,6 +455,29 @@ class ContentJudger:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"保存原始响应失败: {e}")
|
logging.error(f"保存原始响应失败: {e}")
|
||||||
|
|
||||||
|
def _save_error_json(self, json_str, error, response_id):
|
||||||
|
"""保存错误的JSON字符串以供调试"""
|
||||||
|
try:
|
||||||
|
error_log_dir = "/root/autodl-tmp/TravelContentCreator/log/json_errors"
|
||||||
|
os.makedirs(error_log_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 创建包含错误信息和原始JSON的日志
|
||||||
|
error_info = {
|
||||||
|
"error_message": str(error),
|
||||||
|
"error_type": error.__class__.__name__ if hasattr(error, "__class__") else "Unknown",
|
||||||
|
"timestamp": int(time.time()),
|
||||||
|
"response_id": response_id,
|
||||||
|
"json_string": json_str
|
||||||
|
}
|
||||||
|
|
||||||
|
# 保存到文件
|
||||||
|
with open(f"{error_log_dir}/error_{response_id}.json", "w", encoding="utf-8") as f:
|
||||||
|
json.dump(error_info, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
logging.info(f"已保存错误JSON到 {error_log_dir}/error_{response_id}.json")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"保存错误JSON失败: {e}")
|
||||||
|
|
||||||
def _create_fallback_result(self, content, error_msg="解析失败"):
|
def _create_fallback_result(self, content, error_msg="解析失败"):
|
||||||
"""创建回退结果"""
|
"""创建回退结果"""
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
@ -328,4 +533,86 @@ class ContentJudger:
|
|||||||
|
|
||||||
## 运营生成的文案(需要审核的内容):
|
## 运营生成的文案(需要审核的内容):
|
||||||
{content_str}
|
{content_str}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def judge_content_with_retry(self, product_info, content, max_retries=3, temperature=0.2, top_p=0.5, presence_penalty=0.0):
|
||||||
|
"""
|
||||||
|
带重试机制的内容审核方法,当检测到空内容时自动重试
|
||||||
|
|
||||||
|
Args:
|
||||||
|
product_info: 产品资料
|
||||||
|
content: 需要审核的内容
|
||||||
|
max_retries: 最大重试次数
|
||||||
|
temperature, top_p, presence_penalty: AI生成参数
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: 审核结果,如果所有重试都失败,则返回最后一次的失败结果
|
||||||
|
"""
|
||||||
|
retry_count = 0
|
||||||
|
last_result = None
|
||||||
|
|
||||||
|
logging.info(f"开始内容审核流程,最大重试次数: {max_retries},初始温度参数: {temperature}")
|
||||||
|
|
||||||
|
while retry_count <= max_retries:
|
||||||
|
current_attempt = retry_count + 1
|
||||||
|
|
||||||
|
if retry_count > 0:
|
||||||
|
# 每次重试增加温度参数,增加多样性
|
||||||
|
adjusted_temperature = min(temperature + (retry_count * 0.1), 0.9)
|
||||||
|
logging.info(f"🔄 内容审核重试 ({current_attempt}/{max_retries+1}),调整温度参数为: {adjusted_temperature:.2f}")
|
||||||
|
else:
|
||||||
|
adjusted_temperature = temperature
|
||||||
|
logging.info(f"⏳ 内容审核首次尝试 (1/{max_retries+1}),使用默认温度: {adjusted_temperature:.2f}")
|
||||||
|
|
||||||
|
# 调用基本的审核方法
|
||||||
|
result = self.judge_content(
|
||||||
|
product_info,
|
||||||
|
content,
|
||||||
|
temperature=adjusted_temperature,
|
||||||
|
top_p=top_p,
|
||||||
|
presence_penalty=presence_penalty
|
||||||
|
)
|
||||||
|
|
||||||
|
last_result = result
|
||||||
|
|
||||||
|
# 检查结果是否为空内容
|
||||||
|
if result.get("judge_success", False) and result.get("title") and result.get("content"):
|
||||||
|
# 成功获取有效内容,返回结果
|
||||||
|
if retry_count > 0:
|
||||||
|
logging.info(f"✅ 成功!在第{retry_count}次重试后获取有效内容(共尝试{current_attempt}次)")
|
||||||
|
else:
|
||||||
|
logging.info(f"✅ 成功!首次尝试已获取有效内容")
|
||||||
|
|
||||||
|
# 添加审核内容长度统计
|
||||||
|
title_len = len(result.get("title", ""))
|
||||||
|
content_len = len(result.get("content", ""))
|
||||||
|
logging.info(f"📊 审核结果统计:标题长度={title_len}字符,内容长度={content_len}字符")
|
||||||
|
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
# 记录当前尝试的结果状态
|
||||||
|
title_len = len(result.get("title", ""))
|
||||||
|
content_len = len(result.get("content", ""))
|
||||||
|
logging.warning(f"❌ 审核尝试 {current_attempt}/{max_retries+1} 失败,judge_success={result.get('judge_success')},标题长度={title_len},内容长度={content_len}")
|
||||||
|
|
||||||
|
# 重试次数增加
|
||||||
|
retry_count += 1
|
||||||
|
|
||||||
|
if retry_count <= max_retries:
|
||||||
|
# 在重试前稍微等待,避免过快请求
|
||||||
|
delay = 1 + random.random() * 2 # 1-3秒随机延迟
|
||||||
|
remaining = max_retries - retry_count + 1
|
||||||
|
logging.info(f"⏱️ 等待{delay:.1f}秒后进行第{retry_count+1}次尝试,剩余{remaining}次尝试机会")
|
||||||
|
time.sleep(delay)
|
||||||
|
else:
|
||||||
|
logging.warning(f"⛔ 已达到最大重试次数,共尝试{current_attempt}次均未获取满意结果")
|
||||||
|
|
||||||
|
# 所有重试都失败,返回最后一次结果
|
||||||
|
logging.warning(f"⚠️ {max_retries+1}次尝试后仍未获取有效内容,将返回最后一次结果")
|
||||||
|
|
||||||
|
# 记录最后返回内容的基本信息
|
||||||
|
title_len = len(last_result.get("title", ""))
|
||||||
|
content_len = len(last_result.get("content", ""))
|
||||||
|
logging.info(f"📄 最终返回内容:judge_success={last_result.get('judge_success')},标题长度={title_len}字符,内容长度={content_len}字符")
|
||||||
|
|
||||||
|
return last_result
|
||||||
@ -164,10 +164,19 @@ class FileSystemOutputHandler(OutputHandler):
|
|||||||
if "tags" in input_data and "original_tags" not in input_data:
|
if "tags" in input_data and "original_tags" not in input_data:
|
||||||
input_data["original_tags"] = input_data["tags"]
|
input_data["original_tags"] = input_data["tags"]
|
||||||
|
|
||||||
|
# 统一审核分析字段,优先使用judge_analysis,其次使用不良内容分析
|
||||||
|
if "judge_analysis" not in input_data and "不良内容分析" in input_data:
|
||||||
|
input_data["judge_analysis"] = input_data["不良内容分析"]
|
||||||
|
elif "不良内容分析" not in input_data and "judge_analysis" in input_data:
|
||||||
|
input_data["不良内容分析"] = input_data["judge_analysis"]
|
||||||
|
|
||||||
# 保存原始值用于txt文件生成和调试
|
# 保存原始值用于txt文件生成和调试
|
||||||
original_title = input_data.get("title", "")
|
original_title = input_data.get("original_title", input_data.get("title", ""))
|
||||||
original_content = input_data.get("content", "")
|
original_content = input_data.get("original_content", input_data.get("content", ""))
|
||||||
original_tags = input_data.get("tags", "")
|
original_tags = input_data.get("original_tags", input_data.get("tags", ""))
|
||||||
|
judge_title = input_data.get("title", "")
|
||||||
|
judge_content = input_data.get("content", "")
|
||||||
|
judge_tags = input_data.get("tags", "")
|
||||||
original_judge_analysis = input_data.get("judge_analysis", "")
|
original_judge_analysis = input_data.get("judge_analysis", "")
|
||||||
|
|
||||||
# 创建一个只包含元数据和base64编码的输出数据对象
|
# 创建一个只包含元数据和base64编码的输出数据对象
|
||||||
@ -201,9 +210,10 @@ class FileSystemOutputHandler(OutputHandler):
|
|||||||
if "original_tags" in input_data and input_data["original_tags"]:
|
if "original_tags" in input_data and input_data["original_tags"]:
|
||||||
output_data["original_tags_base64"] = base64.b64encode(input_data["original_tags"].encode('utf-8')).decode('ascii')
|
output_data["original_tags_base64"] = base64.b64encode(input_data["original_tags"].encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
# 5. 审核分析
|
# 5. 审核分析 - 检查judge_analysis和不良内容分析两个字段
|
||||||
if "judge_analysis" in input_data and input_data["judge_analysis"]:
|
judge_analysis = input_data.get("judge_analysis", input_data.get("不良内容分析", ""))
|
||||||
output_data["judge_analysis_base64"] = base64.b64encode(input_data["judge_analysis"].encode('utf-8')).decode('ascii')
|
if judge_analysis:
|
||||||
|
output_data["judge_analysis_base64"] = base64.b64encode(judge_analysis.encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
logging.info("成功添加Base64编码内容")
|
logging.info("成功添加Base64编码内容")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -226,20 +236,47 @@ class FileSystemOutputHandler(OutputHandler):
|
|||||||
# 创建一份article.txt文件以便直接查看
|
# 创建一份article.txt文件以便直接查看
|
||||||
txt_path = os.path.join(variant_dir, "article.txt")
|
txt_path = os.path.join(variant_dir, "article.txt")
|
||||||
try:
|
try:
|
||||||
# 使用原始内容,保留所有换行符
|
# 重新组织内容显示,明确区分原始内容和审核后内容
|
||||||
with open(txt_path, "w", encoding="utf-8") as f:
|
with open(txt_path, "w", encoding="utf-8") as f:
|
||||||
if original_title:
|
# 根据审核状态决定显示哪些内容
|
||||||
|
is_judged = input_data.get("judged", False)
|
||||||
|
is_judge_success = input_data.get("judge_success", False)
|
||||||
|
|
||||||
|
if is_judged and is_judge_success:
|
||||||
|
# 显示审核后的内容
|
||||||
|
f.write(f"{judge_title}\n\n")
|
||||||
|
if judge_content:
|
||||||
|
f.write(judge_content)
|
||||||
|
if judge_tags:
|
||||||
|
f.write(f"\n\n{judge_tags}")
|
||||||
|
|
||||||
|
# 在最后添加原始内容作为参考
|
||||||
|
if original_title != judge_title or original_content != judge_content:
|
||||||
|
f.write("\n\n=== 原始内容 ===\n")
|
||||||
|
f.write(f"{original_title}\n\n")
|
||||||
|
if original_content:
|
||||||
|
f.write(original_content)
|
||||||
|
if original_tags and original_tags != judge_tags:
|
||||||
|
f.write(f"\n\n{original_tags}")
|
||||||
|
elif is_judged and not is_judge_success:
|
||||||
|
# 审核失败,显示审核失败信息和原始内容
|
||||||
|
f.write("审核失败\n\n")
|
||||||
f.write(f"{original_title}\n\n")
|
f.write(f"{original_title}\n\n")
|
||||||
|
if original_content:
|
||||||
|
f.write(original_content)
|
||||||
|
if original_tags:
|
||||||
|
f.write(f"\n\n{original_tags}")
|
||||||
|
else:
|
||||||
|
# 未审核,直接显示原始内容
|
||||||
|
f.write(f"{original_title}\n\n")
|
||||||
|
if original_content:
|
||||||
|
f.write(original_content)
|
||||||
|
if original_tags:
|
||||||
|
f.write(f"\n\n{original_tags}")
|
||||||
|
|
||||||
# 保持原始内容的所有换行符
|
# 添加审核分析信息(如果有)
|
||||||
if original_content:
|
|
||||||
f.write(original_content)
|
|
||||||
|
|
||||||
if original_tags:
|
|
||||||
f.write(f"\n\n{original_tags}")
|
|
||||||
|
|
||||||
if original_judge_analysis:
|
if original_judge_analysis:
|
||||||
f.write(f"\n\n审核分析:\n{original_judge_analysis}")
|
f.write(f"\n\n=== 审核分析 ===\n{original_judge_analysis}")
|
||||||
|
|
||||||
logging.info(f"Article text saved to: {txt_path}")
|
logging.info(f"Article text saved to: {txt_path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -253,8 +290,16 @@ class FileSystemOutputHandler(OutputHandler):
|
|||||||
f.write(f"原始内容: {original_content}\n\n")
|
f.write(f"原始内容: {original_content}\n\n")
|
||||||
if original_tags:
|
if original_tags:
|
||||||
f.write(f"原始标签: {original_tags}\n\n")
|
f.write(f"原始标签: {original_tags}\n\n")
|
||||||
|
|
||||||
|
if is_judged:
|
||||||
|
f.write(f"审核状态: {'成功' if is_judge_success else '失败'}\n")
|
||||||
|
if is_judge_success:
|
||||||
|
f.write(f"审核后标题: {judge_title}\n\n")
|
||||||
|
f.write(f"审核后内容: {judge_content}\n\n")
|
||||||
|
|
||||||
if original_judge_analysis:
|
if original_judge_analysis:
|
||||||
f.write(f"审核分析: {original_judge_analysis}\n\n")
|
f.write(f"审核分析: {original_judge_analysis}\n\n")
|
||||||
|
|
||||||
f.write("---处理后---\n\n")
|
f.write("---处理后---\n\n")
|
||||||
for key, value in output_data.items():
|
for key, value in output_data.items():
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
@ -335,7 +380,7 @@ class FileSystemOutputHandler(OutputHandler):
|
|||||||
# 保存配置到JSON文件
|
# 保存配置到JSON文件
|
||||||
config_file_path = os.path.join(variant_dir, f"topic_{topic_index}_poster_configs.json")
|
config_file_path = os.path.join(variant_dir, f"topic_{topic_index}_poster_configs.json")
|
||||||
with open(config_file_path, 'w', encoding='utf-8') as f:
|
with open(config_file_path, 'w', encoding='utf-8') as f:
|
||||||
json.dump(processed_configs, f, ensure_ascii=False, indent=4, cls=self.SafeJSONEncoder)
|
json.dump(processed_configs, f, ensure_ascii=False, indent=4)
|
||||||
logging.info(f"Successfully saved poster configs to {config_file_path}")
|
logging.info(f"Successfully saved poster configs to {config_file_path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error saving poster configs: {e}")
|
logging.error(f"Error saving poster configs: {e}")
|
||||||
|
|||||||
@ -133,52 +133,94 @@ def generate_topics(ai_agent, system_prompt, user_prompt, run_id, temperature=0.
|
|||||||
|
|
||||||
|
|
||||||
def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id,
|
def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id,
|
||||||
article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5):
|
article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5,
|
||||||
|
max_retries=3):
|
||||||
"""Generates single content variant data. Returns (content_json, user_prompt) or (None, None)."""
|
"""Generates single content variant data. Returns (content_json, user_prompt) or (None, None)."""
|
||||||
logging.info(f"Generating content for topic {article_index}, variant {variant_index}")
|
logging.info(f"Generating content for topic {article_index}, variant {variant_index}")
|
||||||
try:
|
|
||||||
if not system_prompt or not user_prompt:
|
if not system_prompt or not user_prompt:
|
||||||
logging.error("System or User prompt is empty. Cannot generate content.")
|
logging.error("System or User prompt is empty. Cannot generate content.")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
logging.debug(f"Using pre-constructed prompts. User prompt length: {len(user_prompt)}")
|
||||||
|
|
||||||
|
# 实现重试逻辑
|
||||||
|
retry_count = 0
|
||||||
|
last_result = None
|
||||||
|
last_tokens = None
|
||||||
|
last_time_cost = None
|
||||||
|
|
||||||
|
while retry_count <= max_retries:
|
||||||
|
try:
|
||||||
|
# 只有重试时增加延迟和调整参数
|
||||||
|
if retry_count > 0:
|
||||||
|
# 添加随机延迟避免频繁请求
|
||||||
|
delay = 1 + random.random() * 2 # 1-3秒随机延迟
|
||||||
|
logging.info(f"内容生成重试 ({retry_count}/{max_retries}),等待{delay:.1f}秒后尝试...")
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
# 调整温度参数,增加多样性
|
||||||
|
adjusted_temperature = min(temperature + (retry_count * 0.1), 0.9)
|
||||||
|
logging.info(f"调整温度参数为: {adjusted_temperature}")
|
||||||
|
else:
|
||||||
|
adjusted_temperature = temperature
|
||||||
|
|
||||||
logging.debug(f"Using pre-constructed prompts. User prompt length: {len(user_prompt)}")
|
# Generate content (non-streaming work returns result, tokens, time_cost)
|
||||||
|
result, tokens, time_cost = ai_agent.work(
|
||||||
time.sleep(random.random() * 0.5)
|
system_prompt, user_prompt, "", adjusted_temperature, top_p, presence_penalty
|
||||||
|
)
|
||||||
# Generate content (non-streaming work returns result, tokens, time_cost)
|
|
||||||
result, tokens, time_cost = ai_agent.work(
|
|
||||||
system_prompt, user_prompt, "", temperature, top_p, presence_penalty
|
|
||||||
)
|
|
||||||
|
|
||||||
if result is None: # Check if AI call failed
|
|
||||||
logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.")
|
|
||||||
return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段
|
|
||||||
|
|
||||||
logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}")
|
last_result = result
|
||||||
|
last_tokens = tokens
|
||||||
|
last_time_cost = time_cost
|
||||||
|
|
||||||
|
if result is None: # Check if AI call failed completely
|
||||||
|
logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.")
|
||||||
|
retry_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}")
|
||||||
|
|
||||||
# --- Create tweetContent object (handles parsing) ---
|
# --- Create tweetContent object (handles parsing) ---
|
||||||
# Pass user_prompt instead of full prompt? Yes, user_prompt is what we need later.
|
tweet_content = tweetContent(result, user_prompt, run_id, article_index, variant_index)
|
||||||
tweet_content = tweetContent(result, user_prompt, run_id, article_index, variant_index)
|
content_json = tweet_content.get_json_data()
|
||||||
|
|
||||||
# --- Remove Saving Logic ---
|
# 检查是否成功解析到有效内容
|
||||||
# run_specific_output_dir = os.path.join(output_dir, run_id) # output_dir no longer available
|
if not content_json.get("error", False) and content_json.get("title") and content_json.get("content"):
|
||||||
# variant_result_dir = os.path.join(run_specific_output_dir, f"{article_index}_{variant_index}")
|
# 成功获取有效内容
|
||||||
# os.makedirs(variant_result_dir, exist_ok=True)
|
if retry_count > 0:
|
||||||
# content_save_path = os.path.join(variant_result_dir, "article.json")
|
logging.info(f"在第{retry_count}次重试后成功获取有效内容")
|
||||||
# prompt_save_path = os.path.join(variant_result_dir, "tweet_prompt.txt")
|
# 返回成功结果
|
||||||
# tweet_content.save_content(content_save_path) # Method removed
|
return content_json, user_prompt
|
||||||
# tweet_content.save_prompt(prompt_save_path) # Method removed
|
else:
|
||||||
# --- End Remove Saving Logic ---
|
logging.warning(f"内容解析失败或内容不完整,结果: {content_json.get('error')}, 标题长度: {len(content_json.get('title', ''))}, 内容长度: {len(content_json.get('content', ''))}")
|
||||||
|
|
||||||
# Return the data needed by the output handler
|
# 如果到这里,说明内容生成或解析有问题,需要重试
|
||||||
content_json = tweet_content.get_json_data()
|
retry_count += 1
|
||||||
prompt_data = tweet_content.get_prompt() # Get the stored user prompt
|
|
||||||
|
except Exception as e:
|
||||||
return content_json, prompt_data # Return data pair
|
logging.exception(f"Error during content generation attempt {retry_count+1} for {article_index}_{variant_index}:")
|
||||||
|
retry_count += 1
|
||||||
except Exception as e:
|
|
||||||
logging.exception(f"Error generating single content for {article_index}_{variant_index}:")
|
if retry_count <= max_retries:
|
||||||
return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段
|
logging.info(f"将尝试第{retry_count}次重试...")
|
||||||
|
else:
|
||||||
|
logging.error(f"达到最大重试次数({max_retries}),无法生成有效内容")
|
||||||
|
|
||||||
|
# 所有重试都失败,返回最后一次的结果(即使不完整)
|
||||||
|
logging.warning(f"在{max_retries}次尝试后仍未生成有效内容,返回最后一次结果")
|
||||||
|
|
||||||
|
# 如果有最后一次结果,尝试使用它
|
||||||
|
if last_result:
|
||||||
|
try:
|
||||||
|
tweet_content = tweetContent(last_result, user_prompt, run_id, article_index, variant_index)
|
||||||
|
content_json = tweet_content.get_json_data()
|
||||||
|
return content_json, user_prompt
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception(f"Error processing last result: {e}")
|
||||||
|
|
||||||
|
# 完全失败的情况,返回空内容
|
||||||
|
return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt
|
||||||
|
|
||||||
def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir,
|
def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir,
|
||||||
variants=2, temperature=0.3, start_index=0, end_index=None):
|
variants=2, temperature=0.3, start_index=0, end_index=None):
|
||||||
@ -457,8 +499,8 @@ def generate_content_for_topic(ai_agent: AI_Agent,
|
|||||||
logging.info("成功获取产品资料,初始化ContentJudger...")
|
logging.info("成功获取产品资料,初始化ContentJudger...")
|
||||||
# 从配置中读取系统提示词路径(脚本级别无法直接获取,需要传递)
|
# 从配置中读取系统提示词路径(脚本级别无法直接获取,需要传递)
|
||||||
# 使用ai_agent的model_name或api_url判断是否使用主AI模型,避免额外资源占用
|
# 使用ai_agent的model_name或api_url判断是否使用主AI模型,避免额外资源占用
|
||||||
content_judger_system_prompt_path = prompt_manager._system_prompt_cache.get("judger_system_prompt")
|
content_judger_system_prompt = prompt_manager._system_prompt_cache.get("judger_system_prompt")
|
||||||
content_judger = ContentJudger(ai_agent, system_prompt_path=content_judger_system_prompt_path)
|
content_judger = ContentJudger(ai_agent, system_prompt=content_judger_system_prompt)
|
||||||
else:
|
else:
|
||||||
logging.warning("未能获取产品资料,内容审核功能将被跳过")
|
logging.warning("未能获取产品资料,内容审核功能将被跳过")
|
||||||
enable_content_judge = False
|
enable_content_judge = False
|
||||||
@ -521,9 +563,9 @@ def generate_content_for_topic(ai_agent: AI_Agent,
|
|||||||
content_json["judged"] = True
|
content_json["judged"] = True
|
||||||
# 添加judge_success状态
|
# 添加judge_success状态
|
||||||
content_json["judge_success"] = judged_result.get("judge_success", False)
|
content_json["judge_success"] = judged_result.get("judge_success", False)
|
||||||
# 可选:保存审核分析结果
|
# 处理分析结果,优先使用"analysis"字段,兼容"不良内容分析"字段
|
||||||
if "不良内容分析" in judged_result:
|
if "analysis" in judged_result:
|
||||||
content_json["judge_analysis"] = judged_result["不良内容分析"]
|
content_json["judge_analysis"] = judged_result["analysis"]
|
||||||
else:
|
else:
|
||||||
logging.warning(f" 审核结果缺少title或content字段,保留原内容")
|
logging.warning(f" 审核结果缺少title或content字段,保留原内容")
|
||||||
content_json["judge_success"] = False
|
content_json["judge_success"] = False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user