修复了曾导致提取失败的审核器问题
This commit is contained in:
parent
cd44bbda6f
commit
8085a9a723
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -51,9 +51,9 @@ class ContentJudger:
|
||||
3. 重点审查对象:请你着重检查以下关键字词前后的内容是否符合产品资料,如不符必须严格按照资料修改;如产品资料中未提及,必须修改为符合上下文情境、资料中明确提及的内容。
|
||||
关键字词:价、元、r、人民币、rmb、优惠、活动、福利、赠、免费、折、DIY、跟拍、送、摄影、兑、服务、¥、包、课、提供、选、专业、补、差
|
||||
4. 字数控制:每个文案的标题字数都必须少于19个字(计数包括文字、符号、数字和emoji)。如果标题超过19个字,请在符合文案风格和背景资料的前提下修改标题到19个字以内,尽量保留emoji,必须保证标题流畅通顺。
|
||||
5. 敏感字词替换:请删去标题中的数字后面的“元”和“r”,并将正文中数字后面的“元”字修改为“r”。例如:标题中的399元修改为399,正文中的399元修改为399r
|
||||
6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除,例如“先关zhu+留下99看到会回复”
|
||||
7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写“为情侣定制的山水秘境”是可以接受的。
|
||||
5. 敏感字词替换:请删去标题中的数字后面的"元"和"r",并将正文中数字后面的"元"字修改为"r"。例如:标题中的399元修改为399,正文中的399元修改为399r
|
||||
6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除,例如"先关zhu+留下99看到会回复"
|
||||
7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写"为情侣定制的山水秘境"是可以接受的。
|
||||
8. 案例如下,请参考案例评判真假信息的尺度,逐行逐句仔细分析不符点和修改思路,并按照分析思路落实对每一处不符的修改措施,严格审查每一篇文案:
|
||||
{
|
||||
"产品资料":
|
||||
@ -126,7 +126,7 @@ class ContentJudger:
|
||||
输出结果:
|
||||
{ "不良内容分析" : "
|
||||
1、观察文案标题和内容,可以看出此文案主要面向亲子出游人群,因此修改后的文案也应该围绕亲子出游这一主题。
|
||||
2、文章标题字数为28个字,超过19个字,因此属于不符内容。由于要求中提到尽量保留emoji,并且标题中数字后面的“元”字应删去,所以修改为:五一遛娃👶必囤!喜来登1088景观房
|
||||
2、文章标题字数为28个字,超过19个字,因此属于不符内容。由于要求中提到尽量保留emoji,并且标题中数字后面的"元"字应删去,所以修改为:五一遛娃👶必囤!喜来登1088景观房
|
||||
3、产品资料中未提及儿童乐园开放时间和儿童乐园配置,但文案中提到儿童乐园10:00-20:00全程开放,滑梯/积木/绘本一应俱全,因此属于不符内容。应修改为:儿童乐园:免费儿童乐园和丰富的游乐设施,让孩子们可以尽情玩耍。
|
||||
4、产品材料中未提及户外泳池开放时间和消毒频次,但文案中提到户外泳池:9:00-18:00恒温开放(五一期间每日消毒3次),因此属于不符内容。应修改为:户外泳池:酒店配有户外无边泳池,供大人小孩一同享受清凉时光。
|
||||
5、产品材料中未提及健身房开放时间与具体细节,但文案中提到健身房:8:00-22:00配备亲子瑜伽课程(需提前预约),因此属于不符内容。应修改为:健身房:酒店提供免费健身中心,方便您和家人一起强身健体。
|
||||
@ -174,7 +174,7 @@ class ContentJudger:
|
||||
presence_penalty: 存在惩罚参数
|
||||
|
||||
Returns:
|
||||
dict: 审核后的结果JSON,包含修改后的title和content
|
||||
dict: 审核后的结果JSON,包含修改后的title和content以及judge_success状态
|
||||
"""
|
||||
logging.info("开始内容审核流程")
|
||||
# 构建用户提示词
|
||||
@ -198,16 +198,43 @@ class ContentJudger:
|
||||
end_time = time.time()
|
||||
logging.info(f"AI模型响应完成,耗时:{end_time - start_time:.2f}秒")
|
||||
|
||||
# 保存原始响应用于调试
|
||||
response_log_dir = "/root/autodl-tmp/TravelContentCreator/log/judge_responses"
|
||||
os.makedirs(response_log_dir, exist_ok=True)
|
||||
response_log_file = f"{response_log_dir}/response_{int(time.time())}.txt"
|
||||
with open(response_log_file, "w", encoding="utf-8") as f:
|
||||
f.write(result)
|
||||
logging.info(f"原始响应已保存到: {response_log_file}")
|
||||
|
||||
# 提取修改后的内容
|
||||
modified_content = self._extract_modified_content(result)
|
||||
if modified_content:
|
||||
logging.info("成功提取修改后的内容")
|
||||
# 添加judge_success字段
|
||||
modified_content["judge_success"] = True
|
||||
return modified_content
|
||||
else:
|
||||
return {"title": "提取失败", "content": "无法从响应中提取有效内容"}
|
||||
logging.error("无法从响应中提取有效内容")
|
||||
# 尝试使用原始内容并标记审核失败
|
||||
if isinstance(content, dict) and "title" in content and "content" in content:
|
||||
return {
|
||||
"title": content.get("title", "提取失败"),
|
||||
"content": content.get("content", "无法从响应中提取有效内容"),
|
||||
"judge_success": False
|
||||
}
|
||||
return {
|
||||
"title": "提取失败",
|
||||
"content": "无法从响应中提取有效内容",
|
||||
"judge_success": False
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"title": "审核失败", "content": f"审核过程中出错: {str(e)}"}
|
||||
logging.exception(f"审核过程中出错: {e}")
|
||||
return {
|
||||
"title": "审核失败",
|
||||
"content": f"审核过程中出错: {str(e)}",
|
||||
"judge_success": False
|
||||
}
|
||||
|
||||
def _build_user_prompt(self, product_info, content_gen):
|
||||
"""
|
||||
@ -229,21 +256,106 @@ class ContentJudger:
|
||||
"""
|
||||
|
||||
def _extract_modified_content(self, result_text):
|
||||
"""从检测结果文本中提取修改后的文案内容"""
|
||||
try:
|
||||
processed_text = result_text # Work on a copy of the input text
|
||||
# 记录原始文本前100个字符用于调试
|
||||
logging.debug(f"原始响应文本前100字符: {result_text[:100]}")
|
||||
|
||||
if "</think>" in processed_text:
|
||||
processed_text = processed_text.split("</think>", 1)[1].strip()
|
||||
logging.debug("检测到</think>标签并分离内容")
|
||||
|
||||
# Attempt 1: Parse as JSON from the processed text
|
||||
json_start = processed_text.find('{')
|
||||
json_end = processed_text.rfind('}') + 1
|
||||
if json_start >= 0 and json_end > json_start:
|
||||
json_str = processed_text[json_start:json_end]
|
||||
logging.debug(f"找到JSON字符串,长度: {len(json_str)},前100字符: {json_str[:100]}")
|
||||
|
||||
# Clean control characters that might break JSON parsing
|
||||
json_str_cleaned = re.sub(r'[\x00-\x1F\x7F]', '', json_str)
|
||||
try:
|
||||
content_json = json.loads(json_str_cleaned)
|
||||
if "title" in content_json and "content" in content_json:
|
||||
logging.info("Successfully parsed JSON content from AI response.")
|
||||
return {
|
||||
"title": content_json["title"].strip(),
|
||||
"content": content_json["content"].strip()
|
||||
}
|
||||
except json.JSONDecodeError as e:
|
||||
logging.warning(f"JSON parsing failed for substring: '{json_str_cleaned[:100]}...'. Error: {e}. Will attempt regex extraction.")
|
||||
|
||||
# Attempt 2: Regex on the processed_text (which might have had </think> stripped)
|
||||
# 修复正则表达式,移除多余的反斜杠
|
||||
logging.debug("尝试使用正则表达式提取")
|
||||
title_match = re.search(r'"title":\s*"([^"]*)"', processed_text)
|
||||
content_match = re.search(r'"content":\s*"([^"]*)"', processed_text)
|
||||
|
||||
if title_match and content_match:
|
||||
logging.info("Successfully extracted title/content using regex.")
|
||||
return {
|
||||
"title": title_match.group(1).strip(),
|
||||
"content": content_match.group(1).strip()
|
||||
}
|
||||
|
||||
# Attempt 3: Try finding content with single quotes
|
||||
logging.debug("尝试查找使用单引号的内容")
|
||||
title_match = re.search(r'"title":\s*\'([^\']*)\'', processed_text)
|
||||
content_match = re.search(r'"content":\s*\'([^\']*)\'', processed_text)
|
||||
|
||||
if title_match and content_match:
|
||||
logging.info("Successfully extracted title/content using single-quote regex.")
|
||||
return {
|
||||
"title": title_match.group(1).strip(),
|
||||
"content": content_match.group(1).strip()
|
||||
}
|
||||
|
||||
# Final attempt: Look for key-value pairs without standard JSON formatting
|
||||
logging.debug("尝试非标准格式提取")
|
||||
title_pattern = re.compile(r'["""]?title["""]?[::]\s*["""]([^"""]+)["""]', re.IGNORECASE)
|
||||
content_pattern = re.compile(r'["""]?content["""]?[::]\s*["""]([^"""]+)["""]', re.IGNORECASE)
|
||||
|
||||
title_match = title_pattern.search(processed_text)
|
||||
content_match = content_pattern.search(processed_text)
|
||||
|
||||
if title_match and content_match:
|
||||
logging.info("提取到标题和内容(使用灵活模式匹配)")
|
||||
return {
|
||||
"title": title_match.group(1).strip(),
|
||||
"content": content_match.group(1).strip()
|
||||
}
|
||||
|
||||
logging.warning(f"所有提取方法失败,响应前300字符: {processed_text[:300]}...")
|
||||
return None # Fallback if all extraction methods fail
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error during content extraction: {e}\n{traceback.format_exc()}")
|
||||
return None
|
||||
|
||||
def test_extraction_from_file(self, response_file_path):
|
||||
"""
|
||||
从检测结果文本中提取修改后的文案内容
|
||||
从文件中读取响应并测试提取功能
|
||||
|
||||
Args:
|
||||
result_text: AI响应的文本
|
||||
response_file_path: 响应文件路径
|
||||
|
||||
Returns:
|
||||
dict or None: 提取的内容JSON,提取失败则返回None
|
||||
dict: 提取结果
|
||||
"""
|
||||
try:
|
||||
result_text = result_text.split("</think>")[1]
|
||||
logging.info(f"从文件测试提取: {response_file_path}")
|
||||
with open(response_file_path, 'r', encoding='utf-8') as f:
|
||||
response_text = f.read()
|
||||
|
||||
## 舍弃
|
||||
|
||||
return json.loads(result_text)
|
||||
result = self._extract_modified_content(response_text)
|
||||
if result:
|
||||
logging.info(f"成功从文件提取内容: {result.get('title', '')[:30]}...")
|
||||
return {"success": True, "result": result}
|
||||
else:
|
||||
logging.error(f"从文件中提取内容失败")
|
||||
return {"success": False, "error": "提取失败"}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"提取内容时发生错误: {e}")
|
||||
return None
|
||||
logging.exception(f"测试提取时发生错误: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import random
|
||||
import json
|
||||
import logging
|
||||
class ResourceLoader:
|
||||
"""资源加载器,用于加载提示词和参考资料"""
|
||||
|
||||
@ -13,11 +14,11 @@ class ResourceLoader:
|
||||
content = f.read()
|
||||
return content
|
||||
else:
|
||||
print(f"文件不存在: {file_path}")
|
||||
logging.warning(f"文件不存在: {file_path}")
|
||||
# Return None for non-existent file to distinguish from empty file
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"加载文件 '{file_path}' 内容失败: {e}")
|
||||
logging.warning(f"加载文件 '{file_path}' 内容失败: {e}")
|
||||
# Return None on error as well
|
||||
return None
|
||||
|
||||
@ -26,10 +27,10 @@ class ResourceLoader:
|
||||
"""加载Refer目录下的指定文件内容"""
|
||||
refer_content = ""
|
||||
if not file_path or not os.path.isfile(file_path):
|
||||
print(f"Warning: Refer directory '{file_path}' not found or invalid.")
|
||||
logging.warning(f"Warning: Refer directory '{file_path}' not found or invalid.")
|
||||
return ""
|
||||
try:
|
||||
if True: # print(file_path)
|
||||
if True:
|
||||
if os.path.isfile(file_path) and file_path.endswith(".txt"):
|
||||
# Use the updated load_file_content
|
||||
content = ResourceLoader.load_file_content(file_path)
|
||||
@ -49,7 +50,7 @@ class ResourceLoader:
|
||||
|
||||
# 检查必要的键是否存在
|
||||
if "title" not in file_content or "description" not in file_content or "examples" not in file_content:
|
||||
print(f"Warning: JSON文件 '{file_path}' 缺少必要的键(title/description/examples)")
|
||||
logging.warning(f"Warning: JSON文件 '{file_path}' 缺少必要的键(title/description/examples)")
|
||||
|
||||
title_content = file_content["title"]
|
||||
description_content = file_content["description"]
|
||||
@ -66,12 +67,12 @@ class ResourceLoader:
|
||||
|
||||
refer_content += f"## {file_path}\n{content}\n\n"
|
||||
else:
|
||||
print(f"Warning: JSON文件 '{file_path}' 的examples不是有效列表")
|
||||
logging.warning(f"Warning: JSON文件 '{file_path}' 的examples不是有效列表")
|
||||
except Exception as json_err:
|
||||
print(f"处理JSON文件 '{file_path}' 失败: {json_err}")
|
||||
logging.warning(f"处理JSON文件 '{file_path}' 失败: {json_err}")
|
||||
return refer_content
|
||||
except Exception as e:
|
||||
print(f"加载Refer目录文件失败: {e}")
|
||||
logging.warning(f"加载Refer目录文件失败: {e}")
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
@ -98,7 +99,7 @@ class ResourceLoader:
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"查找文件 '{file_name}' 在 '{directory}' 失败: {e}")
|
||||
logging.warning(f"查找文件 '{file_name}' 在 '{directory}' 失败: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@ -125,7 +126,7 @@ class ResourceLoader:
|
||||
f.write(f"```\n{result}\n```\n\n")
|
||||
f.write("--------------------------------\n\n")
|
||||
except Exception as e:
|
||||
print(f"更新汇总文件时出错: {e}")
|
||||
logging.warning(f"更新汇总文件时出错: {e}")
|
||||
|
||||
@staticmethod
|
||||
def save_article(result, prompt, output_dir, run_id, article_index, variant_index):
|
||||
@ -145,5 +146,5 @@ class ResourceLoader:
|
||||
|
||||
return filepath
|
||||
except Exception as e:
|
||||
print(f"保存文章时出错: {e}")
|
||||
logging.warning(f"保存文章时出错: {e}")
|
||||
return None
|
||||
@ -88,14 +88,22 @@ class tweetContent:
|
||||
json_data = json.loads(processed_result)
|
||||
json_data["error"] = False
|
||||
json_data["raw_result"] = None
|
||||
# 确保judge_success字段存在
|
||||
if "judge_success" not in json_data:
|
||||
json_data["judge_success"] = None
|
||||
return json_data
|
||||
# --- End Existing Logic ---
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"解析内容时出错: {e}, 返回空字符串")
|
||||
json_data["error"] = True
|
||||
json_data["raw_result"] = e
|
||||
return json_data
|
||||
logging.warning(f"解析内容时出错: {e}, 使用默认空内容")
|
||||
# 创建一个新的json_data而不是使用未定义的变量
|
||||
return {
|
||||
"title": "",
|
||||
"content": "",
|
||||
"error": True,
|
||||
"raw_result": str(e),
|
||||
"judge_success": False
|
||||
}
|
||||
|
||||
def get_json_data(self):
|
||||
"""Returns the generated JSON data dictionary."""
|
||||
@ -159,7 +167,7 @@ def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id,
|
||||
|
||||
if result is None: # Check if AI call failed
|
||||
logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.")
|
||||
return {"title": "", "content": "", "error": True}, user_prompt # 返回空字段而不是None
|
||||
return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段
|
||||
|
||||
logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}")
|
||||
|
||||
@ -185,13 +193,13 @@ def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id,
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(f"Error generating single content for {article_index}_{variant_index}:")
|
||||
return {"title": "", "content": "", "error": True}, user_prompt # 返回空字段而不是None
|
||||
return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段
|
||||
|
||||
def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir,
|
||||
variants=2, temperature=0.3, start_index=0, end_index=None):
|
||||
"""根据选题生成内容"""
|
||||
if not topics:
|
||||
print("没有选题,无法生成内容")
|
||||
logging.warning("没有选题,无法生成内容")
|
||||
return
|
||||
|
||||
# 确定处理范围
|
||||
@ -199,7 +207,7 @@ def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompt
|
||||
end_index = len(topics)
|
||||
|
||||
topics_to_process = topics[start_index:end_index]
|
||||
print(f"准备处理{len(topics_to_process)}个选题...")
|
||||
logging.info(f"准备处理{len(topics_to_process)}个选题...")
|
||||
|
||||
# 创建汇总文件
|
||||
# summary_file = ResourceLoader.create_summary_file(output_dir, run_id, len(topics_to_process))
|
||||
@ -207,11 +215,11 @@ def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompt
|
||||
# 处理每个选题
|
||||
processed_results = []
|
||||
for i, item in enumerate(topics_to_process):
|
||||
print(f"处理第 {i+1}/{len(topics_to_process)} 篇文章")
|
||||
logging.info(f"处理第 {i+1}/{len(topics_to_process)} 篇文章")
|
||||
|
||||
# 为每个选题生成多个变体
|
||||
for j in range(variants):
|
||||
print(f"正在生成变体 {j+1}/{variants}")
|
||||
logging.info(f"正在生成变体 {j+1}/{variants}")
|
||||
|
||||
# 调用单篇文章生成函数
|
||||
tweet_content, result = generate_single_content(
|
||||
@ -225,7 +233,7 @@ def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompt
|
||||
# if j == 0:
|
||||
# ResourceLoader.update_summary(summary_file, i+1, user_prompt, result)
|
||||
|
||||
print(f"完成{len(processed_results)}篇文章生成")
|
||||
logging.info(f"完成{len(processed_results)}篇文章生成")
|
||||
return processed_results
|
||||
|
||||
|
||||
@ -520,15 +528,24 @@ content: {content_json.get('content', '')}
|
||||
content_json["content"] = judged_result["content"]
|
||||
# 添加审核标记
|
||||
content_json["judged"] = True
|
||||
# 添加judge_success状态
|
||||
content_json["judge_success"] = judged_result.get("judge_success", False)
|
||||
# 可选:保存审核分析结果
|
||||
if "不良内容分析" in judged_result:
|
||||
content_json["judge_analysis"] = judged_result["不良内容分析"]
|
||||
else:
|
||||
logging.warning(f" 审核结果缺少title或content字段,保留原内容")
|
||||
content_json["judge_success"] = False
|
||||
else:
|
||||
logging.warning(f" 内容审核返回无效结果,保留原内容")
|
||||
content_json["judge_success"] = False
|
||||
except Exception as judge_err:
|
||||
logging.exception(f" 内容审核过程出错: {judge_err},保留原内容")
|
||||
content_json["judge_success"] = False
|
||||
else:
|
||||
# 未启用内容审核时,添加相应标记
|
||||
content_json["judged"] = False
|
||||
content_json["judge_success"] = None
|
||||
|
||||
# Use the output handler to process/save the result
|
||||
output_handler.handle_content_variant(
|
||||
@ -859,7 +876,7 @@ def generate_posters_for_topic(topic_item: dict,
|
||||
collage_img = collage_images[0] # 获取第一个 PIL Image
|
||||
used_image_files = used_image_filenames[0] if used_image_filenames else [] # 获取使用的图片文件名
|
||||
logging.info(f"Collage image generated successfully (in memory). Used images: {used_image_files}")
|
||||
print(f"拼贴图使用的图片文件: {used_image_files}")
|
||||
logging.info(f"拼贴图使用的图片文件: {used_image_files}")
|
||||
|
||||
# --- 使用 Handler 保存 Collage 图片和使用的图片文件信息 ---
|
||||
output_handler.handle_generated_image(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user