diff --git a/utils/__pycache__/content_judger.cpython-312.pyc b/utils/__pycache__/content_judger.cpython-312.pyc index 1820624..3262b11 100644 Binary files a/utils/__pycache__/content_judger.cpython-312.pyc and b/utils/__pycache__/content_judger.cpython-312.pyc differ diff --git a/utils/__pycache__/output_handler.cpython-312.pyc b/utils/__pycache__/output_handler.cpython-312.pyc index 1d58ef4..15e3ad7 100644 Binary files a/utils/__pycache__/output_handler.cpython-312.pyc and b/utils/__pycache__/output_handler.cpython-312.pyc differ diff --git a/utils/__pycache__/tweet_generator.cpython-312.pyc b/utils/__pycache__/tweet_generator.cpython-312.pyc index 6d44fd5..982c6ac 100644 Binary files a/utils/__pycache__/tweet_generator.cpython-312.pyc and b/utils/__pycache__/tweet_generator.cpython-312.pyc differ diff --git a/utils/content_judger.py b/utils/content_judger.py index 436f4df..86ead85 100644 --- a/utils/content_judger.py +++ b/utils/content_judger.py @@ -51,9 +51,9 @@ class ContentJudger: 3. 重点审查对象:请你着重检查以下关键字词前后的内容是否符合产品资料,如不符必须严格按照资料修改;如产品资料中未提及,必须修改为符合上下文情境、资料中明确提及的内容。 关键字词:价、元、r、人民币、rmb、优惠、活动、福利、赠、免费、折、DIY、跟拍、送、摄影、兑、服务、¥、包、课、提供、选、专业、补、差 4. 字数控制:每个文案的标题字数都必须少于19个字(计数包括文字、符号、数字和emoji)。如果标题超过19个字,请在符合文案风格和背景资料的前提下修改标题到19个字以内,尽量保留emoji,必须保证标题流畅通顺。 -5. 敏感字词替换:请删去标题中的数字后面的“元”和“r”,并将正文中数字后面的“元”字修改为“r”。例如:标题中的399元修改为399,正文中的399元修改为399r -6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除,例如“先关zhu+留下99看到会回复” -7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写“为情侣定制的山水秘境”是可以接受的。 +5. 敏感字词替换:请删去标题中的数字后面的"元"和"r",并将正文中数字后面的"元"字修改为"r"。例如:标题中的399元修改为399,正文中的399元修改为399r +6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除,例如"先关zhu+留下99看到会回复" +7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写"为情侣定制的山水秘境"是可以接受的。 8. 案例如下,请参考案例评判真假信息的尺度,逐行逐句仔细分析不符点和修改思路,并按照分析思路落实对每一处不符的修改措施,严格审查每一篇文案: { "产品资料": @@ -126,7 +126,7 @@ class ContentJudger: 输出结果: { "不良内容分析" : " 1、观察文案标题和内容,可以看出此文案主要面向亲子出游人群,因此修改后的文案也应该围绕亲子出游这一主题。 - 2、文章标题字数为28个字,超过19个字,因此属于不符内容。由于要求中提到尽量保留emoji,并且标题中数字后面的“元”字应删去,所以修改为:五一遛娃👶必囤!喜来登1088景观房 + 2、文章标题字数为28个字,超过19个字,因此属于不符内容。由于要求中提到尽量保留emoji,并且标题中数字后面的"元"字应删去,所以修改为:五一遛娃👶必囤!喜来登1088景观房 3、产品资料中未提及儿童乐园开放时间和儿童乐园配置,但文案中提到儿童乐园10:00-20:00全程开放,滑梯/积木/绘本一应俱全,因此属于不符内容。应修改为:儿童乐园:免费儿童乐园和丰富的游乐设施,让孩子们可以尽情玩耍。 4、产品材料中未提及户外泳池开放时间和消毒频次,但文案中提到户外泳池:9:00-18:00恒温开放(五一期间每日消毒3次),因此属于不符内容。应修改为:户外泳池:酒店配有户外无边泳池,供大人小孩一同享受清凉时光。 5、产品材料中未提及健身房开放时间与具体细节,但文案中提到健身房:8:00-22:00配备亲子瑜伽课程(需提前预约),因此属于不符内容。应修改为:健身房:酒店提供免费健身中心,方便您和家人一起强身健体。 @@ -174,7 +174,7 @@ class ContentJudger: presence_penalty: 存在惩罚参数 Returns: - dict: 审核后的结果JSON,包含修改后的title和content + dict: 审核后的结果JSON,包含修改后的title和content以及judge_success状态 """ logging.info("开始内容审核流程") # 构建用户提示词 @@ -198,16 +198,43 @@ class ContentJudger: end_time = time.time() logging.info(f"AI模型响应完成,耗时:{end_time - start_time:.2f}秒") + # 保存原始响应用于调试 + response_log_dir = "/root/autodl-tmp/TravelContentCreator/log/judge_responses" + os.makedirs(response_log_dir, exist_ok=True) + response_log_file = f"{response_log_dir}/response_{int(time.time())}.txt" + with open(response_log_file, "w", encoding="utf-8") as f: + f.write(result) + logging.info(f"原始响应已保存到: {response_log_file}") + # 提取修改后的内容 modified_content = self._extract_modified_content(result) if modified_content: logging.info("成功提取修改后的内容") + # 添加judge_success字段 + modified_content["judge_success"] = True return modified_content else: - return {"title": "提取失败", "content": "无法从响应中提取有效内容"} + logging.error("无法从响应中提取有效内容") + # 尝试使用原始内容并标记审核失败 + if isinstance(content, dict) and "title" in content and "content" in content: + return { + "title": content.get("title", "提取失败"), + "content": content.get("content", "无法从响应中提取有效内容"), + "judge_success": False + } + return { + "title": "提取失败", + "content": "无法从响应中提取有效内容", + "judge_success": False + } except Exception as e: - return {"title": "审核失败", "content": f"审核过程中出错: {str(e)}"} + logging.exception(f"审核过程中出错: {e}") + return { + "title": "审核失败", + "content": f"审核过程中出错: {str(e)}", + "judge_success": False + } def _build_user_prompt(self, product_info, content_gen): """ @@ -229,21 +256,106 @@ class ContentJudger: """ def _extract_modified_content(self, result_text): + """从检测结果文本中提取修改后的文案内容""" + try: + processed_text = result_text # Work on a copy of the input text + # 记录原始文本前100个字符用于调试 + logging.debug(f"原始响应文本前100字符: {result_text[:100]}") + + if "" in processed_text: + processed_text = processed_text.split("", 1)[1].strip() + logging.debug("检测到标签并分离内容") + + # Attempt 1: Parse as JSON from the processed text + json_start = processed_text.find('{') + json_end = processed_text.rfind('}') + 1 + if json_start >= 0 and json_end > json_start: + json_str = processed_text[json_start:json_end] + logging.debug(f"找到JSON字符串,长度: {len(json_str)},前100字符: {json_str[:100]}") + + # Clean control characters that might break JSON parsing + json_str_cleaned = re.sub(r'[\x00-\x1F\x7F]', '', json_str) + try: + content_json = json.loads(json_str_cleaned) + if "title" in content_json and "content" in content_json: + logging.info("Successfully parsed JSON content from AI response.") + return { + "title": content_json["title"].strip(), + "content": content_json["content"].strip() + } + except json.JSONDecodeError as e: + logging.warning(f"JSON parsing failed for substring: '{json_str_cleaned[:100]}...'. Error: {e}. Will attempt regex extraction.") + + # Attempt 2: Regex on the processed_text (which might have had stripped) + # 修复正则表达式,移除多余的反斜杠 + logging.debug("尝试使用正则表达式提取") + title_match = re.search(r'"title":\s*"([^"]*)"', processed_text) + content_match = re.search(r'"content":\s*"([^"]*)"', processed_text) + + if title_match and content_match: + logging.info("Successfully extracted title/content using regex.") + return { + "title": title_match.group(1).strip(), + "content": content_match.group(1).strip() + } + + # Attempt 3: Try finding content with single quotes + logging.debug("尝试查找使用单引号的内容") + title_match = re.search(r'"title":\s*\'([^\']*)\'', processed_text) + content_match = re.search(r'"content":\s*\'([^\']*)\'', processed_text) + + if title_match and content_match: + logging.info("Successfully extracted title/content using single-quote regex.") + return { + "title": title_match.group(1).strip(), + "content": content_match.group(1).strip() + } + + # Final attempt: Look for key-value pairs without standard JSON formatting + logging.debug("尝试非标准格式提取") + title_pattern = re.compile(r'["""]?title["""]?[::]\s*["""]([^"""]+)["""]', re.IGNORECASE) + content_pattern = re.compile(r'["""]?content["""]?[::]\s*["""]([^"""]+)["""]', re.IGNORECASE) + + title_match = title_pattern.search(processed_text) + content_match = content_pattern.search(processed_text) + + if title_match and content_match: + logging.info("提取到标题和内容(使用灵活模式匹配)") + return { + "title": title_match.group(1).strip(), + "content": content_match.group(1).strip() + } + + logging.warning(f"所有提取方法失败,响应前300字符: {processed_text[:300]}...") + return None # Fallback if all extraction methods fail + + except Exception as e: + logging.error(f"Unexpected error during content extraction: {e}\n{traceback.format_exc()}") + return None + + def test_extraction_from_file(self, response_file_path): """ - 从检测结果文本中提取修改后的文案内容 + 从文件中读取响应并测试提取功能 Args: - result_text: AI响应的文本 + response_file_path: 响应文件路径 Returns: - dict or None: 提取的内容JSON,提取失败则返回None + dict: 提取结果 """ try: - result_text = result_text.split("")[1] + logging.info(f"从文件测试提取: {response_file_path}") + with open(response_file_path, 'r', encoding='utf-8') as f: + response_text = f.read() - ## 舍弃 - - return json.loads(result_text) + result = self._extract_modified_content(response_text) + if result: + logging.info(f"成功从文件提取内容: {result.get('title', '')[:30]}...") + return {"success": True, "result": result} + else: + logging.error(f"从文件中提取内容失败") + return {"success": False, "error": "提取失败"} + except Exception as e: - logging.error(f"提取内容时发生错误: {e}") - return None \ No newline at end of file + logging.exception(f"测试提取时发生错误: {e}") + return {"success": False, "error": str(e)} \ No newline at end of file diff --git a/utils/resource_loader.py b/utils/resource_loader.py index 4cb4ef3..0decd0c 100644 --- a/utils/resource_loader.py +++ b/utils/resource_loader.py @@ -1,6 +1,7 @@ import os import random import json +import logging class ResourceLoader: """资源加载器,用于加载提示词和参考资料""" @@ -13,11 +14,11 @@ class ResourceLoader: content = f.read() return content else: - print(f"文件不存在: {file_path}") + logging.warning(f"文件不存在: {file_path}") # Return None for non-existent file to distinguish from empty file return None except Exception as e: - print(f"加载文件 '{file_path}' 内容失败: {e}") + logging.warning(f"加载文件 '{file_path}' 内容失败: {e}") # Return None on error as well return None @@ -26,10 +27,10 @@ class ResourceLoader: """加载Refer目录下的指定文件内容""" refer_content = "" if not file_path or not os.path.isfile(file_path): - print(f"Warning: Refer directory '{file_path}' not found or invalid.") + logging.warning(f"Warning: Refer directory '{file_path}' not found or invalid.") return "" try: - if True: # print(file_path) + if True: if os.path.isfile(file_path) and file_path.endswith(".txt"): # Use the updated load_file_content content = ResourceLoader.load_file_content(file_path) @@ -49,7 +50,7 @@ class ResourceLoader: # 检查必要的键是否存在 if "title" not in file_content or "description" not in file_content or "examples" not in file_content: - print(f"Warning: JSON文件 '{file_path}' 缺少必要的键(title/description/examples)") + logging.warning(f"Warning: JSON文件 '{file_path}' 缺少必要的键(title/description/examples)") title_content = file_content["title"] description_content = file_content["description"] @@ -66,12 +67,12 @@ class ResourceLoader: refer_content += f"## {file_path}\n{content}\n\n" else: - print(f"Warning: JSON文件 '{file_path}' 的examples不是有效列表") + logging.warning(f"Warning: JSON文件 '{file_path}' 的examples不是有效列表") except Exception as json_err: - print(f"处理JSON文件 '{file_path}' 失败: {json_err}") + logging.warning(f"处理JSON文件 '{file_path}' 失败: {json_err}") return refer_content except Exception as e: - print(f"加载Refer目录文件失败: {e}") + logging.warning(f"加载Refer目录文件失败: {e}") return "" @staticmethod @@ -98,7 +99,7 @@ class ResourceLoader: return None except Exception as e: - print(f"查找文件 '{file_name}' 在 '{directory}' 失败: {e}") + logging.warning(f"查找文件 '{file_name}' 在 '{directory}' 失败: {e}") return None @staticmethod @@ -125,7 +126,7 @@ class ResourceLoader: f.write(f"```\n{result}\n```\n\n") f.write("--------------------------------\n\n") except Exception as e: - print(f"更新汇总文件时出错: {e}") + logging.warning(f"更新汇总文件时出错: {e}") @staticmethod def save_article(result, prompt, output_dir, run_id, article_index, variant_index): @@ -145,5 +146,5 @@ class ResourceLoader: return filepath except Exception as e: - print(f"保存文章时出错: {e}") + logging.warning(f"保存文章时出错: {e}") return None \ No newline at end of file diff --git a/utils/tweet_generator.py b/utils/tweet_generator.py index cd1a28b..85146ed 100644 --- a/utils/tweet_generator.py +++ b/utils/tweet_generator.py @@ -88,14 +88,22 @@ class tweetContent: json_data = json.loads(processed_result) json_data["error"] = False json_data["raw_result"] = None + # 确保judge_success字段存在 + if "judge_success" not in json_data: + json_data["judge_success"] = None return json_data # --- End Existing Logic --- except Exception as e: - logging.warning(f"解析内容时出错: {e}, 返回空字符串") - json_data["error"] = True - json_data["raw_result"] = e - return json_data + logging.warning(f"解析内容时出错: {e}, 使用默认空内容") + # 创建一个新的json_data而不是使用未定义的变量 + return { + "title": "", + "content": "", + "error": True, + "raw_result": str(e), + "judge_success": False + } def get_json_data(self): """Returns the generated JSON data dictionary.""" @@ -159,7 +167,7 @@ def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id, if result is None: # Check if AI call failed logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.") - return {"title": "", "content": "", "error": True}, user_prompt # 返回空字段而不是None + return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段 logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}") @@ -185,13 +193,13 @@ def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id, except Exception as e: logging.exception(f"Error generating single content for {article_index}_{variant_index}:") - return {"title": "", "content": "", "error": True}, user_prompt # 返回空字段而不是None + return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段 def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir, variants=2, temperature=0.3, start_index=0, end_index=None): """根据选题生成内容""" if not topics: - print("没有选题,无法生成内容") + logging.warning("没有选题,无法生成内容") return # 确定处理范围 @@ -199,7 +207,7 @@ def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompt end_index = len(topics) topics_to_process = topics[start_index:end_index] - print(f"准备处理{len(topics_to_process)}个选题...") + logging.info(f"准备处理{len(topics_to_process)}个选题...") # 创建汇总文件 # summary_file = ResourceLoader.create_summary_file(output_dir, run_id, len(topics_to_process)) @@ -207,11 +215,11 @@ def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompt # 处理每个选题 processed_results = [] for i, item in enumerate(topics_to_process): - print(f"处理第 {i+1}/{len(topics_to_process)} 篇文章") + logging.info(f"处理第 {i+1}/{len(topics_to_process)} 篇文章") # 为每个选题生成多个变体 for j in range(variants): - print(f"正在生成变体 {j+1}/{variants}") + logging.info(f"正在生成变体 {j+1}/{variants}") # 调用单篇文章生成函数 tweet_content, result = generate_single_content( @@ -225,7 +233,7 @@ def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompt # if j == 0: # ResourceLoader.update_summary(summary_file, i+1, user_prompt, result) - print(f"完成{len(processed_results)}篇文章生成") + logging.info(f"完成{len(processed_results)}篇文章生成") return processed_results @@ -520,15 +528,24 @@ content: {content_json.get('content', '')} content_json["content"] = judged_result["content"] # 添加审核标记 content_json["judged"] = True + # 添加judge_success状态 + content_json["judge_success"] = judged_result.get("judge_success", False) # 可选:保存审核分析结果 if "不良内容分析" in judged_result: content_json["judge_analysis"] = judged_result["不良内容分析"] else: logging.warning(f" 审核结果缺少title或content字段,保留原内容") + content_json["judge_success"] = False else: logging.warning(f" 内容审核返回无效结果,保留原内容") + content_json["judge_success"] = False except Exception as judge_err: logging.exception(f" 内容审核过程出错: {judge_err},保留原内容") + content_json["judge_success"] = False + else: + # 未启用内容审核时,添加相应标记 + content_json["judged"] = False + content_json["judge_success"] = None # Use the output handler to process/save the result output_handler.handle_content_variant( @@ -859,7 +876,7 @@ def generate_posters_for_topic(topic_item: dict, collage_img = collage_images[0] # 获取第一个 PIL Image used_image_files = used_image_filenames[0] if used_image_filenames else [] # 获取使用的图片文件名 logging.info(f"Collage image generated successfully (in memory). Used images: {used_image_files}") - print(f"拼贴图使用的图片文件: {used_image_files}") + logging.info(f"拼贴图使用的图片文件: {used_image_files}") # --- 使用 Handler 保存 Collage 图片和使用的图片文件信息 --- output_handler.handle_generated_image(