修改了文件的读取模式

2025-05-21 09:49:21 +08:00 · 2025-05-21 09:49:21 +08:00 · 674082e7d7
commit 674082e7d7
parent 44c79ec8e5
6 changed files with 523 additions and 149 deletions
--- a/utils/pycache/content_judger.cpython-312.pyc
+++ b/utils/pycache/content_judger.cpython-312.pyc
--- a/utils/pycache/output_handler.cpython-312.pyc
+++ b/utils/pycache/output_handler.cpython-312.pyc
--- a/utils/pycache/tweet_generator.cpython-312.pyc
+++ b/utils/pycache/tweet_generator.cpython-312.pyc
--- a/utils/content_judger.py
+++ b/utils/content_judger.py
@ -12,6 +12,7 @@ import traceback
 import sys
 import base64
 import re
 import random
 sys.path.append('/root/autodl-tmp/TravelContentCreator') # 添加项目根目录
 from core.ai_agent import AI_Agent
@ -66,8 +67,8 @@ class ContentJudger:
 6. 特征语句保留：请保留文案中原本的引流语句，不要修改或删除。请保留文案中的换行符 \\n，不要修改或删除换行符。
 7. 面向人群保留：请尽量保留文案原本的面向人群和风格，这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时，文案写"为情侣定制的山水秘境"是可以接受的。
 8. 案例如下，请参考案例评判真假信息的尺度，逐行逐句仔细分析不符点和修改思路，并按照分析思路落实对每一处不符的修改措施，严格审查每一篇文案：
-{  
+[  
-"产品资料"：         
+    "产品资料"：         
    "周末不加收【南沙越秀喜来登】1088元/套，豪华客房1间1晚+双人自助早餐+自助晚餐+2大1小水鸟世界门票，免费儿童乐园，户外泳池+健身房~
    不想待在家，又想带娃出去玩?更不想开长途车、人挤人？为你推荐路程短、不塞车、景点多、坐地铁就能直达的溜娃地!
    南沙越秀喜来登是广州南沙区首家国际品牌酒店，坐拥广州南大门，拥有得天独厚的中心位置，可俯瞰蕉门河美景，车程短，不出广州也能玩！
@ -97,15 +98,10 @@ class ContentJudger:
    酒店地址:广东省广州市南沙区海熙大街79-80号
    导航关键词:广州南沙越秀喜来登酒店"
-
+   
-                 
+    "生成文案":         
 "生成文案"："[         
    "title": "五一遛娃👶必囤！南沙喜来登1088元住景观房+双早+门票",
-
+    "content": "五一不想挤人潮？南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达！2大1小1088元住景观房，含双早+自助晚餐+水鸟世界门票，儿童乐园/泳池/健身房全开放！\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园：10:00-20:00全程开放，滑梯/积木/绘本一应俱全\n✅ 户外泳池：9:00-18:00恒温开放（五一期间每日消毒3次）\n✅ 健身房：8:00-22:00配备亲子瑜伽课程（需提前预约）\n\n📍【1小时玩转南沙】\n① 南沙天后宫（车程20分钟）：穿汉服拍大片，听妈祖传说涨知识\n② 南沙湿地公园（40分钟）：5月芦苇摇曳，带娃认鸟类+乘船探秘\n③ 十九涌海鲜街（45分钟）：现捞现煮生猛海鲜，人均50元吃到撑\n\n🍽️【家长友好细节】\n• 自助晚餐隐藏彩蛋：儿童餐区设独立洗手台+热食保温柜\n• 房内配置：加厚床垫/卡通洗漱杯/尿布台（无需额外购买）\n• 安全保障：全区域监控+24小时安保巡逻\n\n🎁【五一专属加码】\n5月1-5日期间入住，凭房卡可免费领取儿童防晒冰袖+湿巾礼包\n\n📌Tips：\n1. 周一至周四仅限双床房型，周五起可选大床房\n2. 水鸟世界门票需提前1小时至前台领取纸质票\n3. 地铁四号线金洲站下车，打车15分钟直达酒店\n\n这个五一，南沙喜来登让你躺着遛娃！不用长途跋涉，家门口就能玩出仪式感～"
    "content": "
    五一不想挤人潮？南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达！2大1小1088元住景观房，含双早+自助晚餐+水鸟世界门票，儿童乐园/泳池/健身房全开放！\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园：10:00-20:00全程开放，滑梯/积木/绘本一应俱全\n✅ 户外泳池：9:00-18:00恒温开放（五一期间每日消毒3次）\n✅ 健身房：8:00-22:00配备亲子瑜伽课程（需提前预约）\n\n📍【1小时玩转南沙】\n① 南沙天后宫（车程20分钟）：穿汉服拍大片，听妈祖传说涨知识\n② 南沙湿地公园（40分钟）：5月芦苇摇曳，带娃认鸟类+乘船探秘\n③ 十九涌海鲜街（45分钟）：现捞现煮生猛海鲜，人均50元吃到撑\n\n🍽️【家长友好细节】\n• 自助晚餐隐藏彩蛋：儿童餐区设独立洗手台+热食保温柜\n• 房内配置：加厚床垫/卡通洗漱杯/尿布台（无需额外购买）\n• 安全保障：全区域监控+24小时安保巡逻\n\n🎁【五一专属加码】\n5月1-5日期间入住，凭房卡可免费领取儿童防晒冰袖+湿巾礼包\n\n📌Tips：\n1. 周一至周四仅限双床房型，周五起可选大床房\n2. 水鸟世界门票需提前1小时至前台领取纸质票\n3. 地铁四号线金洲站下车，打车15分钟直达酒店\n\n这个五一，南沙喜来登让你躺着遛娃！不用长途跋涉，家门口就能玩出仪式感～"    " 
 ]"
 }
 输出结果:
 {
@ -113,8 +109,8 @@ class ContentJudger:
    "title": "五一遛娃👶必囤！喜来登1088景观房",
    "content": "五一不想挤人潮？南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达！2大1小1088r住景观房，含双早+自助晚餐+水鸟世界门票，儿童乐园/泳池/健身房全开放！\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园：酒店设有免费儿童乐园，提供丰富的游乐设施，让孩子们尽情玩耍\n✅ 户外泳池：酒店配有户外无边泳池，供大人小孩一同享受清凉时光  \n✅ 健身房：酒店提供免费健身中心，适合家庭成员共同锻炼。\n\n📍【1小时玩转南沙】\n① 南沙天后宫（车程20分钟）：穿汉服拍大片，听妈祖传说涨知识\n② 南沙湿地公园（40分钟）：5月芦苇摇曳，带娃认鸟类+乘船探秘\n③ 十九涌海鲜街（45分钟）：现捞现煮生猛海鲜，人均50r吃到撑  \n\n🍽️【家长友好细节】  \n• 自助餐厅：供应鲜美海鲜、精美甜品等任君选择，大人小孩都爱吃  \n• 房内配置：55英寸超大纯平电视+独立的浴缸+超大的落地玻璃窗，尽览蕉门河风景，尽享亲子度假时光  \n• 安全保障：酒店设有完善的监控系统和安保措施，全力保障您与家人的安全  \n\n🎁【套餐专属福利】\n1、豪华客房一间一晚(周一至四只开放双床房) \n2、2大1小自助早晚餐 \n3、赠送2大1小水鸟世界门票（酒店前台领取），无需额外购买  \n\n📌Tips：  \n1. 周一至周四仅限双床房型，周五起可选大床房  \n2. 酒店前台领取水鸟世界纸质门票  \n3. 地铁四号线金洲站下车，打车15分钟直达酒店  \n\n这个五一，南沙喜来登让你躺着遛娃！不用长途跋涉，家门口就能玩出仪式感～\n"
 }
-
+]
-8. 必须按照以下格式输出修改后内容，不需要输出无关内容
+9. 必须按照以下格式输出修改后内容，不需要输出无关内容
 {
    "analysis" : "分析过程",
    "title": "修改后的标题",
@ -154,58 +150,133 @@ class ContentJudger:
            logging.error(f"从PromptManager获取系统提示词失败: {e}")
            return False
-    def _split_content(self, result):
+    def _preprocess_for_json(self, text):
-        """
+        """预处理文本，处理JSON结构中的问题字符"""
-        参考tweet_generator的处理方式，解析AI返回的内容
+        if not isinstance(text, str):
            return text
        Args:
            result: AI返回的原始结果
        Returns:
            dict: 解析后的JSON数据
        """
        try:
-            # 处理AI可能返回的思考部分
+            # 1. 处理特殊Unicode字符和标点符号
-            processed_result = result
+            char_map = {
-            if "</think>" in result:
+                '"': '"',  # 特殊Unicode引号替换为标准双引号
-                processed_result = result.split("</think>")[1] # 取</think>标签后的内容
+                '"': '"',  # 特殊Unicode引号替换为标准双引号
                ''': "'",  # 特殊Unicode单引号替换为标准单引号
                ''': "'",  # 特殊Unicode单引号替换为标准单引号
                '，': ',',  # 中文逗号替换为英文逗号
                '：': ':',  # 中文冒号替换为英文冒号
                '（': '(',  # 中文括号替换为英文括号
                '）': ')',  # 中文括号替换为英文括号
                '\u200b': '',  # 零宽空格直接移除
                '\u200c': '',  # 零宽不连字直接移除
                '\u200d': '',  # 零宽连字直接移除
                '\u2028': ' ',  # 行分隔符替换为空格
                '\u2029': ' '   # 段落分隔符替换为空格
            }
-            # 直接尝试解析JSON
+            # 应用字符替换
-            json_data = json.loads(processed_result)
+            for char, replacement in char_map.items():
-            json_data["error"] = False
+                text = text.replace(char, replacement)
            json_data["judge_success"] = True
            return json_data
-        except json.JSONDecodeError as json_err:
+            # 2. 处理控制字符 (ASCII < 32)
-            # JSON解析失败，记录错误并尝试更基本的处理方法
+            cleaned_text = ""
-            logging.warning(f"解析内容时出错: {json_err}, 尝试提取JSON部分")
+            for i, char in enumerate(text):
                if ord(char) < 32:  # ASCII 32以下是控制字符
                    if char in ['\n', '\r', '\t']:  # 保留这些常用控制字符
                        cleaned_text += char
                    else:  # 删除其他控制字符
                        logging.debug(f"移除位置{i}的无效控制字符(ASCII: {ord(char)})")
                        continue
                else:
                    cleaned_text += char
-            try:
+            # 3. 处理JSON结构特定问题
-                # 尝试找到JSON部分（从第一个{到最后一个}）
+            # 处理大括号附近的换行符和空白
-                json_start = processed_result.find('{')
+            if cleaned_text.startswith('{\n'):
-                json_end = processed_result.rfind('}') + 1
+                cleaned_text = '{' + cleaned_text[2:]
            if cleaned_text.startswith('{ '):
                cleaned_text = '{' + cleaned_text[2:]
-                if json_start >= 0 and json_end > json_start:
+            if '\n}' in cleaned_text:
-                    json_str = processed_result[json_start:json_end]
+                cleaned_text = cleaned_text.replace('\n}', '}')
-                    json_data = json.loads(json_str)
+                
-                    json_data["error"] = False
+            if ' }' in cleaned_text:
-                    json_data["judge_success"] = True
+                cleaned_text = cleaned_text.replace(' }', '}')
-                    return json_data
+            
-            except Exception as e:
+            # 4. 处理转义序列 - 保留\n、\r、\t的转义，移除其他转义
-                logging.error(f"尝试提取JSON部分失败: {e}")
+            import re
-        
+            
            # 第一步：将要保留的转义序列临时替换为安全标记
            safe_replacements = {
                r'\\n': '@NEWLINE@',  # 保留换行转义
                r'\\r': '@RETURN@',    # 保留回车转义
                r'\\t': '@TAB@',       # 保留制表符转义
            }
            # 应用安全替换
            for pattern, replacement in safe_replacements.items():
                cleaned_text = re.sub(pattern, replacement, cleaned_text)
            # 第二步：移除除JSON必要转义外的所有反斜杠转义
            # 处理常见的多余转义情况
            cleaned_text = re.sub(r'\\([^\\/"bfnrtu])', r'\1', cleaned_text)  # 移除非特殊字符前的反斜杠
            cleaned_text = cleaned_text.replace('\\"', '"')    # 将转义的双引号还原为普通双引号
            cleaned_text = cleaned_text.replace('\\\'', '\'')  # 将转义的单引号还原为普通单引号
            cleaned_text = cleaned_text.replace('\\\\', '\\')  # 将双反斜杠替换为单反斜杠
            # 第三步：将安全标记替换回原始转义序列
            reverse_replacements = {
                '@NEWLINE@': '\\n',  # 还原换行转义
                '@RETURN@': '\\r',   # 还原回车转义
                '@TAB@': '\\t',      # 还原制表符转义
            }
            # 应用反向替换
            for marker, escape_seq in reverse_replacements.items():
                cleaned_text = cleaned_text.replace(marker, escape_seq)
            # 第四步：再次检查并修复字符串内的换行符（确保100%处理）
            # 这个额外的步骤确保没有任何字符串值中包含实际的换行符
            pattern = r'"([^"\\]*(\\.[^"\\]*)*)"'  # 匹配所有JSON字符串（包括已经有转义字符的）
            def fix_remaining_newlines(match):
                string_value = match.group(1)
                # 确保所有实际换行符都被转义
                fixed_value = string_value.replace('\n', '\\n').replace('\r', '\\r')
                return f'"{fixed_value}"'
            cleaned_text = re.sub(pattern, fix_remaining_newlines, cleaned_text)
            # 5. 确保逗号后换行不会导致问题
            cleaned_text = cleaned_text.replace(',\n', ', ')  # 替换逗号后的换行为空格
            # 6. 尝试解析检验
            try:
                # 尝试进行轻度解析验证
                json.loads(cleaned_text)
                # 如果能成功解析，直接返回
                return cleaned_text
            except json.JSONDecodeError as e:
                logging.debug(f"预处理后JSON仍有问题：{e}，尝试最后的修复...")
                # 最后的处理：使用simplejson替代内置json库尝试修复
                try:
                    import simplejson
                    # 加载后再保存，让simplejson自己处理一些小问题
                    fixed_json = simplejson.loads(cleaned_text, strict=False)
                    return simplejson.dumps(fixed_json)
                except:
                    # simplejson也失败了，继续后续流程
                    pass
            # 7. 记录处理后的文本，以便调试
            logging.debug(f"JSON预处理后的文本长度: {len(cleaned_text)}")
            return cleaned_text
        except Exception as e:
-            logging.error(f"解析内容时出错: {e}")
+            logging.exception(f"JSON预处理过程中出错: {e}")
-        
+            # 发生异常时，返回原始文本，不做修改
-        # 所有解析方法都失败，返回一个默认结果
+            return text
-        return {
+
            "title": "", 
            "content": "", 
            "error": True, 
            "judge_success": False,
            "analysis": f"内容解析失败，错误信息: {str(e)}"
        }
    def judge_content(self, product_info, content, temperature=0.2, top_p=0.5, presence_penalty=0.0):
        """审核内容"""
        logging.info("开始内容审核流程")
@ -220,48 +291,159 @@ class ContentJudger:
                system_prompt=self._system_prompt,
                user_prompt=user_prompt,
                file_folder=None,
-                temperature=self._temperature,
+                temperature=temperature,  # 使用传入的参数
-                top_p=self._topp,
+                top_p=top_p,  # 使用传入的参数
-                presence_penalty=self._presence_penatly,
+                presence_penalty=presence_penalty,  # 使用传入的参数
            )
            # 保存原始响应以便调试
            self._save_response(result, response_id)
            logging.info(f"AI响应长度: {len(result)} 字符")
-            # 使用简化的解析方法处理响应
+            # 尝试多种方法提取JSON
-            content_json = self._split_content(result)
+            json_obj = None
            error_msg = None
-            # 检查解析结果是否有错误
+            # 方法1: 提取{...}的JSON部分
-            if content_json.get("error", False):
+            try:
-                logging.warning(f"内容解析失败，使用原内容")
+                # 移除思考部分
-                return self._create_fallback_result(content)
+                processed_result = result.split("</think>", 1)[-1].strip() if "</think>" in result else result
                # 找到最外层的大括号
                json_start = processed_result.find('{')
                json_end = processed_result.rfind('}') + 1
                if json_start >= 0 and json_end > json_start:
                    # 提取JSON字符串
                    json_str = processed_result[json_start:json_end]
                    # 预处理JSON字符串
                    json_str = self._preprocess_for_json(json_str)
                    # 尝试解析JSON
                    json_obj = json.loads(json_str)
                    logging.info("方法1成功解析JSON")
            except Exception as e:
                error_msg = f"方法1解析JSON失败: {e}"
                logging.debug(error_msg)
                # 继续尝试其他方法
-            # 检查必要字段是否存在
+            # 方法2: 尝试多行解析，逐行检查是否有合法JSON
-            if "title" not in content_json or "content" not in content_json:
+            if not json_obj:
-                logging.warning(f"解析结果缺少必要字段 'title' 或 'content'")
+                try:
-                content_json["judge_success"] = False
+                    lines = result.split('\n')
-                return self._create_fallback_result(content)
+                    for i, line in enumerate(lines):
                        line = line.strip()
                        if line.startswith('{') and line.endswith('}'):
                            try:
                                # 尝试处理和解析这一行
                                processed_line = self._preprocess_for_json(line)
                                json_obj = json.loads(processed_line)
                                logging.info(f"方法2在第{i+1}行成功解析JSON")
                                break
                            except:
                                # 继续尝试下一行
                                pass
                except Exception as e:
                    if not error_msg:
                        error_msg = f"方法2解析JSON失败: {e}"
                        logging.debug(error_msg)
-            # 添加Base64编码内容
+            # 方法3: 尝试使用正则表达式匹配最可能的JSON部分
-            result_dict = {
+            if not json_obj:
-                "judge_success": content_json.get("judge_success", True),
+                try:
                    import re
                    # 尝试匹配 {..."title":...,"content":...}
                    json_pattern = r'\{[^{}]*"title"[^{}]*"content"[^{}]*\}'
                    matches = re.findall(json_pattern, result, re.DOTALL)
                    if matches:
                        for match in matches:
                            try:
                                processed_match = self._preprocess_for_json(match)
                                json_obj = json.loads(processed_match)
                                logging.info("方法3成功解析JSON")
                                break
                            except:
                                # 继续尝试下一个匹配
                                pass
                except Exception as e:
                    if not error_msg:
                        error_msg = f"方法3解析JSON失败: {e}"
                        logging.debug(error_msg)
            # 处理解析结果
            if json_obj and isinstance(json_obj, dict):
                # 验证关键字段
                if "title" in json_obj and "content" in json_obj:
                    # 构建结果字典
                    result_dict = {
                        "judge_success": True,
                        "judged": True,
                        "title": json_obj["title"],
                        "content": json_obj["content"],
                        "title_base64": base64.b64encode(json_obj["title"].encode('utf-8')).decode('utf-8'),
                        "content_base64": base64.b64encode(json_obj["content"].encode('utf-8')).decode('utf-8')
                    }
                    # 添加分析字段(如果存在)
                    if "analysis" in json_obj:
                        result_dict["analysis"] = json_obj["analysis"]
                        result_dict["analysis_base64"] = base64.b64encode(json_obj["analysis"].encode('utf-8')).decode('utf-8')
                    logging.info(f"成功提取内容: 标题({len(json_obj['title'])}字符), 内容({len(json_obj['content'])}字符)")
                    return result_dict
                else:
                    # JSON对象缺少必要字段
                    logging.warning("解析的JSON缺少必要字段'title'或'content'")
                    error_msg = "缺少必要字段'title'或'content'"
                    # 保存错误日志
                    self._save_error_json(json.dumps(json_obj), error_msg, response_id)
            else:
                # 未找到有效的JSON
                if error_msg:
                    logging.warning(f"JSON解析失败: {error_msg}")
                else:
                    logging.warning("找不到有效的JSON结构")
                # 保存可能的JSON字符串以供调试
                if json_start >= 0 and json_end > json_start:
                    json_str = processed_result[json_start:json_end]
                    self._save_error_json(json_str, error_msg or "解析失败", response_id)
            # 所有方法都失败，返回空内容
            logging.info("内容审核过程未能产生有效结果，返回空内容")
            empty_result = {
                "judge_success": False,
                "judged": True,
-                "title": content_json["title"],
+                "title": "",
-                "content": content_json["content"],
+                "content": "",
-                "title_base64": base64.b64encode(content_json["title"].encode('utf-8')).decode('utf-8'),
+                "title_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'),
-                "content_base64": base64.b64encode(content_json["content"].encode('utf-8')).decode('utf-8')
+                "content_base64": base64.b64encode("".encode('utf-8')).decode('utf-8')
            }
-            # 如果有analysis字段，也包含
+            if error_msg:
-            if "analysis" in content_json:
+                empty_result["analysis"] = f"内容审核失败: {error_msg}"
-                result_dict["analysis"] = content_json["analysis"]
+                empty_result["analysis_base64"] = base64.b64encode(f"内容审核失败: {error_msg}".encode('utf-8')).decode('utf-8')
                result_dict["analysis_base64"] = base64.b64encode(content_json["analysis"].encode('utf-8')).decode('utf-8')
            return result_dict
            return empty_result
        except Exception as e:
            # 捕获所有异常
            error_traceback = traceback.format_exc()
            logging.exception(f"审核过程中出错: {e}")
-            return self._create_fallback_result(content, error_msg=str(e))
+            logging.debug(f"详细错误: {error_traceback}")
            return {
                "judge_success": False,
                "judged": True,
                "title": "",
                "content": "",
                "title_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'),
                "content_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'),
                "analysis": f"内容审核过程出错: {e}",
                "analysis_base64": base64.b64encode(f"内容审核过程出错: {e}".encode('utf-8')).decode('utf-8')
            }
    def _save_response(self, response, response_id):
        """保存原始响应"""
@ -273,6 +455,29 @@ class ContentJudger:
        except Exception as e:
            logging.error(f"保存原始响应失败: {e}")
    def _save_error_json(self, json_str, error, response_id):
        """保存错误的JSON字符串以供调试"""
        try:
            error_log_dir = "/root/autodl-tmp/TravelContentCreator/log/json_errors"
            os.makedirs(error_log_dir, exist_ok=True)
            # 创建包含错误信息和原始JSON的日志
            error_info = {
                "error_message": str(error),
                "error_type": error.__class__.__name__ if hasattr(error, "__class__") else "Unknown",
                "timestamp": int(time.time()),
                "response_id": response_id,
                "json_string": json_str
            }
            # 保存到文件
            with open(f"{error_log_dir}/error_{response_id}.json", "w", encoding="utf-8") as f:
                json.dump(error_info, f, ensure_ascii=False, indent=2)
            logging.info(f"已保存错误JSON到 {error_log_dir}/error_{response_id}.json")
        except Exception as e:
            logging.error(f"保存错误JSON失败: {e}")
    def _create_fallback_result(self, content, error_msg="解析失败"):
        """创建回退结果"""
        if isinstance(content, str):
@ -328,4 +533,86 @@ class ContentJudger:
 ## 运营生成的文案（需要审核的内容）:
 {content_str}
-"""
+"""
    def judge_content_with_retry(self, product_info, content, max_retries=3, temperature=0.2, top_p=0.5, presence_penalty=0.0):
        """
        带重试机制的内容审核方法，当检测到空内容时自动重试
        Args:
            product_info: 产品资料
            content: 需要审核的内容
            max_retries: 最大重试次数
            temperature, top_p, presence_penalty: AI生成参数
        Returns:
            dict: 审核结果，如果所有重试都失败，则返回最后一次的失败结果
        """
        retry_count = 0
        last_result = None
        logging.info(f"开始内容审核流程，最大重试次数: {max_retries}，初始温度参数: {temperature}")
        while retry_count <= max_retries:
            current_attempt = retry_count + 1
            if retry_count > 0:
                # 每次重试增加温度参数，增加多样性
                adjusted_temperature = min(temperature + (retry_count * 0.1), 0.9)
                logging.info(f"🔄 内容审核重试 ({current_attempt}/{max_retries+1})，调整温度参数为: {adjusted_temperature:.2f}")
            else:
                adjusted_temperature = temperature
                logging.info(f"⏳ 内容审核首次尝试 (1/{max_retries+1})，使用默认温度: {adjusted_temperature:.2f}")
            # 调用基本的审核方法
            result = self.judge_content(
                product_info, 
                content, 
                temperature=adjusted_temperature, 
                top_p=top_p, 
                presence_penalty=presence_penalty
            )
            last_result = result
            # 检查结果是否为空内容
            if result.get("judge_success", False) and result.get("title") and result.get("content"):
                # 成功获取有效内容，返回结果
                if retry_count > 0:
                    logging.info(f"✅ 成功！在第{retry_count}次重试后获取有效内容（共尝试{current_attempt}次）")
                else:
                    logging.info(f"✅ 成功！首次尝试已获取有效内容")
                # 添加审核内容长度统计
                title_len = len(result.get("title", ""))
                content_len = len(result.get("content", ""))
                logging.info(f"📊 审核结果统计：标题长度={title_len}字符，内容长度={content_len}字符")
                return result
            else:
                # 记录当前尝试的结果状态
                title_len = len(result.get("title", ""))
                content_len = len(result.get("content", ""))
                logging.warning(f"❌ 审核尝试 {current_attempt}/{max_retries+1} 失败，judge_success={result.get('judge_success')}，标题长度={title_len}，内容长度={content_len}")
            # 重试次数增加
            retry_count += 1
            if retry_count <= max_retries:
                # 在重试前稍微等待，避免过快请求
                delay = 1 + random.random() * 2  # 1-3秒随机延迟
                remaining = max_retries - retry_count + 1
                logging.info(f"⏱️ 等待{delay:.1f}秒后进行第{retry_count+1}次尝试，剩余{remaining}次尝试机会")
                time.sleep(delay)
            else:
                logging.warning(f"⛔ 已达到最大重试次数，共尝试{current_attempt}次均未获取满意结果")
        # 所有重试都失败，返回最后一次结果
        logging.warning(f"⚠️ {max_retries+1}次尝试后仍未获取有效内容，将返回最后一次结果")
        # 记录最后返回内容的基本信息
        title_len = len(last_result.get("title", ""))
        content_len = len(last_result.get("content", ""))
        logging.info(f"📄 最终返回内容：judge_success={last_result.get('judge_success')}，标题长度={title_len}字符，内容长度={content_len}字符")
        return last_result
--- a/utils/output_handler.py
+++ b/utils/output_handler.py
@ -164,10 +164,19 @@ class FileSystemOutputHandler(OutputHandler):
            if "tags" in input_data and "original_tags" not in input_data:
                input_data["original_tags"] = input_data["tags"]
        # 统一审核分析字段，优先使用judge_analysis，其次使用不良内容分析
        if "judge_analysis" not in input_data and "不良内容分析" in input_data:
            input_data["judge_analysis"] = input_data["不良内容分析"]
        elif "不良内容分析" not in input_data and "judge_analysis" in input_data:
            input_data["不良内容分析"] = input_data["judge_analysis"]
        # 保存原始值用于txt文件生成和调试
-        original_title = input_data.get("title", "")
+        original_title = input_data.get("original_title", input_data.get("title", ""))
-        original_content = input_data.get("content", "")
+        original_content = input_data.get("original_content", input_data.get("content", ""))
-        original_tags = input_data.get("tags", "")
+        original_tags = input_data.get("original_tags", input_data.get("tags", ""))
        judge_title = input_data.get("title", "")
        judge_content = input_data.get("content", "")
        judge_tags = input_data.get("tags", "")
        original_judge_analysis = input_data.get("judge_analysis", "")
        # 创建一个只包含元数据和base64编码的输出数据对象
@ -201,9 +210,10 @@ class FileSystemOutputHandler(OutputHandler):
            if "original_tags" in input_data and input_data["original_tags"]:
                output_data["original_tags_base64"] = base64.b64encode(input_data["original_tags"].encode('utf-8')).decode('ascii')
-            # 5. 审核分析
+            # 5. 审核分析 - 检查judge_analysis和不良内容分析两个字段
-            if "judge_analysis" in input_data and input_data["judge_analysis"]:
+            judge_analysis = input_data.get("judge_analysis", input_data.get("不良内容分析", ""))
-                output_data["judge_analysis_base64"] = base64.b64encode(input_data["judge_analysis"].encode('utf-8')).decode('ascii')
+            if judge_analysis:
                output_data["judge_analysis_base64"] = base64.b64encode(judge_analysis.encode('utf-8')).decode('ascii')
            logging.info("成功添加Base64编码内容")
        except Exception as e:
@ -226,20 +236,47 @@ class FileSystemOutputHandler(OutputHandler):
        # 创建一份article.txt文件以便直接查看
        txt_path = os.path.join(variant_dir, "article.txt")
        try:
-            # 使用原始内容，保留所有换行符
+            # 重新组织内容显示，明确区分原始内容和审核后内容
            with open(txt_path, "w", encoding="utf-8") as f:
-                if original_title:
+                # 根据审核状态决定显示哪些内容
                is_judged = input_data.get("judged", False)
                is_judge_success = input_data.get("judge_success", False)
                if is_judged and is_judge_success:
                    # 显示审核后的内容
                    f.write(f"{judge_title}\n\n")
                    if judge_content:
                        f.write(judge_content)
                    if judge_tags:
                        f.write(f"\n\n{judge_tags}")
                    # 在最后添加原始内容作为参考
                    if original_title != judge_title or original_content != judge_content:
                        f.write("\n\n=== 原始内容 ===\n")
                        f.write(f"{original_title}\n\n")
                        if original_content:
                            f.write(original_content)
                        if original_tags and original_tags != judge_tags:
                            f.write(f"\n\n{original_tags}")
                elif is_judged and not is_judge_success:
                    # 审核失败，显示审核失败信息和原始内容
                    f.write("审核失败\n\n")
                    f.write(f"{original_title}\n\n")
                    if original_content:
                        f.write(original_content)
                    if original_tags:
                        f.write(f"\n\n{original_tags}")
                else:
                    # 未审核，直接显示原始内容
                    f.write(f"{original_title}\n\n")
                    if original_content:
                        f.write(original_content)
                    if original_tags:
                        f.write(f"\n\n{original_tags}")
-                # 保持原始内容的所有换行符
+                # 添加审核分析信息（如果有）
                if original_content:
                    f.write(original_content)
                if original_tags:
                    f.write(f"\n\n{original_tags}")
                if original_judge_analysis:
-                    f.write(f"\n\n审核分析：\n{original_judge_analysis}")
+                    f.write(f"\n\n=== 审核分析 ===\n{original_judge_analysis}")
            logging.info(f"Article text saved to: {txt_path}")
        except Exception as e:
@ -253,8 +290,16 @@ class FileSystemOutputHandler(OutputHandler):
                f.write(f"原始内容: {original_content}\n\n")
                if original_tags:
                    f.write(f"原始标签: {original_tags}\n\n")
                if is_judged:
                    f.write(f"审核状态: {'成功' if is_judge_success else '失败'}\n")
                    if is_judge_success:
                        f.write(f"审核后标题: {judge_title}\n\n")
                        f.write(f"审核后内容: {judge_content}\n\n")
                if original_judge_analysis:
                    f.write(f"审核分析: {original_judge_analysis}\n\n")
                f.write("---处理后---\n\n")
                for key, value in output_data.items():
                    if isinstance(value, str):
@ -335,7 +380,7 @@ class FileSystemOutputHandler(OutputHandler):
            # 保存配置到JSON文件
            config_file_path = os.path.join(variant_dir, f"topic_{topic_index}_poster_configs.json")
            with open(config_file_path, 'w', encoding='utf-8') as f:
-                json.dump(processed_configs, f, ensure_ascii=False, indent=4, cls=self.SafeJSONEncoder)
+                json.dump(processed_configs, f, ensure_ascii=False, indent=4)
            logging.info(f"Successfully saved poster configs to {config_file_path}")
        except Exception as e:
            logging.error(f"Error saving poster configs: {e}")
--- a/utils/tweet_generator.py
+++ b/utils/tweet_generator.py
@ -133,52 +133,94 @@ def generate_topics(ai_agent, system_prompt, user_prompt, run_id, temperature=0.
 def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id, 
-                           article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5):
+                           article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5, 
                           max_retries=3):
    """Generates single content variant data. Returns (content_json, user_prompt) or (None, None)."""
    logging.info(f"Generating content for topic {article_index}, variant {variant_index}")
-    try:
+    
-        if not system_prompt or not user_prompt:
+    if not system_prompt or not user_prompt:
-            logging.error("System or User prompt is empty. Cannot generate content.")
+        logging.error("System or User prompt is empty. Cannot generate content.")
-            return None, None
+        return None, None
    logging.debug(f"Using pre-constructed prompts. User prompt length: {len(user_prompt)}")
    # 实现重试逻辑
    retry_count = 0
    last_result = None
    last_tokens = None
    last_time_cost = None
    while retry_count <= max_retries:
        try:
            # 只有重试时增加延迟和调整参数
            if retry_count > 0:
                # 添加随机延迟避免频繁请求
                delay = 1 + random.random() * 2  # 1-3秒随机延迟
                logging.info(f"内容生成重试 ({retry_count}/{max_retries})，等待{delay:.1f}秒后尝试...")
                time.sleep(delay)
                # 调整温度参数，增加多样性
                adjusted_temperature = min(temperature + (retry_count * 0.1), 0.9)
                logging.info(f"调整温度参数为: {adjusted_temperature}")
            else:
                adjusted_temperature = temperature
-        logging.debug(f"Using pre-constructed prompts. User prompt length: {len(user_prompt)}")
+            # Generate content (non-streaming work returns result, tokens, time_cost)
-        
+            result, tokens, time_cost = ai_agent.work(
-        time.sleep(random.random() * 0.5)
+                system_prompt, user_prompt, "", adjusted_temperature, top_p, presence_penalty
-        
+            )
        # Generate content (non-streaming work returns result, tokens, time_cost)
        result, tokens, time_cost = ai_agent.work(
            system_prompt, user_prompt, "", temperature, top_p, presence_penalty
        )
        if result is None: # Check if AI call failed
            logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.")
            return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt  # 添加judge_success字段
-        logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}")
+            last_result = result
            last_tokens = tokens
            last_time_cost = time_cost
            if result is None: # Check if AI call failed completely
                logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.")
                retry_count += 1
                continue
            logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}")
-        # --- Create tweetContent object (handles parsing) --- 
+            # --- Create tweetContent object (handles parsing) --- 
-        # Pass user_prompt instead of full prompt? Yes, user_prompt is what we need later.
+            tweet_content = tweetContent(result, user_prompt, run_id, article_index, variant_index)
-        tweet_content = tweetContent(result, user_prompt, run_id, article_index, variant_index)
+            content_json = tweet_content.get_json_data()
-
+            
-        # --- Remove Saving Logic --- 
+            # 检查是否成功解析到有效内容
-        # run_specific_output_dir = os.path.join(output_dir, run_id) # output_dir no longer available
+            if not content_json.get("error", False) and content_json.get("title") and content_json.get("content"):
-        # variant_result_dir = os.path.join(run_specific_output_dir, f"{article_index}_{variant_index}")
+                # 成功获取有效内容
-        # os.makedirs(variant_result_dir, exist_ok=True)
+                if retry_count > 0:
-        # content_save_path = os.path.join(variant_result_dir, "article.json")
+                    logging.info(f"在第{retry_count}次重试后成功获取有效内容")
-        # prompt_save_path = os.path.join(variant_result_dir, "tweet_prompt.txt")
+                # 返回成功结果
-        # tweet_content.save_content(content_save_path) # Method removed
+                return content_json, user_prompt
-        # tweet_content.save_prompt(prompt_save_path) # Method removed
+            else:
-        # --- End Remove Saving Logic --- 
+                logging.warning(f"内容解析失败或内容不完整，结果: {content_json.get('error')}, 标题长度: {len(content_json.get('title', ''))}, 内容长度: {len(content_json.get('content', ''))}")
-        
+            
-        # Return the data needed by the output handler
+            # 如果到这里，说明内容生成或解析有问题，需要重试
-        content_json = tweet_content.get_json_data()
+            retry_count += 1
-        prompt_data = tweet_content.get_prompt() # Get the stored user prompt
+            
-        
+        except Exception as e:
-        return content_json, prompt_data # Return data pair
+            logging.exception(f"Error during content generation attempt {retry_count+1} for {article_index}_{variant_index}:")
-        
+            retry_count += 1
-    except Exception as e:
+            
-        logging.exception(f"Error generating single content for {article_index}_{variant_index}:")
+            if retry_count <= max_retries:
-        return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt  # 添加judge_success字段
+                logging.info(f"将尝试第{retry_count}次重试...")
            else:
                logging.error(f"达到最大重试次数({max_retries})，无法生成有效内容")
    # 所有重试都失败，返回最后一次的结果（即使不完整）
    logging.warning(f"在{max_retries}次尝试后仍未生成有效内容，返回最后一次结果")
    # 如果有最后一次结果，尝试使用它
    if last_result:
        try:
            tweet_content = tweetContent(last_result, user_prompt, run_id, article_index, variant_index)
            content_json = tweet_content.get_json_data()
            return content_json, user_prompt
        except Exception as e:
            logging.exception(f"Error processing last result: {e}")
    # 完全失败的情况，返回空内容
    return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt
 def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir,
                    variants=2, temperature=0.3, start_index=0, end_index=None):
@ -457,8 +499,8 @@ def generate_content_for_topic(ai_agent: AI_Agent,
            logging.info("成功获取产品资料，初始化ContentJudger...")
            # 从配置中读取系统提示词路径（脚本级别无法直接获取，需要传递）
            # 使用ai_agent的model_name或api_url判断是否使用主AI模型，避免额外资源占用
-            content_judger_system_prompt_path = prompt_manager._system_prompt_cache.get("judger_system_prompt")
+            content_judger_system_prompt = prompt_manager._system_prompt_cache.get("judger_system_prompt")
-            content_judger = ContentJudger(ai_agent, system_prompt_path=content_judger_system_prompt_path)
+            content_judger = ContentJudger(ai_agent, system_prompt=content_judger_system_prompt)
        else:
            logging.warning("未能获取产品资料，内容审核功能将被跳过")
            enable_content_judge = False
@ -521,9 +563,9 @@ def generate_content_for_topic(ai_agent: AI_Agent,
                                content_json["judged"] = True
                                # 添加judge_success状态
                                content_json["judge_success"] = judged_result.get("judge_success", False)
-                                # 可选：保存审核分析结果
+                                # 处理分析结果，优先使用"analysis"字段，兼容"不良内容分析"字段
-                                if "不良内容分析" in judged_result:
+                                if "analysis" in judged_result:
-                                    content_json["judge_analysis"] = judged_result["不良内容分析"]
+                                    content_json["judge_analysis"] = judged_result["analysis"]
                            else:
                                logging.warning(f"  审核结果缺少title或content字段，保留原内容")
                                content_json["judge_success"] = False