From 30e8f402d9f212d0712e3c0d739adc82824db9b6 Mon Sep 17 00:00:00 2001
From: jinye_huang <jinye_huang@foxmail.com>
Date: Mon, 12 May 2025 14:00:42 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BA=86=E4=B8=80=E4=B8=8B?=
 =?UTF-8?q?=E6=B8=B2=E6=9F=93=E6=A8=A1=E5=9D=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/extract_and_render.py | 111 ++++++++++++++++++++++++++++++----
 1 file changed, 99 insertions(+), 12 deletions(-)

diff --git a/scripts/extract_and_render.py b/scripts/extract_and_render.py
index 7366dae..9f34134 100644
--- a/scripts/extract_and_render.py
+++ b/scripts/extract_and_render.py
@@ -41,6 +41,39 @@ def convert_json_to_txt_content(json_path):
         print(f"    - 错误: 处理 JSON 时出错: {e}")
         return None, f"处理 JSON 时出错: {e}"
 
+def load_topic_data(source_dir, run_id):
+    """
+    加载选题数据
+    
+    Args:
+        source_dir: 源目录路径
+        run_id: 运行ID
+    
+    Returns:
+        dict: 以topic_index为键的选题数据字典
+    """
+    topic_file_path = os.path.join(source_dir, f"tweet_topic_{run_id}.json")
+    topic_data = {}
+    
+    if os.path.exists(topic_file_path):
+        try:
+            with open(topic_file_path, 'r', encoding='utf-8') as f:
+                topics = json.load(f)
+                
+            # 将选题数据转换为以index为键的字典
+            for topic in topics:
+                index = topic.get("index")
+                if index:
+                    topic_data[index] = topic
+            
+            print(f"成功加载选题数据，共{len(topic_data)}条")
+        except Exception as e:
+            print(f"加载选题数据时出错: {e}")
+    else:
+        print(f"警告: 未找到选题文件: {topic_file_path}")
+    
+    return topic_data
+
 def process_result_directory(source_dir, output_dir, run_id=None):
     """
     处理指定的结果目录，提取内容并渲染到输出目录。
@@ -62,28 +95,44 @@ def process_result_directory(source_dir, output_dir, run_id=None):
     if not run_id:
         run_id = os.path.basename(source_dir)
     
-    # 创建CSV清单
+    # 加载选题数据
+    topic_data = load_topic_data(source_dir, run_id)
+    
+    # 创建CSV清单，添加选题相关字段
     csv_path = os.path.join(output_dir, f"manifest_{run_id}.csv")
     csv_data = [
         [
             "EntryID",
+            "TopicIndex",
+            "VariantIndex",
+            "Date",
+            "Logic",
+            "Object",
+            "Product",
+            "ProductLogic",
+            "Style",
+            "StyleLogic", 
+            "TargetAudience",
+            "TargetAudienceLogic",
             "SourcePath",
             "ArticleJsonPath",
             "OutputTxtPath",
             "PosterPath",
             "AdditionalImagesCount",
             "Status",
-            "Details"
+            "Details",
+            "JudgeStatus"
         ]
     ]
     
     # 查找所有i_j目录
-    entry_pattern = re.compile(r"^\d+_\d+$")
+    entry_pattern = re.compile(r"^(\d+)_(\d+)$")
     entries = []
     
     for item in os.listdir(source_dir):
         item_path = os.path.join(source_dir, item)
-        if os.path.isdir(item_path) and entry_pattern.match(item):
+        match = entry_pattern.match(item)
+        if os.path.isdir(item_path) and match:
             entries.append(item)
     
     if not entries:
@@ -99,16 +148,36 @@ def process_result_directory(source_dir, output_dir, run_id=None):
         
         print(f"\n处理条目: {entry}")
         
+        # 解析topic_index和variant_index
+        match = entry_pattern.match(entry)
+        topic_index = match.group(1)
+        variant_index = match.group(2)
+        
+        # 获取该话题的选题信息
+        topic_info = topic_data.get(topic_index, {})
+        
         # 创建记录
         record = {
             "EntryID": entry,
+            "TopicIndex": topic_index,
+            "VariantIndex": variant_index,
+            "Date": topic_info.get("date", ""),
+            "Logic": topic_info.get("logic", ""),
+            "Object": topic_info.get("object", ""),
+            "Product": topic_info.get("product", ""),
+            "ProductLogic": topic_info.get("product_logic", ""),
+            "Style": topic_info.get("style", ""),
+            "StyleLogic": topic_info.get("style_logic", ""),
+            "TargetAudience": topic_info.get("target_audience", ""),
+            "TargetAudienceLogic": topic_info.get("target_audience_logic", ""),
             "SourcePath": entry_path,
             "ArticleJsonPath": "",
             "OutputTxtPath": "",
             "PosterPath": "",
             "AdditionalImagesCount": 0,
             "Status": "Processing",
-            "Details": ""
+            "Details": "",
+            "JudgeStatus": ""
         }
         
         # 创建输出条目目录
@@ -128,6 +197,18 @@ def process_result_directory(source_dir, output_dir, run_id=None):
         record["OutputTxtPath"] = txt_path
         
         if os.path.exists(json_path):
+            # 读取article.json
+            try:
+                with open(json_path, 'r', encoding='utf-8') as f_json:
+                    article_data = json.load(f_json)
+                    # 提取judge_success状态
+                    if "judge_success" in article_data:
+                        record["JudgeStatus"] = str(article_data["judge_success"])
+                    elif "judged" in article_data:
+                        record["JudgeStatus"] = "已审核" if article_data["judged"] else "未审核"
+            except Exception as e:
+                print(f"  - 错误: 读取article.json失败: {e}")
+                
             txt_content, error = convert_json_to_txt_content(json_path)
             if error:
                 record["Status"] = "Partial"
@@ -218,20 +299,26 @@ def process_result_directory(source_dir, output_dir, run_id=None):
 
 def main():
     # parser = argparse.ArgumentParser(description="从TravelContentCreator结果目录提取内容并渲染到指定目录")
-    # parser.add_argument("--source", type=str, required=True, help="源目录路径")
-    # parser.add_argument("--output", type=str, required=True, help="输出目录路径")
+    # parser.add_argument("--source", type=str, help="源目录路径")
+    # parser.add_argument("--output", type=str, help="输出目录路径")
     # parser.add_argument("--run-id", type=str, help="自定义运行ID")
     
     # args = parser.parse_args()
-    source = ""
-    output = ""
-    run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    # # 默认值设置
+    # source = args.source if args.source else "/root/autodl-tmp/TravelContentCreator/result/2025-05-11_00-26-30"
+    # output = args.output if args.output else "/root/autodl-tmp/TravelContentCreator/output/2025-05-11_00-26-30"
+    # run_id = args.run_id if args.run_id else os.path.basename(source)
+    
+    source = "/root/autodl-tmp/TravelContentCreator/result/2025-05-12_09-33-12"
+    output = "/root/autodl-tmp/TravelContentCreator/output/2025-05-12_09-33-12"
+    run_id = os.path.basename(source)
+    
     print("-" * 60)
     print(f"开始提取和渲染流程")
     print(f"源目录: {source}")
     print(f"输出目录: {output}")
-    if run_id:
-        print(f"运行ID: {run_id}")
+    print(f"运行ID: {run_id}")
     print("-" * 60)
     
     process_result_directory(source, output, run_id)