From 1018fd53bc553ae21aa76be32ebcdc2afc0bfa1e Mon Sep 17 00:00:00 2001
From: yujie_jiang <2621675592@qq.com>
Date: Fri, 9 May 2025 16:55:12 +0800
Subject: [PATCH] add_tags_to_detectedarticles

---
 add_tags.py | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 add_tags.py

diff --git a/add_tags.py b/add_tags.py
new file mode 100644
index 0000000..4d961ac
--- /dev/null
+++ b/add_tags.py
@@ -0,0 +1,81 @@
+import json
+import os
+import glob
+import re
+
+def copy_tags_for_all_folders():
+    # 基础目录路径
+    base_dir = "Content_detector"
+    
+    # 查找所有"四季梦幻"开头的文件夹
+    dream_folders = [folder for folder in os.listdir(base_dir) 
+                    if os.path.isdir(os.path.join(base_dir, folder)) and folder.startswith("四季梦幻")]
+    
+    print(f"找到以下'四季梦幻'文件夹：{dream_folders}")
+    
+    total_processed = 0
+    
+    for dream_folder in dream_folders:
+        dream_path = os.path.join(base_dir, dream_folder)
+        
+        # 提取文件夹编号 (例如: "四季梦幻1" -> "1")
+        folder_number = re.search(r'四季梦幻(\d+)', dream_folder)
+        if folder_number:
+            subfolder_prefix = folder_number.group(1) + "_"
+        else:
+            subfolder_prefix = "*_"  # 如果无法提取编号，使用通配符
+        
+        print(f"处理 {dream_folder}，子文件夹前缀: {subfolder_prefix}")
+        
+        # 查找所有日期文件夹
+        date_dirs = glob.glob(os.path.join(dream_path, "*"))
+        
+        for date_dir in date_dirs:
+            if not os.path.isdir(date_dir):
+                continue
+                
+            # 查找所有子文件夹 (使用提取的前缀，如 "1_*" 或 "2_*")
+            sub_dirs = glob.glob(os.path.join(date_dir, f"{subfolder_prefix}*"))
+            
+            folder_processed = 0
+            
+            for sub_dir in sub_dirs:
+                article_path = os.path.join(sub_dir, "article.json")
+                article_detect_path = os.path.join(sub_dir, "article_detect.json")
+                
+                # 检查文件是否存在
+                if not os.path.exists(article_path) or not os.path.exists(article_detect_path):
+                    print(f"跳过 {sub_dir}：文件不存在")
+                    continue
+                    
+                try:
+                    # 读取article.json
+                    with open(article_path, 'r', encoding='utf-8') as f:
+                        article_data = json.load(f)
+                    
+                    # 读取article_detect.json
+                    with open(article_detect_path, 'r', encoding='utf-8') as f:
+                        article_detect_data = json.load(f)
+                    
+                    # 复制tag字段
+                    if 'tag' in article_data:
+                        article_detect_data['tag'] = article_data['tag']
+                        
+                        # 写回article_detect.json
+                        with open(article_detect_path, 'w', encoding='utf-8') as f:
+                            json.dump(article_detect_data, f, ensure_ascii=False, indent=4)
+                        
+                        folder_processed += 1
+                        total_processed += 1
+                    else:
+                        print(f"跳过 {sub_dir}：没有tag字段")
+                        
+                except Exception as e:
+                    print(f"处理 {sub_dir} 时出错: {e}")
+            
+            print(f"在文件夹 {date_dir} 中处理了 {folder_processed} 个文件")
+    
+    print(f"任务完成！总共处理了 {total_processed} 个文件。")
+
+if __name__ == "__main__":
+    copy_tags_for_all_folders()