add_tags_to_detectedarticles

This commit is contained in:
yujie_jiang 2025-05-09 16:55:12 +08:00
parent 0863126f74
commit 1018fd53bc

81
add_tags.py Normal file
View File

@ -0,0 +1,81 @@
import json
import os
import glob
import re
def copy_tags_for_all_folders():
# 基础目录路径
base_dir = "Content_detector"
# 查找所有"四季梦幻"开头的文件夹
dream_folders = [folder for folder in os.listdir(base_dir)
if os.path.isdir(os.path.join(base_dir, folder)) and folder.startswith("四季梦幻")]
print(f"找到以下'四季梦幻'文件夹:{dream_folders}")
total_processed = 0
for dream_folder in dream_folders:
dream_path = os.path.join(base_dir, dream_folder)
# 提取文件夹编号 (例如: "四季梦幻1" -> "1")
folder_number = re.search(r'四季梦幻(\d+)', dream_folder)
if folder_number:
subfolder_prefix = folder_number.group(1) + "_"
else:
subfolder_prefix = "*_" # 如果无法提取编号,使用通配符
print(f"处理 {dream_folder},子文件夹前缀: {subfolder_prefix}")
# 查找所有日期文件夹
date_dirs = glob.glob(os.path.join(dream_path, "*"))
for date_dir in date_dirs:
if not os.path.isdir(date_dir):
continue
# 查找所有子文件夹 (使用提取的前缀,如 "1_*" 或 "2_*")
sub_dirs = glob.glob(os.path.join(date_dir, f"{subfolder_prefix}*"))
folder_processed = 0
for sub_dir in sub_dirs:
article_path = os.path.join(sub_dir, "article.json")
article_detect_path = os.path.join(sub_dir, "article_detect.json")
# 检查文件是否存在
if not os.path.exists(article_path) or not os.path.exists(article_detect_path):
print(f"跳过 {sub_dir}:文件不存在")
continue
try:
# 读取article.json
with open(article_path, 'r', encoding='utf-8') as f:
article_data = json.load(f)
# 读取article_detect.json
with open(article_detect_path, 'r', encoding='utf-8') as f:
article_detect_data = json.load(f)
# 复制tag字段
if 'tag' in article_data:
article_detect_data['tag'] = article_data['tag']
# 写回article_detect.json
with open(article_detect_path, 'w', encoding='utf-8') as f:
json.dump(article_detect_data, f, ensure_ascii=False, indent=4)
folder_processed += 1
total_processed += 1
else:
print(f"跳过 {sub_dir}没有tag字段")
except Exception as e:
print(f"处理 {sub_dir} 时出错: {e}")
print(f"在文件夹 {date_dir} 中处理了 {folder_processed} 个文件")
print(f"任务完成!总共处理了 {total_processed} 个文件。")
if __name__ == "__main__":
copy_tags_for_all_folders()