add_tags_to_detectedarticles
This commit is contained in:
parent
0863126f74
commit
1018fd53bc
81
add_tags.py
Normal file
81
add_tags.py
Normal file
@ -0,0 +1,81 @@
|
||||
import json
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
|
||||
def copy_tags_for_all_folders():
|
||||
# 基础目录路径
|
||||
base_dir = "Content_detector"
|
||||
|
||||
# 查找所有"四季梦幻"开头的文件夹
|
||||
dream_folders = [folder for folder in os.listdir(base_dir)
|
||||
if os.path.isdir(os.path.join(base_dir, folder)) and folder.startswith("四季梦幻")]
|
||||
|
||||
print(f"找到以下'四季梦幻'文件夹:{dream_folders}")
|
||||
|
||||
total_processed = 0
|
||||
|
||||
for dream_folder in dream_folders:
|
||||
dream_path = os.path.join(base_dir, dream_folder)
|
||||
|
||||
# 提取文件夹编号 (例如: "四季梦幻1" -> "1")
|
||||
folder_number = re.search(r'四季梦幻(\d+)', dream_folder)
|
||||
if folder_number:
|
||||
subfolder_prefix = folder_number.group(1) + "_"
|
||||
else:
|
||||
subfolder_prefix = "*_" # 如果无法提取编号,使用通配符
|
||||
|
||||
print(f"处理 {dream_folder},子文件夹前缀: {subfolder_prefix}")
|
||||
|
||||
# 查找所有日期文件夹
|
||||
date_dirs = glob.glob(os.path.join(dream_path, "*"))
|
||||
|
||||
for date_dir in date_dirs:
|
||||
if not os.path.isdir(date_dir):
|
||||
continue
|
||||
|
||||
# 查找所有子文件夹 (使用提取的前缀,如 "1_*" 或 "2_*")
|
||||
sub_dirs = glob.glob(os.path.join(date_dir, f"{subfolder_prefix}*"))
|
||||
|
||||
folder_processed = 0
|
||||
|
||||
for sub_dir in sub_dirs:
|
||||
article_path = os.path.join(sub_dir, "article.json")
|
||||
article_detect_path = os.path.join(sub_dir, "article_detect.json")
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(article_path) or not os.path.exists(article_detect_path):
|
||||
print(f"跳过 {sub_dir}:文件不存在")
|
||||
continue
|
||||
|
||||
try:
|
||||
# 读取article.json
|
||||
with open(article_path, 'r', encoding='utf-8') as f:
|
||||
article_data = json.load(f)
|
||||
|
||||
# 读取article_detect.json
|
||||
with open(article_detect_path, 'r', encoding='utf-8') as f:
|
||||
article_detect_data = json.load(f)
|
||||
|
||||
# 复制tag字段
|
||||
if 'tag' in article_data:
|
||||
article_detect_data['tag'] = article_data['tag']
|
||||
|
||||
# 写回article_detect.json
|
||||
with open(article_detect_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(article_detect_data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
folder_processed += 1
|
||||
total_processed += 1
|
||||
else:
|
||||
print(f"跳过 {sub_dir}:没有tag字段")
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理 {sub_dir} 时出错: {e}")
|
||||
|
||||
print(f"在文件夹 {date_dir} 中处理了 {folder_processed} 个文件")
|
||||
|
||||
print(f"任务完成!总共处理了 {total_processed} 个文件。")
|
||||
|
||||
if __name__ == "__main__":
|
||||
copy_tags_for_all_folders()
|
Loading…
x
Reference in New Issue
Block a user