diff --git a/scripts/json_to_txt.py b/scripts/json_to_txt.py new file mode 100644 index 0000000..168d635 --- /dev/null +++ b/scripts/json_to_txt.py @@ -0,0 +1,41 @@ +import json +import os +import glob + +def convert_json_to_txt(json_file_path): + """将JSON文件转换为TXT文件,保持换行格式""" + with open(json_file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + # 创建txt文件内容 + txt_content = data.get('title', '') + txt_content += '\n\n' # title和content之间有两个换行 + txt_content += data.get('content', '') + txt_content += '\n' # content和tag之间有一个换行 + txt_content += data.get('tag', '') + + # 生成输出文件路径 + txt_file_path = os.path.splitext(json_file_path)[0] + '.txt' + + # 写入txt文件 + with open(txt_file_path, 'w', encoding='utf-8') as f: + f.write(txt_content) + + print(f"已转换: {json_file_path} -> {txt_file_path}") + +def main(): + """查找并转换所有topic_*/article_judged.json文件""" + # 查找所有topic文件夹下的article_judged.json文件 + json_files = glob.glob('topic_*/article_judged.json') + + if not json_files: + print("未找到符合条件的JSON文件") + return + + for json_file in json_files: + convert_json_to_txt(json_file) + + print(f"转换完成,共处理了 {len(json_files)} 个文件") + +if __name__ == "__main__": + main() \ No newline at end of file