From 75bba5fdb367785d7ee6c09e8002c86644e8d9b5 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Tue, 29 Apr 2025 15:23:37 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86refer=E7=9A=84?= =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- poster_gen_config.json | 4 +- scripts/test_refer.py | 6 +++ .../prompt_manager.cpython-312.pyc | Bin 26480 -> 26539 bytes .../resource_loader.cpython-312.pyc | Bin 7439 -> 8341 bytes utils/prompt_manager.py | 9 +++- utils/resource_loader.py | 50 ++++++++++++------ 6 files changed, 50 insertions(+), 19 deletions(-) create mode 100644 scripts/test_refer.py diff --git a/poster_gen_config.json b/poster_gen_config.json index ea96449..551edbd 100644 --- a/poster_gen_config.json +++ b/poster_gen_config.json @@ -33,8 +33,8 @@ { "type": "Refer", "file_path": [ - "./genPrompts/Refer/标题参考格式.txt", - "./genPrompts/Refer/正文开头引入段落参考.txt" + "./genPrompts/Refer/标题参考格式.json", + "./genPrompts/Refer/正文开头引入段落参考.json" ] } ], diff --git a/scripts/test_refer.py b/scripts/test_refer.py new file mode 100644 index 0000000..8c8d0d3 --- /dev/null +++ b/scripts/test_refer.py @@ -0,0 +1,6 @@ +import json + +with open("genPrompts/Refer/标题参考格式.json", "r", encoding="utf-8") as f: + data = json.load(f) + +print(data) diff --git a/utils/__pycache__/prompt_manager.cpython-312.pyc b/utils/__pycache__/prompt_manager.cpython-312.pyc index e75407e752cbc3d1ee3c9d9f12693d5c3eb9e732..09861d83ed4753d7f9030050d0da0cf11313f475 100644 GIT binary patch delta 1246 zcmZXTUrd`-6u`d&v|phu{Rfk-ErqXTmcf7m^3O2`qob?_RQ$&fI<|hLj6&hI*^t#4 z;-BbjCiC#X%ZLxgjSw~SdvQ@GG11JB%&?h2nmrKZ*-VxgA6(`=A10W1AAULa{JZzu zbMAvXu<|n$t{II50$<^a=9qQ*MqyBs^SKU>HahS_u?g2aHhk2l!!6PCubhu=T`AUz zUL~OYJ~#i7AiR;6&O2L0E!CbUqlJ1(MO@npI;ofl6tUGER3Le~Vy=^S8gkZkwevy6AyAZ-b ztJktE)onr&@x<2@<3DE^8r+Yv`GIX);6tges6Uo1~QB8MmzROPjQPHIA&T=p zm%f`@SS8^Ee&uV2Q~0OvFr3DQ%0Bo>d8@KPEQygdhcL#G)pp4fBa;y}j(6(3Psh1x zR#&^LtL?3{s4vWF$bEuPK{!CDBTNz!g#VxalD|UF$dn|U!gu_V{v3(t38(Q(|MT#- zl2?CB<9&fNG_n85Urk%Jo*+p*INt2rN6*7t9NGNcUPs0l8^Gn}3s9$2w7gmfbIQfz zHYnW-RP*#uh}hR3U@XD-+kDY2=|hyOpYS5y@9rpmio_;DGeIq~1?z)N_5r@nT1Mr` z@Dxi-Or?^$BgSN$3wCH5$h#452WOy1>FD{(441I&q`&k7(w7k=j#*LdS6dm&kZMkO z|6~rp7-j~Kz;R`5Fb09kWXL1%A0e#Iv%f|n#E0*HQQ5YE-6yvu3pdhG4+#4R0T7<41JZipO>tVFzIbL@v(i*)9Tp;L9{o2mFF*cR2?Z; zNPJW8)jm6?OfZI1)xSU^wtIHKAKJL*61W=pD0xxs&N3x$PV}Y}IhEb7Fc!gn->;CJ z@Gmmiw&^)ratBk&Wnf!zb#NHs_-D{#S;HeNM2KP`xEx}*FtiMIVLG%4ZfiF~^~TBs zMT-bYj=a>Oq<3fHtQSq;TIj?1VYjcJqO%CI2@44+LYnY>rxW2>mH`UwA;j=jIAF<< zxtBoQt5B+oz_660D$q!8{!KVx}Y%CdYdj>%bS0 z!w}NsCEKkqq}^ES1n-xFdYx7}V%)T44P#lx|HP}-fTfLUwG%dDMRZkpJ(-ILiwXKg z8!#1ZaCLA$Z%-;2d4Ofp136WuHK?o`C!?#RFlEkNB;BHd32 za2zt~+ga*oKa(`1-RUd>NaEd{P0*r=KlXrcgaQTvf2so8M~D)R5QcH2%P+;rY{!YN zIyiwZyK2faT*#NIw0XHZo5?Ae96N}f*v_(3lsc0zm(YR7V*$SY@mQ<%3<=#FYBdLf z?8cRCMhwUi_+9Ii&rAFYAC>xTmne_+oS)Xq>$=FTAn0T01O7W3X=e%735|p*LM1`^ zAEn$-5sxKO(h9K|-ZtaAM7VsC!us2~NS6Lq*WuEnuj<=6pG?cr+4$a+(!{1vO@?f$ L3A_b?s5kou$Uhwt diff --git a/utils/__pycache__/resource_loader.cpython-312.pyc b/utils/__pycache__/resource_loader.cpython-312.pyc index 5ecade9c35a4e3f9a37747c3c38d3ca39be48c6c..815842692f4e8c0d130367eeda53f8b1ec5f2947 100644 GIT binary patch delta 2500 zcmai0ZEO?C8J_ie*Pm-Ac5EEm;SA1~^JR{&ViS&*9)uhukmTCHp`OppZeT;=aI>3( zaBJLF+(D)G?$Gv%#Wm7V(PBh&n1;4ids0&!5|%1eYbDi%B|nf#_)!1&j#N~oQaj_Y ziK(vYT7Ku5cV?b>=Xw3k`u*sK-?06OVaxzMwWlhMRQayk#`UX=s7s>nb!Zu9~GK@DX4siA2^gL7ij(KnK+IEuUzybfSOJ{uKPH35dRBBEXth^Q7e} z76(0>K#~?~mM{QAt2MOJO7!wU46GG@v|tIqGAYAbgkWTC5^nXZ9k)#>ObFRa$Sj%R32}`^AeqIr8rC+3N5neK$2z_;7iMj@)bq*E zDE@C!%wl$_Q4KB=<Qw*YdhK>`* z!B_HmlD0CwYRQI}bXd|-E}l`@PT9r@M2XrJqL5eqhoOf0(V2x$F34Wva7v{_UL59B z3(E-ukUu8!qY2f>orsSd8|H*DTpImxnBWtKqk&iY_I2O?>6v@K|4pD_Y#kbD2t4@o zcMI8b3x9a$;V-}c;KJ!gKl!Aw#1}533u|nlN4R`=~J^%1xX3T-+EL=MM;DdL|WBNP3W*Pzu zm##m&@r!6ce%Dy%aY5{a7G%&4!XQ7)K|AWmw~aL^J|U{)Fqcqi7*DXHBPuPF&aLW) zM)?FpWeX4#kRtlw(KxG`d4W#|VmvXxArCL0)g$o}D#ebC91~y#njW%4eD|mh$PIEZ zHZYnH(V1c}E^;dKoDVvIeAE<7sa715HdAFv@-26rWjbn3h_3;)VkuNMrpcr_G!z>g zg(GoMCiX;AGu%iOkmF6k~CE*dV|F4{8V zvw_Gw<4|1TtSR%h675l(y|YZ;T$Q`1YcaX!>`r-~xwZE8s?~QJ!`GNAOqQK#+?rHH;aI%-ZK5> zd~^6(8rDiJLa2O zGw~bF8+|#?)OVDf{e?~aGcO*PX*!tRGaue`EqNuGJ23tBOt??+w=B|N{pR0#uX~mD zy~?3C6nQ8oUv!2675nV-TCh-Jp;;qR$&K~`ormtyZ%r{$VRfwx!!3* zZalA7TDuDVu5?$?{7feUs9C#whyi6S`xifll2;P(gKbF3CmXN`Se>VfZNOjmB_NP= z^PU=|F0j5t{aW5QeNtK7`?;r2@$}6(18CLLB88_tU8Lo1OE4v%-^#Cd)b0*~&%Eo9 zy=k#^Yy~%~-5p`_W_U*v>fEX((7-L99pmN_ZY|j`)v?8JYoh`Cx0-eb$h^LCw~x%b zC^SFsqp&AHVjMJe>dAbIZTEIEznwyO;MuEhYT8mqz666PijW237s^T{!)DyIVpEHZ zO7Q~AL)8GeW5aP&n+9B)$Rjt{_t2^y7npq9zK7_V`pn)y63z189BJan)M@7zI!6~~ z!KGWe7D6=ak@qgXE zo^n4X0qrR25Go##U-R$h2JtZZ2L(dO7(V}@Fhf!$~zQ_M*Y#$kA$A-C=Al?+; f#ex@^B1sU$9bmcxm^;AoMAxGuJYNEAOKblFT&$qa delta 1508 zcmZ8hZA@EL7(S=H_qM(5rL5l|{ot$Pnpqs68w^=ND7p_vHY4)GA#%%BEbVf7NyW6X z#QkuSMBrorL^76XR)f(JG~o}1m_cIv17VBi%8&g3Hvd&InIAEF4qZ#;xw+4I-}k&9 z&pG!!_j>b34Fx|40uRXG+Y2wQSmz6d7`uk3YVz*BMQ{R0FsVuaO^?kYvVLH}(};1dw3nR9U|yON}xyvU87v(fbJ&kFr)fleZwb zFzZedn@M7GDDR!*5?l<9gcDr9HRrQGOZ$Er*`Bi}pj?!fHMuHzN?Q4;+-iRbt!C6D zwE0zy#A3hI;!WfO2?dqJC75X-u?dc#W`=!NARAVVtUlhIG0%f=-O26`&u}^)QU8$L zxpE0s(nWWIP9wI=M6}ldYFgGQ+wY;Up&+=#!%c!9I`SCwPp)ks5}qo&)9J zFo?mp3W6>G$um;?j=BrPVL6a1a&;;IawK_~e(Sv<2?>50%&4+;TL1~`0+J!gzsC>3 z>>P)2kWC~O42mRb``YZorKPjMz94!q{pI%3^{vGZAAEde=j!F1Pp;yCE{sR`N}n0g z219xR3LA<^A%u)n)`UYbnP>{r9}PtmE)?qvg@gEe-Z?x#bagmLjHo9fMF$iv)-!M^ z97N>60r8JTBMK9a_DG1fjHp+1h?3~0*(_PHHyV+H5xJXWxgcLugeR;DOH0KRhEC`J z$$`u?vyhF#QGF4QEW{F{tnokk4%HQi&ls#);iixZ-Sr&BVT0pYht9TXu}z#9J25%5 zVG%!Q`^n+l1ty^xjxw7it{d%B?Ke(Mok|s@TQ^GTM|Jp`;h9p!?z&%9{gv>!kd`*8 zj!g<%UUBW|raS88(;NP?8{TvHXM=ZGPW7c*((!a?QC<^U*Bz}>!lu)0s+2JL3hG(lvwUkwHh@X`a7-=kcwA^1zGa zAiil)1qATj(dK$G-KjZ1aD^$LXtD($yHe#K@+xElyl$1#P+8CgeCkzOK|oZmifq8Q z&}j6jTLx)E9Q15|I{)zN&l)_U9(jmizfF*fss;-5)}RssiUtKQg?oB~$VoJuGM!}6 zAu9c8Itl9++RO*l(1izVlkn9A&i<>)^a72f?XlNwB8Qr>uJ~>E=E7j{ZIz>*EGXI@ zB!_y#!EOb1Bl@#I$8epqqoI+C^vh?7(56HGOUI*jY7J0mqHvr-8~)ZAg%Rv?{mcGR zLoE1tcfd-1ivS&=K&}-)FHtyy$J~CSi?!{l0cXmnh?Mc;`|gg0PP)tx5Si?W&?}3) mIV*aNN_$T9SNNBbMF(x5&{01oQIsFF!_I8W_ diff --git a/utils/prompt_manager.py b/utils/prompt_manager.py index affb5c2..e3075eb 100644 --- a/utils/prompt_manager.py +++ b/utils/prompt_manager.py @@ -71,6 +71,7 @@ class PromptManager: if os.path.exists(dateline_path): self._dateline_cache = ResourceLoader.load_file_content(dateline_path) logging.info(f"预加载日期线文件: {dateline_path}") + # 加载prompts_config配置的文件 if not self.prompts_config: @@ -92,9 +93,12 @@ class PromptManager: elif prompt_type == "demand": for path in file_paths: + # print(path) if os.path.exists(path): filename = os.path.basename(path) + # print(filename) content = ResourceLoader.load_file_content(path) + # print(content) if content: self._demand_cache[filename] = content name_without_ext = os.path.splitext(filename)[0] @@ -102,11 +106,14 @@ class PromptManager: elif prompt_type == "refer": for path in file_paths: + # print(path) if os.path.exists(path): filename = os.path.basename(path) - content = ResourceLoader.load_file_content(path) + # print(filename) + content = ResourceLoader.load_all_refer_files(path) if content: self._refer_cache[filename] = content + # print(content) def find_directory_fuzzy_match(self, name, directory=None, files=None): """ diff --git a/utils/resource_loader.py b/utils/resource_loader.py index fb7175b..4cb4ef3 100644 --- a/utils/resource_loader.py +++ b/utils/resource_loader.py @@ -22,16 +22,14 @@ class ResourceLoader: return None @staticmethod - def load_all_refer_files(refer_dir, refer_content_length=50): - """加载Refer目录下的所有文件内容""" + def load_all_refer_files(file_path, refer_content_rate=0.5): + """加载Refer目录下的指定文件内容""" refer_content = "" - if not refer_dir or not os.path.isdir(refer_dir): - print(f"Warning: Refer directory '{refer_dir}' not found or invalid.") + if not file_path or not os.path.isfile(file_path): + print(f"Warning: Refer directory '{file_path}' not found or invalid.") return "" try: - files = os.listdir(refer_dir) - for file in files: - file_path = os.path.join(refer_dir, file) + if True: # print(file_path) if os.path.isfile(file_path) and file_path.endswith(".txt"): # Use the updated load_file_content content = ResourceLoader.load_file_content(file_path) @@ -39,18 +37,38 @@ class ResourceLoader: # 用\n分割content,取前length条 content_lines = content.split("\n") # Ensure refer_content_length doesn't exceed available lines - sample_size = min(refer_content_length, len(content_lines)) + sample_size = int(len(content_lines) * refer_content_rate) content_lines = random.sample(content_lines, sample_size) content = "\n".join(content_lines) - refer_content += f"## {file}\n{content}\n\n" + refer_content += f"## {file_path}\n{content}\n\n" elif os.path.isfile(file_path) and file_path.endswith(".json"): - # 读取json文件 - with open(file_path, 'r', encoding='utf-8') as f: - content = json.load(f) - - ## 随机进行多次抽样 - - refer_content += f"## {file}\n{content}\n\n" + try: + # 读取json文件 + with open(file_path, 'r', encoding='utf-8') as f: + file_content = json.load(f) + + # 检查必要的键是否存在 + if "title" not in file_content or "description" not in file_content or "examples" not in file_content: + print(f"Warning: JSON文件 '{file_path}' 缺少必要的键(title/description/examples)") + + title_content = file_content["title"] + description_content = file_content["description"] + examples = file_content["examples"] + + # 对examples进行采样 + if examples and isinstance(examples, list): + sample_size = max(1, int(len(examples) * refer_content_rate)) + sampled_examples = random.sample(examples, sample_size) + + # 格式化内容 + examples_formatted = json.dumps(sampled_examples, ensure_ascii=False, indent=2) + content = f"{title_content}\n{description_content}\n{examples_formatted}\n" + + refer_content += f"## {file_path}\n{content}\n\n" + else: + print(f"Warning: JSON文件 '{file_path}' 的examples不是有效列表") + except Exception as json_err: + print(f"处理JSON文件 '{file_path}' 失败: {json_err}") return refer_content except Exception as e: print(f"加载Refer目录文件失败: {e}")