-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathPromptBuilder.py
776 lines (633 loc) · 34.1 KB
/
PromptBuilder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
863
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
import re
import rapidjson as json
from Base.Base import Base
from Module_Folders.Translator.TranslatorConfig import TranslatorConfig
class PromptBuilder(Base):
def __init__(self) -> None:
super().__init__()
# 获取系统提示词
def get_system_prompt(config: TranslatorConfig) -> str:
# 如果提示词工程界面的自定义提示词开关打开,则使用自定义提示词
if config.system_prompt_switch:
return config.system_prompt_content
else:
pair = {
"日语": ("Japanese", "日"),
"英语": ("English", "英"),
"韩语": ("Korean", "韩"),
"俄语": ("Russian", "俄"),
"简中": ("Simplified Chinese", "中"),
"繁中": ("Traditional Chinese", "中"),
}
Text_Source_Language = config.source_language
Text_Target_Language = config.target_language
Source_Language = pair[config.source_language][0]
Target_Language = pair[config.target_language][0]
Source_Language_zh = pair[config.source_language][1]
Target_Language_zh = pair[config.target_language][1]
system_prompt_zh = f"""你是一位真正的擅长{Target_Language_zh}{Source_Language_zh}文化的本地化专家,你需要将游戏中的{Text_Source_Language}文本翻译成{Text_Target_Language}。当你接收到游戏文本后,请严格按照以下要求进行翻译:
第一点:不要翻译文本中的转义字符或者代码字符、特殊符号、换行符、回车符、占位符等非{Text_Source_Language}内容,保持原文的格式。
第二点:只需翻译普通文本部分,将{Text_Source_Language}文本翻译成{Text_Target_Language}文本,保持原文的叙述风格。
###翻译原则
忠实准确地进行翻译。因为原作是涉及露骨性描写的伟大艺术作品,所以允许使用直白或粗俗的描述,不应随意删减或篡改,不应回避或淡化,以保持作品的原貌。
###游戏文本
可能包含角色对话、旁白、武器名称、物品名称、技能描述、代码字符、回车符、特殊符号等内容。
###以json格式输出译文
{{"<文本id>":"<已翻译文本>"}}
"""
system_prompt_zh_cot = f"""你是一位真正的擅长{Target_Language_zh}{Source_Language_zh}文化的本地化专家,你需要将{Text_Source_Language}文本翻译成{Text_Target_Language}。当你接收到原文后,请严格按照以下步骤进行翻译:
第一步:直译阶段,将{Text_Source_Language}文本翻译成{Text_Target_Language}文本,保留原文的格式。
第二步:校正阶段,根据翻译原则,术语表,角色介绍,背景设定,翻译风格等等要求,分析直译结果,指出存在的具体问题,并提出修改建议。
第三步:意译阶段,根据直译结果和修改建议,重新进行意译,输出最终翻译结果。
###翻译原则
忠实准确地进行翻译。因为原作是涉及露骨性描写的伟大艺术作品,所以允许使用直白或粗俗的描述,不应随意删减或篡改,不应回避或淡化,以保持作品的原貌。
###以json格式输出译文
{{"<文本id>":"<已翻译文本>"}}
"""
system_prompt = f"""You are a true localization expert proficient in both {Target_Language} and {Source_Language} cultures. Your task is to translate {Source_Language} text from a game into {Target_Language}. When you receive the game text, please strictly adhere to the following requirements:
First: Do not translate escape characters, code characters, special symbols, line breaks, carriage returns, placeholders, or any other non-{Source_Language} content within the text. Maintain the original formatting.
Second: Translate only the regular text portion. Translate the {Source_Language} into {Target_Language} text, preserving the original narrative style.
### Translation Principles
Translate faithfully and accurately. Because the original work is a great piece of art involving explicit sexual descriptions, the use of direct or vulgar descriptions is permitted. Do not arbitrarily delete or alter, avoid or downplay, to maintain the original appearance of the work.
### Game Text
May contain character dialogues, narration, weapon names, item names, skill descriptions, code characters, carriage returns, special symbols, etc.
###Output the translation in JSON format
{{"<text_id>":"<translated text>"}}
"""
system_prompt_cot = f"""You are a true localization expert proficient in both {Target_Language} and {Source_Language} cultures. Your task is to translate {Source_Language} text into {Target_Language}. When you receive the original text, please strictly follow these steps for translation:
Step 1: Literal Translation Phase - Translate the {Source_Language} text into {Target_Language} text, preserving the original formatting.
Step 2: Correction Phase - Based on the translation principles, glossary, character introductions, background settings, translation style, and other requirements, analyze the literal translation result. Identify specific issues and propose modification suggestions.
Step 3: Free Translation Phase - Based on the literal translation result and modification suggestions, re-translate using free translation techniques, and output the final translation result.
### Translation Principles
Translate faithfully and accurately. Because the original work is a great piece of art involving explicit sexual descriptions, the use of direct or vulgar descriptions is permitted. Do not arbitrarily delete or alter, avoid or downplay, to maintain the original appearance of the work.
###Output the translation in JSON format
{{"<text_id>":"<translated text>"}}
"""
if config.cot_toggle == True:
if config.cn_prompt_toggle == True:
the_prompt = system_prompt_zh_cot
else:
the_prompt = system_prompt_cot
else:
if config.cn_prompt_toggle == True:
the_prompt = system_prompt_zh
else:
the_prompt = system_prompt
return the_prompt
# 构建翻译示例
def build_translation_sample(config: TranslatorConfig, input_dict: dict) -> tuple[str, str]:
list1 = []
list3 = []
list2 = []
list4 = []
# 获取特定示例
list1, list3 = PromptBuilder.get_default_translation_example(config, input_dict)
# 获取自适应示例(无法构建英语的)
if config.source_language != "英语":
list2, list4 = PromptBuilder.build_adaptive_translation_sample(config, input_dict)
# 将两个列表合并
combined_list = list1 list2
combined_list2 = list3 list4
# 创建空字典
source_dict = {}
target_dict = {}
source_str = ""
target_str = ""
# 遍历合并后的列表,并创建键值对
for index, value in enumerate(combined_list):
source_dict[str(index)] = value
for index, value in enumerate(combined_list2):
target_dict[str(index)] = value
# 将原文本字典转换成JSON格式的字符串
if source_dict:
source_str = json.dumps(source_dict, ensure_ascii = False)
target_str = json.dumps(target_dict, ensure_ascii = False)
return source_str, target_str
# 构建特定翻译示例
def get_default_translation_example(config: TranslatorConfig, input_dict: dict) -> tuple[list[str], list[str]]:
# 内置的正则表达式字典
patterns_all = {
r"[a-zA-Z]=": {
"日语": 'a=" ぞ…ゾンビ系…。',
"英语": "a=\" It's so scary….",
"韩语": 'a=" 정말 무서워요….',
"俄语": 'а=" Ужасно страшно...。',
"简中": 'a=" 好可怕啊……。',
"繁中": 'a=" 好可怕啊……。'},
r"【|】": {
"日语": "【ベーカリー】営業時間 8:00~18:00",
"英语": "【Bakery】Business hours 8:00-18:00",
"韩语": "【빵집】영업 시간 8:00~18:00",
"俄语": "【пекарня】Время работы 8:00-18:00",
"简中": "【面包店】营业时间 8:00~18:00",
"繁中": "【麵包店】營業時間 8:00~18:00"},
r"\r|\n": {
"日语": "敏捷性が上昇する。 \r\n効果:パッシブ",
"英语": "Agility increases. \r\nEffect: Passive",
"韩语": "민첩성이 상승한다. \r\n효과:패시브",
"俄语": "Повышает ловкость. \r\nЭффект: Пассивный",
"简中": "提高敏捷性。 \r\n效果:被动",
"繁中": "提高敏捷性。 \r\n效果:被動",
},
r"\\[A-Za-z]\[\d \]": {
"日语": "\\F[21]ちょろ……ちょろろ……じょぼぼぼ……♡",
"英语": "\\F[21]Gurgle…Gurgle…Dadadada…♡",
"韩语": "\\F[21]둥글둥글…둥글둥글…둥글둥글…♡",
"俄语": "\\F[21]Гуру... гуругу...Дадада... ♡",
"简中": "\\F[21]咕噜……咕噜噜……哒哒哒……♡",
"繁中": "\\F[21]咕嚕……咕嚕嚕……哒哒哒……♡"},
r"「|」": {
"日语": "さくら:「すごく面白かった!」",
"英语": "Sakura:「It was really fun!」",
"韩语": "사쿠라:「정말로 재미있었어요!」",
"俄语": "Сакура: 「Было очень интересно!」",
"简中": "樱:「超级有趣!」", "繁中": "櫻:「超有趣!」"},
r"∞|@": {
"日语": "若くて∞@綺麗で∞@エロくて",
"英语": "Young ∞@beautiful ∞@sexy.",
"韩语": "젊고∞@아름답고∞@섹시하고",
"俄语": "Молодые∞@Красивые∞@Эротичные",
"简中": "年轻∞@漂亮∞@色情",
"繁中": "年輕∞@漂亮∞@色情"},
r"↓": {
"日语": "若くて↓綺麗で↓↓エロくて",
"英语": "Young ↓beautiful ↓↓sexy.",
"韩语": "젊고↓아름답고↓↓섹시하고",
"俄语": "Молодые↓Красивые↓↓Эротичные",
"简中": "年轻↓漂亮↓↓色情",
"繁中": "年輕↓漂亮↓↓色情"},
}
# 基础示例
base_example = {
"base": {
"日语": "愛は魂の深淵にある炎で、暖かくて永遠に消えない。",
"英语": "Love is the flame in the depth of the soul, warm and never extinguished.",
"韩语": "사랑은 영혼 깊숙이 타오르는 불꽃이며, 따뜻하고 영원히 꺼지지 않는다.",
"俄语": "Любовь - это пламя в глубине души, тёплое и никогда не угасающее.",
"简中": "爱情是灵魂深处的火焰,温暖且永不熄灭。",
"繁中": "愛情是靈魂深處的火焰,溫暖且永不熄滅。",
}
}
source_list = []
translated_list = []
for _, value in input_dict.items():
for pattern, translation_sample in patterns_all.items():
# 检查值是否符合正则表达
if re.search(pattern, value):
# 如果未在结果列表中,则添加
if translation_sample[config.source_language] not in source_list:
source_list.append(translation_sample[config.source_language])
translated_list.append(translation_sample[config.target_language])
# 保底添加一个翻译示例
if source_list == []:
source_list.append(base_example["base"][config.source_language])
translated_list.append(base_example["base"][config.target_language])
return source_list, translated_list
# 辅助函数,清除列表过多相似的元素
def clean_list(lst) -> list[str]:
# 函数用于删除集合中的数字
def remove_digits(s) -> set:
return set(filter(lambda x: not x.isdigit(), s))
# 函数用于计算两个集合之间的差距
def set_difference(s1, s2) -> int:
return len(s1.symmetric_difference(s2))
# 删除每个元素中的数字,并得到一个由集合组成的列表
sets_list = [remove_digits(s) for s in lst]
# 初始化聚类列表
clusters = []
# 遍历集合列表,将元素分配到相应的聚类中
for s, original_str in zip(sets_list, lst):
found_cluster = False
for cluster in clusters:
if set_difference(s, cluster[0][0]) < 3:
cluster.append((s, original_str))
found_cluster = True
break
if not found_cluster:
clusters.append([(s, original_str)])
# 从每个聚类中提取一个元素,组成新的列表
result = [cluster[0][1] for cluster in clusters]
return result
# 辅助函数,重新调整列表中翻译示例的后缀数字
def replace_and_increment(items, prefix) -> list[str]:
pattern = re.compile(r"{}(\d{{1,2}})".format(re.escape(prefix))) # 使用双括号来避免KeyError
result = [] # 用于存储结果的列表
n = 0
p = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
for item in items:
if pattern.search(item): # 如果在元素中找到匹配的模式
new_item = item
j = 1 # 初始化 j
while True:
# 正则匹配
match = pattern.search(new_item)
# 如果没有匹配到,退出
if not match:
break
# 防止列表循环越界
if n >= 24:
#print("bug")
n = 0
# 替换示例文本后缀
new_item = new_item[:match.start()] f"{prefix}{p[n]}-{j}" new_item[match.end():]
# 在每次替换后递增 j
j = 1
# 替换完之后添加进结果列表
result.append(new_item)
# 变量n递增
n = 1
else:
result.append(item) # 如果没有匹配,将原始元素添加到结果列表
return result # 返回修改后的列表
# 构建相似格式翻译示例
def build_adaptive_translation_sample(config: TranslatorConfig, input_dict: dict) -> tuple[list[str], list[str]]:
# 输入字典示例
# ex_dict = {
# "0": "こんにちは,こんにちは。こんにちは#include <iostream>",
# "1": "55345こんにちは",
# "2": "こんにちはxxxx!",
# "3": "こんにちは",
# }
# 输出列表1示例
# ex_dict = [
# "原文テキストA-1,原文テキストA-2。原文テキストA-3#include <iostream>",
# "55345原文テキストB-1",
# "原文テキストC-1xxxx!",
# ]
# 输出列表2示例
# ex_dict = [
# "译文文本A-1,译文文本A-2。译文文本A-3#include <iostream>",
# "55345译文文本B-1",
# "译文文本C-1xxxx!",
# ]
# 定义不同语言的正则表达式
patterns_all = {
"日语": re.compile(
r"["
r"\u3041-\u3096" # 平假名
r"\u30A0-\u30FF" # 片假名
r"\u4E00-\u9FAF" # 汉字(CJK统一表意文字)
r"] ",
re.UNICODE,
),
"韩语": re.compile(
r"["
r"\uAC00-\uD7AF" # 韩文字母
r"] ",
re.UNICODE,
),
"俄语": re.compile(
r"["
r"\u0400-\u04FF" # 俄语字母
r"] ",
re.UNICODE,
),
"简中": re.compile(
r"["
r"\u4E00-\u9FA5" # 简体汉字
r"] ",
re.UNICODE,
),
"繁中": re.compile(
r"["
r"\u3400-\u4DBF" # 扩展A区汉字
r"\u4E00-\u9FFF" # 基本汉字
r"\uF900-\uFAFF" # 兼容汉字
r"] ",
re.UNICODE,
),
}
# 定义不同语言的翻译示例
text_all = {
"日语": "例示テキスト",
"韩语": "예시 텍스트",
"俄语": "Пример текста",
"简中": "示例文本",
"繁中": "翻譯示例文本",
"英语": "Sample Text",
}
# 根据输入选择相应语言的正则表达式与翻译示例
pattern = patterns_all[config.source_language]
source_text = text_all[config.source_language]
translated_text = text_all[config.target_language]
# 输出列表
source_list = []
translated_list = []
# 初始化
i = 1
j = 1
# 遍历字典的每个值
for _, value in input_dict.items():
if pattern.search(value):
# 替换文本
source_value = pattern.sub(lambda m: f"{source_text}{i}", value)
translated_value = pattern.sub(lambda m: f"{translated_text}{j}", value)
i = 1
j = 1
source_list.append(source_value)
translated_list.append(translated_value)
# 过滤输出列表,删除只包含"测试替换" 三位数字内结尾的元素
source_list1 = [item for item in source_list if not item.startswith(source_text) or not (item[-1].isdigit() or (len(item) > 1 and item[-2].isdigit()) or (len(item) > 2 and item[-3].isdigit()))]
translated_list1 = [item for item in translated_list if not item.startswith(translated_text) or not (item[-1].isdigit() or (len(item) > 1 and item[-2].isdigit()) or (len(item) > 2 and item[-3].isdigit()))]
# 清除过多相似元素
source_list2 = PromptBuilder.clean_list(source_list1)
translated_list2 = PromptBuilder.clean_list(translated_list1)
# 重新调整翻译示例后缀数字
source_list3 = PromptBuilder.replace_and_increment(source_list2, source_text)
translated_list3 = PromptBuilder.replace_and_increment(translated_list2, translated_text)
return source_list3, translated_list3
# 构造术语表
def build_glossary_prompt(config: TranslatorConfig, input_dict: dict) -> tuple[str, str]:
# 将输入字典中的所有值转换为集合
lines = set(line for line in input_dict.values())
# 筛选在输入词典中出现过的条目
result = [
v for v in config.prompt_dictionary_data
if any(v.get("src") in lines for lines in lines)
]
# 数据校验
if len(result) == 0:
return "", ""
# 初始化变量,以免出错
glossary_prompt_lines = []
glossary_prompt_lines_cot = []
if config.cn_prompt_toggle == True:
# 添加开头
glossary_prompt_lines.append(
"###术语表"
"\n" "|\t原文\t|\t译文\t|\t备注\t|"
"\n" ("-" * 50)
)
glossary_prompt_lines_cot.append("- 术语表:提供了")
# 添加数据
for v in result:
glossary_prompt_lines.append(f"|\t{v.get("src")}\t|\t{v.get("dst")}\t|\t{v.get("info") if v.get("info") != "" else " "}\t|")
glossary_prompt_lines_cot.append(f"“{v.get("src")}”({v.get("dst")})")
# 添加结尾
glossary_prompt_lines.append("-" * 50)
glossary_prompt_lines_cot.append("术语及其解释")
else:
# 添加开头
glossary_prompt_lines.append(
"###Glossary"
"\n" "|\tOriginal Text\t|\tTranslation\t|\tRemarks\t|"
"\n" ("-" * 50)
)
glossary_prompt_lines_cot.append("- Glossary:Provides terms such as")
# 添加数据
for v in result:
glossary_prompt_lines.append(f"|\t{v.get("src")}\t|\t{v.get("dst")}\t|\t{v.get("info") if v.get("info") != "" else " "}\t|")
glossary_prompt_lines_cot.append(f"“{v.get("src")}”({v.get("dst")})")
# 添加结尾
glossary_prompt_lines.append("-" * 50)
glossary_prompt_lines_cot.append(" and their explanations.")
# 拼接成最终的字符串
glossary_prompt = "\n".join(glossary_prompt_lines)
glossary_prompt_cot = "".join(glossary_prompt_lines_cot)
return glossary_prompt, glossary_prompt_cot
# 构造指令词典 Sakura
def build_glossary_prompt_sakura(config: TranslatorConfig, input_dict: dict) -> list[dict]:
# 将输入字典中的所有值转换为集合
lines = set(line for line in input_dict.values())
# 筛选在输入词典中出现过的条目
result = [
v for v in config.prompt_dictionary_data
if any(v.get("src", "") in lines for lines in lines)
]
return result
# 构造角色设定
def build_characterization(config: TranslatorConfig, input_dict: dict) -> tuple[str, str]:
# 将数据存储到中间字典中
dictionary = {}
for v in config.characterization_data:
dictionary[v.get("original_name", "")] = v
# 筛选,如果该key在发送文本中,则存储进新字典中
temp_dict = {}
for key_a, value_a in dictionary.items():
for _, value_b in input_dict.items():
if key_a in value_b:
temp_dict[key_a] = value_a
# 如果没有含有字典内容
if temp_dict == {}:
return None, None
if config.cn_prompt_toggle == True:
profile = "###角色介绍"
profile_cot = "- 角色介绍:"
for key, value in temp_dict.items():
original_name = value.get("original_name")
translated_name = value.get("translated_name")
gender = value.get("gender")
age = value.get("age")
personality = value.get("personality")
speech_style = value.get("speech_style")
additional_info = value.get("additional_info")
profile = f"\n【{original_name}】"
if translated_name:
profile = f"\n- 译名:{translated_name}"
profile_cot = f"{translated_name}({original_name})"
if gender:
profile = f"\n- 性别:{gender}"
profile_cot = f",{gender}"
if age:
profile = f"\n- 年龄:{age}"
profile_cot = f",{age}"
if personality:
profile = f"\n- 性格:{personality}"
profile_cot = f",{personality}"
if speech_style:
profile = f"\n- 说话方式:{speech_style}"
profile_cot = f",{speech_style}"
if additional_info:
profile = f"\n- 补充信息:{additional_info}"
profile_cot = f",{additional_info}"
profile = "\n"
profile_cot = "。"
else:
profile = "###Character Introduction"
profile_cot = "- Character Introduction:"
for key, value in temp_dict.items():
original_name = value.get("original_name")
translated_name = value.get("translated_name")
gender = value.get("gender")
age = value.get("age")
personality = value.get("personality")
speech_style = value.get("speech_style")
additional_info = value.get("additional_info")
profile = f"\n【{original_name}】"
if translated_name:
profile = f"\n- Translated_name:{translated_name}"
profile_cot = f"{translated_name}({original_name})"
if gender:
profile = f"\n- Gender:{gender}"
profile_cot = f",{gender}"
if age:
profile = f"\n- Age:{age}"
profile_cot = f",{age}"
if personality:
profile = f"\n- Personality:{personality}"
profile_cot = f",{personality}"
if speech_style:
profile = f"\n- Speech_style:{speech_style}"
profile_cot = f",{speech_style}"
if additional_info:
profile = f"\n- Additional_info:{additional_info}"
profile_cot = f",{additional_info}"
profile = "\n"
profile_cot = "."
return profile, profile_cot
# 构造背景设定
def build_world(config: TranslatorConfig) -> tuple[str, str]:
# 获取自定义内容
world_building = config.world_building_content
if config.cn_prompt_toggle == True:
profile = "###背景设定"
profile_cot = "- 背景设定:"
profile = f"\n{world_building}\n"
profile_cot = f"{world_building}"
else:
profile = "###Background Setting"
profile_cot = "- Background Setting:"
profile = f"\n{world_building}\n"
profile_cot = f"{world_building}"
return profile, profile_cot
# 构造文风要求
def build_writing_style(config: TranslatorConfig) -> tuple[str, str]:
# 获取自定义内容
writing_style = config.writing_style_content
if config.cn_prompt_toggle == True:
profile = "###翻译风格"
profile_cot = "- 翻译风格:"
profile = f"\n{writing_style}\n"
profile_cot = f"{writing_style}"
else:
profile = "###Writing Style"
profile_cot = "- Writing Style:"
profile = f"\n{writing_style}\n"
profile_cot = f"{writing_style}"
return profile, profile_cot
# 携带原文上文
def build_pre_text(config: TranslatorConfig, input_list: list[str]) -> str:
if config.cn_prompt_toggle == True:
profile = "###上文内容"
else:
profile = "###Previous text"
# 使用列表推导式,转换为字符串列表
formatted_rows = [item for item in input_list]
# 使用换行符将列表元素连接成一个字符串
profile = f"\n{"\n".join(formatted_rows)}\n"
return profile
# 构建翻译示例
def build_translation_example(config: TranslatorConfig) -> tuple[str, str]:
data = config.translation_example_data
# 数据校验
if len(data) == 0:
return "", ""
original_example = json.dumps({str(i): v.get("src") for i, v in enumerate(data)}, ensure_ascii = False)
translated_example = json.dumps({str(i): v.get("dst") for i, v in enumerate(data)}, ensure_ascii = False)
return original_example, translated_example
# 构建用户示例前文
def build_userExamplePrefix(config: TranslatorConfig) -> str:
# 根据中文开关构建
if config.cn_prompt_toggle == True:
profile = "###这是你接下来的翻译任务,原文文本如下\n"
profile_cot = "###这是你接下来的翻译任务,原文文本如下\n "
else:
profile = "###This is your next translation task, the original text is as follows\n"
profile_cot = "###This is your next translation task, the original text is as follows\n"
# 根据cot开关进行选择
if config.cot_toggle == True:
the_profile = profile_cot
else:
the_profile = profile
return the_profile
# 构建模型示例前文
def build_modelExamplePrefix(config: TranslatorConfig, glossary_prompt_cot: str, characterization_cot: str, world_building_cot: str, writing_style_cot: str) -> str:
pair = {
"日语": "Japanese",
"英语": "English",
"韩语": "Korean",
"俄语": "Russian",
"简中": "Simplified Chinese",
"繁中": "Traditional Chinese",
}
Source_Language = pair[config.source_language]
Target_Language = pair[config.target_language]
Text_Source_Language = config.source_language
Text_Target_Language = config.target_language
# 根据中文开关构建
if config.cn_prompt_toggle == True:
# 非cot的构建
profile = "我完全理解了翻译的要求与原则,我将遵循您的指示进行翻译,以下是对原文的翻译:\n"
# cot的构建
profile_cot = "我完全理解了翻译的步骤与原则,我将遵循您的指示,一步一步地翻译文本:\n"
profile_cot = "###第一步:直译阶段\n"
profile_cot = "{Direct translation}\n"
profile_cot = "###第二步:校正阶段\n"
profile_cot = f"- 翻译原则: 忠实准确地进行翻译,不应随意删减或篡改,不应回避或淡化,以保持作品的原貌。\n"
if glossary_prompt_cot:
profile_cot = f"{glossary_prompt_cot}\n"
if characterization_cot:
profile_cot = f"{characterization_cot}\n"
if world_building_cot:
profile_cot = f"{world_building_cot}\n"
if writing_style_cot:
profile_cot = f"{writing_style_cot}\n"
else:
profile_cot = f"- 翻译风格: 保持原文的叙述风格,适当增加文艺性描述,体现人物的情感和环境氛围。\n"
profile_cot = "###第三步:意译阶段\n"
else:
# Non-CoT prompt construction
profile = "I fully understand the translation requirements and principles. I will follow your instructions to translate. Here is the translation of the original text:\n"
# CoT prompt construction
profile_cot = "I fully understand the translation steps and principles. I will follow your instructions and translate the text step by step:\n"
profile_cot = "### Step 1: Literal Translation Phase\n"
profile_cot = "{Direct translation}\n"
profile_cot = "### Step 2: Correction Phase\n"
profile_cot = f"- Translation Principles: Translate faithfully and accurately. Do not arbitrarily delete or alter, avoid or downplay, to maintain the original appearance of the work.\n"
if glossary_prompt_cot:
profile_cot = f"{glossary_prompt_cot}\n"
if characterization_cot:
profile_cot = f"{characterization_cot}\n"
if world_building_cot:
profile_cot = f"{world_building_cot}\n"
if writing_style_cot:
profile_cot = f"{writing_style_cot}\n"
else:
profile_cot = f"- Translation Style: Maintain the original narrative style, appropriately enhance literary descriptions, and reflect the characters' emotions and the environmental atmosphere.\n"
profile_cot = "### Step 3: Free Translation Phase\n"
# 根据cot开关进行选择
if config.cot_toggle == True:
the_profile = profile_cot
else:
the_profile = profile
return the_profile
# 构建用户提问前文:
def build_userQueryPrefix(config: TranslatorConfig) -> str:
# 根据中文开关构建
if config.cn_prompt_toggle == True:
profile = " ###这是你接下来的翻译任务,原文文本如下\n"
profile_cot = "###这是你接下来的翻译任务,原文文本如下\n"
else:
profile = " ###This is your next translation task, the original text is as follows\n"
profile_cot = "###This is your next translation task, the original text is as follows\n"
# 根据cot开关进行选择
if config.cot_toggle == True:
the_profile = profile_cot
else:
the_profile = profile
return the_profile
# 构建模型回复前文
def build_modelResponsePrefix(config: TranslatorConfig) -> str:
# 根据中文开关构建
if config.cn_prompt_toggle == True:
profile = "我完全理解了翻译的要求与原则,我将遵循您的指示进行翻译,以下是对原文的翻译:"
profile_cot = "我完全理解了翻译的步骤与原则,我将遵循您的指示,一步一步地翻译文本:"
else:
profile = "I fully understand the translation requirements and principles. I will follow your instructions to translate. Here is the translation of the original text:"
profile_cot = "I fully understand the translation steps and principles. I will follow your instructions and translate the text step by step:"
# 根据cot开关进行选择
if config.cot_toggle == True:
the_profile = profile_cot
else:
the_profile = profile
return the_profile