2024-01-15 09:14:38 +00:00
|
|
|
import asyncio
|
|
|
|
import os
|
|
|
|
|
2024-01-16 13:15:39 +00:00
|
|
|
from tqdm import tqdm
|
|
|
|
|
2024-01-17 03:08:39 +00:00
|
|
|
from .const import RED, GREEN, RESET
|
2024-01-16 13:15:39 +00:00
|
|
|
|
2024-01-15 09:14:38 +00:00
|
|
|
|
|
|
|
class BaseTranslateManager:
|
2024-01-17 03:38:58 +00:00
|
|
|
bulk_size = 15
|
2024-01-15 09:14:38 +00:00
|
|
|
SEPARATOR = "<SEP>"
|
|
|
|
LANG_MAPPER = {
|
2024-05-17 10:46:30 +00:00
|
|
|
'ja': 'Japanese',
|
2024-06-17 11:22:45 +00:00
|
|
|
# 'zh_hant': 'Taiwan',
|
2024-05-17 10:46:30 +00:00
|
|
|
# 'en': 'English',
|
2024-01-15 09:14:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
def __init__(self, dir_path, oai_trans_instance):
|
|
|
|
self.oai_trans = oai_trans_instance
|
|
|
|
self._dir = dir_path
|
2024-01-16 13:15:39 +00:00
|
|
|
self.dir_name = os.path.basename(self._dir)
|
2024-01-15 09:14:38 +00:00
|
|
|
if not os.path.exists(self._dir):
|
|
|
|
os.makedirs(self._dir)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def split_dict_into_chunks(input_dict, chunk_size=20):
|
|
|
|
temp = {}
|
|
|
|
result = []
|
|
|
|
|
|
|
|
for i, (k, v) in enumerate(input_dict.items()):
|
|
|
|
temp[k] = v
|
|
|
|
if (i + 1) % chunk_size == 0 or i == len(input_dict) - 1:
|
|
|
|
result.append(temp)
|
|
|
|
temp = {}
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
async def create_translate_task(self, data, target_lang):
|
|
|
|
try:
|
|
|
|
keys = list(data.keys())
|
|
|
|
values = list(data.values())
|
|
|
|
combined_text = self.SEPARATOR.join(values)
|
|
|
|
translated_text = await self.oai_trans.translate_text(combined_text, target_lang)
|
|
|
|
translated_texts = translated_text.split(self.SEPARATOR)
|
|
|
|
return dict(zip(keys, translated_texts))
|
|
|
|
except Exception as e:
|
2024-01-16 13:15:39 +00:00
|
|
|
print(f"{RED}Error during translation task: {e}{RED}")
|
2024-01-15 09:14:38 +00:00
|
|
|
return {}
|
|
|
|
|
|
|
|
async def bulk_translate(self, need_trans_dict, target_lang):
|
|
|
|
split_data = self.split_dict_into_chunks(need_trans_dict, self.bulk_size)
|
|
|
|
|
|
|
|
tasks = [self.create_translate_task(batch, target_lang) for batch in split_data]
|
2024-01-16 13:15:39 +00:00
|
|
|
number_of_tasks = len(tasks)
|
2024-01-15 09:14:38 +00:00
|
|
|
translated_dict = {}
|
2024-01-16 13:15:39 +00:00
|
|
|
bar_format = "{l_bar}%s{bar}%s{r_bar}" % (GREEN, RESET)
|
|
|
|
desc = f"{target_lang} translate"
|
|
|
|
with tqdm(
|
|
|
|
total=number_of_tasks, ncols=100,
|
|
|
|
desc=desc, bar_format=bar_format
|
|
|
|
) as pbar:
|
|
|
|
for task in asyncio.as_completed(tasks):
|
|
|
|
pbar.set_description_str(f"{GREEN}{desc}{RESET}")
|
|
|
|
result = await task
|
|
|
|
translated_dict.update(result)
|
|
|
|
pbar.update(1)
|
2024-01-15 09:14:38 +00:00
|
|
|
|
|
|
|
return translated_dict
|