In [6]:
import pandas
import os
from yandex_translate import YandexTranslate, YandexTranslateException
def get_translated_column(
yandex_token: str,
table: pandas.DataFrame,
column_name: str,
lang_to_from: str = 'ru-en',
):
translator = YandexTranslate(yandex_token)
return table[column_name].apply(lambda x: translator.translate(x, lang=lang_to_from)['text'][0])
def get_supported_langs(yandex_token: str):
import requests
import json
url = f'https://translate.yandex.net/api/v1.5/tr.json/getLangs?ui=ru&key={yandex_token}'
return json.loads(requests.get(url).text)
def translate_column(
yandex_token_list: list,
file_path: str,
original_column_name: str,
translated_column_name: str,
save_to_file_path: str = None,
lang_to_from: str = 'ru-en',
start_row: int = 0,
finish_row: int = None,
batch: int = 10,
):
# получить таблицу из файла
ext = os.path.splitext(file_path)[1]
if ext == ".csv":
table = pandas.read_csv(file_path, header=None)
elif ext in [".xlsx", ".xls"]:
table = pandas.read_excel(file_path)
else:
raise ValueError(f"Неподдерживаемый формат: {ext}")
if not translated_column_name in table.columns:
table[translated_column_name] = pandas.Series([], dtype=str)
if not finish_row:
finish_row = len(table)
yandex_token_pos = 0
for i in range(((finish_row - start_row) // batch) + 1):
l = start_row + i * batch
r = start_row + (i + 1) * batch
print(f"Перевод строк {l}-{r}.")
try:
translated_column = get_translated_column(
yandex_token=yandex_token_list[yandex_token_pos],
table=table[l:r],
column_name=original_column_name,
lang_to_from=lang_to_from,
)
except YandexTranslateException:
yandex_token_pos += 1
if yandex_token_pos >= len(yandex_token_list):
raise YandexTranslateException(
"Лимиты на перевод достигнуты для всех переданных токенов."
)
translated_column = get_translated_column(
yandex_token=yandex_token_list[yandex_token_pos],
table=table[l:r],
column_name=original_column_name,
lang_to_from=lang_to_from,
)
table[translated_column_name][l:r] = translated_column
print(f"Перевод строк {l}-{r} сохранен.")
# сохранить таблицу с переведенным столбцом
if not save_to_file_path:
save_to_file_path = file_path
table.to_csv(save_to_file_path, index=False)
return table
In [14]:
# токен можно получить здесь: https://translate.yandex.ru/developers/keys
yandex_token_list = [
'trnsl.1.1.20200331T110242Z.9b513b0ed4f09237.33aec5c62db6c59d166b2d087085a48984c5e773', # ivan
'trnsl.1.1.20190906T170533Z.2cdef571e4680794.3161998ff0a03702bffae1c9b1c6f5cfbd84a682', # kamil@zonesmart.ru
'trnsl.1.1.20200110T122307Z.0eab94a550695b16.851da33dae66e111a419c298ea04bbc64e9889cd', # dev@zonesmart.ru
'trnsl.1.1.20200331T111658Z.debed69e977278e3.dc2212dc94324502e515153f8f706e70bfe9f323', # info@zonesmart.ru
]
# перевод названий товаров c русского на английский
translate_column(
yandex_token_list=yandex_token_list,
file_path='translate.csv',
save_to_file_path='translate.csv',
original_column_name=0,
translated_column_name='1',
lang_to_from='en-ja',
start_row=0,
finish_row=None,
)
Out[14]:
In [ ]:
In [10]:
resp = get_supported_langs(yandex_token='trnsl.1.1.20200331T110242Z.9b513b0ed4f09237.33aec5c62db6c59d166b2d087085a48984c5e773')
In [13]:
resp.text
Out[13]:
In [ ]: