25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

translate.py 12 KiB

12 년 전
12 년 전
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. # Copyright (c) 2013, Web Notes Technologies Pvt. Ltd.
  2. # MIT License. See license.txt
  3. from __future__ import unicode_literals
  4. """
  5. Contributing:
  6. 1. Add the .csv file
  7. 2. Run import
  8. 3. Then run translate
  9. """
  10. import webnotes
  11. import os
  12. import codecs
  13. import json
  14. import re
  15. from csv import reader
  16. from webnotes.modules import get_doc_path
  17. from webnotes.utils import get_base_path, cstr
  18. def translate(lang=None):
  19. languages = [lang]
  20. if lang=="all" or lang==None:
  21. languages = get_all_languages()
  22. print "Extracting / updating translatable strings..."
  23. build_message_files()
  24. print "Compiling messages in one file..."
  25. export_messages(lang, '_lang_tmp.csv')
  26. for lang in languages:
  27. if lang != "en":
  28. filename = 'app/translations/'+lang+'.csv'
  29. print "For " + lang + ":"
  30. print "Translating via Google Translate..."
  31. google_translate(lang, '_lang_tmp.csv', filename)
  32. print "Updating language files..."
  33. import_messages(lang, filename)
  34. print "Deleting temp file..."
  35. os.remove('_lang_tmp.csv')
  36. def get_all_languages():
  37. try:
  38. return [f[:-4] for f in os.listdir("app/translations") if f.endswith(".csv")]
  39. except OSError, e:
  40. if e.args[0]==2:
  41. return []
  42. else:
  43. raise e
  44. def get_lang_dict():
  45. languages_path = os.path.join(get_base_path(), "app", "translations", "languages.json")
  46. if os.path.exists(languages_path):
  47. with open(languages_path, "r") as langfile:
  48. return json.loads(langfile.read())
  49. else: return {}
  50. def update_translations():
  51. """
  52. compare language file timestamps with last updated timestamps in `.wnf-lang-status`
  53. if timestamps are missing / changed, build new `.json` files in the `lang folders`
  54. """
  55. langstatus = {}
  56. languages = get_all_languages()
  57. message_updated = False
  58. status_file_path = "app/.wnf-lang-status"
  59. if not os.path.exists(os.path.join('app', 'translations')):
  60. return
  61. if os.path.exists(status_file_path):
  62. with open(status_file_path, "r") as langstatusfile:
  63. langstatus = eval(langstatusfile.read())
  64. for lang in languages:
  65. filename = os.path.join('app', 'translations', lang + '.csv')
  66. if langstatus.get(lang, None)!=os.path.getmtime(filename):
  67. print "Setting up lang files for " + lang + "..."
  68. if not message_updated:
  69. print "Extracting / updating translatable strings..."
  70. build_message_files()
  71. message_updated = True
  72. print "Writing translations..."
  73. import_messages(lang, filename)
  74. langstatus[lang] = os.path.getmtime(filename)
  75. with open(status_file_path, "w") as langstatusfile:
  76. langstatus = langstatusfile.write(str(langstatus))
  77. def build_message_files():
  78. """build from doctypes, pages, database and framework"""
  79. if not webnotes.conn:
  80. webnotes.connect()
  81. build_for_pages('lib/core')
  82. build_for_pages('app')
  83. build_from_doctype_code('lib/core')
  84. build_from_doctype_code('app')
  85. # doctype
  86. build_from_database()
  87. build_for_framework('lib/webnotes', 'py', with_doctype_names=True)
  88. build_for_framework('lib/public/js/wn', 'js')
  89. build_for_framework('app/public/js', 'js', with_doctype_names=True)
  90. def build_for_pages(path):
  91. """make locale files for framework py and js (all)"""
  92. messages = []
  93. for (basepath, folders, files) in os.walk(path):
  94. if os.path.basename(os.path.dirname(basepath))=="page":
  95. messages_js, messages_py = [], []
  96. for fname in files:
  97. fname = cstr(fname)
  98. if fname.endswith('.js'):
  99. messages_js += get_message_list(os.path.join(basepath, fname))
  100. if fname.endswith('.py'):
  101. messages_py += get_message_list(os.path.join(basepath, fname))
  102. if messages_js:
  103. write_messages_file(basepath, messages_js, "js")
  104. if messages_py:
  105. write_messages_file(basepath, messages_py, "py")
  106. def build_from_database():
  107. """make doctype labels, names, options, descriptions"""
  108. def get_select_options(doc):
  109. if doc.doctype=="DocField" and doc.fieldtype=='Select' and doc.options \
  110. and not doc.options.startswith("link:") \
  111. and not doc.options.startswith("attach_files:"):
  112. return doc.options.split('\n')
  113. else:
  114. return []
  115. build_for_doc_from_database(webnotes._dict({
  116. "doctype": "DocType",
  117. "module_field": "module",
  118. "DocType": ["name", "description", "module"],
  119. "DocField": ["label", "description"],
  120. "custom": get_select_options
  121. }))
  122. def build_for_doc_from_database(fields):
  123. for item in webnotes.conn.sql("""select name from `tab%s`""" % fields.doctype, as_dict=1):
  124. messages = []
  125. doclist = webnotes.bean(fields.doctype, item.name).doclist
  126. for doc in doclist:
  127. if doc.doctype in fields:
  128. messages += map(lambda x: x in fields[doc.doctype] and doc.fields.get(x) or None,
  129. doc.fields.keys())
  130. if fields.custom:
  131. messages += fields.custom(doc)
  132. doc = doclist[0]
  133. if doc.fields.get(fields.module_field):
  134. doctype_path = get_doc_path(doc.fields[fields.module_field],
  135. doc.doctype, doc.name)
  136. write_messages_file(doctype_path, messages, 'doc')
  137. def build_for_framework(path, mtype, with_doctype_names = False):
  138. """make locale files for framework py and js (all)"""
  139. messages = []
  140. for (basepath, folders, files) in os.walk(path):
  141. for fname in files:
  142. fname = cstr(fname)
  143. if fname.endswith('.' + mtype):
  144. messages += get_message_list(os.path.join(basepath, fname))
  145. # append module & doctype names
  146. if with_doctype_names:
  147. for m in webnotes.conn.sql("""select name, module from `tabDocType`"""):
  148. messages.append(m[0])
  149. messages.append(m[1])
  150. # append labels from config.json
  151. config = webnotes.get_config()
  152. for moduleinfo in config["modules"].values():
  153. if moduleinfo.get("label"):
  154. messages.append(moduleinfo["label"])
  155. if messages:
  156. write_messages_file(path, messages, mtype)
  157. def build_from_doctype_code(path):
  158. """walk and make locale files in all folders"""
  159. for (basepath, folders, files) in os.walk(path):
  160. messagespy = []
  161. messagesjs = []
  162. for fname in files:
  163. fname = cstr(fname)
  164. if fname.endswith('py'):
  165. messagespy += get_message_list(os.path.join(basepath, fname))
  166. if fname.endswith('js'):
  167. messagesjs += get_message_list(os.path.join(basepath, fname))
  168. if messagespy:
  169. write_messages_file(basepath, messagespy, 'py')
  170. if messagespy:
  171. write_messages_file(basepath, messagesjs, 'js')
  172. def get_message_list(path):
  173. """get list of messages from a code file"""
  174. import re
  175. messages = []
  176. with open(path, 'r') as sourcefile:
  177. txt = sourcefile.read()
  178. messages += re.findall('_\("([^"]*)"\)', txt)
  179. messages += re.findall("_\('([^']*)'\)", txt)
  180. messages += re.findall('_\("{3}([^"]*)"{3}\)', txt, re.S)
  181. return messages
  182. def write_messages_file(path, messages, mtype):
  183. """write messages to translation file"""
  184. if not os.path.exists(path):
  185. return
  186. if not os.path.exists(os.path.join(path, 'locale')):
  187. os.makedirs(os.path.join(path, 'locale'))
  188. fname = os.path.join(path, 'locale', '_messages_' + mtype + '.json')
  189. messages = list(set(messages))
  190. filtered = []
  191. for m in messages:
  192. if m and re.search('[a-zA-Z]+', m):
  193. filtered.append(m)
  194. with open(fname, 'w') as msgfile:
  195. msgfile.write(json.dumps(filtered, indent=1))
  196. def export_messages(lang, outfile):
  197. """get list of all messages"""
  198. messages = {}
  199. # extract messages
  200. for (basepath, folders, files) in os.walk('.'):
  201. def _get_messages(messages, basepath, mtype):
  202. mlist = get_messages(basepath, mtype)
  203. if not mlist:
  204. return
  205. # update messages with already existing translations
  206. langdata = get_lang_data(basepath, lang, mtype)
  207. for m in mlist:
  208. if not messages.get(m):
  209. messages[m] = langdata.get(m, "")
  210. if os.path.basename(basepath)=='locale':
  211. _get_messages(messages, basepath, 'doc')
  212. _get_messages(messages, basepath, 'py')
  213. _get_messages(messages, basepath, 'js')
  214. # remove duplicates
  215. if outfile:
  216. from csv import writer
  217. with open(outfile, 'w') as msgfile:
  218. w = writer(msgfile)
  219. keys = messages.keys()
  220. keys.sort()
  221. for m in keys:
  222. w.writerow([m.encode('utf-8'), messages.get(m, '').encode('utf-8')])
  223. def import_messages(lang, infile):
  224. """make individual message files for each language"""
  225. data = dict(get_all_messages_from_file(infile))
  226. for (basepath, folders, files) in os.walk('.'):
  227. def _update_lang_file(mtype):
  228. """create a langauge file for the given message type"""
  229. messages = get_messages(basepath, mtype)
  230. if not messages: return
  231. # read existing
  232. langdata = get_lang_data(basepath, lang, mtype)
  233. # update fresh
  234. for m in messages:
  235. if data.get(m):
  236. langdata[m] = data.get(m)
  237. if langdata:
  238. # write new langfile
  239. langfilename = os.path.join(basepath, lang + '-' + mtype + '.json')
  240. with open(langfilename, 'w') as langfile:
  241. langfile.write(json.dumps(langdata, indent=1, sort_keys=True).encode('utf-8'))
  242. #print 'wrote ' + langfilename
  243. if os.path.basename(basepath)=='locale':
  244. # make / update lang files for each type of message file (doc, js, py)
  245. # example: hi-doc.json, hi-js.json, hi-py.json
  246. _update_lang_file('doc')
  247. _update_lang_file('js')
  248. _update_lang_file('py')
  249. def load_doc_messages(module, doctype, name):
  250. if webnotes.lang=="en":
  251. return {}
  252. if not webnotes.local.translated_docs:
  253. webnotes.local.translated_docs = []
  254. doc_path = get_doc_path(module, doctype, name)
  255. # don't repload the same doc again
  256. if (webnotes.lang + ":" + doc_path) in webnotes.local.translated_docs:
  257. return
  258. if not docs_loaded:
  259. webnotes.local.translate_docs_loaded = []
  260. webnotes.local.translated_docs.append(webnotes.lang + ":" + doc_path)
  261. webnotes.local.translations.update(get_lang_data(doc_path, None, 'doc'))
  262. def get_lang_data(basepath, lang, mtype):
  263. """get language dict from langfile"""
  264. # add "locale" folder if reqd
  265. if os.path.basename(basepath) != 'locale':
  266. basepath = os.path.join(basepath, 'locale')
  267. if not lang: lang = webnotes.local.lang
  268. path = os.path.join(basepath, lang + '-' + mtype + '.json')
  269. langdata = {}
  270. if os.path.exists(path):
  271. with codecs.open(path, 'r', 'utf-8') as langfile:
  272. langdata = json.loads(langfile.read())
  273. return langdata
  274. def get_messages(basepath, mtype):
  275. """load list of messages from _message files"""
  276. # get message list
  277. path = os.path.join(basepath, '_messages_' + mtype + '.json')
  278. messages = []
  279. if os.path.exists(path):
  280. with open(path, 'r') as msgfile:
  281. messages = json.loads(msgfile.read())
  282. return messages
  283. def update_lang_js(jscode, path):
  284. return jscode + "\n\n$.extend(wn._messages, %s)" % \
  285. json.dumps(get_lang_data(path, webnotes.lang, 'js'))
  286. def get_all_messages_from_file(path):
  287. with codecs.open(path, 'r', 'utf-8') as msgfile:
  288. data = msgfile.read()
  289. data = reader([r.encode('utf-8') for r in data.splitlines()])
  290. newdata = []
  291. for row in data:
  292. newrow = []
  293. for val in row:
  294. newrow.append(unicode(val, 'utf-8'))
  295. newdata.append(newrow)
  296. return newdata
  297. def google_translate(lang, infile, outfile):
  298. """translate objects using Google API. Add you own API key for translation"""
  299. data = get_all_messages_from_file(infile)
  300. import requests
  301. from webnotes import conf
  302. old_translations = {}
  303. # update existing translations
  304. if os.path.exists(outfile):
  305. with codecs.open(outfile, "r", "utf-8") as oldfile:
  306. old_data = oldfile.read()
  307. old_translations = dict(reader([r.encode('utf-8').strip() for r in old_data.splitlines()]))
  308. with open(outfile, 'w') as msgfile:
  309. from csv import writer
  310. w = writer(msgfile)
  311. for row in data:
  312. if row[0] and row[0].strip():
  313. if old_translations.get(row[0].strip()):
  314. row[1] = old_translations[row[0].strip()]
  315. else:
  316. print 'translating: ' + row[0]
  317. response = requests.get("""https://www.googleapis.com/language/translate/v2""",
  318. params = {
  319. "key": conf.google_api_key,
  320. "source": "en",
  321. "target": lang,
  322. "q": row[0]
  323. })
  324. if "error" in response.json:
  325. print response.json
  326. continue
  327. row[1] = response.json["data"]["translations"][0]["translatedText"]
  328. if not row[1]:
  329. row[1] = row[0] # google unable to translate!
  330. row[1] = row[1].encode('utf-8')
  331. row[0] = row[0].encode('utf-8')
  332. w.writerow(row)