# -*- coding: utf-8 -*-
import wikipedia, time, random, wikipedia as pywikibot
import re, sys
"""
This file is part of Bot-Jagwar
Foobar is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Foobar is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Foobar. If not, see <http://www.gnu.org/licenses/>.
* This file reads translations made in a file to put pages on a given wiki.
* To function properly, settings below must be set first.
* I give no guarantee for unfailability of this script.
----------
Format of each line the input text file:
word (separator) translation (separator) word type (new line)
"""
summary = "teny iditra vaovao"
site = wikipedia.getSite('mg','wiktionary')
(wikipedia.config).put_throttle = int(1)
wikipedia.put_throttle.setDelay()
exf = file('existingpages.txt','a')
def trydecode(s, decs):
for i in decs:
try:
return s.decode(i)
except UnicodeError:
continue
return s
def tryencode(s, encs):
for i in decs:
try:
return s.encode(i)
except UnicodeError:
continue
return s
existings = []
print "Maka ny lisitry ny pejy efa misy"
for line in file('existingpages.txt', 'r').readlines():
items = re.match('(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)', line)
if items is None:
print 'Tsy ampy fampahalalana'
continue
items = items.groups()
items = list(items)
items[0]=trydecode(items[0], ['utf8','latin1'])
items[3]=trydecode(items[3], ['utf8','latin1'])
#items[0]=items[0].decode('utf8')
#items[3]=items[3].encode('utf8')
existings.append((items[0].strip(), items[3]))
def exists(lang, ent):
lang = lang.strip('\n')
ent=ent.encode('utf8')
lang=lang.encode('utf8')
try:
if (ent, lang) in existings:
return True
else: return False
except Exception:
return False
def append(infos):
for i in infos:
i = i.decode('utf8')
exf.write((u"%(word)s -> %(translation)s -> %(POS)s -> %(lang)s\n"%infos).encode('utf8'))
return True
def structtime(i):
a = abs(float(i)/86400)
o = float((a-int(a))*24)
m = float((o-int(o))*60)
s = float((m-int(m))*60)
return (a,o,m,s)
def main():
#print existings
count=0
som=0
moy=0
for item in getfilecontent(sys.argv[1]):
count+=1
nitems=item['nitems']
chrono=time.time()
putpage(item)
som+=abs(float((chrono-time.time())*(nitems-count)))
moy=som/count
moy=(5*moy+abs(float((chrono-time.time())*(nitems-count))))/6
print "fotoana mety ilaina: %dd %dh %2dm %2ds"%(structtime( abs(float((chrono-time.time())*(nitems-count))) ))
print "elanelana voakajy : %dd %dh %2dm %2ds"%structtime(moy)
def getfilecontent(filename):
lines = file(filename, 'r').readlines()
nitems= len(lines)
for line in lines:
try:
items = re.match('(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)', line)
if items is None:
print 'Tsy ampy fampahalalana'
continue
items = items.groups()
title = unicode(items[0].decode('utf8'))
if len(title)<1:continue
trans = unicode(items[1].decode('utf8'))
pos = unicode(items[2])
lang = unicode(items[3])
#Azo ovaovaina arakaraky ny ilaina (raha mandika teny malagasy amin'ny teny vahiny
#dia ampifamadihana ny trans ary ny title)
if lang.strip()=='en':continue
yield {'word':title.strip(), #idem
'translation':trans.strip(),
'POS':pos.strip(),
'lang':lang.strip(),
'nitems':nitems}
except Exception:
continue
def putpage(infos): #infos = dict misy an'i word, translation, POS ary lang
origin = ""
if len(sys.argv)>=3 :
if sys.argv[2]:
infos['origin']="{{fiaviana dikanteny|%s}}"%(sys.argv[2])
else: infos['origin']=""
c = u"""
=={{=%(lang)s=}}==
{{-%(POS)s-|%(lang)s}}
'''{{subst:BASEPAGENAME}}''' {{pron X-SAMPA||%(lang)s}} {{pron||%(lang)s}}
# %(translation)s %(origin)s
"""%infos
wikipedia.output(c)
if len(infos['word'].strip())==0: return
p = wikipedia.Page(site, infos['word'])
i=0
while 1:
wikipedia.output('>>> \03{lightgreen}%(word)s \03{default} <<<'%infos)
try:
if exists(infos['lang'], infos['word']):
print "Efa misy ilay teny iditra amin'ilay fiteny"
return
elif not p.exists():
p.put(c, summary)
append(infos)
break
else:
try:
p_cont = p.get()
if p_cont.find('=%s='%infos['lang'])!=-1 or p_cont.find('|%s}}'%infos['lang'])!=-1:
print 'efa misy ilay pejy ary ilay fizarana'
append(infos)
return
else:
print 'efa misy ilay pejy fa mbola tsy misy ilay fizarana'
p.put(c+"\n"+p_cont, 'fizarana vaovao')
append(infos)
break
except Exception:
print 'nahitana hadisoana'
if i<3:
time.sleep(5)
i+=1
continue
else:
break
except Exception as e :
print "Nahitana hadisoana: %s\nFamerenana ny tao afaka 5 segondra..."%e.message
if i<54:
time.sleep(5)
i+=1
else:
break
if __name__ == '__main__':
try:
main()
finally:
wikipedia.stopme()