Mpikambana:Bot-Jagwar/mpampiditeny.py

# -*- coding: utf-8 -*-
import wikipedia, time, random, wikipedia as pywikibot
import re, sys
"""
This file is part of Bot-Jagwar

    Foobar is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Foobar is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Foobar.  If not, see <http://www.gnu.org/licenses/>.

* This file reads translations made in a file to put pages on a given wiki.
* To function properly, settings below must be set first.
* I give no guarantee for unfailability of this script.
----------
Format of each line the input text file:
word (separator) translation (separator) word type (new line)
"""
    
summary = "teny iditra vaovao"
site = wikipedia.getSite('mg','wiktionary')

(wikipedia.config).put_throttle = int(1)
wikipedia.put_throttle.setDelay()

exf = file('existingpages.txt','a')


def trydecode(s, decs):
    for i in decs:
        try:
            return s.decode(i)
        except UnicodeError:
            continue
    return s

def tryencode(s, encs):
    for i in decs:
        try:
            return s.encode(i)
        except UnicodeError:
            continue
    return s

existings = []
print "Maka ny lisitry ny pejy efa misy"
for line in file('existingpages.txt', 'r').readlines():
    items = re.match('(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)', line)
    if items is None:
        print 'Tsy ampy fampahalalana'
        continue
    items = items.groups()
    items = list(items)
    items[0]=trydecode(items[0], ['utf8','latin1'])
    items[3]=trydecode(items[3], ['utf8','latin1'])
    
    #items[0]=items[0].decode('utf8')
    #items[3]=items[3].encode('utf8')
    
    existings.append((items[0].strip(), items[3]))

def exists(lang, ent):
    lang = lang.strip('\n')
    ent=ent.encode('utf8')
    lang=lang.encode('utf8')
    try:
        if (ent, lang) in existings:
            return True
        else: return False
    except Exception:
        return False

def append(infos):
    for i in infos:
        i = i.decode('utf8')
    exf.write((u"%(word)s -> %(translation)s -> %(POS)s -> %(lang)s\n"%infos).encode('utf8'))
    return True


def structtime(i):
    a = abs(float(i)/86400) 
    o = float((a-int(a))*24)
    m = float((o-int(o))*60)
    s = float((m-int(m))*60)
    return (a,o,m,s)
    

def main():
    #print existings
    count=0
    som=0
    moy=0
    for item in getfilecontent(sys.argv[1]):
        count+=1
        nitems=item['nitems']
        chrono=time.time()
        
        putpage(item)
        som+=abs(float((chrono-time.time())*(nitems-count)))
        moy=som/count
        moy=(5*moy+abs(float((chrono-time.time())*(nitems-count))))/6
        print "fotoana mety ilaina: %dd %dh %2dm %2ds"%(structtime( abs(float((chrono-time.time())*(nitems-count))) ))
        print "elanelana voakajy : %dd %dh %2dm %2ds"%structtime(moy)


        
def getfilecontent(filename):
    lines = file(filename, 'r').readlines()
    nitems= len(lines)
        
    for line in lines:
        try:
            items = re.match('(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)[ ]?->[ ]?(.*)', line)
            if items is None:
                print 'Tsy ampy fampahalalana'
                continue
            
            items = items.groups()

            title = unicode(items[0].decode('utf8'))
            if len(title)<1:continue
            trans = unicode(items[1].decode('utf8'))
            pos = unicode(items[2])
            lang = unicode(items[3])
            #Azo ovaovaina arakaraky ny ilaina (raha mandika teny malagasy amin'ny teny vahiny
            #dia ampifamadihana ny trans ary ny title)
            if lang.strip()=='en':continue
            
            yield {'word':title.strip(), #idem
                   'translation':trans.strip(),
                   'POS':pos.strip(),
                   'lang':lang.strip(),
                   'nitems':nitems}
        except Exception:
            continue

def putpage(infos): #infos = dict misy an'i word, translation, POS ary lang
    origin = ""
    if len(sys.argv)>=3 :
        if sys.argv[2]:
            infos['origin']="{{fiaviana dikanteny|%s}}"%(sys.argv[2])
    else: infos['origin']=""
    c = u"""
=={{=%(lang)s=}}==

{{-%(POS)s-|%(lang)s}}
'''{{subst:BASEPAGENAME}}''' {{pron X-SAMPA||%(lang)s}} {{pron||%(lang)s}}
# %(translation)s %(origin)s

"""%infos
    wikipedia.output(c)
    if len(infos['word'].strip())==0: return
    p = wikipedia.Page(site, infos['word'])
    i=0
    while 1:
        wikipedia.output('>>> \03{lightgreen}%(word)s \03{default} <<<'%infos)
        try:
            if exists(infos['lang'], infos['word']):
                print "Efa misy ilay teny iditra amin'ilay fiteny"
                return
            elif not p.exists():
                p.put(c, summary)
                append(infos)
                break
            else:
                try:
                    p_cont = p.get()
                    if p_cont.find('=%s='%infos['lang'])!=-1 or p_cont.find('|%s}}'%infos['lang'])!=-1:
                        print 'efa misy ilay pejy ary ilay fizarana'
                        append(infos)
                        return
                    else:
                        print 'efa misy ilay pejy fa mbola tsy misy ilay fizarana'
                        p.put(c+"\n"+p_cont, 'fizarana vaovao')
                        append(infos)
                        break
                except Exception:
                    print 'nahitana hadisoana'
                    if i<3:
                        time.sleep(5)
                        i+=1
                        continue
                    else:
                        break
        except Exception as e :
            print "Nahitana hadisoana: %s\nFamerenana ny tao afaka 5 segondra..."%e.message
            if i<54:
                time.sleep(5)
                i+=1
            else:
                break

if __name__ == '__main__':
    try:
        main()
    finally:
        wikipedia.stopme()