వాడుకరి:Mpradeepbot/mpc.villageCreate.py

వికీపీడియా నుండి
Jump to navigation Jump to search

ఈ ప్రోగ్రాముకు సహాకారిగా ఈ ఫైలును కూడా వాడండి.

import wikipedia, pagegenerators, catlib, config, codecs, time

# This function returns the list of articles as a list object
# in given category.  Please give only the Category Name,
# namespace will be addd automatically.
# --function requires both 'wikipedia' and 'catlib' to be imported
def getCatList(catTitle):
    cat = catlib.Category(wikipedia.getSite(), u'Category:'+catTitle)
    listOfArticles = cat.articles()
    return listOfArticles

def doReplacements(oText, en_list, te_list):
    nText = oText
    if len(en_list) != len(te_list):
      wikipedia.output(u'length of the lists do not match.')
      return oText

    count = len(en_list)
    i = 0
    while i < count:
       nText = nText.replace(en_list[i], te_list[i])
       i = i + 1
    
    return nText

def createSubPages(otext, hDes, mainContent, disambigTemplate, reviewTemplate, comments, mTitle):
    en_list = []
    en_list.append(u'gggg')
    en_list.append(u'mmmm')
    en_list.append(u'jjjj')

    diff = 0

    lines = otext.splitlines()
    newpages = (len(lines) + 1) / 3;
    ntext = u''

    # number of line did not match the pattern
    if ((len(lines) - 2) % 3) != 0:
        return otext

    # number of reviewTemplates did not match the pattern
    if (newpages-1) != otext.count(reviewTemplate):
        return otext

    print '%d new pages are being created' % newpages
    logfile.write(u'* [[' + mTitle + u']]\r\n')

    site = wikipedia.getSite()

    i = 0
    while i < newpages:
       next = lines[i*3].find('\'\'\'',3)

       if i == 0:
          gggg = lines[i][3:next]
       mmmm = lines[i*3-diff][lines[i*3-diff].rfind('[[')+2:lines[i*3-diff].rfind(']]')]
       mmmm = mmmm.split(' (')[0].split('(')[0].split(' ,')[0].split(',')[0]

       temp = mmmm.split('|')
       if len(temp) > 1:
          mmmm = temp[1]

       jjjj = lines[i*3-diff][lines[i*3-diff].find('[[')+2:lines[i*3-diff].find(']]')]
       jjjj = jjjj.split(' (')[0].split('(')[0].split(' ,')[0].split(',')[0]

       temp = jjjj.split('|')
       if len(temp) > 1:
          jjjj = temp[1]

       te_list = []
       te_list.append(gggg)
       te_list.append(mmmm)
       te_list.append(jjjj)

       if i == 0:
          ntext = doReplacements(hDes, en_list, te_list) + u'\r\n'
       ntext = ntext + u'\r\n' + doReplacements(mainContent, en_list, te_list)

       gPageTitle = u'' + gggg + u' (' + mmmm + u')'
       logfile.write(u'** [[' + gPageTitle + u']]')
       gPageContents = u'' + lines[i*3-diff] + u'\r\n' +  lines[i*3-diff+1] + u'\r\n'
#       logfile.write(u'' + gPageContents + u'\r\n')

       # creating stub pages.
       page = wikipedia.Page(site, gPageTitle)
       try:
         old = page.get()
         if not page.canBeEdited():
            logfile.write(u' - page exists and locked, should contain \r\n' + gPageContents + u'\r\n')
            old = u'abcd'
       except wikipedia.IsRedirectPage:
            logfile.write(u' - page exists and is redirect, should contain \r\n' + gPageContents + u'\r\n')
            old = u'abcd'
       except wikipedia.NoPage:
          old = u''

       if old != '':
          logfile.write(u' - page exists, should contain \r\n' + gPageContents + u'\r\n')
       else:
          logfile.write(u'\r\n')
          try:
             page.put(gPageContents, comment = comments)
          except wikipedia.EditConflict:
             logfile.write(u'Skipping [[%s]] because of edit conflict\r\n' % (page.title()))
          except wikipedia.SpamfilterError, url:
             logfile.write(u'Cannot change [[%s]] because of blacklist entry %s\r\n' % (page.title(), url))

       if i == newpages - 1:
          ntext = ntext + u'\r\n\r\n' + disambigTemplate

       if i == 0:
          diff = 1
       i = i + 1

    return ntext


logfile = codecs.open('mpc.villageCreate.log', encoding='utf-8', mode='wb')
replaceFile = open('mpc.villageCreate.txt', 'rb' )

#omit 3 characters if it is UTF-8
replaceFile.read(3)
commentDis = u"" + unicode(replaceFile.readline(), 'utf8')
commentVil = u"" + unicode(replaceFile.readline(), 'utf8')

# read category
line = u"" + unicode(replaceFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
villageCategory = line

# read header description
line = u"" + unicode(replaceFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
headerDescription = line

# read main content line
line = u"" + unicode(replaceFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
mainContent = line

# read Disambiguation Template line
line = u"" + unicode(replaceFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
disambigTemplate = line

# read Review Template line
line = u"" + unicode(replaceFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
reviewTemplate = line

pageList = getCatList(villageCategory)

for page in pageList:

    #get the page from wikipedia
    try:
      # Load the page's text from the wiki
      original_text = page.get()
      if not page.canBeEdited():
         logfile.write(u'* Skipping locked page [[%s]]\r\n' % page.title())
         continue
    except wikipedia.NoPage:
       logfile.write(u'* Page [[%s]] not found\r\n' % page.title())
       continue
    except wikipedia.IsRedirectPage:
       original_text = page.get(get_redirect=True)

    new_text = createSubPages(original_text, headerDescription, mainContent, disambigTemplate, reviewTemplate, commentVil, page.title())

    if new_text == original_text:
       logfile.write(u'* No changes were necessary in %s\r\n' % page.title())
       continue
    else:
#       logfile.write(u'* modified [[' + page.title() + u']]\r\n')
#       logfile.write(u'' + new_text + u'\r\n')
       # Show the title of the page where the link was found.
       # Highlight the title in purple.
       colors = [None] * 5 + [13] * len(page.title()) + [None] * 4
       wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors)
       wikipedia.showDiff(original_text, new_text)
       try:
          page.put(new_text, comment = commentDis)
       except wikipedia.EditConflict:
          logfile.write(u'Skipping [[%s]] because of edit conflict\r\n' % (page.title()))
       except wikipedia.SpamfilterError, url:
          logfile.write(u'Cannot change [[%s]] because of blacklist entry %s\r\n' % (page.title(), url))

       print 'Completed modifing,  Sleeping for 20 seconds...'
       time.sleep(10)

logfile.close()
replaceFile.close()