వాడుకరి:Mpradeepbot/mpc.wikiStats.py

వికీపీడియా నుండి
Jump to navigation Jump to search

ఈ ప్రోగ్రాముకు సహాయకారిగా ఈ ఫైలును ఉపయోగించండి.

import wikipedia, pagegenerators, catlib, config, codecs

# Replace the contents in the page 'pageTitle' with data 'pageData' 
# and add the comment 'comment'
def writeData(pageTitle, pageData, comment):
  page = wikipedia.Page(wikipedia.getSite(), pageTitle)
  try:
    # Load the page's text from the wiki
    data = page.get()
  except wikipedia.NoPage:
    data = u''
  data = pageData
  try:
    page.put(data, comment = comment)
  except wikipedia.EditConflict:
    wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
  except wikipedia.SpamfilterError, url:
    wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url))
  


inputFile = open('mpc.wikiStats.txt', 'rb' )

#omit 3 characters if it is UTF-8
inputFile.read(3)

comment = u'Bot: Updating links'

# Disambiguation Template
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
disambig = line

# Review Template
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
review = line

# Translate Template
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
translate = line

# First Page Title
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
firstPageTitle = line

if firstPageTitle == '!':
  logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='wb')
  logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='wb')
  logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='wb')
else:
  logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='ab')
  logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='ab')
  logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='ab')

namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace()
firstPageTitle = wikipedia.Page(wikipedia.getSite(), firstPageTitle).titleWithoutNamespace()
gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)

preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 500)

gt2lt5  = 0
gt5lt10 = 0
gt10    = 0

for page in preloadingGen:
    try:
      # Load the page's text from the wiki
      pageData = page.get()
      if not page.canBeEdited():
         wikipedia.output(u'Skipping locked page %s' % page.title())
         continue
    except wikipedia.NoPage:
       wikipedia.output(u'Page %s not found' % page.title())
       continue
    except wikipedia.IsRedirectPage:
       wikipedia.output(u'Page %s is redirect page' % page.title())
       continue

    if pageData.find(disambig) >= 0:
       wikipedia.output(u'Page %s is Disambiguation Page' % page.title())
       continue
    if pageData.find(review) >= 0:
       wikipedia.output(u'Page %s is Page to be reviewed for villages' % page.title())
       continue

    tFlag = 0

    if pageData.find(translate) >= 0:
       wikipedia.output(u'Page %s is Page to be Translated' % page.title())
       tFlag = 1

    # Do the accessment of the pages
    if len(pageData) <= 5120 and len(pageData) > 2048:
        # less than 5KB but greater than 2KB
        print u'less than 5KB but greater than 2KB'
        if tFlag == 0:
           logfilegt2lt5.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n')
        else:
           logfilegt2lt5.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n')
        gt2lt5 = gt2lt5 +1
    elif len(pageData) <= 10240 and len(pageData) > 5120:
        # less than 10KB but greater than 5KB
        print u'less than 10KB but greater than 5KB'
        if tFlag == 0:
           logfilegt5lt10.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n')
        else:
           logfilegt5lt10.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n')
        gt5lt10 = gt5lt10 + 1
    elif len(pageData) > 10240:
        # less than 5KB but greater than 2KB
        print u'less than 10KB but greater than 5KB'
        if tFlag == 0:
           logfilegt10.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n')
        else:
           logfilegt10.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n')
        gt10 = gt10 + 1


# close all the open handles
logfilegt2lt5.close()
logfilegt5lt10.close()
logfilegt10.close()
inputFile.close()

# start uploading the calculated data to tewiki

# pages with lesser than 5KB info
logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='rb')
writeData(u'User:Mpradeep/gt2lt5', logfilegt2lt5.read(), comment)
logfilegt2lt5.close()

# pages with lesser than 10KB info
logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='rb')
writeData(u'User:Mpradeep/gt5lt10', logfilegt5lt10.read(), comment)
logfilegt5lt10.close()

# pages with greater than 10KB info
logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='rb')
writeData(u'User:Mpradeep/gt10', logfilegt10.read(), comment)
logfilegt10.close()

# Now update the counts
# pages with lesser than 5KB info
writeData(u'User:Mpradeep/gt2lt5ct', str(gt2lt5), comment)

# pages with lesser than 10KB info
writeData(u'User:Mpradeep/gt5lt10ct', str(gt5lt10), comment)

# pages with greater than 10KB info
writeData(u'User:Mpradeep/gt10ct', str(gt10), comment)