వాడుకరి:Mpradeepbot/mpc.wikiStats.py
స్వరూపం
ఈ ప్రోగ్రాముకు సహాయకారిగా ఈ ఫైలును ఉపయోగించండి.
import wikipedia, pagegenerators, catlib, config, codecs
# Replace the contents in the page 'pageTitle' with data 'pageData'
# and add the comment 'comment'
def writeData(pageTitle, pageData, comment):
page = wikipedia.Page(wikipedia.getSite(), pageTitle)
try:
# Load the page's text from the wiki
data = page.get()
except wikipedia.NoPage:
data = u''
data = pageData
try:
page.put(data, comment = comment)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
except wikipedia.SpamfilterError, url:
wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url))
inputFile = open('mpc.wikiStats.txt', 'rb' )
#omit 3 characters if it is UTF-8
inputFile.read(3)
comment = u'Bot: Updating links'
# Disambiguation Template
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
disambig = line
# Review Template
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
review = line
# Translate Template
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
translate = line
# First Page Title
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
firstPageTitle = line
if firstPageTitle == '!':
logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='wb')
logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='wb')
logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='wb')
else:
logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='ab')
logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='ab')
logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='ab')
namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace()
firstPageTitle = wikipedia.Page(wikipedia.getSite(), firstPageTitle).titleWithoutNamespace()
gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 500)
gt2lt5 = 0
gt5lt10 = 0
gt10 = 0
for page in preloadingGen:
try:
# Load the page's text from the wiki
pageData = page.get()
if not page.canBeEdited():
wikipedia.output(u'Skipping locked page %s' % page.title())
continue
except wikipedia.NoPage:
wikipedia.output(u'Page %s not found' % page.title())
continue
except wikipedia.IsRedirectPage:
wikipedia.output(u'Page %s is redirect page' % page.title())
continue
if pageData.find(disambig) >= 0:
wikipedia.output(u'Page %s is Disambiguation Page' % page.title())
continue
if pageData.find(review) >= 0:
wikipedia.output(u'Page %s is Page to be reviewed for villages' % page.title())
continue
tFlag = 0
if pageData.find(translate) >= 0:
wikipedia.output(u'Page %s is Page to be Translated' % page.title())
tFlag = 1
# Do the accessment of the pages
if len(pageData) <= 5120 and len(pageData) > 2048:
# less than 5KB but greater than 2KB
print u'less than 5KB but greater than 2KB'
if tFlag == 0:
logfilegt2lt5.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n')
else:
logfilegt2lt5.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n')
gt2lt5 = gt2lt5 +1
elif len(pageData) <= 10240 and len(pageData) > 5120:
# less than 10KB but greater than 5KB
print u'less than 10KB but greater than 5KB'
if tFlag == 0:
logfilegt5lt10.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n')
else:
logfilegt5lt10.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n')
gt5lt10 = gt5lt10 + 1
elif len(pageData) > 10240:
# less than 5KB but greater than 2KB
print u'less than 10KB but greater than 5KB'
if tFlag == 0:
logfilegt10.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n')
else:
logfilegt10.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n')
gt10 = gt10 + 1
# close all the open handles
logfilegt2lt5.close()
logfilegt5lt10.close()
logfilegt10.close()
inputFile.close()
# start uploading the calculated data to tewiki
# pages with lesser than 5KB info
logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='rb')
writeData(u'User:Mpradeep/gt2lt5', logfilegt2lt5.read(), comment)
logfilegt2lt5.close()
# pages with lesser than 10KB info
logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='rb')
writeData(u'User:Mpradeep/gt5lt10', logfilegt5lt10.read(), comment)
logfilegt5lt10.close()
# pages with greater than 10KB info
logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='rb')
writeData(u'User:Mpradeep/gt10', logfilegt10.read(), comment)
logfilegt10.close()
# Now update the counts
# pages with lesser than 5KB info
writeData(u'User:Mpradeep/gt2lt5ct', str(gt2lt5), comment)
# pages with lesser than 10KB info
writeData(u'User:Mpradeep/gt5lt10ct', str(gt5lt10), comment)
# pages with greater than 10KB info
writeData(u'User:Mpradeep/gt10ct', str(gt10), comment)