వాడుకరి:Mpradeepbot/mpc.wikiStats.py
Jump to navigation
Jump to search
ఈ ప్రోగ్రాముకు సహాయకారిగా ఈ ఫైలును ఉపయోగించండి.
import wikipedia, pagegenerators, catlib, config, codecs # Replace the contents in the page 'pageTitle' with data 'pageData' # and add the comment 'comment' def writeData(pageTitle, pageData, comment): page = wikipedia.Page(wikipedia.getSite(), pageTitle) try: # Load the page's text from the wiki data = page.get() except wikipedia.NoPage: data = u'' data = pageData try: page.put(data, comment = comment) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title())) except wikipedia.SpamfilterError, url: wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url)) inputFile = open('mpc.wikiStats.txt', 'rb' ) #omit 3 characters if it is UTF-8 inputFile.read(3) comment = u'Bot: Updating links' # Disambiguation Template line = u"" + unicode(inputFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') disambig = line # Review Template line = u"" + unicode(inputFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') review = line # Translate Template line = u"" + unicode(inputFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') translate = line # First Page Title line = u"" + unicode(inputFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') firstPageTitle = line if firstPageTitle == '!': logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='wb') logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='wb') logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='wb') else: logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='ab') logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='ab') logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='ab') namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace() firstPageTitle = wikipedia.Page(wikipedia.getSite(), firstPageTitle).titleWithoutNamespace() gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 500) gt2lt5 = 0 gt5lt10 = 0 gt10 = 0 for page in preloadingGen: try: # Load the page's text from the wiki pageData = page.get() if not page.canBeEdited(): wikipedia.output(u'Skipping locked page %s' % page.title()) continue except wikipedia.NoPage: wikipedia.output(u'Page %s not found' % page.title()) continue except wikipedia.IsRedirectPage: wikipedia.output(u'Page %s is redirect page' % page.title()) continue if pageData.find(disambig) >= 0: wikipedia.output(u'Page %s is Disambiguation Page' % page.title()) continue if pageData.find(review) >= 0: wikipedia.output(u'Page %s is Page to be reviewed for villages' % page.title()) continue tFlag = 0 if pageData.find(translate) >= 0: wikipedia.output(u'Page %s is Page to be Translated' % page.title()) tFlag = 1 # Do the accessment of the pages if len(pageData) <= 5120 and len(pageData) > 2048: # less than 5KB but greater than 2KB print u'less than 5KB but greater than 2KB' if tFlag == 0: logfilegt2lt5.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n') else: logfilegt2lt5.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n') gt2lt5 = gt2lt5 +1 elif len(pageData) <= 10240 and len(pageData) > 5120: # less than 10KB but greater than 5KB print u'less than 10KB but greater than 5KB' if tFlag == 0: logfilegt5lt10.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n') else: logfilegt5lt10.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n') gt5lt10 = gt5lt10 + 1 elif len(pageData) > 10240: # less than 5KB but greater than 2KB print u'less than 10KB but greater than 5KB' if tFlag == 0: logfilegt10.write(u'# [[' + page.title() + u']] - ' + str(len(pageData)/1024) + u'KB\r\n') else: logfilegt10.write(u'# \'\'\'[[' + page.title() + u']]\'\'\' - \'\'' + str(len(pageData)/1024) + u'KB, Translation needed!\'\'\r\n') gt10 = gt10 + 1 # close all the open handles logfilegt2lt5.close() logfilegt5lt10.close() logfilegt10.close() inputFile.close() # start uploading the calculated data to tewiki # pages with lesser than 5KB info logfilegt2lt5 = codecs.open('mpc.wikiStats.gt2lt5.log', encoding='utf-8', mode='rb') writeData(u'User:Mpradeep/gt2lt5', logfilegt2lt5.read(), comment) logfilegt2lt5.close() # pages with lesser than 10KB info logfilegt5lt10 = codecs.open('mpc.wikiStats.gt5lt10.log', encoding='utf-8', mode='rb') writeData(u'User:Mpradeep/gt5lt10', logfilegt5lt10.read(), comment) logfilegt5lt10.close() # pages with greater than 10KB info logfilegt10 = codecs.open('mpc.wikiStats.gt10.log', encoding='utf-8', mode='rb') writeData(u'User:Mpradeep/gt10', logfilegt10.read(), comment) logfilegt10.close() # Now update the counts # pages with lesser than 5KB info writeData(u'User:Mpradeep/gt2lt5ct', str(gt2lt5), comment) # pages with lesser than 10KB info writeData(u'User:Mpradeep/gt5lt10ct', str(gt5lt10), comment) # pages with greater than 10KB info writeData(u'User:Mpradeep/gt10ct', str(gt10), comment)