వాడుకరి:Mpradeepbot/mpc.cinemaStats.py
Jump to navigation
Jump to search
ఈ ప్రోగ్రాముకు సహాయకారిగా ఈ ఫైలును ఉపయోగించండి.
import wikipedia, pagegenerators, catlib, config, codecs # Replace the contents in the page 'pageTitle' with data 'pageData' # and add the comment 'comment' def writeData(pageTitle, pageData, comment): page = wikipedia.Page(wikipedia.getSite(), pageTitle) try: # Load the page's text from the wiki data = page.get() except wikipedia.NoPage: data = u'' data = pageData try: page.put(data, comment = comment) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title())) except wikipedia.SpamfilterError, url: wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url)) # get all the wiki links count in a given page def getLinkListCount(pageText): count = 0 pos = 0 while 1==1: beg = pageText.find(u'[[',pos) pos = beg + 2 if beg >= 0: beg = beg + 2 end = pageText.find(u']]',beg) if end == -1: print u'Page is not normal' break pos = end + 2 if end >= 0: count = count + 1 else: break return count logfile = codecs.open('mpc.cinemaStats.log', encoding='utf-8', mode='wb') logfileNames = codecs.open('mpc.cinemaStatsNames.log', encoding='utf-8', mode='wb') logfileNO = codecs.open('mpc.cinemaStats.no.log', encoding='utf-8', mode='wb') logfileML = codecs.open('mpc.cinemaStats.ml.log', encoding='utf-8', mode='wb') datafile = open('mpc.cinemaStats.txt', 'rb' ) #omit 3 characters if it is UTF-8 datafile.read(3) line = unicode(datafile.readline(), 'utf8') line = line.replace(u'\n', u'') sinimAmUsa = line.replace(u'\r', u'') line = unicode(datafile.readline(), 'utf8') line = line.replace(u'\n', u'') liMkulEdu = line.replace(u'\r', u'') line = unicode(datafile.readline(), 'utf8') line = line.replace(u'\n', u'') ekkuva = line.replace(u'\r', u'') line = unicode(datafile.readline(), 'utf8') line = line.replace(u'\n', u'') allPages = line.replace(u'\r', u'') line = unicode(datafile.readline(), 'utf8') line = line.replace(u'\n', u'') firstPageTitle = line.replace(u'\r', u'') namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace() firstPageTitle = wikipedia.Page(wikipedia.getSite(), firstPageTitle).titleWithoutNamespace() gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 500) nolinks = 0 multiple = 0 movieCount = 0 extraInfo = 0 for page in preloadingGen: try: # Load the page's text from the wiki pageData = page.get() if not page.canBeEdited(): wikipedia.output(u'Skipping locked page %s' % page.title()) continue except wikipedia.NoPage: wikipedia.output(u'Page %s not found' % page.title()) continue except wikipedia.IsRedirectPage: wikipedia.output(u'Page %s is redirect page' % page.title()) continue logfileNames.write(u'* [[' + page.title() + u']]\r\n') tCount = pageData.count(sinimAmUsa) if tCount == 0: continue # skip if it is not a sinimA page elif tCount > 1: multiple = multiple + 1 logfileML.write(u'* [[' + page.title() + u']]\r\n') if getLinkListCount(pageData) == 0: nolinks = nolinks + 1 logfileNO.write(u'* [[' + page.title() + u']]\r\n') movieCount = movieCount + 1 # write the results to a file print u'Total Movie Pages = ' + str(movieCount) logfile.write(u'* ' + allPages + u' = ' + str(movieCount) + u'\n') print u'Movie Pages without Links = ' + str(nolinks) logfile.write(u'* ' + liMkulEdu + u' - ' + str(nolinks) + u'\n') # close all the open handles logfile.close() logfileNames.close() logfileNO.close() logfileML.close() datafile.close() logfile = codecs.open('mpc.cinemaStats.log', encoding='utf-8', mode='rb') logfileNO = codecs.open('mpc.cinemaStats.no.log', encoding='utf-8', mode='rb') logfileML = codecs.open('mpc.cinemaStats.ml.log', encoding='utf-8', mode='rb') totalData = u'' totalData = totalData + logfile.read() + u'\n' totalData = totalData + u'== ' + liMkulEdu + u' ==\n' + logfileNO.read() + u'\n\n' totalData = totalData + u'== ' + ekkuva + u' ==\n' + logfileML.read() + u'\n\n' writeData(u'User:Mpradeep/movstat', totalData, 'Robot: Updating statistics') logfile.close() logfileNO.close() logfileML.close() logfile = codecs.open('mpc.cinemaStats.log', encoding='utf-8', mode='wb') logfile.write(totalData) logfile.close()