వాడుకరి:Mpradeepbot/mpc.cinemaStats.py

వికీపీడియా నుండి
Jump to navigation Jump to search

ఈ ప్రోగ్రాముకు సహాయకారిగా ఈ ఫైలును ఉపయోగించండి.

import wikipedia, pagegenerators, catlib, config, codecs

# Replace the contents in the page 'pageTitle' with data 'pageData' 
# and add the comment 'comment'
def writeData(pageTitle, pageData, comment):
  page = wikipedia.Page(wikipedia.getSite(), pageTitle)
  try:
    # Load the page's text from the wiki
    data = page.get()
  except wikipedia.NoPage:
    data = u''
  data = pageData
  try:
    page.put(data, comment = comment)
  except wikipedia.EditConflict:
    wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
  except wikipedia.SpamfilterError, url:
    wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url))


# get all the wiki links count in a given page
def getLinkListCount(pageText):
   count = 0
   pos = 0

   while 1==1:
     beg = pageText.find(u'[[',pos)
     pos = beg + 2
     if beg >= 0:
        beg = beg + 2
        end = pageText.find(u']]',beg)
        if end == -1:
          print u'Page is not normal'
          break
        pos = end + 2
        if end >= 0:
           count = count + 1
     else:
        break
   return count



logfile = codecs.open('mpc.cinemaStats.log', encoding='utf-8', mode='wb')
logfileNames = codecs.open('mpc.cinemaStatsNames.log', encoding='utf-8', mode='wb')
logfileNO = codecs.open('mpc.cinemaStats.no.log', encoding='utf-8', mode='wb')
logfileML = codecs.open('mpc.cinemaStats.ml.log', encoding='utf-8', mode='wb')
datafile = open('mpc.cinemaStats.txt', 'rb' )

#omit 3 characters if it is UTF-8
datafile.read(3)

line       = unicode(datafile.readline(), 'utf8')
line       = line.replace(u'\n', u'')
sinimAmUsa = line.replace(u'\r', u'')

line   = unicode(datafile.readline(), 'utf8')
line   = line.replace(u'\n', u'')
liMkulEdu = line.replace(u'\r', u'')

line   = unicode(datafile.readline(), 'utf8')
line   = line.replace(u'\n', u'')
ekkuva = line.replace(u'\r', u'')

line   = unicode(datafile.readline(), 'utf8')
line   = line.replace(u'\n', u'')
allPages = line.replace(u'\r', u'')

line   = unicode(datafile.readline(), 'utf8')
line   = line.replace(u'\n', u'')
firstPageTitle = line.replace(u'\r', u'')

namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace()
firstPageTitle = wikipedia.Page(wikipedia.getSite(), firstPageTitle).titleWithoutNamespace()
gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)

preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 500)

nolinks    = 0
multiple   = 0
movieCount = 0
extraInfo  = 0

for page in preloadingGen:
    try:
      # Load the page's text from the wiki
      pageData = page.get()
      if not page.canBeEdited():
         wikipedia.output(u'Skipping locked page %s' % page.title())
         continue
    except wikipedia.NoPage:
       wikipedia.output(u'Page %s not found' % page.title())
       continue
    except wikipedia.IsRedirectPage:
       wikipedia.output(u'Page %s is redirect page' % page.title())
       continue

    logfileNames.write(u'* [[' + page.title() + u']]\r\n')

    tCount = pageData.count(sinimAmUsa)

    if tCount == 0:
       continue  # skip if it is not a sinimA page
    elif tCount > 1:
       multiple = multiple + 1
       logfileML.write(u'* [[' + page.title() + u']]\r\n')

    if getLinkListCount(pageData) == 0:
       nolinks = nolinks + 1
       logfileNO.write(u'* [[' + page.title() + u']]\r\n')

    movieCount = movieCount + 1

# write the results to a file
print u'Total Movie Pages = ' + str(movieCount)
logfile.write(u'* ' + allPages + u' = ' + str(movieCount) + u'\n')
print u'Movie Pages without Links = ' + str(nolinks)
logfile.write(u'* ' + liMkulEdu + u' - ' + str(nolinks) + u'\n')


# close all the open handles
logfile.close()
logfileNames.close()
logfileNO.close()
logfileML.close()
datafile.close()

logfile   = codecs.open('mpc.cinemaStats.log', encoding='utf-8', mode='rb')
logfileNO = codecs.open('mpc.cinemaStats.no.log', encoding='utf-8', mode='rb')
logfileML = codecs.open('mpc.cinemaStats.ml.log', encoding='utf-8', mode='rb')

totalData = u''
totalData = totalData + logfile.read() + u'\n'
totalData = totalData + u'== ' + liMkulEdu + u' ==\n' + logfileNO.read() + u'\n\n'
totalData = totalData + u'== ' + ekkuva + u' ==\n' + logfileML.read() + u'\n\n'


writeData(u'User:Mpradeep/movstat', totalData, 'Robot: Updating statistics')

logfile.close()
logfileNO.close()
logfileML.close()


logfile = codecs.open('mpc.cinemaStats.log', encoding='utf-8', mode='wb')
logfile.write(totalData)
logfile.close()