వాడుకరి:Mpradeepbot/mpc.movieListGenerator.py

వికీపీడియా నుండి
Jump to navigation Jump to search

తెలుగు సినిమాల జాబితాను తయారు చేయడానికి ఈ ప్రోగ్రాముకు సహాయకారిగా ఈ ఫైలును ఉపయోగించండి.

import wikipedia, catlib, config, codecs

####################################################################################################
# Replace the contents in the page 'pageTitle' with data 'pageData' 
# and add the comment 'comment'
def writeData(pageTitle, pageData, comment):
  page = wikipedia.Page(wikipedia.getSite(), pageTitle)
  try:
    # Load the page's text from the wiki
    data = page.get()
  except wikipedia.NoPage:
    data = u''
  data = pageData
  try:
    page.put(data, comment = comment)
  except wikipedia.EditConflict:
    wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
  except wikipedia.SpamfilterError, url:
    wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url))
####################################################################################################


####################################################################################################
# This function returns the list of articles as a list object
# in given category.  Please give only the Category Name,
# namespace will be addd automatically.
# --function requires both 'wikipedia' and 'catlib' to be imported
def getCatList(catTitle):
    cat = catlib.Category(wikipedia.getSite(), u'Category:'+catTitle)
    listOfArticles = cat.articlesList()
    return listOfArticles
####################################################################################################


####################################################################################################
# This function returns the year of the movies list, if
# the name of the category is passed as input.
def getYear(catTitle):
    year = catTitle.split(' ')[0]
    return year
####################################################################################################


####################################################################################################
# The main program starts from here.
logfile = codecs.open('mpc.movieListGenerator.log', encoding='utf-8', mode='wb')
inputFile = open('mpc.movieListGenerator.txt', 'rb')

#omit 3 characters if it is UTF-8
inputFile.read(3)

# first line of input file should be the name of the article,
# to hold the movie names list according to the year.
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
yearListName = line

# second line of input file should be the name of the article,
# to hold the movie names list alphabetically.
line = u"" + unicode(inputFile.readline(), 'utf8')
line = line.replace(u'\n', u'')
line = line.replace(u'\r', u'')
alphabetListName = line

# initialize the alphabetic names list, content
alphabeticListOfNames = []
yearListPageContent = u''
alphabetListPageContent = u''

# from third line onwards we should have the names of the categories,
# from where we get the names for article list
for line in inputFile:
    line = u'' + unicode(line, 'utf8');
    line = line.replace(u'\n', u'')
    line = line.replace(u'\r', u'')
    categoryName = line

    articleNamesList = getCatList(categoryName)

    # parse the name of the category and obtain the year of the movies
    year = getYear(categoryName)

    # fill the yearListPageContent with the obtained movie names, and fill
    # those movie names into alphabetic list so that we sprt all the movies at a time
    yearListPageContent = u'' + yearListPageContent + u'== ' + year + u' ==\r\n'
    for pageName in articleNamesList:
        yearListPageContent = u'' + yearListPageContent + u'# [[' + pageName.title() + u']]\r\n'
        alphabeticListOfNames.append(pageName.title())

# sort the gathered list of movie names
alphabeticListOfNames.sort()

prevFirstLetter = u''
# write the sorted movies into the alphabeticListPageContent
for pageName in alphabeticListOfNames:
    if prevFirstLetter != pageName[0]:
        alphabetListPageContent = alphabetListPageContent + u'== ' + pageName[0] + u' ==\r\n'
        prevFirstLetter = pageName[0]

    alphabetListPageContent = alphabetListPageContent + u'# [[' + pageName + u']]\r\n'

yearListPageContent = u'__NOTOC__\r\n{{:' + yearListName + '\\header}}\r\n' + yearListPageContent + u'\r\n\r\n{{:' + yearListName + '\\footer}}'
alphabetListPageContent = u'__NOTOC__\r\n{{:' + alphabetListName + '\\header}}\r\n' + alphabetListPageContent + u'\r\n\r\n{{:' + alphabetListName + '\\footer}}'

writeData(yearListName, yearListPageContent, u'Bot: Updating the list of movies according to year.')
writeData(alphabetListName, alphabetListPageContent, u'Bot: Updating the list of movies sorted alphabetically.')