వాడుకరి:Mpradeepbot/mpc.movieListGenerator.py
Jump to navigation
Jump to search
తెలుగు సినిమాల జాబితాను తయారు చేయడానికి ఈ ప్రోగ్రాముకు సహాయకారిగా ఈ ఫైలును ఉపయోగించండి.
import wikipedia, catlib, config, codecs #################################################################################################### # Replace the contents in the page 'pageTitle' with data 'pageData' # and add the comment 'comment' def writeData(pageTitle, pageData, comment): page = wikipedia.Page(wikipedia.getSite(), pageTitle) try: # Load the page's text from the wiki data = page.get() except wikipedia.NoPage: data = u'' data = pageData try: page.put(data, comment = comment) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title())) except wikipedia.SpamfilterError, url: wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url)) #################################################################################################### #################################################################################################### # This function returns the list of articles as a list object # in given category. Please give only the Category Name, # namespace will be addd automatically. # --function requires both 'wikipedia' and 'catlib' to be imported def getCatList(catTitle): cat = catlib.Category(wikipedia.getSite(), u'Category:'+catTitle) listOfArticles = cat.articlesList() return listOfArticles #################################################################################################### #################################################################################################### # This function returns the year of the movies list, if # the name of the category is passed as input. def getYear(catTitle): year = catTitle.split(' ')[0] return year #################################################################################################### #################################################################################################### # The main program starts from here. logfile = codecs.open('mpc.movieListGenerator.log', encoding='utf-8', mode='wb') inputFile = open('mpc.movieListGenerator.txt', 'rb') #omit 3 characters if it is UTF-8 inputFile.read(3) # first line of input file should be the name of the article, # to hold the movie names list according to the year. line = u"" + unicode(inputFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') yearListName = line # second line of input file should be the name of the article, # to hold the movie names list alphabetically. line = u"" + unicode(inputFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') alphabetListName = line # initialize the alphabetic names list, content alphabeticListOfNames = [] yearListPageContent = u'' alphabetListPageContent = u'' # from third line onwards we should have the names of the categories, # from where we get the names for article list for line in inputFile: line = u'' + unicode(line, 'utf8'); line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') categoryName = line articleNamesList = getCatList(categoryName) # parse the name of the category and obtain the year of the movies year = getYear(categoryName) # fill the yearListPageContent with the obtained movie names, and fill # those movie names into alphabetic list so that we sprt all the movies at a time yearListPageContent = u'' + yearListPageContent + u'== ' + year + u' ==\r\n' for pageName in articleNamesList: yearListPageContent = u'' + yearListPageContent + u'# [[' + pageName.title() + u']]\r\n' alphabeticListOfNames.append(pageName.title()) # sort the gathered list of movie names alphabeticListOfNames.sort() prevFirstLetter = u'' # write the sorted movies into the alphabeticListPageContent for pageName in alphabeticListOfNames: if prevFirstLetter != pageName[0]: alphabetListPageContent = alphabetListPageContent + u'== ' + pageName[0] + u' ==\r\n' prevFirstLetter = pageName[0] alphabetListPageContent = alphabetListPageContent + u'# [[' + pageName + u']]\r\n' yearListPageContent = u'__NOTOC__\r\n{{:' + yearListName + '\\header}}\r\n' + yearListPageContent + u'\r\n\r\n{{:' + yearListName + '\\footer}}' alphabetListPageContent = u'__NOTOC__\r\n{{:' + alphabetListName + '\\header}}\r\n' + alphabetListPageContent + u'\r\n\r\n{{:' + alphabetListName + '\\footer}}' writeData(yearListName, yearListPageContent, u'Bot: Updating the list of movies according to year.') writeData(alphabetListName, alphabetListPageContent, u'Bot: Updating the list of movies sorted alphabetically.')