వికీపీడియా:వికీప్రాజెక్టు/అనాథాశ్రమం/అనాథ వ్యాసాల ప్రస్తావనలు/స్క్రిప్టు
Jump to navigation
Jump to search
#!/usr/bin/python # -*- coding: utf-8 -*- # Optimized Script to generate mentions of orphan pages and update Telugu Wikipedia page + optional offline logging import pywikibot from pywikibot import pagegenerators import warnings from concurrent.futures import ThreadPoolExecutor # Suppress UserWarnings from pywikibot warnings.filterwarnings("ignore", category=UserWarning, module='pywikibot') # Connect to Telugu Wikipedia site = pywikibot.Site('te', 'wikipedia') # Function to check if a page is truly an orphan page def is_orphan_page(page): backlinks = list(page.backlinks(namespaces=0)) return len(backlinks) == 0 # Function to search for mentions of a given page, restricting to main namespace (namespace 0) def search_for_mentions(site, page_title): search_results = site.search(page_title, total=10, namespaces=0) return [result.title() for result in search_results if result.title() != page_title] # Fetch orphaned pages using the special page generator orphaned_pages_gen = pagegenerators.LonelyPagesPageGenerator(total=None, site=site) orphaned_pages = list(pagegenerators.PreloadingGenerator(orphaned_pages_gen, 500)) # Load all pages at once # Function to process each orphaned page def process_orphan_page(orphaned_page, page_num): page_title = orphaned_page.title() # Check if the page is an orphan if not is_orphan_page(orphaned_page): print(f"Skipping page '{page_title}' as it is not an orphan page.") return None # Return None for non-orphan pages to skip them print(f"Processing orphaned page #{page_num}: {page_title}") # Search for pages that mention the orphaned page title mentioned_in = search_for_mentions(site, page_title) if mentioned_in: mentions_str = "\n".join([f": [[{mention}]]" for mention in mentioned_in]) return f"\r\n{page_num}. అనాథ పేజీ: [[{page_title}]] - ఈ వ్యాసాలలో ప్రస్తావించబడింది:\n{mentions_str}" else: return f"\r\n{page_num}. అనాథ పేజీ: [[{page_title}]] - ఎక్కడా ప్రస్తావించబడలేదు." # Use ThreadPoolExecutor to parallelize the process of searching mentions with ThreadPoolExecutor(max_workers=5) as executor: log_data = [] orphaned_page_count = 0 # Counter for orphaned pages to be processed future_to_page = {} for page in orphaned_pages: # Check if the page is an orphan before submitting to the executor if is_orphan_page(page): orphaned_page_count += 1 # Increment only for orphan pages future = executor.submit(process_orphan_page, page, orphaned_page_count) future_to_page[future] = page # Map future to page for reference # Collect results from futures for future in future_to_page: result = future.result() if result: # Only append results that are not None log_data.append(result) # Write the log data to an offline text file (optional logging) with open('orphaned_page_mentions_tewiki.txt', 'w', encoding='utf-8') as logfile: logfile.write("\n".join(log_data)) # Write the same log data to a file # Write all log data directly to the Telugu Wikipedia page (overwrite content) tewiki_page = pywikibot.Page(site, 'వికీపీడియా:వికీప్రాజెక్టు/అనాథాశ్రమం/అనాథ వ్యాసాల ప్రస్తావనలు') # Overwrite the page with the new log data new_content = "\n".join(log_data) # Join log data into one string tewiki_page.text = new_content # Set the new content directly # Save the updated content to the page (rewriting the content) tewiki_page.save(summary='అనాథ వ్యాసాల ప్రస్తావనలు తాజాకరించా', minor=False) print("Script completed. Results written to both the Telugu Wikipedia page and 'orphaned_page_mentions_tewiki.txt'.")