Jump to content

వికీపీడియా:వికీప్రాజెక్టు/అనాథాశ్రమం/అనాథ వ్యాసాల ప్రస్తావనలు/స్క్రిప్టు

వికీపీడియా నుండి
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Optimized Script to generate mentions of orphan pages and update Telugu Wikipedia page + optional offline logging

import pywikibot
from pywikibot import pagegenerators
import warnings
from concurrent.futures import ThreadPoolExecutor

# Suppress UserWarnings from pywikibot
warnings.filterwarnings("ignore", category=UserWarning, module='pywikibot')

# Connect to Telugu Wikipedia
site = pywikibot.Site('te', 'wikipedia')

# Function to check if a page is truly an orphan page
def is_orphan_page(page):
    backlinks = list(page.backlinks(namespaces=0))
    return len(backlinks) == 0

# Function to search for mentions of a given page, restricting to main namespace (namespace 0)
def search_for_mentions(site, page_title):
    search_results = site.search(page_title, total=10, namespaces=0)
    return [result.title() for result in search_results if result.title() != page_title]

# Fetch orphaned pages using the special page generator
orphaned_pages_gen = pagegenerators.LonelyPagesPageGenerator(total=None, site=site)
orphaned_pages = list(pagegenerators.PreloadingGenerator(orphaned_pages_gen, 500))  # Load all pages at once

# Function to process each orphaned page
def process_orphan_page(orphaned_page, page_num):
    page_title = orphaned_page.title()
    
    # Check if the page is an orphan
    if not is_orphan_page(orphaned_page):
        print(f"Skipping page '{page_title}' as it is not an orphan page.")
        return None  # Return None for non-orphan pages to skip them

    print(f"Processing orphaned page #{page_num}: {page_title}")
    
    # Search for pages that mention the orphaned page title
    mentioned_in = search_for_mentions(site, page_title)
    
    if mentioned_in:
        mentions_str = "\n".join([f": [[{mention}]]" for mention in mentioned_in])
        return f"\r\n{page_num}. అనాథ పేజీ: [[{page_title}]] - ఈ వ్యాసాలలో ప్రస్తావించబడింది:\n{mentions_str}"
    else:
        return f"\r\n{page_num}. అనాథ పేజీ: [[{page_title}]] - ఎక్కడా ప్రస్తావించబడలేదు."

# Use ThreadPoolExecutor to parallelize the process of searching mentions
with ThreadPoolExecutor(max_workers=5) as executor:
    log_data = []
    orphaned_page_count = 0  # Counter for orphaned pages to be processed
    future_to_page = {}
    
    for page in orphaned_pages:
        # Check if the page is an orphan before submitting to the executor
        if is_orphan_page(page):
            orphaned_page_count += 1  # Increment only for orphan pages
            future = executor.submit(process_orphan_page, page, orphaned_page_count)
            future_to_page[future] = page  # Map future to page for reference

    # Collect results from futures
    for future in future_to_page:
        result = future.result()
        if result:  # Only append results that are not None
            log_data.append(result)

# Write the log data to an offline text file (optional logging)
with open('orphaned_page_mentions_tewiki.txt', 'w', encoding='utf-8') as logfile:
    logfile.write("\n".join(log_data))  # Write the same log data to a file

# Write all log data directly to the Telugu Wikipedia page (overwrite content)
tewiki_page = pywikibot.Page(site, 'వికీపీడియా:వికీప్రాజెక్టు/అనాథాశ్రమం/అనాథ వ్యాసాల ప్రస్తావనలు')

# Overwrite the page with the new log data
new_content = "\n".join(log_data)  # Join log data into one string
tewiki_page.text = new_content  # Set the new content directly

# Save the updated content to the page (rewriting the content)
tewiki_page.save(summary='అనాథ వ్యాసాల ప్రస్తావనలు తాజాకరించా', minor=False)

print("Script completed. Results written to both the Telugu Wikipedia page and 'orphaned_page_mentions_tewiki.txt'.")