User:Sakura emad/kcat.js
Note: After publishing, you may have to bypass your browser's cache to see the changes.
- Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
- Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
- Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This bots finds the English Wikipedia counterpart of a non-English Wikipedia
page and fetches its categories. If any of those categories has a counterpart
in the origin Wikipedia, the bot then adds the page to those categories.
"""
#
# (C) User:Huji, 2021
# The original version can be found at https://github.com/PersianWikipedia/fawikibot/blob/master/categorize.py
# Distributed under the terms of the MIT license.
#
import pywikibot
from pywikibot import pagegenerators
#import fa_cosmetic_changes_core as fccc
from functools import lru_cache
from pywikibot.bot import (
SingleSiteBot,
ExistingPageBot,
NoRedirectPageBot,
AutomaticTWSummaryBot,
)
import re
# Show help with the parameter -help.
docuReplacements = {"¶ms;": pagegenerators.parameterHelp}
class CategorizeBot(
SingleSiteBot,
ExistingPageBot,
NoRedirectPageBot,
AutomaticTWSummaryBot,
):
update_options = {
"cosmetic": False, # Whether to run cosmetic changes script
}
def __init__(self, generator, **kwargs):
"""
@param generator: the page generator that determines which pages
to work on
@type generator: generator
"""
super(CategorizeBot, self).__init__(site=True, **kwargs)
self.generator = generator
self.skip_categories = [
"Rûpelên ku heman kategoriyê qebûl nakin",
]
self.summary = (
"[[Wikipedia:Classifying balanced articles|bot]]: Kategoriyên kêm ji en.wîkiyê lê hatin zêdekirin"
)
self.allowednamespaces = [0, 4, 6, 10, 12, 14, 16]
self.cosmetic_changes = kwargs["cosmetic"]
self.site_fa = pywikibot.Site("ku")
self.site_en = pywikibot.Site("en")
self.remove_parent = False
def list_intersection(self, list1, list2):
list3 = [value for value in list1 if value in list2]
return list3
@lru_cache(maxsize=None)
def get_existing_cats(self, page):
"""Get a list() of categories the page is in."""
cats = list(page.categories())
cat_titles = list()
for c in cats:
cat_titles.append(c.title(with_ns=False))
return cat_titles
@lru_cache(maxsize=None)
def check_eligibility(self, candidate):
"""Determine if the category is addable."""
cat = pywikibot.Page(self.site_fa, "Kategorî:%s" % candidate)
if not cat.exists():
return False
cat_cats = self.get_existing_cats(cat)
ineligible_parents = [
"Kategoriyên veşartî",
"Tracking_categories",
"Kategoriyên şitilan",
"Beralîkirinên kategoriyan",
"Infobox mapframe without OSM relation ID on Wikidata",
"Kesên ji Tirkiyeyê",
"Bajarên Tirkiyeyê",
"Aktorên tirk",
"Aktrîsên tirk",
"Bêjerên tirk",
"Derhênerên fîlman ên tirk",
"Derhênerên tirk",
"Dozgêrên tirk",
"Felsefevanên tirk",
"Fizîknasên tirk",
"Fîlozofên tirk",
"Helbestvanên tirk",
"Jinên tirk",
"Matematîknasên tirk",
"Muzîkvanên tirk",
"Nivîskarên tirk",
"Parêzerên tirk",
"Popvanên tirk",
"Rojnamevanên tirk",
"Senaryonivîsên tirk",
"Siyasetmedarên tirk",
"Stranbêjên tirk",
"Stêrnasên tirk",
"Wênesazên tirk",
"Zanyarên tirk",
"Şanogerên tirk",
"Şoreşgerên tirk",
"Bakurê Kurdistanê",
"Başûrê Kurdistanê",
"Rojavaya Kurdistanê",
"Rojhilata Kurdistanê"
]
if len(self.list_intersection(ineligible_parents, cat_cats)) > 0:
return False
return True
@lru_cache(maxsize=None)
def check_eligibility_en(self, candidate):
"""Determine if the category is addable."""
cat = pywikibot.Page(self.site_en, "Category:%s" % candidate)
cat_cats = self.get_existing_cats(cat)
ineligible_parents = [
"Hidden categories",
"Tracking categories",
"Stub categories",
"Turkish people by occupation",
"Cities in Turkey",
"Turkish Kurdistan",
"Iraqi Kurdistan",
"Syrian Kurdistan",
"Iranian Kurdistan",
"Turkish people"
]
if len(self.list_intersection(ineligible_parents, cat_cats)) > 0:
return False
return True
@lru_cache(maxsize=None)
def is_child_category_of(self, child, parent):
child_cat = pywikibot.Page(self.site_fa, "Kategorî:%s" % child)
child_cat_cats = self.get_existing_cats(child_cat)
if parent in child_cat_cats:
return True
return False
def treat_page(self):
"""Process the current page that the bot is working on."""
page = self.current_page
if page.namespace() not in self.allowednamespaces:
pywikibot.output("Namespace not allowed!")
return False
langlinks = page.langlinks()
remote_page = None
for ll in langlinks:
if ll.site.code == "en":
remote_page = pywikibot.Page(ll)
break
if remote_page is None:
pywikibot.output("No interwiki link to enwiki; skipped.")
return False
if remote_page.isRedirectPage():
pywikibot.output("Target page is a redirect; skipped.")
return False
current_categories = self.get_existing_cats(page)
if len(set(self.skip_categories) & set(current_categories)) > 0:
pywikibot.output("Page disallows this bot; skipped.")
remote_categories = list(remote_page.categories())
added_categories = list()
removed_categories = list()
for rc in remote_categories:
if self.check_eligibility_en(rc.title(with_ns=False)) is False:
continue
candidate = None
for ll in rc.langlinks():
if ll.site.code == "ku":
candidate = ll.title
if candidate is None:
continue
if candidate not in current_categories:
if self.check_eligibility(candidate):
# If a child of this category is already used, don't add it
skip_less_specific = False
for cc in current_categories:
if self.is_child_category_of(cc, candidate):
skip_less_specific = True
pywikibot.output(
"More specific category already used."
)
# Otherwise add this category
if skip_less_specific is False:
added_categories.append(candidate)
# If a parent of what you just added is used, remove it
if self.remove_parent is True:
candidate_fullname = "Kategorî:%s" % candidate
candidate_page = pywikibot.Page(
self.site_fa,
candidate_fullname
)
candidate_parents = self.get_existing_cats(
candidate_page
)
intersection = self.list_intersection(
candidate_parents,
current_categories)
if len(intersection) > 0:
pywikibot.output("Removing less specific parent.")
removed_categories.extend(intersection)
if len(added_categories) > 0:
text = page.text
for ac in added_categories:
text = "\n[[Kategorî:%s]]" % ac
if len(removed_categories) > 0:
for rc in removed_categories:
rc_pattern = r"\n\[\[Kategorî:" rc r"(\|[^\]]*)?\]\]"
text = re.sub(rc_pattern, "", text)
if self.cosmetic_changes is True:
text, ver, msg = fccc.fa_cosmetic_changes(text, page)
self.put_current(text, summary=self.summary)
def main(*args):
"""
Process command line arguments and invoke bot.
@param args: command line arguments
@type args: list of unicode
"""
options = {}
# Default value for "cosmetic" option
options["cosmetic"] = False
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Process pagegenerators arguments
gen_factory = pagegenerators.GeneratorFactory()
local_args = gen_factory.handle_args(local_args)
# Parse command line arguments
for arg in local_args:
arg, sep, value = arg.partition(":")
option = arg[1:]
if option in ("summary", "text"):
if not value:
pywikibot.input("Please enter a value for " arg)
options[option] = value
# Take the remaining options as booleans.
else:
options[option] = True
gen = gen_factory.getCombinedGenerator(preload=True)
if gen:
bot = CategorizeBot(gen, **options)
bot.run()
return True
else:
pywikibot.bot.suggest_help(missing_generator=True)
return False
if __name__ == "__main__":
main()