Page MenuHomePhabricator
Paste P49299

Simple python script to migrate spam blacklist
ActivePublic

Authored by Ladsgroup on Jun 8 2023, 9:00 PM.
Tags
None
Referenced Files
F37108085: Simple python script to migrate spam blacklist
Jun 17 2023, 10:16 PM
F37106884: Simple python script to migrate spam blacklist
Jun 16 2023, 7:59 AM
F37100383: Simple python script to migrate spam blacklist
Jun 11 2023, 12:46 AM
F37098495: Simple python script to migrate spam blacklist
Jun 8 2023, 9:10 PM
F37098490: Simple python script to migrate spam blacklist
Jun 8 2023, 9:03 PM
F37098488: Simple python script to migrate spam blacklist
Jun 8 2023, 9:00 PM
Subscribers
import requests
import json
import re
domain = 'https://en.wikipedia.beta.wmflabs.org'
current_spam_blacklist = requests.get(domain '/wiki/MediaWiki:Spam-blacklist?action=raw').text
current_blocked_domains = requests.get(domain '/wiki/MediaWiki:BlockedExternalDomains.json?action=raw')
query_res = requests.get(domain '/w/api.php', params= { 'action': 'query', 'format': 'json', 'prop': 'revisions', 'titles': 'MediaWiki:Spam-blacklist', 'rvslots': 'main', 'rvprops': 'ids'}).json()['query']['pages']
spam_blacklist_rev_id = query_res[list(query_res.keys())[0]]['revisions'][0]['revid']
if current_blocked_domains.status_code == 404:
current_blocked_domains = []
else:
current_blocked_domains = current_blocked_domains.json()
new_spam_blacklist = []
for line in current_spam_blacklist.split('\n'):
if line.startswith('#'):
new_spam_blacklist.append(line)
continue
domain_regex = line.strip()
if domain_regex.startswith('\\b') and domain_regex.endswith('\\b'):
domain_regex = domain_regex[2:-2]
else:
new_spam_blacklist.append(line)
continue
if '/' in domain_regex:
new_spam_blacklist.append(line)
continue
if re.search(r'[^\\][\.\$\^\{\[\(\|\)\*\ \?]', domain_regex):
new_spam_blacklist.append(line)
continue
if re.search(r'\\[^\.]', domain_regex):
new_spam_blacklist.append(line)
continue
if '[' in domain_regex or ']' in domain_regex:
new_spam_blacklist.append(line)
continue
current_blocked_domains.append(
{ 'domain': domain_regex.replace('\\', ''), 'notes': 'Moved from [[Special:PermaLink/{}|MediaWiki:Spam-blacklist]]'.format(spam_blacklist_rev_id)}
)
print('\n'.join(new_spam_blacklist))
print(json.dumps(current_blocked_domains, ensure_ascii=False, indent='\t'))