User:SlaungerBot/Fir0002
Jump to navigation
Jump to search
Tasks
[edit]Action | Responsible | Status | Comment |
---|---|---|---|
List all files | Slaunger | Done | |
Direct edit links | Slaunger | Done | |
Find file pages containing Swifts Creek | Slaunger | Done | |
Omit already geolocated file pages from Swifts Creek | Slaunger | Done | Confirmed by Fir |
Add {{Location dec|37|15|47.31|S|147|43|17.72|E|...}} to 354 non-located files containing Swifts Creek | Slaunger | Done | Inital Location template error on 220 files corrected. Remaining approx 100 files geocoded also. |
Add annotations in file lists | Slaunger | Done |
|
Add Location templates to relevant Swifts Creek files | Fir0002 | Done | Progress updated regularly by Slaunger using the annotation codes above in existing lists on this page. |
Create new credits license templates | Fir0002 | Done | Make new sub-pages containing only equipment and credits info. For instance, create User:Fir0002/200 credits based on User:Fir0002/200. |
Create new user license templates for substing | Fir0002 and/or Slaunger? | Done | Make new sub-pages aggregating a credits template with a license template. For instance, create User:Fir0002/200 license based on User:Fir0002/200 credits and {{GFDL 1.2 or cc-by-nc 3.0}} for later substing. |
Replace transcluded user templates with new subst'ed license templates on all Firs own works | Slaunger | Done | Approved by Fir |
Ignored subpages on User:Fir0002
[edit]Files uploaded by User:Fir0002 with no transcluded license templates
[edit]Subpages on User:Fir0002 transcluded on file pages (excluding Swifts Creek)
[edit]Only files from Swifts Creek
List of 3 files |
---|
* File:Brown rice.jpg (C) ( )
|
List of 1 files |
---|
* File:Ramesh Mashelkar Apr09.jpg (I) ( ) |
List of 1 files |
---|
* File:Brown&black goat.jpg (I) ( ) |
List of 21 files |
---|
* File:Bairnsdale court house.jpg (L) ( )
|
List of 16 files |
---|
* File:Fir0002 banner 01.jpg (I) ( )
|
Only files from Swifts Creek
List of 1 files |
---|
* File:Emperor penguin in water Feb09.jpg (L) ( ) |
List of 22 files |
---|
* File:Australian painted lady feeding closeup.jpg () ( )
|
List of 4 files |
---|
* File:Engine coolant.jpg () ( ) |
Files pages containing 'Swifts Creek'
[edit]File pages containing Swifts Creek with no Location template
[edit]List of 2 files |
---|
* File:Hooded Robin Sept09 02.jpg ( ) |
Already located file pages containing Swifts Creek
[edit]Script
[edit]'''
Created on 12/02/2012
@author: Kim
'''
import logging
logging.basicConfig(level=logging.DEBUG)
import os
import pickle
import re
import wikipedia as wiki
import pagegenerators as pg
from userlib import User
from add_text import add_text
from replace import ReplaceRobot
from template import TemplateRobot
def transcluded_file_page_gen(page):
transcluded_page_gen = pg.ReferringPageGenerator(page, onlyTemplateInclusion=True)
for fp in pg.NamespaceFilterPageGenerator(transcluded_page_gen, ["File"]):
yield fp
class UserData(object):
def __init__(self, user_name, refresh=False):
self.user_name = user_name
logging.debug("Getting site info...")
self.site = wiki.getSite()
self.user_ns=2
self.user = User(self.site, user_name)
self.user_page = wiki.Page(self.site, user_name, defaultNamespace=self.user_ns).title()
self.uploaded_files = self.get_uploaded_files(refresh)
self.sub_pages = self.get_sub_pages(refresh)
self.file_pages_using_subpage, self.unused_subpages = self.get_file_pages_using_sub_page(refresh)
all_transcluded_file_pages = set([])
for file_pages in self.file_pages_using_subpage.itervalues():
all_transcluded_file_pages.update(file_pages)
self.all_transcluded_file_pages = all_transcluded_file_pages
self.up_not_trans = self.get_uploaded_not_transcluded(refresh)
self.trans_not_up = self.all_transcluded_file_pages.difference(self.uploaded_files)
self.files_containing_swifts_creek = self.get_files_containing("Swifts Creek", self.all_transcluded_file_pages, refresh)
self.located_files_from_swifts_creek = self.mark_pages_as_located(self.files_containing_swifts_creek, refresh)
self.templates_of_interest = self.find_templates_of_interest(self.all_transcluded_file_pages, refresh)
logging.debug("Done collecting data")
def get_uploaded_files(self, refresh):
fn = "uploaded_files.pickle"
if os.path.exists(fn) and (not refresh):
logging.debug("Loading previously retrieved list of uploaded file pages...")
result = pickle.load(open(fn, 'rb'))
else:
logging.debug("Retrieving files uploaded...")
result = set(t[0] for t in self.user.uploadedImages(10000))
logging.debug("Saving files uploaded for future use...")
pickle.dump(result, open(fn, 'wb'))
return result
def get_sub_pages(self, refresh):
fn = "sub_pages.pickle"
if os.path.exists(fn) and (not refresh):
logging.debug("Loading previously retrieved list of user subpages...")
result = pickle.load(open(fn, 'rb'))
else:
logging.debug("Retrieving User subpages...")
result = frozenset(pg.PrefixingPageGenerator(self.user_name, namespace=self.user_ns, site = self.site))
logging.debug("Saving user subpages for future use...")
pickle.dump(result, open(fn, 'wb'))
return result
def get_file_pages_using_sub_page(self,refresh):
fn = "file_pages_using_sub_page.pickle"
if os.path.exists(fn) and (not refresh):
logging.debug("Loading previously retrieved dictionary of files_pages using subpage...")
result = pickle.load(open(fn, 'rb'))
else:
logging.debug("Retrieving file pages using subpages...")
file_pages_using_subpage = {}
unused_subpages = []
files_found = 0
for sub_page in self.sub_pages:
file_pages = set(list(transcluded_file_page_gen(sub_page)))
if len(file_pages) == 0:
unused_subpages.append(sub_page)
else:
files_found += len(file_pages)
file_pages_using_subpage[sub_page] = file_pages
result = file_pages_using_subpage, unused_subpages
logging.debug("Saving file pages using subpages for future use...")
pickle.dump(result, open(fn, 'wb'))
return result
def get_uploaded_not_transcluded(self, refresh):
fn = "up_not_trans.pickle"
if os.path.exists(fn) and (not refresh):
logging.debug("Loading previously retrieved list of uploaded, not transcluded file pages...")
result = pickle.load(open(fn, 'rb'))
else:
logging.debug("Filtering out deleted uploads...")
result = frozenset(list(f for f in self.uploaded_files.difference(self.all_transcluded_file_pages) if f.exists()))
logging.debug("Saving list of uploaded, not transcluded filepages for future use...")
pickle.dump(result, open(fn, 'wb'))
return result
def get_files_containing(self, s, file_pages, refresh):
fn = "files_containing.pickle"
if os.path.exists(fn) and (not refresh):
logging.debug("Loading previously retrieved list of files containing '%s'..." % s)
result = pickle.load(open(fn, 'rb'))
else:
result = []
logging.debug("Finding file pages containing '%s'..." % s)
for f in file_pages:
if f.isRedirectPage():
logging.warning("%s is a redirect page" % f.title())
else:
p = f.get()
if p.find(s) >= 0:
result.append(f)
logging.debug("%s contains '%s'." % (f.title(), s))
logging.debug('Convert to set')
result = frozenset(result)
logging.debug("Saving list of file pages containing '%s' for future use..." % s)
pickle.dump(result, open(fn, 'wb'))
return result
def mark_pages_as_located(self, file_pages, refresh):
fn = "files_as_located.pickle"
if os.path.exists(fn) and (not refresh):
logging.debug("Loading previously retrieved list of located files")
result = pickle.load(open(fn, 'rb'))
else:
result = {}
location_templates = frozenset([u"Template:Location dec", u"Template:Location"])
logging.debug("Finding located files...")
for f in file_pages:
logging.debug("Retrieving templates used on %s" % f.title())
result[f] = any(t.title() in location_templates for t in f.getTemplates())
if result[f]:
print "Yes!!"
logging.debug("Saving list of located file pages...")
pickle.dump(result, open(fn, 'wb'))
return result
def find_templates_of_interest(self, file_pages, refresh):
fn = "templates_used.pickle"
if os.path.exists(fn) and (not refresh):
logging.debug("Loading previously retrieved dictionary of templates used...")
result = pickle.load(open(fn, 'rb'))
else:
result = {}
logging.debug("Getting templates of interest on file pages...")
templates_of_interest = frozenset([
u'Template:Information',
u'Template:Check categories',
u'Template:Uncategorized',
u'Template:Location',
u'Template:Location dec',
u'Template:Valued image',
u'Template:QualityImage',
u'Template:Quality image',
u'Template:Assessments',
u'Template:Delisted picture'])
templates_of_no_interest = set([])
for f in file_pages:
for t in f.getTemplates():
ttl = t.title()
if ttl in templates_of_interest:
result.setdefault(ttl, set([])).add(f)
elif ttl in templates_of_no_interest:
pass
else:
logging.debug(ttl)
templates_of_no_interest.add(ttl)
logging.debug("Saving dictionary of templates of interest...")
pickle.dump(result, open(fn, 'wb'))
return result
def marks_for_file_page(self, file_page):
toi = self.templates_of_interest
letters = []
if file_page in toi[u'Template:Information']:
letters.append('I')
if file_page in toi[u'Template:Check categories']:
letters.append('C')
# if file_page in toi[u'Template:Uncategorized']:
# letters.append('U')
if file_page in toi[u'Template:Location'] or file_page in toi[u'Template:Location dec']:
letters.append('L')
if file_page in toi[u'Template:Valued image']:
letters.append('V')
if file_page in toi[u'Template:QualityImage'] or file_page in toi[u'Template:Quality image']:
letters.append('Q')
if file_page in toi[u'Template:Assessments']:
letters.append('F')
# if file_page in toi[u'Template:Delisted picture']:
# letters.append('D')
return "".join(letters)
def make_report(self):
logging.debug('Making report...')
report = []
report.append("==Ignored subpages on [[%s]]==" % self.user_page)
report.append("")
report.append("{{collapse|title=List of ignored subpages|1=")
for unused in self.unused_subpages:
report.append("* [[%s]]" % unused.title())
report.append("}}")
report.append("")
report.append("==Files uploaded by [[%s]] with no transcluded license templates==" % self.user_page)
report.append("")
report.append("{{collapse|title=List of %d file pages|1=" % len(self.up_not_trans))
for file_page in self.up_not_trans:
report.append("* [[:%s]] ({{Custom edit|%s|text=edit}})" % (file_page.title(), file_page.title()))
report.append("}}")
report.append("")
report.append("==Subpages on [[%s]] transcluded on file pages (excluding Swifts Creek)==" % self.user_page)
for sub_page, file_pages in self.file_pages_using_subpage.iteritems():
files_no_swifts = file_pages.difference(self.files_containing_swifts_creek)
report.append("")
report.append("===[[%s]]===" % sub_page.title())
if len(files_no_swifts) > 0:
report.append("{{collapse|title=List of %d files|1=" % len(files_no_swifts))
for file_page in sorted(files_no_swifts):
report.append("* [[:%s]] (%s) ({{Custom edit|%s|text=edit}})" % (file_page.title(), self.marks_for_file_page(file_page), file_page.title()))
report.append("}}")
else:
report.append("Only files from Swifts Creek")
report.append("")
report.append("==Files pages containing 'Swifts Creek'==")
report.append("")
report.append("===File pages containing Swifts Creek with no Location template===")
snl = list(sorted(f for f, located in self.located_files_from_swifts_creek.iteritems() if not located))
report.append("{{collapse|title=List of %d files|1=" % len(snl))
for file_page in snl:
report.append("* [[:%s]] ({{Custom edit|%s|text=edit}})" % (file_page.title(), file_page.title()))
report.append("}}")
report.append("")
report.append("===Already located file pages containing Swifts Creek ===")
snl = list(sorted(f for f, located in self.located_files_from_swifts_creek.iteritems() if located))
report.append("{{collapse|title=List of %d files|1=" % len(snl))
for file_page in snl:
report.append("* [[:%s]] ({{Custom edit|%s|text=edit}})" % (file_page.title(), file_page.title()))
report.append("}}")
report.append("")
return "\n".join(report)
def wrong_location_page_generator(data):
# Already geocoded before bot error
excluded_titles = frozenset([
u"File:Aurora australis panorama edit.jpg",
u"File:Aurora australis panorama.jpg",
u"File:Australian toad.jpg",
u"File:Mouse spider scale.jpg",
u"File:Mouse spider.jpg",
u"File:Supernumerary rainbow04.jpg",
u"File:Swifts creek township and surrounding hills.jpg"])
for page, located in data.located_files_from_swifts_creek.iteritems():
if page.title() in excluded_titles:
logging.debug("Ignoring %s..." % page.title())
else:
logging.debug("Fixing wrong Location on %s..." % page.title())
yield page
def fix_wrong_location_templ(data):
bot = ReplaceRobot(
wrong_location_page_generator(data),
[(re.escape(u"{{Location dec|37|15|47.31|S|147|43|17.72|E|region:AU}}"), u"{{Location|37|15|47.31|S|147|43|17.72|E|region:AU}}")],
editSummary=u"Fix {{Location dec}} -> {{Location}} template error introduced by [[User:SlaungerBot|SlaungerBot]]")
try:
bot.run()
finally:
wiki.stopme()
def add_location_templates_on_sc(data):
for page, located in data.located_files_from_swifts_creek.iteritems():
if not located:
add_text(page,
addText=u"{{Location|37|15|47.31|S|147|43|17.72|E|region:AU}}",
summary=u"Adding default Location for Swifts Creek on request from creator",
always=True)
def replace_template_in_text(templ_name, repl, text):
stripped = templ_name.lstrip("Template:")
c = stripped[0]
regex = "\{\{:?([Tt]emplate:)?[" + c.upper() + c.lower() + "]" + stripped[1:] + "\}\}"
return re.sub(regex, repl, text)
def replace_templ_on_file_pages(old_template_name, new_subst, comment, max_files=1000):
file_ns = 6
site = wiki.getSite()
old_templ_page = wiki.Page(site, old_template_name)
where_transcluded_gen = pg.ReferringPageGenerator(old_templ_page, onlyTemplateInclusion=True)
file_pages_transcluded_gen = pg.NamespaceFilterPageGenerator(where_transcluded_gen, [file_ns])
skipped = []
for i, p in enumerate(file_pages_transcluded_gen):
if i >= max_files:
break
if p.canBeEdited():
text = p.get()
new_text = replace_template_in_text(old_template_name, new_subst, text)
if new_text == text:
print "%s could not be found in %s" % (old_template_name, repr(text))
else:
p.put(new_text, comment=comment, watchArticle=None, minorEdit=False, sysop=False)
print "%d of %d processed (%5.1f%%)" % (i+1, max_files, 100.0 * (i+1) / max_files)
else:
new_skip = p.aslink()
print "%s could not be edited." % new_skip
skipped.append(new_skip)
if len(skipped) > 0:
print "Skipped:"
print skipped
old_templates = [ u"User:Fir0002/200 1.4",
u"User:Fir0002/85-5D",
u"User:Fir0002/17",
u"User:Fir0002/400",
u"User:Fir0002/400-5D",
u"User:Fir0002/20D",
u"User:Fir0002/150",
u"User:Fir0002/200",
u"User:Fir0002/150MT"]
other_old_to_new = {
u"Template:Fir0002 17" : u"User:Fir0002/17 license",
u"Template:Fir0002 200" : u"User:Fir0002/200 license",
u"Template:Fir0002 200 1.4" : u"User:Fir0002/200 1.4 license",
u"Template:Fir0002 400" : u"User:Fir0002/400 license",
u"Template:Fir0002 150" : u"User:Fir0002/150 license",
u"Template:Fir0002 150MT" : u"User:Fir0002/150MT license"
}
if __name__ == '__main__':
data = UserData(u"Fir0002", refresh = True)
print data.make_report()
add_location_templates_on_sc(data)
for old_template in old_templates:
new_subst = u"{{subst:%s license}}" % old_template
replace_templ_on_file_pages(old_template,
new_subst,
u"Add CC-BY-NC license to existing GDL 1.2 license on "
u"[[User:SlaungerBot/Fir0002|request]] from [[User:Fir0002|the creator]]. "
u"Align with "
u"[[Commons:User-specific galleries, templates and categories policy|user license template policy]].",
max_files=1000)
for old_template, new_template in other_old_to_new.iteritems():
new_subst = u"{{subst:%s}}" % new_template
replace_templ_on_file_pages(old_template,
new_subst,
u"Add CC-BY-NC license to existing GDL 1.2 license on "
u"[[User:SlaungerBot/Fir0002|request]] from [[User:Fir0002|the creator]]. "
u"Align with "
u"[[Commons:User-specific galleries, templates and categories policy|user license template policy]].",
max_files=1000)
wiki.stopme()