Note: You may have to bypass your browser’s cache to see the changes. In addition, after saving a sitewide CSS file such as MediaWiki:Common.css, it will take 5-10 minutes before the changes take effect, even if you clear your cache.
- Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
- Konqueror and Chrome: click Reload or press F5;
- Opera: clear the cache in Tools → Preferences;
- Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.
- This script lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • redirects • your own
/**
* VERSION 1.0α <- You have been warned.
*
* Firsts divides the page by language (~100% accurately)
* Then tries to reconnect homonym specific information. (~80% at the moment)
*
* Should present this nicely and very customisably to the user
* May eventually also output information in a computer readable format
*
* Things that need doing,
* Split categories across languages
* Recognise (senses) manually marked up (i.e. with italics instead of {{sense}})
*
* Basic file layout intentions (i.e. probably not stuck too)
* Init section
* Functions for splitting the dom
* Functions for language tabbing
* Generic functions for homonym parsing/tabbing
* User preference functions
* Site preference settings [section headers and handlers]
* DOM extensions
* Hacked in stylesheets
*
* Paper View
* Requested FEATURES 'alternative spellings'
* 'noun gender'
* 'turn off languages'
*
**/
var wtp={};
wtp.header = {}; //Store information about the headings that may be found
// wtHeaders['pos'] is an array of part of speech headings
// wtHeaders['breaks'] is an array of headers that seperate
// homonyms.
wtp.handler= {}; //Store handlers for specific headings
// e.g. wtHandler['Translations'] handles any translation
// sections
wtp.layout = {}; //This is a named object of arrays, each array providing
// keys for the object, that stores the order of the
// removed DOM nodes. Keys are of the form "h1-h2-h3"
// e.g. "English-Noun-Translations"
wtp.page = {}; //This stores the DOM nodes in sections referencable by
// their layout key, and headings (-Head-English-Noun)
wtp.prefs = {}; //Stores peoples preferences from wherever they come...
wtp.pos = [] //An array holding the current heading structure
// i.e. ['English','Noun','Translation']
wtp.cPos= '-Top';//Contains the layout and page keys normally pos.join('-');
wtp.page[wtp.cPos]=document.createElement("div"); //Store the first bits
wtp.homonym=[]//An array of {}s
wtp.ppAbbrevs = {'noun' : 'n.', 'verb' : 'vb.', 'countable' : '[C]', 'uncountable' : '[U]','transitive':'[T]', 'intransitive':'[I]', 'adjective':'adj.', 'adverb':'adv.'};
wtp.gloss=[];//An array of {}s
wtp.set=[];
wtp.bit=[];
/**
* Initialise the parser, and see what people want us to do.
*
**/
function wtp_init(){
wtp_loadPrefs();
//Read the entry
var bc=parsers_getContentNode();
if(!bc)return false;
//Just in case
try{
//Parse
wtps_splitDom(bc)
wtp_createLanguageTabs();
//Allow language view
parsers_registerView(wtp.languageOutput,
"Toggle Sections",wtp_showLanguageView);
//Create other sections
wtp_matchHomonyms();
parsers_registerView(document.createElement('div'),"Paper Dictionary",
wtp_createDictionaryView);
}catch(e){
document.title+=" (Parser Failed: Notify [[User:Conrad.Irwin]] if nescessary.)";
if(document.getElementById('ParserTab-Unchanged'))
parsers_tabParser(document.getElementById('ParserTab-Unchanged'));
}
}
/**
* Split the DOM into sections by heading, assuming that each <hX> element
* is a direct child of the bodyContent node.
**/
function wtps_splitDom(div){
var nod;
while(nod=div.childNodes[0]){
div.removeChild(nod);
var nn = nod.nodeName.toUpperCase();
if( nn.replace(/^H[0-9]/,'')==''){
if( nod.getAttribute('id') == 'siteSubC' )
delete nod;
else
wtps_changeHead(nod);
}else if( nn=='DIV'){
var nid=nod.getAttribute('id');
if(nid=="catlinksC" ){
nod.setAttribute('id','catlinks'); //TODO\\ get the style out of the main page
wtp.page['-Categories']=nod; //TODO\\ Deal with these properly
}else if(nod.className=="printfooter" ){
delete nod;
}else if(nid=="siteNoticeC"
||nid=='contentSubC' || nid=='jump-to-navC'){
delete nod;
}else{
wtps_includeElement(nod);
}
}else if ( nn=='TABLE' && nod.getAttribute('id')=="tocC"
|| !containsText(nod)) {
delete nod;
}else{
wtps_includeElement(nod);
}
}
return true;
}
/**
* Change the active section for the parser
**/
function wtps_changeHead(hd){
var level=hd.nodeName.replace(/^H/,"")-2;
var span=getChildByClass(hd,"mw-headline");
if( span ){
//Remove current status
while ( wtp.pos.length>level
&& wtp.pos.length>0 ){
wtp.pos.pop();
}
//Get parent heading
var oPos=wtp.pos.join('-');
if(oPos=='')oPos='-Top';
//Ensure parent can be layed out
if(!wtp.layout[oPos])wtp.layout[oPos]=[];
//Move into child heading
wtp.pos[level]=extractText(span).replace(/[ \s\t\r\n]+/g,'_');
wtp.cPos=wtp.pos.join('-');
wtp.layout[oPos].push(wtp.cPos);
//Set up everything for parsing the child heading
wtp.page[wtp.cPos]=document.createElement("div");
wtp.page[wtp.cPos].setAttribute("id",wtp.cPos);
wtp.page[wtp.cPos].className=wtp.pos[level]
wtp.page['-Head-'+wtp.cPos]=hd;
}else{ //Doubt this ever happens. but you never know :)
wtps_includeElement(hd);
}
}
/**
* Include an element in the current parser section
**/
function wtps_includeElement(el){
wtp.page[wtp.cPos].appendChild(el);
}
/**
* Create the language tabs, and register the language view with the view tabs
* Each tab toggles the className of the language.
**/
function wtp_createLanguageTabs(){
//Create the display nodes
wtp.languageOutput=document.createElement("div");
wtp.languageOutput.setAttribute("id","LanguageOutput");
wtp.tabs = document.createElement("div");
wtp.tabs.className="LanguageTabs";
wtp.languageOutput.appendChild(wtp.page['-Top']);
wtp.languageOutput.appendChild(wtp.tabs);
//For each language...
if ( wtp.layout['-Top'] ) {
for(var i=0;i<wtp.layout['-Top'].length;i++){
var lng=wtp.layout['-Top'][i];
var tab=document.createElement('span');
var sect=document.createElement('div');
//Create language section
sect.setAttribute('id','-Language-'+lng)
sect.className="LanguageSectionHidden";
sect.appendChild(wtp.page['-Head-'+lng]);
sect.appendChild(wtp.page[lng]);
wtp_recursiveAppendChild(sect,wtp.layout[lng]);
wtp.languageOutput.appendChild(sect);
//Create language tab
tab.setAttribute('id','-LanguageTab-'+lng)
tab.className="LanguageTabHidden";
tab.appendChild(document.createTextNode(lng.replace(/_/g,' ')));
try{
tab.addEventListener('click',
function(e){wtp_tabHandle(e.target,"Language")},false)
}catch(e){
tab.attachEvent('onclick',
function(){wtp_tabHandle(window.event.srcElement,"Language")})
}
wtp.tabs.appendChild(tab);
}
} //endif
if(wtp.page['-Categories'])
wtp.languageOutput.appendChild(wtp.page['-Categories']);
return true;
}
/**
* A hack to try and notice when people click on #Language links on this page
**/
function wtp_checkLanguageLinks(){
var hlng =document.location.href.replace(/[^#]+#?/,'');
if( hlng.length && hlng != wtp.currentLinkLanguage
&& wtp.page[hlng]){
var ntab = document.getElementById('-LanguageTab-'+hlng);
if(ntab){
wtp_tabHandle(ntab,"Language");
wtp.currentLinkLanguage=hlng;
}
}
window.setTimeout(wtp_checkLanguageLinks,100);
}
/**
* Used by the createLanguageTab function to recursively append all subchildren
* of the language section into one element.
**/
function wtp_recursiveAppendChild(dest,lay){
if(!lay)return;
for(var i=0;i<lay.length;i++){
dest.appendChild(wtp.page['-Head-'+lay[i]]);
dest.appendChild(wtp.page[lay[i]]);
if(wtp.layout[lay[i]]){
wtp_recursiveAppendChild(dest,wtp.layout[lay[i]]);
}
}
}
/**
*In an attempt to get the categories to split by language too.
*
**/
function wtp_handleCategories(div){
var p = getChildByClass(div,"catlinks");
for(var i=0;i<p.childNodes.length;i++){
}
}
/**
* This handles a click on the language elements, at the moment it switches
* between them, but it should be possible to define a toggle behaviour too.
**/
function wtp_tabHandle(el,type,showonly){
var id=el.getAttribute('id').replace("-"+type+"Tab-",'-'+type+'-');
var sect=document.getElementById(id);
//Check to see if we are already displaying a language
if(el.parentNode){
var otab = getChildByClass(el.parentNode,type+"TabShown");
if(otab){
//Hide old language
var oid=otab.getAttribute('id').replace("-"+type+"Tab-","-"+type+"-");
if(oid==id)sect=showonly;//What to do on a dbl click
var osect=document.getElementById(oid);
if(osect && !showonly){
otab.className=type+"TabHidden";
osect.className=type+"SectionHidden";
}
}
}
//Display a new language
if(sect){
wtp['current'+type+'Tab']=el;
sect.className=type+"SectionShown";
el.className=type+"TabShown";
}
}
/**
* Called when the language view is selected, it ensures at least one
* language is showing
**/
function wtp_showLanguageView(parserNode,parserTab){
if(!wtp.currentLanguageTab)wtp.currentLanguageTab=wtp.tabs.firstChild;
wtp_tabHandle(wtp.currentLanguageTab,"Language",true);
wtp.currentLinkLanguage=document.location.href.replace(/[^#]+#?/,'');
window.setTimeout(wtp_checkLanguageLinks,100);
}
/**
* Tries to convert the matched up homonyms to a standard dictionary like entry
* much less feature-full! But hopefully nice??
* IS EN.WIKT specific as it needs to parse the PoS section in more detail
**/
function wtp_createDictionaryView(node,parserTab){
if(node.childNodes[0])return; //We have already run.
node.className="DictionaryView";
var lng; var pos; var ety; var p;var etyc=1;var fhn=false;
for(var i=0;i<wtp.homonym.length;i++){
var hnym = wtp.homonym[i];
//Check we are adding it to the right language
if(lng != hnym.language){
lng = hnym.language;
var h = document.createElement('h3');
h.appendChild(document.createTextNode(lng.replace(/_/g,' ')));
node.appendChild(h);
p = document.createElement('p');
node.appendChild(p);
//Write the bold word at the start
span=document.createElement('span');
span.className="dictHomonym";
span.appendChild(document.createTextNode(wgTitle));
p.appendChild(span);
//Add a hidden etymology count (willbe shown if nescessary)
ety = hnym.etyTitle;
etyc=1;
span=document.createElement('span');
span.className="dictOnlyHomonymNumber";
span.appendChild(document.createTextNode(etyc));
fhn='-HiddenFhn-'+lng+etyc;
span.setAttribute('id',"-HiddenFhn-"+lng+etyc);
p.appendChild(span);
//Add the PoS in italics
span=document.createElement('span');
span.className="dictPos";
var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase();
if(wtp.ppAbbrevs[ps]){
span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps]));
span.setAttribute('title',ps);
}else{
span.appendChild(document.createTextNode(ps));
}
p.appendChild(span);
pos = hnym.pos.title;
}else if(hnym.etyTitle!=ety){
//start a new paragraph
p = document.createElement('p');
node.appendChild(p);
etyc+=1;
ety = hnym.etyTitle;
//Write the bold word at the start
span=document.createElement('span');
span.className="dictHomonym";
span.appendChild(document.createTextNode(wgTitle));
p.appendChild(span);
//Add homonym number
span=document.createElement('span');
span.className="dictHomonymNumber";
span.appendChild(document.createTextNode(etyc));
p.appendChild(span);
//Add the PoS in italics
span=document.createElement('span');
span.className="dictPos";
var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase();
if(wtp.ppAbbrevs[ps]){
span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps]));
span.setAttribute('title',ps);
}else{
span.appendChild(document.createTextNode(ps));
}
p.appendChild(span);
//Show the first number
if(fhn && document.getElementById(fhn)){
document.getElementById(fhn).className="dictHomonymNumber";
fhn=false;
}
}else if(hnym.pos.title!=pos){
wtp_addPreviousFullstop(node.childNodes[node.childNodes.length-1]);
pos = hnym.pos.title;
//Add the PoS Seperator
span=document.createElement('span');
span.className="dictPosSep";
span.appendChild(document.createTextNode(' ● '));
p.appendChild(span);
//Add the PoS in italics
span=document.createElement('span');
span.className="dictPos";
var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase();
if(wtp.ppAbbrevs[ps]){
span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps]));
span.setAttribute('title',ps);
}else{
span.appendChild(document.createTextNode(ps));
}
p.appendChild(span);
}else{
wtp_removeLastPunctuation(node.childNodes[node.childNodes.length-1]);
//put a sense seperator in, could be something better
span=document.createElement('span');
span.className="dictSenseSep";
span.appendChild(document.createTextNode('; '));
p.appendChild(span);
}
var eos=false;
for(j=0;j<hnym.node.childNodes.length;j++){
//Parse stuff in more detail
var on = hnym.node.childNodes[j];
var nn = on.nodeName.toUpperCase();
var sns=[];
if(!nn.indexOf('#')){
p.appendChild(cloneNode(on)); //Include Text Nodes
}else{
eos=true;
if(nn=='DL' || nn=='OL' || nn=='UL' ){
//don't include example sentances (however they have been put in)
}else if(nn=='DIV'){
//don't include parsed stuff or other floaty boxy thingys
}else if(eos && nn=='SPAN'){
eos=false;
if(on.className=='ib-content'){
var txt=extractText(on);
sns=txt.split(/ *, */);
var k=0;
while(k<sns.length){
if(sns[k]==''){
sns.splice(k,1);
}else if(wtp.ppAbbrevs[sns[k]]){
span=document.createElement('span');
span.className='dictGrammar';
span.setAttribute('title',sns[k]);
span.appendChild(document.createTextNode(wtp.ppAbbrevs[sns[k]]));
p.appendChild(span);
sns.splice(k,1);
}else{
k+=1;//Just a context tag.
}
}
}
//Will handle sense vs. grammar
}else{
p.appendChild(cloneNode(on));
}
if(eos==false&&sns.length>0){
span=document.createElement('span');
span.className='ib-bracket';
span.appendChild(document.createTextNode('('));
p.appendChild(span);
span=document.createElement('span');
span.className='ib-contents';
span.appendChild(document.createTextNode(sns.join(',')));
p.appendChild(span);
span=document.createElement('span');
span.className='ib-bracket';
span.appendChild(document.createTextNode(')'));
p.appendChild(span);
}
}
}
}
}
/**
* Attempts to remove a last [.,;:!?] from within or before the given node.
**/
function wtp_removeLastPunctuation(node){
if(node){
if(node.childNodes && node.childNodes.length){
if(wtp_removeLastPunctuation(node.childNodes[node.childNodes.length-1])){
return true;
}else{
return wtp_removeLastPunctuation(node.previousSibling);
}
}else if(node.nodeValue){
if(node.nodeValue.match(/[\.,;:!\?][ \n\r\s]*$/g)){
node.nodeValue = node.nodeValue.replace(/[\.,;:!\?][ \n\r\s]*$/g,'');
return true;
}else if(!containsText(node)){
var ret=wtp_removeLastPunctuation(node.previousSibling);
node.parentNode.removeChild(node);
return ret;
}
}
}
return false;
}
/**
* Attempts to add a fullstop to the previous node value, providing no punctuation is already there
**/
function wtp_addPreviousFullstop(node){
if(node){
if(node.childNodes && node.childNodes.length){
if(wtp_addPreviousFullstop(node.childNodes[node.childNodes.length-1])){
return true;
}else{
return wtp_addPreviousFullstop(node.previousSibling);
}
}else if(node.nodeValue){
if(node.nodeValue.match(/([\.,;:!\?])[ \n\r\s]*$/)){
return true;
}else if(containsText(node)){
node.nodeValue=node.nodeValue.replace(/(^[ \n\r\s\"])?(\")?[ \n\r\s]*$/g,'$1.$2'); //"
return true;
}else{
var ret=wtp_addPreviousFullstop(node.previousSibling);
node.parentNode.removeChild(node);
return ret;
}
}
}
}
/**
* Extract user preferences from the environment, this should eventually
* be able to get cookies, and predefined js variables, and we may even get
* a nice preference setter
**/
function wtp_loadPrefs(){
//i.e. turn off completely
if ( ( typeof(wtpNoParser) != "undefined"
&& wtpNoParser == false )
|| (wgIsArticle == true
&& wgNamespaceNumber ==0
&& wgAction == "view" ) ){
wtp.prefs['SplitDom']=true;
//Whether to display the language tabs at the top
wtp.prefs['TabLanguages']=
(typeof(wtpNoSplitLanguages)=="undefined")?true:false;
//Whether to try and put related homonyms together
wtp.prefs['MatchHomonyms']=
(typeof(wtpNoJoinHomonyms)=="undefined")?true:false;
}
//ewwww
var style='#LanguageOutput { background-color: #F8F8F8 }\
.LanguageSectionHidden { display:none; }\
.LanguageSectionShown { display:block; }\
.LanguageTabs { line-height: 31px; margin: 10px }\
.LanguageTabs>span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \
white-space:pre; margin-left: -1px; }\
.LanguageTabHidden{ background-color: #BBB }\
.LanguageTabShown{ background-color: #EEE }\
\
.SetSectionHidden { display:none; }\
.SetSectionShown { display:block; }\
.SetTabHidden{ background-color: #BBB }\
.SetTabShown{ background-color: #EEE }\
.SetTabs { line-height: 31px; margin: 10px }\
.SetTabs>span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \
white-space:pre; margin-left: -1px; }\
\
.HomonymSectionHidden { display:none; }\
.HomonymSectionShown { display:block; }\
.HomonymTabHidden{ background-color: #BBB }\
.HomonymTabShown{ background-color: #EEE }\
.HomonymTabs { line-height: 31px; margin: 10px }\
.HomonymTabs> span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \
white-space:pre; margin-left: -1px; }\
\
.ParserViewHidden{ display: none; } \
.ParserViewShown{ display: block; } \
.ParserTabHidden { color: #0000FF } \
.ParserTabShown{ color: #000000; font-weight: bold; } \
.UnrecognisedSection{ background-color: #FFFFF } \
.UnrecognisedSectionTitle{ color: #FF0000} \
.Unparsable{ color: #EE3333 }\
\
.DictionaryView{ }\
.dictHomonym{ font-weight: bold; margin-right: 2px; }\
.dictPos{ font-style: italic; margin-right: 2px; }\
.dictPosSep{ font-weight: bold; /*font-style: italic;*/ }\
.dictHomonymNumber{ font-weight: bold; display:inline; margin-right: 2px; }\
.dictOnlyHomonymNumber{ display: none; }\
.dictGrammar{ font-style: italic; margin-right: 2px; font-size:90%; };\
';
try{
document.getElementsByTagName('head')[0].innerHTML+='<style type="text/css">'+style+'</style>';
}catch(e){
try{
document.write('<style>'+style+'</style>');
}catch(e){
// alert('Conrad Irwin / parser.js\nBorken Stylesheet');
}
}
}
/*************** Homonym Matching ***************/
/**
* Parse the page and try and match the scrambled homonyms together
* Does it by parsing all the sections according to the handlers defined by
* their headings
**/
function wtp_matchHomonyms(){
for(var l=0;l<wtp.layout['-Top'].length;l++){
var lang=wtp.layout['-Top'][l];
if(!wtp.layout[lang]) continue;
var flat = wtp_flattenLayout(lang);
for(var i=0; i<flat.length; i++){
var sect=flat[i].replace(/.+-/,'').toLowerCase().replace(/[ _]([0-9]*|[IVX]*)$/,'');
if(sect && wtp.handler[wtp.section[sect]]){
wtp.handler[wtp.section[sect]](wtp.page[flat[i]],flat[i]);
}else{
wtp.page[flat[i]].className="UnrecognisedSection";
wtp.page['-Head-'+flat[i]].className="UnrecognisedSectionTitle";
}
}
wtp_matchCurrent();
}
}
/**
* Converts the tree structure of wtp.layout into a flat array for looping
**/
function wtp_flattenLayout(lang){
var output=[];
if(wtp.layout[lang]){
for(var i=0;i<wtp.layout[lang].length;i++){
output.push(wtp.layout[lang][i]);
output=output.concat(wtp_flattenLayout(wtp.layout[lang][i]));
}
}
return output;
}
/**
* Tries to match homonym's to glosses and otherwise deal with sections
* Sections can either be attached to "homonyms" "part of speech" or "language"
**/
function wtp_registerHomonym(node,definition){
var def = definition.toLowerCase().split(/\W+/).sort();
wtp.homonym.push({
'match' : def,
'node' : node,
'text' : definition,
'pos' : wtp.set[wtp.set.length-1],
'language' : wtp.set[wtp.set.length-1].title.replace(/-.+/,'')
});
return wtp.homonym[wtp.homonym.length-1];
}
function wtp_registerHomonymSection(node,gloss,title){
var glo=gloss.toLowerCase().split(/\W+/).sort();
wtp.gloss.push({
'match' : glo,
'node' : node,
'gloss' : gloss,
'title' : title
});
wtp.gloss[wtp.gloss.length-1].language=title.replace(/-.+/,'');
}
/**
* Will add a comment when I know what this does....
**/
function wtp_registerSet(node,title){
wtp.set.push({'node':node,'title':title});
}
function wtp_registerSetBit(node,title){
wtp.bit.push({'node':node,'title':title});
}
/**
* This is called when it is obvious that all sections associated with a
* set of homonyms have been found, on en.wikt when a new "Etymology" section
* starts, for example
**/
function wtp_matchCurrent(){
//First match the homonyms to the glosses
var glo;
while(glo=wtp.gloss.pop()) {
var best=-1;var highest=0;
//Find the highest definition score for each gloss
for(var i=0;i<wtp.homonym.length;i++){
var def=wtp.homonym[i];
var score = wtp_glossScore(glo.match,def.match);
if(score>highest &&
(!def.score || !def.score[glo.title] || score>def.score[glo.title])){
highest=score;best=i;
}
}
if(best>-1){
var def=wtp.homonym[best];
if(def.score){
//Try again if we are displacing a previous match
if(def.score[glo.title]){
wtp.gloss.push(glo);
}
}else{
def.score={};
}
def.score=highest;
if(!def.gloss)def.gloss={};
def.gloss[glo.title]=glo;
}else{ //Add it to general sections
wtp_registerSetBit(glo.node,glo.title);
}
}
//Then create the gloss tabs
for(var i=0;i<wtp.homonym.length;i++){
var def=wtp.homonym[i];
if(def.done)continue;else def.done=true;
var tabs=wtp.homonym[i].tabs;
var div=wtp.homonym[i].div
if(!tabs){
tabs=document.createElement('div');
tabs.className="HomonymTabs";
div=document.createElement('div');
tabs.appendChild(div);
def.node.appendChild(tabs);
wtp.homonym[i].tabs=tabs;
wtp.homonym[i].div=div;
}
for(var gc in def.gloss){
var glo=def.gloss[gc];
var tab=document.createElement('span');
tab.className="HomonymTabHidden";
tab.appendChild(document.createTextNode(glo.title.replace(/.+-/,'').replace('_',' ')));
tab.setAttribute('id','-HomonymTab-'+gc+'-'+i+'-'+glo.title);
tabs.insertBefore(tab,div);
glo.node.setAttribute('id','-Homonym-'+gc+'-'+i+'-'+glo.title);
glo.node.className="HomonymSectionHidden";
if(glo.node.parentNode)
glo.node.parentNode.removeChild(glo.node);
if(wtp.page['-Head-'+glo.title].parentNode)
wtp.page['-Head-'+glo.title].parentNode.removeChild(
wtp.page['-Head-'+glo.title]);
div.appendChild(glo.node);
try{
tab.addEventListener('click',
function(e){wtp_tabHandle(e.target,"Homonym")},false)
}catch(e){
tab.attachEvent('onclick',
function(){
wtp_tabHandle(window.event.srcElement,"Homonym")
}
);
}
}
}
//Then add the set bits to each set (Etymology to Noun etc.)
for(var i=0;i<wtp.set.length;i++){
if(!wtp.set[i].done){ wtp.set[i].done=true;
var tabs=document.createElement('div');
tabs.className="SetTabs";
var div=document.createElement('div');
tabs.appendChild(div);
wtp.set[i].node.insertBefore(tabs,wtp.set[i].node.childNodes[0]);
for(var b=0;b<wtp.bit.length;b++){
var bit = wtp.bit[b];
var title=bit.title;
if( bit.title.replace(/[^-]/g,'').length >
wtp.set[i].title.replace(/[^-]/g,'').length
&& bit.title.indexOf(wtp.set[i].title)<0){
//shouldn't match
}else{
if(document.getElementById('-SetTab-'+i+'-'+bit.title)){
var j=1;
while(document.getElementById('-SetTab-'+i+'-'+bit.title+'-'+j)){
j=j+1;
}
title+='_'+j;
}
var tab=document.createElement('span');
tab.className="SetTabHidden";
tab.appendChild(document.createTextNode(title.replace(/.+-/,'').replace(/_/g,' ')));
tab.setAttribute('id','-SetTab-'+i+'-'+title);
tabs.insertBefore(tab,div);
bit.node.setAttribute('id','-Set-'+i+'-'+title);
bit.node.className="SetSectionHidden";
if(bit.node.parentNode)
bit.node.parentNode.removeChild(bit.node);
div.appendChild(bit.node);
if(wtp.page['-Head-'+bit.title].parentNode)
wtp.page['-Head-'+bit.title].parentNode.removeChild(
wtp.page['-Head-'+bit.title]);
try{
tab.addEventListener('click',
function(e){wtp_tabHandle(e.target,"Set")},false)
}catch(e){
tab.attachEvent('onclick',
function(){
wtp_tabHandle(window.event.srcElement,"Set")
}
);
}
}}
}
}
wtp.bit=[];
}
/**
* This function tries to work out a score for the closeness of match between
* two probably very different sorted arrays of strings
* It also edits the arrays as it goes to make the answers better
**/
function wtp_glossScore(glo,def){
var gc=0; var dc=0;
var score=0.0;
while(gc<glo.length && dc<def.length){
//Remove duplicates
while(glo[gc+1] && glo[gc+1]==glo[gc]){glo.splice(gc+1,1);}
while(def[dc+1] && def[dc+1]==def[dc]){def.splice(dc+1,1);}
if(glo[gc]==def[dc]){
if(def[dc].length>2){
//Letter modifier should be tweaked
score+=1.0+(glo[gc].length/2)
gc+=1;dc+=1;
}else{
//Remove short words
def.splice(dc,1);
glo.splice(gc,1);
}
}else if(glo[gc]<def[dc]){
gc+=1;
}else{
dc+=1;
}
}
return score;
}
/****************** DOM Helpers *****************/
/**
* containsText(node) is a W3C DOM complaint cross-browser way of
* saying (node.innerText=="") though it may well be slower (no benchmarking)
* however it is almost always faster than (extractText(node)=="")
**/
function containsText(el){
for(var i=0;i<el.childNodes.length;i++){
var nod=el.childNodes[i];
if( nod.nodeName.toUpperCase()=='#TEXT' ){
if( nod.nodeValue.replace(/[^a-zA-Z]/g,'').length>0 )
return true;
}else if(nod.nodeName.indexOf('#')!=0){
if(containsText(nod))
return true;
}
}
if(el.nodeValue && el.nodeValue.replace(/[^a-zA-Z]/g,'').length>0 ){
return true;
}else{
return false;
}
}
/**
* extractText(node) is a W3C DOM compliant cross-browser way of
* saying (node.innerText) though it may well be slower (no benchmarking)
*
**/
function extractText(el){
//if(FF)return el.textContents;
var output="";
for( var i=0;i<el.childNodes.length;i++ ){
var nod=el.childNodes[i];
if( nod.nodeName.toUpperCase()=='#TEXT' ){
output+=nod.nodeValue;
}else if(nod.nodeName.indexOf('#')!=0){
output+=extractText(nod);
}
}
return output;
}
/**
* node.getChildByClass is essentialy document.getElementById but locally
**/
function getChildByClass(node,clsname){
if(!node) return;
for(var i=0;i<node.childNodes.length;i++){
if( node.childNodes[i].className==clsname){
return node.childNodes[i];
}
}
}
/**
* Copies nodes from one place to another, removing all attributes but className
* to avoid id repition and unwanted event handles
**/
function cloneNode(onode){
if(onode.nodeType==3){
return document.createTextNode(onode.nodeValue);
}else if(onode.nodeType==1){
var ret = document.createElement(onode.nodeName);
if(onode.className)
ret.className=onode.className;
for(var i=0;i<onode.attributes.length;i++){
var an = onode.attributes[i].nodeName;
if(an == 'id')
ret.setAttribute(an,onode.attributes[i].nodeValue+'C');
else if(an == 'style')
ret.style.cssText = onode.style.cssText.replace(/(display|visibility):[^;]+/,'');
else
ret.setAttribute(an, onode.attributes[i].nodeValue);
}
for(var i=0;i<onode.childNodes.length;i++){
ret.appendChild(cloneNode(onode.childNodes[i]));
}
return ret;
}else{ //A comment.
return document.createTextNode('');
}
}
/**************** en.wikt specifics **************/
/**
* Each possible title (at any level) and which handler it should use
**/
wtp.section = { //From [[User:AutoFormat/Headers]] thanks Ullman!!
//English POS
'noun':'pos','noun_form':'pos','noun_phrase':'pos','proper_noun':'pos',
'prenoun':'pos',
'verb':'pos','verb_form':'pos','verb_phrase':'pos', 'preverb':'pos',
'transitive_verb':'pos','intransitive_verb':'pos',
'adjective':'pos','adjective_form':'pos','adjective_phrase':'pos',
'adverb':'pos','adverb_phrase':'pos',
'pronoun':'pos','conjunction':'pos','contraction':'pos',
'interjection':'pos','article':'pos','preposition':'pos',
'prefix':'pos','suffix':'pos','affix':'pos','infix':'pos',
'idiom':'pos','phrase':'pos',
'acronym':'pos','abbreviation':'pos','initialism':'pos',
'symbol':'pos','letter':'pos',
'numeral':'pos','ordinal_numeral':'pos','cardinal_numeral':'pos',
'number':'pos','ordinal_number':'pos','cardinal_number':'pos',
//Other POS
'particle':'pos','proverb':'pos','han_character':'pos',
'kanji':'pos','hanzi':'pos','hanja':'pos',
'pinyin':'pos','pinyin_syllable':'pos','syllable':'pos',
'katakana_character':'pos','hiragana_letter':'pos',
'hiragana_character':'pos',
'counter':'pos','classifier':'pos','adnominal':'pos','determiner':'pos',
'expression':'pos','postposition':'pos','root':'pos','participle':'pos',
//More interesting stuff
'synonyms':'thesaurus',
'antonyms':'thesaurus',
'translations':'translations',
'translations_to_be_checked':'trivia', //this puts them by PoS not Homonym
'etymology':'etymology',
'conjugation':'trivia', 'inflection':'trivia','declension':'trivia',
'participles':'trivia','infinitives':'trivia',
'alternative_forms':'trivia', 'alternative_spellings':'trivia',
'pronunciation':'trivia',
'derived_terms':'trivia', 'related_terms':'trivia',
'descendants':'trivia', 'mutation':'trivia', 'compounds':'trivia',
'abbreviations':'trivia','forms':'trivia',
'hypernyms':'trivia','hyponyms':'trivia','meronyms':'trivia',
'homonyms':'trivia','holonyms':'trivia','troponyms':'trivia',
'homophones':'trivia','hyphenation':'trivia',
'devanagari_spelling':'trivia','urdu_spelling':'trivia',
'cyrillic_spelling':'trivia','roman_spelling':'trivia',
'kanji_reading':'trivia',
'scientic_names':'trivia',
'proverbs':'trivia','expressions':'trivia','coordinate_terms':'trivia',
'see_also':'trivia', 'external_links':'trivia','references':'trivia',
'names_in_other_languages':'trivia','variants_and_pet_forms':'trivia',
'anagrams':'trivia','trivia':'trivia','shorthand':'trivia',
'usage_notes':'trivia','dictionary_notes':'trivia','quotations':'trivia'
//There are a few missing, I got bored ;)
}
wtp.handler['pos'] = function(el,title){
var start=true;
wtp_registerSet(el,title);
for(var i=0;i<el.childNodes.length;i++){
var p=el.childNodes[i];
if(p.nodeName.toUpperCase()=='OL'){
start=false;
for(var j=0;j<p.childNodes.length;j++){
var li=p.childNodes[j];
if(li.nodeName.toUpperCase()=='LI'){
var defs=extractText(li);
var hnym=wtp_registerHomonym(li,defs);
hnym.etyTitle=wtp.curEty;
}else if(containsText(li)&&li.nodeName.indexOf('#')){
li.className="Unparsable";
}
}
}else if(p.nodeName.indexOf('#')){
if(p.className=='infl-table'
|| p.className=='infl-inline'
|| p.nodeName == 'P' && p.firstChild
&& p.firstChild.className=='infl-inline'){
start=false;
//Skip element
}else if(start){
p.className="infl-inline"; //Guess first section is inflection
}else if(containsText(p)){
p.className="Unparsable";
}
}
}
}
wtp.curEty='';
wtp.handler['etymology'] = function(el,title){
wtp_matchCurrent();
wtp.curEty=title; //This is to let paperView differentiate between homonyms
wtp_registerSetBit(el,title);
}
wtp.handler['trivia'] = function(el,title){
wtp_registerSetBit(el,title);
}
wtp.handler['translations'] = function(el,title){
var success=1;
for(var i=0;i<el.childNodes.length;i++){
var div=el.childNodes[i];
if(div.nodeName.indexOf('#')){
if(div.nodeName=="DIV" && div.className=='NavFrame'){
success*=wtp.handler['navframe'](div,title);
i--;
}else{
success=0;
}
}else if(containsText(div)){
success=0;
}
}
if(success){
el.parentNode.removeChild(el);
}else{
wtp_registerSetBit(el,title);
}
}
wtp.handler['navframe'] = function(el,title){
var head=false;var body=false; //To collect NavHead and NavContent
var success=1; //If parse was succesful [1 on success, 0 or 2 on failure]
for(var i=0;i<el.childNodes.length;i++){
var div=el.childNodes[i];
if(div.nodeName.indexOf('#')){
if(div.className=='NavHead'&&!head){
head=div;
head.style.cssText="";
head.id="="+head.id;
}else if(div.className=='NavContent'&&head&&!body){
if(extractText(div).indexOf('to be checked')>-1){
success=0;
}else{
body=div;
body.style.cssText="";
body.id="="+body.id;
el.parentNode.removeChild(el);
wtp_registerHomonymSection(body,extractText(head),title);
}
}else if(containsText(div)){
success=0;
div.className="Unparsable";
}
}else if(containsText(div)){
success=0;
}
}
return (head&&body)?success:0;
}
wtp.handler['thesaurus'] = function(el,title){
var success=true;
for(var i=0;i<el.childNodes.length;i++){
var ol=el.childNodes[i];
if(ol.nodeName.toUpperCase()=='UL'){
for(var j=0;j<ol.childNodes.length;j++){
var li=ol.childNodes[j];
var senseTag=getChildByClass(li,'ib-content');
if(!senseTag)senseTag=getChildByClass(li,'qualifier-content');
if(senseTag){
var t=true;
while(t){
var fc = li.childNodes[0];
if(fc && fc.nodeName.indexOf('#') &&
(fc.className=='ib-colon' || fc.className=='sense-qualifier-colon')){
t=false;
}
li.removeChild(fc);
};
if(containsText(li))
wtp_registerHomonymSection(li,extractText(senseTag),title);
}else{
success=false;
el.className="Unparsable";
}
}
}else if(ol.nodeName.toUpperCase()=='DIV' && ol.className=='NavFrame'){
if(!wtp.handler['navframe'](div,title)) success=false;
}else if(containsText(ol)){ //Something we dont understand
success=false;
el.className="Unparsable";
}
}
if(success){
el.parentNode.removeChild(el);
}else{
wtp_registerSetBit(el,title);
}
}
/**************** General Parser Functions **************/
var parsers={};
/**
* This section will be split to a different file if nescessary
* parsers should use getContentNode to get the page content,
* this ensures that the content node remains available for others to use
**/
function parsers_getContentNode(){
var bc=document.getElementById('bodyContent');
if(bc) return cloneNode(bc);
// else alert("Conrad Irwin / parser.js\nThis doesn't look like a wiktionary page, no can do I'm afraid.");
return false;
}
/**
* registerView allows parsers to add a view tab at the top of the page
* it should be called instead of manually appending an output to the DOM
* an optional function may be specified, which will be run
* funct(node,title); before the output is made visible.
**/
function parsers_registerView(node,title,funct,selected,first){
if(!parsers.view){
var bc = document.getElementById('bodyContent');
//Create tabs at top of page
parsers.tabs = document.createElement("div");
parsers.tabs.className="ParserTabs";
bc.parentNode.insertBefore(parsers.tabs,bc);
var obc = document.createElement("div");
bc.parentNode.insertBefore(obc,bc);
bc.parentNode.removeChild(bc);
obc.appendChild(bc);
parsers.view={};
parsers.insertPoint=obc;
parsers_registerView(obc,"Unchanged",false,false,true);
}
//Create tab for new view.
var tab = document.createElement("span");
tab.className="ParserTabHidden";
tab.appendChild(document.createTextNode(title));
title=title.replace(/[ \s\r\n]+/,'_');
tab.setAttribute("id","ParserTab-"+title);
node.className="ParserViewHidden";
node.setAttribute("id","ParserView-"+title);
try{
tab.addEventListener('click',
function(e){parsers_tabParser(e.target)},false)
}catch(e){
tab.attachEvent('onclick',
function(){parsers_tabParser(window.event.srcElement)})
}
if(!first){
parsers.tabs.appendChild(document.createTextNode(' • '));
parsers.insertPoint.parentNode.insertBefore(node,parsers.insertPoint);
}
parsers.tabs.appendChild(tab);
parsers.view[title] = [node,tab,funct];
if( (selected && !getCookie('wtParserView'))
||(getCookie('wtParserView')==tab.id) ) parsers_tabParser(tab);
}
/**
* Essentially the onclick handler for parser view tabs, this switches between
* the different displays (using hidden/block display on the divs) and
* preexecutes any provided functions
**/
function parsers_tabParser(tab){
var vw = tab.getAttribute("id").replace(/^ParserTab-/,"");
if(parsers.view[vw]){
if(parsers.currentView){
parsers.currentView[1].className="ParserTabHidden";
parsers.currentView[0].className="ParserViewHidden";
}
parsers.currentView = parsers.view[vw];
setCookie('wtParserView',parsers.currentView[1].getAttribute('id'))
if(typeof(parsers.currentView[2])=='function'){
parsers.currentView[2](
parsers.currentView[0],parsers.currentView[1]);
}
parsers.currentView[1].className="ParserTabShown";
parsers.currentView[0].className="ParserViewShown";
}
}
//Get this show on the road
if(wgAction=='view' && wgNamespaceNumber==0 && wgArticleId!=0)
addOnloadHook(wtp_init);