User:Conrad.Irwin/parser.js

Note: You may have to bypass your browser’s cache to see the changes. In addition, after saving a sitewide CSS file such as MediaWiki:Common.css, it will take 5-10 minutes before the changes take effect, even if you clear your cache.
Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
Konqueror and Chrome: click Reload or press F5;
Opera: clear the cache in Tools → Preferences;
Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.
This script lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • redirects • your own
/**
 * VERSION 1.0α <- You have been warned.
 *
 * Firsts divides the page by language (~100% accurately)
 * Then tries to reconnect homonym specific information. (~80% at the moment)
 *
 * Should present this nicely and very customisably to the user
 * May eventually also output information in a computer readable format
 *
 * Things that need doing, 
 *    Split categories across languages
 *    Recognise (senses) manually marked up (i.e. with italics instead of {{sense}})
 *
 * Basic file layout intentions (i.e. probably not stuck too)
 *  Init section
 *  Functions for splitting the dom
 *  Functions for language tabbing
 *  Generic functions for homonym parsing/tabbing
 *  User preference functions
 *  Site preference settings [section headers and handlers]
 *  DOM extensions
 *  Hacked in stylesheets
 * 
 * Paper View
 *  Requested FEATURES 'alternative spellings' 
 *   'noun gender'
 *   'turn off languages'
 *
**/
var wtp={};
wtp.header = {};    //Store information about the headings that may be found
                    // wtHeaders['pos'] is an array of part of speech headings
                    // wtHeaders['breaks'] is an array of headers that seperate
                    // homonyms.
wtp.handler= {};    //Store handlers for specific headings
                    // e.g. wtHandler['Translations'] handles any translation
                    // sections
wtp.layout  = {};   //This is a named object of arrays, each array providing
                    // keys for the object, that stores the order of the 
                    // removed DOM nodes. Keys are of the form "h1-h2-h3"
                    // e.g. "English-Noun-Translations"
wtp.page    = {};   //This stores the DOM nodes in sections referencable by
                    // their layout key, and headings (-Head-English-Noun)
wtp.prefs    = {};  //Stores peoples preferences from wherever they come...

wtp.pos = []    //An array holding the current heading structure 
                //  i.e. ['English','Noun','Translation']
wtp.cPos= '-Top';//Contains the layout and page keys normally pos.join('-');
wtp.page[wtp.cPos]=document.createElement("div"); //Store the first bits
wtp.homonym=[]//An array of {}s

wtp.ppAbbrevs = {'noun' : 'n.', 'verb' : 'vb.', 'countable' : '[C]', 'uncountable' : '[U]','transitive':'[T]', 'intransitive':'[I]', 'adjective':'adj.', 'adverb':'adv.'};

wtp.gloss=[];//An array of {}s

wtp.set=[];
wtp.bit=[];

/**
 * Initialise the parser, and see what people want us to do.
 *
**/
function wtp_init(){
    wtp_loadPrefs();

    //Read the entry
    var bc=parsers_getContentNode();
    if(!bc)return false;

    //Just in case
    try{
        //Parse
        wtps_splitDom(bc)
        
        wtp_createLanguageTabs();
        //Allow language view   
        parsers_registerView(wtp.languageOutput,
                "Toggle Sections",wtp_showLanguageView);
        //Create other sections
        wtp_matchHomonyms();

        parsers_registerView(document.createElement('div'),"Paper Dictionary",
            wtp_createDictionaryView);
            
    }catch(e){
        document.title+=" (Parser Failed: Notify [[User:Conrad.Irwin]] if nescessary.)";
        if(document.getElementById('ParserTab-Unchanged'))
           parsers_tabParser(document.getElementById('ParserTab-Unchanged'));
    }
}
/**
 * Split the DOM into sections by heading, assuming that each <hX> element
 *  is a direct child of the bodyContent node.
**/

function wtps_splitDom(div){
    
    var nod;
    while(nod=div.childNodes[0]){
        div.removeChild(nod);
        var nn = nod.nodeName.toUpperCase();
        
        if( nn.replace(/^H[0-9]/,'')==''){
            if( nod.getAttribute('id') == 'siteSubC'  )
                delete nod;
            else
                wtps_changeHead(nod);
        }else if( nn=='DIV'){
            var nid=nod.getAttribute('id');
            if(nid=="catlinksC" ){
                nod.setAttribute('id','catlinks'); //TODO\\ get the style out of the main page
                wtp.page['-Categories']=nod; //TODO\\ Deal with these properly
            }else if(nod.className=="printfooter" ){
                delete nod;
            }else if(nid=="siteNoticeC"
                   ||nid=='contentSubC' || nid=='jump-to-navC'){
                delete nod;
            }else{
                wtps_includeElement(nod);
            }
        }else if ( nn=='TABLE' && nod.getAttribute('id')=="tocC"
                || !containsText(nod)) {
            delete nod;
        }else{
            wtps_includeElement(nod);
        }
    }
    return true;
}
/**
 * Change the active section for the parser
**/
function wtps_changeHead(hd){
    var level=hd.nodeName.replace(/^H/,"")-2;
    
    var span=getChildByClass(hd,"mw-headline");

    if( span ){
        //Remove current status
        while ( wtp.pos.length>level
             &&    wtp.pos.length>0 ){
             wtp.pos.pop();
        }
        //Get parent heading
        var oPos=wtp.pos.join('-');
        if(oPos=='')oPos='-Top';
        
        //Ensure parent can be layed out
        if(!wtp.layout[oPos])wtp.layout[oPos]=[];
        
        //Move into child heading
        wtp.pos[level]=extractText(span).replace(/[ \s\t\r\n]+/g,'_');
        wtp.cPos=wtp.pos.join('-');
        wtp.layout[oPos].push(wtp.cPos);
        
        //Set up everything for parsing the child heading
        wtp.page[wtp.cPos]=document.createElement("div");
        wtp.page[wtp.cPos].setAttribute("id",wtp.cPos);
        wtp.page[wtp.cPos].className=wtp.pos[level]
        wtp.page['-Head-'+wtp.cPos]=hd;
    
    }else{ //Doubt this ever happens. but you never know :)
        wtps_includeElement(hd);
    }
}
/**
 * Include an element in the current parser section
**/
function wtps_includeElement(el){
    wtp.page[wtp.cPos].appendChild(el);
}
/**
 * Create the language tabs, and register the language view with the view tabs
 * Each tab toggles the className of the language.
**/
function wtp_createLanguageTabs(){
    //Create the display nodes
    wtp.languageOutput=document.createElement("div");
    wtp.languageOutput.setAttribute("id","LanguageOutput");
    wtp.tabs = document.createElement("div");
    wtp.tabs.className="LanguageTabs";
    wtp.languageOutput.appendChild(wtp.page['-Top']);
    wtp.languageOutput.appendChild(wtp.tabs);
    //For each language...
    if ( wtp.layout['-Top'] ) {
    for(var i=0;i<wtp.layout['-Top'].length;i++){
        var lng=wtp.layout['-Top'][i];
        var tab=document.createElement('span');
        var sect=document.createElement('div');
        //Create language section
        sect.setAttribute('id','-Language-'+lng)
        sect.className="LanguageSectionHidden";
        sect.appendChild(wtp.page['-Head-'+lng]);
        sect.appendChild(wtp.page[lng]);
        wtp_recursiveAppendChild(sect,wtp.layout[lng]);
        wtp.languageOutput.appendChild(sect);
        //Create language tab
        tab.setAttribute('id','-LanguageTab-'+lng)
        tab.className="LanguageTabHidden";
        
        tab.appendChild(document.createTextNode(lng.replace(/_/g,' ')));
        try{
            tab.addEventListener('click',
                function(e){wtp_tabHandle(e.target,"Language")},false)
        }catch(e){
            tab.attachEvent('onclick',
                function(){wtp_tabHandle(window.event.srcElement,"Language")})
        }
        wtp.tabs.appendChild(tab);
    }
    } //endif
    if(wtp.page['-Categories'])
        wtp.languageOutput.appendChild(wtp.page['-Categories']);
    
    return true;
}
/**
 * A hack to try and notice when people click on #Language links on this page
**/
function wtp_checkLanguageLinks(){
    var hlng =document.location.href.replace(/[^#]+#?/,'');
    
    if( hlng.length && hlng != wtp.currentLinkLanguage
      && wtp.page[hlng]){
         var ntab = document.getElementById('-LanguageTab-'+hlng);
         if(ntab){
            wtp_tabHandle(ntab,"Language");
            wtp.currentLinkLanguage=hlng;
         }
    }
    window.setTimeout(wtp_checkLanguageLinks,100);
}
/**
 * Used by the createLanguageTab function to recursively append all subchildren
 *   of the language section into one element.
**/
function wtp_recursiveAppendChild(dest,lay){
    if(!lay)return;
    for(var i=0;i<lay.length;i++){
        dest.appendChild(wtp.page['-Head-'+lay[i]]);
        dest.appendChild(wtp.page[lay[i]]);
        if(wtp.layout[lay[i]]){
            wtp_recursiveAppendChild(dest,wtp.layout[lay[i]]);
        }
    }
}
/**
 *In an attempt to get the categories to split by language too.
 *
**/
function wtp_handleCategories(div){
    var p = getChildByClass(div,"catlinks");
    for(var i=0;i<p.childNodes.length;i++){
        
    }
}
/**
 * This handles a click on the language elements, at the moment it switches
 *  between them, but it should be possible to define a toggle behaviour too.
**/
function wtp_tabHandle(el,type,showonly){
    var id=el.getAttribute('id').replace("-"+type+"Tab-",'-'+type+'-');
    var sect=document.getElementById(id);

    //Check to see if we are already displaying a language
    if(el.parentNode){
        var otab = getChildByClass(el.parentNode,type+"TabShown");
        if(otab){
            //Hide old language
            var oid=otab.getAttribute('id').replace("-"+type+"Tab-","-"+type+"-"); 
            if(oid==id)sect=showonly;//What to do on a dbl click
            var osect=document.getElementById(oid);
            if(osect && !showonly){
                otab.className=type+"TabHidden";
                osect.className=type+"SectionHidden";    
            }
        }
    }
    //Display a new language
    if(sect){
        wtp['current'+type+'Tab']=el;
        sect.className=type+"SectionShown";
        el.className=type+"TabShown";        
    }
}
/**
 * Called when the language view is selected, it ensures at least one 
 *  language is showing
**/
function wtp_showLanguageView(parserNode,parserTab){
    if(!wtp.currentLanguageTab)wtp.currentLanguageTab=wtp.tabs.firstChild;
    wtp_tabHandle(wtp.currentLanguageTab,"Language",true);
    wtp.currentLinkLanguage=document.location.href.replace(/[^#]+#?/,'');
    window.setTimeout(wtp_checkLanguageLinks,100);
}
/**
 * Tries to convert the matched up homonyms to a standard dictionary like entry
 *  much less feature-full! But hopefully nice??
 * IS EN.WIKT specific as it needs to parse the PoS section in more detail
**/
function wtp_createDictionaryView(node,parserTab){
    if(node.childNodes[0])return; //We have already run.

    node.className="DictionaryView";

    var lng; var pos; var ety; var p;var etyc=1;var fhn=false;
    for(var i=0;i<wtp.homonym.length;i++){
        var hnym = wtp.homonym[i];
        //Check we are adding it to the right language
        if(lng != hnym.language){
            lng = hnym.language;
            var h = document.createElement('h3');
            h.appendChild(document.createTextNode(lng.replace(/_/g,' ')));
            node.appendChild(h);
            p = document.createElement('p');
            node.appendChild(p);
            //Write the bold word at the start
            span=document.createElement('span');
            span.className="dictHomonym";
            span.appendChild(document.createTextNode(wgTitle));
            p.appendChild(span);
            //Add a hidden etymology count (willbe shown if nescessary)
            ety = hnym.etyTitle;
            etyc=1;
            span=document.createElement('span');
            span.className="dictOnlyHomonymNumber";
            span.appendChild(document.createTextNode(etyc));
            fhn='-HiddenFhn-'+lng+etyc;
            span.setAttribute('id',"-HiddenFhn-"+lng+etyc);
            p.appendChild(span);
            //Add the PoS in italics
            span=document.createElement('span');
            span.className="dictPos";
            var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase();
            if(wtp.ppAbbrevs[ps]){
                span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps]));
                span.setAttribute('title',ps);
            }else{
                span.appendChild(document.createTextNode(ps));
            }
            p.appendChild(span);
            pos = hnym.pos.title;
        }else if(hnym.etyTitle!=ety){
            //start a new paragraph
            p = document.createElement('p');
            node.appendChild(p);
            etyc+=1;
            ety = hnym.etyTitle;
            //Write the bold word at the start
            span=document.createElement('span');
            span.className="dictHomonym";
            span.appendChild(document.createTextNode(wgTitle));
            p.appendChild(span);
            //Add homonym number
            span=document.createElement('span');
            span.className="dictHomonymNumber";
            span.appendChild(document.createTextNode(etyc));
            p.appendChild(span);
            //Add the PoS in italics
            span=document.createElement('span');
            span.className="dictPos";
            var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase();
            if(wtp.ppAbbrevs[ps]){
                span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps]));
                span.setAttribute('title',ps);
            }else{
                span.appendChild(document.createTextNode(ps));
            }
            p.appendChild(span);
            //Show the first number
            if(fhn && document.getElementById(fhn)){ 
                document.getElementById(fhn).className="dictHomonymNumber";
                fhn=false;
            }
        }else if(hnym.pos.title!=pos){
            wtp_addPreviousFullstop(node.childNodes[node.childNodes.length-1]);
            pos = hnym.pos.title;
            //Add the PoS Seperator
            span=document.createElement('span');
            span.className="dictPosSep";
            span.appendChild(document.createTextNode(' ● '));
            p.appendChild(span);
            //Add the PoS in italics
            span=document.createElement('span');
            span.className="dictPos";
            var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase();
            if(wtp.ppAbbrevs[ps]){
                span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps]));
                span.setAttribute('title',ps);
            }else{
                span.appendChild(document.createTextNode(ps));
            }
            p.appendChild(span);
        }else{
            wtp_removeLastPunctuation(node.childNodes[node.childNodes.length-1]);
            //put a sense seperator in, could be something better
            span=document.createElement('span');
            span.className="dictSenseSep";
            span.appendChild(document.createTextNode('; '));
            p.appendChild(span);
        }
        var eos=false;
        for(j=0;j<hnym.node.childNodes.length;j++){
            //Parse stuff in more detail
            var on = hnym.node.childNodes[j];
            var nn = on.nodeName.toUpperCase();
            var sns=[];
            if(!nn.indexOf('#')){
                p.appendChild(cloneNode(on)); //Include Text Nodes
            }else{
                eos=true;
                if(nn=='DL' || nn=='OL' || nn=='UL' ){
                    //don't include example sentances (however they have been put in)
                }else if(nn=='DIV'){ 
                    //don't include parsed stuff or other floaty boxy thingys
                }else if(eos && nn=='SPAN'){
                    eos=false;
                    if(on.className=='ib-content'){
                        var txt=extractText(on);
                        sns=txt.split(/ *, */);
                        var k=0;
                        while(k<sns.length){
                            if(sns[k]==''){
                                sns.splice(k,1);
                            }else if(wtp.ppAbbrevs[sns[k]]){
                                span=document.createElement('span');
                                span.className='dictGrammar';
                                span.setAttribute('title',sns[k]);
                                span.appendChild(document.createTextNode(wtp.ppAbbrevs[sns[k]]));
                                p.appendChild(span);
                                sns.splice(k,1);
                            }else{
                                k+=1;//Just a context tag.   
                            }
                        }
                    }
                    //Will handle sense vs. grammar
                }else{
                    p.appendChild(cloneNode(on));
                }
                if(eos==false&&sns.length>0){
                    span=document.createElement('span');
                    span.className='ib-bracket';
                    span.appendChild(document.createTextNode('('));
                    p.appendChild(span);
                    
                    span=document.createElement('span');
                    span.className='ib-contents';
                    span.appendChild(document.createTextNode(sns.join(',')));
                    p.appendChild(span);
                    
                    span=document.createElement('span');
                    span.className='ib-bracket';
                    span.appendChild(document.createTextNode(')'));
                    p.appendChild(span);
                }
            }
        }
    }
}
/**
 * Attempts to remove a last [.,;:!?] from within or before the given node.
**/
function wtp_removeLastPunctuation(node){
    if(node){
        if(node.childNodes && node.childNodes.length){
            if(wtp_removeLastPunctuation(node.childNodes[node.childNodes.length-1])){
                return true;
            }else{
                return wtp_removeLastPunctuation(node.previousSibling);
            }
        }else if(node.nodeValue){
            if(node.nodeValue.match(/[\.,;:!\?][ \n\r\s]*$/g)){
                node.nodeValue = node.nodeValue.replace(/[\.,;:!\?][ \n\r\s]*$/g,'');
                return true;
            }else if(!containsText(node)){
                var ret=wtp_removeLastPunctuation(node.previousSibling);
                node.parentNode.removeChild(node);
                return ret;
            }
        }
    }
    return false;
}
/**
 * Attempts to add a fullstop to the previous node value, providing no punctuation is already there
**/
function wtp_addPreviousFullstop(node){
    if(node){
        if(node.childNodes && node.childNodes.length){
            if(wtp_addPreviousFullstop(node.childNodes[node.childNodes.length-1])){
                return true;
            }else{
                return wtp_addPreviousFullstop(node.previousSibling);
            }
        }else if(node.nodeValue){
            if(node.nodeValue.match(/([\.,;:!\?])[ \n\r\s]*$/)){
                return true;
            }else if(containsText(node)){
                node.nodeValue=node.nodeValue.replace(/(^[ \n\r\s\"])?(\")?[ \n\r\s]*$/g,'$1.$2'); //"
                return true;
            }else{
                var ret=wtp_addPreviousFullstop(node.previousSibling);
                node.parentNode.removeChild(node);
                return ret;
            }
        }
    }
}
/**
 * Extract user preferences from the environment, this should eventually
 *    be able to get cookies, and predefined js variables, and we may even get
 *  a nice preference setter
**/
function wtp_loadPrefs(){
        //i.e. turn off completely
    if ( ( typeof(wtpNoParser) != "undefined"
        && wtpNoParser == false )
      || (wgIsArticle == true
        && wgNamespaceNumber ==0 
        && wgAction == "view" ) ){
        
        wtp.prefs['SplitDom']=true;
    
        //Whether to display the language tabs at the top
        wtp.prefs['TabLanguages']=
            (typeof(wtpNoSplitLanguages)=="undefined")?true:false;
        //Whether to try and put related homonyms together
        wtp.prefs['MatchHomonyms']=
            (typeof(wtpNoJoinHomonyms)=="undefined")?true:false;
    }
    //ewwww
    var style='#LanguageOutput { background-color: #F8F8F8 }\
         .LanguageSectionHidden { display:none; }\
         .LanguageSectionShown { display:block; }\
         .LanguageTabs { line-height: 31px; margin: 10px }\
         .LanguageTabs>span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \
                               white-space:pre; margin-left: -1px; }\
         .LanguageTabHidden{ background-color: #BBB }\
         .LanguageTabShown{ background-color: #EEE }\
         \
         .SetSectionHidden { display:none; }\
         .SetSectionShown { display:block; }\
         .SetTabHidden{ background-color: #BBB }\
         .SetTabShown{ background-color: #EEE }\
         .SetTabs { line-height: 31px; margin: 10px }\
         .SetTabs>span { padding:0px; padding-left: 3px; padding-right: 3px;  border: 1px solid #000; \
                               white-space:pre; margin-left: -1px; }\
          \
         .HomonymSectionHidden { display:none; }\
         .HomonymSectionShown { display:block; }\
         .HomonymTabHidden{ background-color: #BBB }\
         .HomonymTabShown{ background-color: #EEE }\
         .HomonymTabs { line-height: 31px; margin: 10px }\
         .HomonymTabs> span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \
                               white-space:pre; margin-left: -1px; }\
         \
         .ParserViewHidden{ display: none; } \
         .ParserViewShown{ display: block; } \
         .ParserTabHidden { color: #0000FF } \
         .ParserTabShown{ color: #000000; font-weight: bold; } \
         .UnrecognisedSection{ background-color: #FFFFF } \
         .UnrecognisedSectionTitle{ color: #FF0000} \
         .Unparsable{ color: #EE3333 }\
         \
         .DictionaryView{ }\
         .dictHomonym{ font-weight: bold; margin-right: 2px; }\
         .dictPos{ font-style: italic; margin-right: 2px; }\
         .dictPosSep{ font-weight: bold; /*font-style: italic;*/ }\
         .dictHomonymNumber{ font-weight: bold; display:inline; margin-right: 2px; }\
         .dictOnlyHomonymNumber{ display: none; }\
         .dictGrammar{ font-style: italic; margin-right: 2px; font-size:90%; };\
         ';
    try{
        document.getElementsByTagName('head')[0].innerHTML+='<style type="text/css">'+style+'</style>';
    }catch(e){
        try{
            document.write('<style>'+style+'</style>');
        }catch(e){
            // alert('Conrad Irwin / parser.js\nBorken Stylesheet');
        }
    }
}
/*************** Homonym Matching ***************/
/**
 * Parse the page and try and match the scrambled homonyms together
 * Does it by parsing all the sections according to the handlers defined by
 * their headings
**/
function wtp_matchHomonyms(){
    for(var l=0;l<wtp.layout['-Top'].length;l++){
        var lang=wtp.layout['-Top'][l];
        if(!wtp.layout[lang]) continue;
        
        var flat = wtp_flattenLayout(lang);
        for(var i=0; i<flat.length; i++){
            var sect=flat[i].replace(/.+-/,'').toLowerCase().replace(/[ _]([0-9]*|[IVX]*)$/,'');
            if(sect && wtp.handler[wtp.section[sect]]){
                wtp.handler[wtp.section[sect]](wtp.page[flat[i]],flat[i]);
            }else{
                wtp.page[flat[i]].className="UnrecognisedSection";
                wtp.page['-Head-'+flat[i]].className="UnrecognisedSectionTitle";
            }
        }
        wtp_matchCurrent();
    }
}
/**
 * Converts the tree structure of wtp.layout into a flat array for looping
**/
function wtp_flattenLayout(lang){
    var output=[];
    if(wtp.layout[lang]){
        for(var i=0;i<wtp.layout[lang].length;i++){
            output.push(wtp.layout[lang][i]);
            output=output.concat(wtp_flattenLayout(wtp.layout[lang][i]));
        }
    }
    return output;
}
/**
 * Tries to match homonym's to glosses and otherwise deal with sections
 * Sections can either be attached to "homonyms" "part of speech" or "language"
**/
function wtp_registerHomonym(node,definition){
    var def = definition.toLowerCase().split(/\W+/).sort();
    wtp.homonym.push({
        'match' : def,
        'node' : node,
        'text' : definition,
        'pos'  : wtp.set[wtp.set.length-1],
        'language' : wtp.set[wtp.set.length-1].title.replace(/-.+/,'')
    });
    return wtp.homonym[wtp.homonym.length-1];
}
function wtp_registerHomonymSection(node,gloss,title){
    var glo=gloss.toLowerCase().split(/\W+/).sort();
    wtp.gloss.push({
        'match' : glo,
        'node' : node,
        'gloss' : gloss,
        'title' : title
    });
    wtp.gloss[wtp.gloss.length-1].language=title.replace(/-.+/,'');
}
/**
 * Will add a comment when I know what this does....
**/
function wtp_registerSet(node,title){
    wtp.set.push({'node':node,'title':title});    
}
function wtp_registerSetBit(node,title){
    wtp.bit.push({'node':node,'title':title});
}
/**
 * This is called when it is obvious that all sections associated with a 
 * set of homonyms have been found, on en.wikt when a new "Etymology" section
 * starts, for example
**/
function wtp_matchCurrent(){
//First match the homonyms to the glosses
    var glo;
    while(glo=wtp.gloss.pop()) {
        var best=-1;var highest=0;
        //Find the highest definition score for each gloss
        for(var i=0;i<wtp.homonym.length;i++){
            var def=wtp.homonym[i];
            var score = wtp_glossScore(glo.match,def.match);
            if(score>highest && 
                 (!def.score || !def.score[glo.title] || score>def.score[glo.title])){
                highest=score;best=i;
            }
        }
        if(best>-1){
            var def=wtp.homonym[best];
            if(def.score){
                //Try again if we are displacing a previous match
                if(def.score[glo.title]){
                    wtp.gloss.push(glo);
                }
            }else{
                def.score={};
            }
            def.score=highest;
            if(!def.gloss)def.gloss={};
            def.gloss[glo.title]=glo;
        }else{ //Add it to general sections
            wtp_registerSetBit(glo.node,glo.title);
        }
    }
//Then create the gloss tabs
    for(var i=0;i<wtp.homonym.length;i++){
        var def=wtp.homonym[i];
        if(def.done)continue;else def.done=true;
            var tabs=wtp.homonym[i].tabs;
            var div=wtp.homonym[i].div
            if(!tabs){
                tabs=document.createElement('div');
                tabs.className="HomonymTabs";
                div=document.createElement('div');
                tabs.appendChild(div);
                def.node.appendChild(tabs);        
                wtp.homonym[i].tabs=tabs;
                wtp.homonym[i].div=div;
            }
        for(var gc in def.gloss){
            var glo=def.gloss[gc];
                var tab=document.createElement('span');
                tab.className="HomonymTabHidden";
                tab.appendChild(document.createTextNode(glo.title.replace(/.+-/,'').replace('_',' ')));
                tab.setAttribute('id','-HomonymTab-'+gc+'-'+i+'-'+glo.title);
                tabs.insertBefore(tab,div);
                glo.node.setAttribute('id','-Homonym-'+gc+'-'+i+'-'+glo.title);
                glo.node.className="HomonymSectionHidden";
                if(glo.node.parentNode)
                    glo.node.parentNode.removeChild(glo.node);
                
                if(wtp.page['-Head-'+glo.title].parentNode)
                    wtp.page['-Head-'+glo.title].parentNode.removeChild(
                        wtp.page['-Head-'+glo.title]);
                        
                div.appendChild(glo.node);
                try{
                    tab.addEventListener('click',
                        function(e){wtp_tabHandle(e.target,"Homonym")},false)
                }catch(e){
                    tab.attachEvent('onclick',
                        function(){
                            wtp_tabHandle(window.event.srcElement,"Homonym")
                        }
                    );            
                }
         }
    }
//Then add the set bits to each set (Etymology to Noun etc.)
    for(var i=0;i<wtp.set.length;i++){
        if(!wtp.set[i].done){ wtp.set[i].done=true;
            var tabs=document.createElement('div');
            tabs.className="SetTabs";
            var div=document.createElement('div');
            tabs.appendChild(div);
            wtp.set[i].node.insertBefore(tabs,wtp.set[i].node.childNodes[0]);

            for(var b=0;b<wtp.bit.length;b++){
                var bit = wtp.bit[b];
                var title=bit.title;
                if( bit.title.replace(/[^-]/g,'').length > 
                    wtp.set[i].title.replace(/[^-]/g,'').length
                  && bit.title.indexOf(wtp.set[i].title)<0){
                    //shouldn't match
                  }else{
                  
                  if(document.getElementById('-SetTab-'+i+'-'+bit.title)){
                    var j=1;
                    while(document.getElementById('-SetTab-'+i+'-'+bit.title+'-'+j)){
                        j=j+1;
                    }
                    title+='_'+j;
                  }
                var tab=document.createElement('span');
                tab.className="SetTabHidden";
                tab.appendChild(document.createTextNode(title.replace(/.+-/,'').replace(/_/g,' ')));
                tab.setAttribute('id','-SetTab-'+i+'-'+title);
                tabs.insertBefore(tab,div);
                
                bit.node.setAttribute('id','-Set-'+i+'-'+title);
                bit.node.className="SetSectionHidden";
                if(bit.node.parentNode)
                    bit.node.parentNode.removeChild(bit.node);
                div.appendChild(bit.node);
                if(wtp.page['-Head-'+bit.title].parentNode)
                    wtp.page['-Head-'+bit.title].parentNode.removeChild(
                        wtp.page['-Head-'+bit.title]);
                    
                try{
                    tab.addEventListener('click',
                        function(e){wtp_tabHandle(e.target,"Set")},false)
                }catch(e){
                    tab.attachEvent('onclick',
                        function(){
                            wtp_tabHandle(window.event.srcElement,"Set")
                        }
                    );
                }
            }}
        }
    }
    wtp.bit=[];
}
/**
 * This function tries to work out a score for the closeness of match between
 * two probably very different sorted arrays of strings
 * It also edits the arrays as it goes to make the answers better
**/
function wtp_glossScore(glo,def){
    var gc=0; var dc=0;
    var score=0.0;
    
    while(gc<glo.length && dc<def.length){
        //Remove duplicates
        while(glo[gc+1] && glo[gc+1]==glo[gc]){glo.splice(gc+1,1);}
        while(def[dc+1] && def[dc+1]==def[dc]){def.splice(dc+1,1);}
    
        if(glo[gc]==def[dc]){
            if(def[dc].length>2){
                //Letter modifier should be tweaked
                score+=1.0+(glo[gc].length/2)
                gc+=1;dc+=1;
            }else{
                //Remove short words
                def.splice(dc,1);
                glo.splice(gc,1);
            }
        }else if(glo[gc]<def[dc]){
            gc+=1;
        }else{
            dc+=1;
        }
    }
    return score;
}
/****************** DOM Helpers *****************/
/**
 * containsText(node) is a W3C DOM complaint cross-browser way of 
 *  saying (node.innerText=="") though it may well be slower (no benchmarking)
 *  however it is almost always faster than (extractText(node)=="")
**/
function containsText(el){
    for(var i=0;i<el.childNodes.length;i++){
        var nod=el.childNodes[i];
        if( nod.nodeName.toUpperCase()=='#TEXT' ){
            if( nod.nodeValue.replace(/[^a-zA-Z]/g,'').length>0 ) 
                return true;
        }else if(nod.nodeName.indexOf('#')!=0){
            if(containsText(nod))
                return true;
        }
    }
    if(el.nodeValue && el.nodeValue.replace(/[^a-zA-Z]/g,'').length>0 ){
        return true;
    }else{
        return false;
    }
}
/**
 * extractText(node) is a W3C DOM compliant cross-browser way of
 *  saying (node.innerText) though it may well be slower (no benchmarking)
 *
**/
function extractText(el){
    //if(FF)return el.textContents;
    var output="";
    for( var i=0;i<el.childNodes.length;i++ ){
        var nod=el.childNodes[i];
        if( nod.nodeName.toUpperCase()=='#TEXT' ){
            output+=nod.nodeValue;
        }else if(nod.nodeName.indexOf('#')!=0){
            output+=extractText(nod);
        }
    }
    return output;
}
/**
 * node.getChildByClass is essentialy document.getElementById but locally
**/
function getChildByClass(node,clsname){
    if(!node) return;
    for(var i=0;i<node.childNodes.length;i++){
        if( node.childNodes[i].className==clsname){
            return node.childNodes[i];
        }
    }
}
/**
 * Copies nodes from one place to another, removing all attributes but className
 *  to avoid id repition and unwanted event handles
**/
function cloneNode(onode){
    if(onode.nodeType==3){
        return document.createTextNode(onode.nodeValue);
    }else if(onode.nodeType==1){
        var ret = document.createElement(onode.nodeName);
        if(onode.className)
            ret.className=onode.className;
            
        for(var i=0;i<onode.attributes.length;i++){
            var an = onode.attributes[i].nodeName;
            if(an == 'id')
                ret.setAttribute(an,onode.attributes[i].nodeValue+'C');
            else if(an == 'style')
                ret.style.cssText = onode.style.cssText.replace(/(display|visibility):[^;]+/,'');
            else
                ret.setAttribute(an, onode.attributes[i].nodeValue);
        }
        for(var i=0;i<onode.childNodes.length;i++){
            ret.appendChild(cloneNode(onode.childNodes[i]));
        }
        return ret;
    }else{ //A comment.
        return document.createTextNode('');
    }
}
/**************** en.wikt specifics **************/
/**
 * Each possible title (at any level) and which handler it should use
**/
wtp.section = { //From [[User:AutoFormat/Headers]] thanks Ullman!!
    //English POS
    'noun':'pos','noun_form':'pos','noun_phrase':'pos','proper_noun':'pos',
    'prenoun':'pos',
    
    'verb':'pos','verb_form':'pos','verb_phrase':'pos', 'preverb':'pos',
    'transitive_verb':'pos','intransitive_verb':'pos',
    
    'adjective':'pos','adjective_form':'pos','adjective_phrase':'pos',
    'adverb':'pos','adverb_phrase':'pos',
    
    'pronoun':'pos','conjunction':'pos','contraction':'pos',
    'interjection':'pos','article':'pos','preposition':'pos',
    
    'prefix':'pos','suffix':'pos','affix':'pos','infix':'pos',
    
    'idiom':'pos','phrase':'pos',
    
    'acronym':'pos','abbreviation':'pos','initialism':'pos',
    
    'symbol':'pos','letter':'pos',
    
    'numeral':'pos','ordinal_numeral':'pos','cardinal_numeral':'pos',
    
    'number':'pos','ordinal_number':'pos','cardinal_number':'pos',
    //Other POS
    'particle':'pos','proverb':'pos','han_character':'pos',
    
    'kanji':'pos','hanzi':'pos','hanja':'pos',
    
    'pinyin':'pos','pinyin_syllable':'pos','syllable':'pos',
    'katakana_character':'pos','hiragana_letter':'pos',
    'hiragana_character':'pos',
    
    'counter':'pos','classifier':'pos','adnominal':'pos','determiner':'pos',
    'expression':'pos','postposition':'pos','root':'pos','participle':'pos',
    //More interesting stuff
    'synonyms':'thesaurus',
    'antonyms':'thesaurus',
    
    'translations':'translations',
    'translations_to_be_checked':'trivia', //this puts them by PoS not Homonym
    
    'etymology':'etymology',
    
    'conjugation':'trivia', 'inflection':'trivia','declension':'trivia',
    'participles':'trivia','infinitives':'trivia',
    
    'alternative_forms':'trivia', 'alternative_spellings':'trivia',
    
    'pronunciation':'trivia',
    
    'derived_terms':'trivia',  'related_terms':'trivia', 
    'descendants':'trivia', 'mutation':'trivia', 'compounds':'trivia',
    'abbreviations':'trivia','forms':'trivia',
    
    'hypernyms':'trivia','hyponyms':'trivia','meronyms':'trivia',
    'homonyms':'trivia','holonyms':'trivia','troponyms':'trivia',
    'homophones':'trivia','hyphenation':'trivia',

    'devanagari_spelling':'trivia','urdu_spelling':'trivia',
    'cyrillic_spelling':'trivia','roman_spelling':'trivia',

    'kanji_reading':'trivia',
    
    'scientic_names':'trivia',
    
    'proverbs':'trivia','expressions':'trivia','coordinate_terms':'trivia',
        
    'see_also':'trivia', 'external_links':'trivia','references':'trivia',
    
    'names_in_other_languages':'trivia','variants_and_pet_forms':'trivia',
    
    'anagrams':'trivia','trivia':'trivia','shorthand':'trivia',
    
    'usage_notes':'trivia','dictionary_notes':'trivia','quotations':'trivia'
    //There are a few missing, I got bored ;)
}

wtp.handler['pos'] = function(el,title){
    var start=true;
    wtp_registerSet(el,title);
    for(var i=0;i<el.childNodes.length;i++){
        var p=el.childNodes[i];
        if(p.nodeName.toUpperCase()=='OL'){
            start=false;
            for(var j=0;j<p.childNodes.length;j++){
                var li=p.childNodes[j];
                if(li.nodeName.toUpperCase()=='LI'){
                    var defs=extractText(li);
                    var hnym=wtp_registerHomonym(li,defs);
                    hnym.etyTitle=wtp.curEty;
                }else if(containsText(li)&&li.nodeName.indexOf('#')){
                    li.className="Unparsable";
                }
            }
        }else if(p.nodeName.indexOf('#')){
            if(p.className=='infl-table'
              || p.className=='infl-inline'
              || p.nodeName == 'P' && p.firstChild
                && p.firstChild.className=='infl-inline'){
                start=false;
                //Skip element
            }else if(start){
                p.className="infl-inline"; //Guess first section is inflection
            }else if(containsText(p)){
                p.className="Unparsable";
            }
        }
    }
}
wtp.curEty='';
wtp.handler['etymology'] = function(el,title){
    wtp_matchCurrent();
    wtp.curEty=title; //This is to let paperView differentiate between homonyms
    wtp_registerSetBit(el,title);
}
wtp.handler['trivia'] = function(el,title){
    wtp_registerSetBit(el,title);
}
wtp.handler['translations'] = function(el,title){
    
    var success=1;
    
    for(var i=0;i<el.childNodes.length;i++){
        var div=el.childNodes[i];
        if(div.nodeName.indexOf('#')){
            if(div.nodeName=="DIV" && div.className=='NavFrame'){
                success*=wtp.handler['navframe'](div,title);
                i--;
            }else{
                success=0;
            }
        }else if(containsText(div)){
            success=0;
        }
    }
    if(success){
        el.parentNode.removeChild(el);
    }else{
        wtp_registerSetBit(el,title);
    }
}
wtp.handler['navframe'] = function(el,title){

    var head=false;var body=false; //To collect NavHead and NavContent
    var success=1;  //If parse was succesful [1 on success, 0 or 2 on failure]

    for(var i=0;i<el.childNodes.length;i++){
        var div=el.childNodes[i];
        if(div.nodeName.indexOf('#')){
            if(div.className=='NavHead'&&!head){
                head=div;
                head.style.cssText="";
                head.id="="+head.id;
            }else if(div.className=='NavContent'&&head&&!body){
                if(extractText(div).indexOf('to be checked')>-1){
                    success=0;
                }else{
                    body=div;
                    body.style.cssText="";
                    body.id="="+body.id;
                    el.parentNode.removeChild(el);
                    wtp_registerHomonymSection(body,extractText(head),title);
                }
            }else if(containsText(div)){
                success=0;
                div.className="Unparsable";
            }
        }else if(containsText(div)){
            success=0;
        }
    }
    return (head&&body)?success:0;
}
wtp.handler['thesaurus'] = function(el,title){

    var success=true;
    for(var i=0;i<el.childNodes.length;i++){
        var ol=el.childNodes[i];
        if(ol.nodeName.toUpperCase()=='UL'){
            for(var j=0;j<ol.childNodes.length;j++){
                var li=ol.childNodes[j];
                var senseTag=getChildByClass(li,'ib-content');
                if(!senseTag)senseTag=getChildByClass(li,'qualifier-content');
                if(senseTag){
                    var t=true;
                    while(t){
                        var fc = li.childNodes[0];
                        if(fc && fc.nodeName.indexOf('#') && 
                            (fc.className=='ib-colon' || fc.className=='sense-qualifier-colon')){
                                t=false;
                            }
                            li.removeChild(fc);
                    };
                    if(containsText(li))
                        wtp_registerHomonymSection(li,extractText(senseTag),title);
                }else{
                    success=false;
                    el.className="Unparsable";
                }
            }
        }else if(ol.nodeName.toUpperCase()=='DIV' && ol.className=='NavFrame'){
            if(!wtp.handler['navframe'](div,title)) success=false;
        }else if(containsText(ol)){ //Something we dont understand
            success=false;
            el.className="Unparsable";
        }
    }
    if(success){
        el.parentNode.removeChild(el);
    }else{
        wtp_registerSetBit(el,title);
    }
}
/**************** General Parser Functions **************/
var parsers={};
/**
 * This section will be split to a different file if nescessary
 * parsers should use getContentNode to get the page content, 
 * this ensures that the content node remains available for others to use
**/
function parsers_getContentNode(){
    var bc=document.getElementById('bodyContent');
    if(bc) return cloneNode(bc);
   // else alert("Conrad Irwin / parser.js\nThis doesn't look like a wiktionary page, no can do I'm afraid.");
    return false;
}
/**
 * registerView allows parsers to add a view tab at the top of the page
 *  it should be called instead of manually appending an output to the DOM
 *  an optional function may be specified, which will be run 
 *    funct(node,title); before the output is made visible. 
**/
function parsers_registerView(node,title,funct,selected,first){
    
    if(!parsers.view){
        var bc = document.getElementById('bodyContent');
        //Create tabs at top of page
        parsers.tabs = document.createElement("div");
        parsers.tabs.className="ParserTabs";
        bc.parentNode.insertBefore(parsers.tabs,bc);
        var obc = document.createElement("div");
        bc.parentNode.insertBefore(obc,bc);
        bc.parentNode.removeChild(bc);
        obc.appendChild(bc);
        parsers.view={};
        parsers.insertPoint=obc;
        parsers_registerView(obc,"Unchanged",false,false,true);
    }
    //Create tab for new view.
    var tab = document.createElement("span");
    tab.className="ParserTabHidden";
    tab.appendChild(document.createTextNode(title));
    title=title.replace(/[ \s\r\n]+/,'_');
    tab.setAttribute("id","ParserTab-"+title);
    node.className="ParserViewHidden";
    node.setAttribute("id","ParserView-"+title);
    try{
        tab.addEventListener('click',
            function(e){parsers_tabParser(e.target)},false)
    }catch(e){
        tab.attachEvent('onclick',
            function(){parsers_tabParser(window.event.srcElement)})
    }
    if(!first){
        parsers.tabs.appendChild(document.createTextNode(' • '));
        parsers.insertPoint.parentNode.insertBefore(node,parsers.insertPoint);
    }
    parsers.tabs.appendChild(tab);
    parsers.view[title] = [node,tab,funct];
    if( (selected && !getCookie('wtParserView'))
      ||(getCookie('wtParserView')==tab.id) ) parsers_tabParser(tab);
}
/**
 * Essentially the onclick handler for parser view tabs, this switches between
 *  the different displays (using hidden/block display on the divs) and 
 *  preexecutes any provided functions
**/
function parsers_tabParser(tab){
    var vw = tab.getAttribute("id").replace(/^ParserTab-/,"");
    if(parsers.view[vw]){
        if(parsers.currentView){
            parsers.currentView[1].className="ParserTabHidden";
            parsers.currentView[0].className="ParserViewHidden";
        }
        parsers.currentView = parsers.view[vw];
        setCookie('wtParserView',parsers.currentView[1].getAttribute('id'))
        if(typeof(parsers.currentView[2])=='function'){
            parsers.currentView[2](
                parsers.currentView[0],parsers.currentView[1]);
        }
        parsers.currentView[1].className="ParserTabShown";
        parsers.currentView[0].className="ParserViewShown";
    }
}

//Get this show on the road
if(wgAction=='view' && wgNamespaceNumber==0 && wgArticleId!=0)
    addOnloadHook(wtp_init);