MediaWiki:Gadget-Transkripsjon-kyrillisk-wikitext.js

Fra Wikipedia, den frie encyklopedi

Merk: Etter publisering vil det kanskje være nødvendig å slette mellomlageret i nettleseren din for å se endringene.

  • Firefox / Safari: Hold Shift mens du klikker på Oppdater, eller trykk enten Ctrl+F5 eller Ctrl+R (⌘+R på Mac)
  • Google Chrome: Trykk Ctrl+Shift+R (⌘+Shift+R på Mac)
  • Internet Explorer / Edge: Hold Ctrl mens du trykker på Oppdater eller trykk Ctrl+F5
  • Opera: Ttrykk Ctrl+F5.
/*jslint browser: true*/
/*global $, jQuery, alert*/
/* See https://github.com/kjetilree-WMNO/kyrillisk for test cases   */
$(function() {

	var WAIT_TIME_BEFORE_TRYING_TO_ADD_BUTTON = 3500;

    var russianMapping = {
        'А': 'A',
        'а': 'a',
        'Б': 'B',
        'б': 'b',
        'В': 'V',
        'в': 'v',
        'Г': 'G',
        'г': 'g',
        'Д': 'D',
        'д': 'd',
        'Е': 'E',
        'е': 'e',
        'Ё': 'Jo',
        'ё': 'jo',
        'Ж': 'Zj',
        'ж': 'zj',
        'З': 'Z',
        'з': 'z',
        'И': 'I',
        'и': 'i',
        'Й': 'J',
        'й': 'j',
        'К': 'K',
        'к': 'k',
        'Л': 'L',
        'л': 'l',
        'М': 'M',
        'м': 'm',
        'Н': 'N',
        'н': 'n',
        'О': 'O',
        'о': 'o',
        'П': 'P',
        'п': 'p',
        'Р': 'R',
        'р': 'r',
        'С': 'S',
        'с': 's',
        'Т': 'T',
        'т': 't',
        'У': 'U',
        'у': 'u',
        'Ф': 'F',
        'ф': 'f',
        'Х': 'Kh',
        'х': 'kh',
        'Ц': 'Ts',
        'ц': 'ts',
        'Ч': 'Tsj',
        'ч': 'tsj',
        'Ш': 'Sj',
        'ш': 'sj',
        'Щ': 'Sjtsj',
        'щ': 'sjtsj',
        'Ы': 'Y',
        'ы': 'y',
        'Ь': 'J',
        'ь': 'j',
        'Э': 'E',
        'э': 'e',
        'Ю': 'Ju',
        'ю': 'ju',
        'Я': 'Ja',
        'я': 'ja'
    };

    var armenianMapping = {
        'Ա': 'A',
        'ա': 'a',
        'Բ': 'B',
        'բ': 'b',
        'Գ': 'G',
        'գ': 'g',
        'Դ': 'D',
        'դ': 'd',
        'Ե': 'E',
        'ե': 'e',
        'Զ': 'Z',
        'զ': 'z',
        'Է': 'E',
        'է': 'e',
        'Ը': 'E',
        'ը': 'e',
        'Թ': 'T',
        'թ': 't',
        'Ժ': 'Zj',
        'ժ': 'zj',
        'Ի': 'I',
        'ի': 'i',
        'Լ': 'L',
        'լ': 'l',
        'Խ': 'Kh',
        'խ': 'kh',
        'Ծ': 'Ts',
        'ծ': 'ts',
        'Կ': 'K',
        'կ': 'k',
        'Հ': 'H',
        'հ': 'h',
        'Ձ': 'Dz',
        'ձ': 'dz',
        'Ղ': 'Gh',
        'ղ': 'gh',
        'Ճ': 'Tsj',
        'ճ': 'tsj',
        'Մ': 'M',
        'մ': 'm',
        'Յ': 'J',
        'յ': 'j',
        'Ն': 'N',
        'ն': 'n',
        'Շ': 'Sj',
        'շ': 'sj',
        'Ո': 'O',
        'ո': 'o',
        'Չ': 'Tsj',
        'չ': 'tsj',
        'Պ': 'P',
        'պ': 'p',
        'Ջ': 'Dzj',
        'ջ': 'dzj',
        'Ռ': 'R',
        'ռ': 'r',
        'Ս': 'S',
        'ս': 's',
        'Վ': 'V',
        'վ': 'v',
        'Տ': 'T',
        'տ': 't',
        'Ր': 'R',
        'ր': 'r',
        'Ց': 'Ts',
        'ց': 'ts',
        'Ւ': 'W',
        'ւ': 'w',
        'Փ': 'P',
        'փ': 'p',
        'Ք': 'K',
        'ք': 'k',
        'Օ': 'O',
        'օ': 'o',
        'Ֆ': 'F',
        'ֆ': 'f'
    }

    var ukrainianMapping = {
        'А': 'A',
        'а': 'a',
        'Б': 'B',
        'б': 'b',
        'В': 'V',
        'в': 'v',
        'Г': 'H',
        'г': 'h',
        'Ґ': 'G',
        'ґ': 'g',
        'Д': 'D',
        'д': 'd',
        'Е': 'E',
        'е': 'e',
        'Є': 'Je',
        'є': 'je',
        'Ж': 'Zj',
        'ж': 'zj',
        'З': 'Z',
        'з': 'z',
        'И': 'Y',
        'и': 'y',
        'І': 'I',
        'і': 'i',
        'Ї': 'Ji',
        'ї': 'ji',
        'Й': 'J',
        'й': 'j',
        'К': 'K',
        'к': 'k',
        'Л': 'L',
        'л': 'l',
        'М': 'M',
        'м': 'm',
        'Н': 'N',
        'н': 'n',
        'О': 'O',
        'о': 'o',
        'П': 'P',
        'п': 'p',
        'Р': 'R',
        'р': 'r',
        'С': 'S',
        'с': 's',
        'Т': 'T',
        'т': 't',
        'У': 'U',
        'у': 'u',
        'Ф': 'F',
        'ф': 'f',
        'Х': 'Kh',
        'х': 'kh',
        'Ц': 'Ts',
        'ц': 'ts',
        'Ч': 'Tsj',
        'ч': 'tsj',
        'Ш': 'Sj',
        'ш': 'sj',
        'Щ': 'Sjtsj',
        'щ': 'sjtsj',
        'Ь': '',
        'ь': '',
        'Ю': 'Ju',
        'ю': 'ju',
        'Я': 'Ja',
        'я': 'ja',
        '’': '’'
    }

    var isRussianCharacter = function(char) {
        if (char.charCodeAt(0) >= 1040 && char.charCodeAt(0) <= 1103) {
            return true;
        }
        if (char.charCodeAt(0) === 1025 || char.charCodeAt(0) === 1105) {
            return true;
        }
        return false;
    };

    var isArmenianCharacter = function(char) {
        if (char.charCodeAt(0) >= 1329 && char.charCodeAt(0) <= 1414) {
            return true;
        }
        return false;
    };

    var isUkrainianCharacter = function(char) {
        if (char.charCodeAt(0) === 1028 || char.charCodeAt(0) === 1030 || char.charCodeAt(0) === 1031 || char.charCodeAt(0) === 1068) {
            return true;
        }
        if (char.charCodeAt(0) >= 1040 && char.charCodeAt(0) <= 1065) {
            return true;
        }
        if (char.charCodeAt(0) >= 1070 && char.charCodeAt(0) <= 1097) {
            return true;
        }
        if (char.charCodeAt(0) === 1100 || char.charCodeAt(0) === 1102 || char.charCodeAt(0) === 1103 || char.charCodeAt(0) === 1108) {
            return true;
        }
        if (char.charCodeAt(0) === 1110 || char.charCodeAt(0) === 1111 || char.charCodeAt(0) === 1168 || char.charCodeAt(0) === 1169) {
            return true;
        }
        if (char === '’') {
            return true;
        }
        return false;
    };

    var isCyrillicVowel = function(char) {
        return /[АаЭэЫыУуОоЯяЕеЁёЮюИи]/.test(char);
    };

    var isCyrillicConsonant = function(char) {
        return /[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШшЩщ]/.test(char);
    };

    var isCheShaShcha = function(char) {
        return char === 'Ч' || char === 'ч' || char === 'Ш' || char === 'ш' || char === 'Щ' || char === 'щ';
    };

    var isEsZe = function(char) {
        return char === 'С' || char === 'с' || char === 'З' || char === 'з';
    };

    var removeDualJ = function(translatedString) {
        for (i = 0; i < translatedString.length; i++) {
            if (translatedString[i] === 'j') {
                if (translatedString[i] === translatedString[i + 1]) {
                    translatedString = translatedString.slice(0, i) + translatedString.slice(i + 1, translatedString.length);
                }
            }
        }
        return translatedString;
    }

    // Data and rules are from http://www.sprakradet.no/upload/Rettskriving%20og%20ordlister/russ.pdf
    var transcribeFromRussian = function(cyrillicString) {
        var latinString = [];

        /* Do the 'plain' transformations first */
        var i;
        for (i = 0; i < cyrillicString.length; i++) {
            if (!isRussianCharacter(cyrillicString[i])) {
                latinString[i] = cyrillicString[i];
            } else {
                latinString[i] = russianMapping[cyrillicString[i]];
            }
        }

        /* Funky e/ë rules.  */
        for (i = 0; i < cyrillicString.length; i++) {
            /* 'e' → 'je' in beginning of syllable (Елена → Jelena) */
            if (cyrillicString[i - 1] === undefined || cyrillicString[i - 1] === ' ') {
                if (cyrillicString[i] === 'Е') {
                    latinString[i] = 'Je';
                }
                if (cyrillicString[i] === 'е') {
                    latinString[i] = 'je';
                }
            }

            /* 'e' → 'je' after vowel (Николаев → Nikolajev) */
            if (isCyrillicVowel(cyrillicString[i - 1]) && cyrillicString[i] === 'е') {
                latinString[i] = 'je';
            }

            /* 'ё' → 'o' after 'ч', 'ш' and 'щ'  */
            if (isCheShaShcha(cyrillicString[i - 1])) {
                if (cyrillicString[i] === 'ё') {
                    latinString[i] = 'o';
                }
                if (cyrillicString[i] === 'Ё') {
                    latinString[i] = 'O';
                }
            }

            /* 'ё' → 'io' after 'с' or 'з'  */
            /* 'ю' → 'iu' after 'с' or 'з' */
            /* 'я' → 'ia' after 'с' or 'з' */
            if (isEsZe(cyrillicString[i - 1])) {
                if (cyrillicString[i] === 'ё') {
                    latinString[i] = 'io';
                }
                if (cyrillicString[i] === 'Ё') {
                    latinString[i] = 'IO';
                }
                if (cyrillicString[i] === 'ю') {
                    latinString[i] = 'iu';
                }
                if (cyrillicString[i] === 'Ю') {
                    latinString[i] = 'Iu';
                }
                if (cyrillicString[i] === 'я') {
                    latinString[i] = 'ia';
                }
                if (cyrillicString[i] === 'Я') {
                    latinString[i] = 'Ia';
                }
            }
        }

        /* Fixing the 'ь' mess  */
        for (i = 0; i < cyrillicString.length; i++) {
            // Remove in end of words
            if (cyrillicString[i + 1] === undefined || cyrillicString[i + 1] === ' ') {
                if (cyrillicString[i] === 'Ь' || cyrillicString[i] === 'ь') {
                    latinString[i] = '';
                }
            }

            // Remove between consonants
            if (isCyrillicConsonant(cyrillicString[i - 1]) && cyrillicString[i] === 'ь' && isCyrillicConsonant(cyrillicString[i + 1])) {
                latinString[i] = '';
            }

            // Change to 'i' between ('с' or 'з') and vowel
            if (isEsZe(cyrillicString[i - 1]) && cyrillicString[i] === 'ь' && isCyrillicVowel(cyrillicString[i + 1])) {
                latinString[i] = 'i';
            }
        }

        latinString = latinString.join('');
        /* Special rule: Do not allow two consecutive 'j' characters  */
        latinString = removeDualJ(latinString);
        return latinString;
    };

    /* Rules are from https://no.wikipedia.org/w/index.php?title=Wikipedia:Transkripsjon_fra_armensk&oldid=16967698  */
    var transcribeFromArmenian = function(armenianString) {
        var latinString = [];

        /* Do the 'plain' transformations first */
        var i;
        for (i = 0; i < armenianString.length; i++) {
            if (!isArmenianCharacter(armenianString[i])) {
                latinString[i] = armenianString[i];
            } else {
                latinString[i] = armenianMapping[armenianString[i]];
            }
        }

        /* Special cases:  */
        for (i = 0; i < armenianString.length; i++) {

            /* Digraphs */
            /* ու  */
            if (armenianString[i - 1] === 'Ո' && armenianString[i] === 'ւ') {
                latinString[i - 1] = undefined;
                latinString[i] = 'U';
            }
            if (armenianString[i - 1] === 'ո' && armenianString[i] === 'ւ') {
                latinString[i - 1] = undefined;
                latinString[i] = 'u';
            }

            /* իւ */
            if (armenianString[i - 1] === 'Ի' && armenianString[i] === 'ւ') {
                latinString[i - 1] = 'J';
                latinString[i] = 'u';
            }
            if (armenianString[i - 1] === 'ի' && armenianString[i] === 'ւ') {
                latinString[i - 1] = 'j';
                latinString[i] = 'u';
            }

            /* Ligature */
            /* և -> ev, jev */
            if (armenianString[i] === 'և') {
                latinString[i] = 'ev';
            }

            /* 'Ե' and 'ե' special casing  */
            if (armenianString[i - 1] === undefined || armenianString[i - 1] === ' ') {
                if (armenianString[i] === 'Ե') {
                    latinString[i] = 'Je';
                }
                if (armenianString[i] === 'ե') {
                    latinString[i] = 'je';
                }
            }

            /* 'Ո' and 'ո' special casing  */
            if (armenianString[i - 1] === undefined || armenianString[i - 1] === ' ') {
                if (armenianString[i] === 'Ո') {
                    latinString[i] = 'Vo';
                }
                if (armenianString[i] === 'ո') {
                    latinString[i] = 'vo';
                }
            }

            /* 'Յ' and 'յ' special casing  */
            if ((armenianString[i - 1] === 'Ծ' || armenianString[i - 1] === 'ծ' || armenianString[i - 1] === 'Ց' || armenianString[i - 1] === 'ց') && armenianString[i] === 'յ') {
                latinString[i] = 'i';
            }
            if ((armenianString[i - 1] === 'Ս' || armenianString[i - 1] === 'ս') && armenianString[i] === 'յ') {
                latinString[i] = 'i';
            }
            if ((armenianString[i - 1] === 'Զ' || armenianString[i - 1] === 'զ') && armenianString[i] === 'յ') {
                latinString[i] = 'i';
            }
            if ((armenianString[i - 1] === 'Ձ' || armenianString[i - 1] === 'ձ') && armenianString[i] === 'յ') {
                latinString[i] = 'i';
            }
        }

        latinString = latinString.join('');
        return latinString;
    }

    var transcribeFromUkrainian = function(ukrainianString) {

        var latinString = [];

        /* Do the 'plain' transformations first */
        var i;
        for (i = 0; i < ukrainianString.length; i++) {
            if (!isUkrainianCharacter(ukrainianString[i])) {
                latinString[i] = ukrainianString[i];
            } else {
                latinString[i] = ukrainianMapping[ukrainianString[i]];
            }
        }

        /* 'Є' and 'є' rule  */
        for (i = 0; i < ukrainianString.length; i++) {

            if (ukrainianString[i - 1] === 'З' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'З' ||
                ukrainianString[i - 1] === 'з' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'з' ||
                ukrainianString[i - 1] === 'С' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'С' ||
                ukrainianString[i - 1] === 'с' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'с' ||
                ukrainianString[i - 1] === 'Ц' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'Ц' ||
                ukrainianString[i - 1] === 'ц' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'ц') {
                if (ukrainianString[i] === 'Є') {
                    latinString[i] = 'Ie';
                }
                if (ukrainianString[i] === 'є') {
                    latinString[i] = 'ie';
                }
            }
        }

        /* 'Ь' and 'ь' rule  */
        for (i = 0; i < ukrainianString.length; i++) {

            if (isCyrillicConsonant(ukrainianString[i - 1]) && (ukrainianString[i + 1] === 'О' || ukrainianString[i + 1] === 'о')) {
                if (ukrainianString[i] === 'Ь') {
                    latinString[i] = 'J';
                }
                if (ukrainianString[i] === 'ь') {
                    latinString[i] = 'j';
                }
            }

            if (ukrainianString[i - 1] === 'З' && ukrainianString[i + 1] === 'о' ||
                ukrainianString[i - 1] === 'з' && ukrainianString[i + 1] === 'о' ||
                ukrainianString[i - 1] === 'С' && ukrainianString[i + 1] === 'о' ||
                ukrainianString[i - 1] === 'с' && ukrainianString[i + 1] === 'о' ||
                ukrainianString[i - 1] === 'Ц' && ukrainianString[i + 1] === 'о' ||
                ukrainianString[i - 1] === 'ц' && ukrainianString[i + 1] === 'о') {
                if (ukrainianString[i] === 'Ь') {
                    latinString[i] = 'I';
                }
                if (ukrainianString[i] === 'ь') {
                    latinString[i] = 'i';
                }
            }

        }

        /* 'Ю' and 'ю' rule */
        for (i = 0; i < ukrainianString.length; i++) {

            if (ukrainianString[i - 1] === 'З' ||
                ukrainianString[i - 1] === 'з' ||
                ukrainianString[i - 1] === 'С' ||
                ukrainianString[i - 1] === 'с' ||
                ukrainianString[i - 1] === 'Ц' ||
                ukrainianString[i - 1] === 'ц') {
                if (ukrainianString[i] === 'Ю') {
                    latinString[i] = 'Iu';
                }
                if (ukrainianString[i] === 'ю') {
                    latinString[i] = 'iu';
                }
            }
        }

        /* 'Я' and 'я' rule  */
        for (i = 0; i < ukrainianString.length; i++) {

            if (ukrainianString[i - 1] === 'З' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'З' ||
                ukrainianString[i - 1] === 'з' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'з' ||
                ukrainianString[i - 1] === 'С' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'С' ||
                ukrainianString[i - 1] === 'с' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'с' ||
                ukrainianString[i - 1] === 'Ц' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'Ц' ||
                ukrainianString[i - 1] === 'ц' ||
                ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'ц') {
                if (ukrainianString[i] === 'Я') {
                    latinString[i] = 'Ia';
                }
                if (ukrainianString[i] === 'я') {
                    latinString[i] = 'ia';
                }
            }
        }

        latinString = latinString.join('');
        latinString = latinString.replace('’', '');
        latinString = removeDualJ(latinString);
        return latinString.trim();
    }

    var processAndUpdateText = function(sourceText, sourceLanguage) {
        if (sourceLanguage === 'Russian') {
            var transcribedText = transcribeFromRussian(sourceText);
        }
        if (sourceLanguage === 'Armenian') {
            var transcribedText = transcribeFromArmenian(sourceText);
        }
        if (sourceLanguage === 'Ukrainian') {
            var transcribedText = transcribeFromUkrainian(sourceText);
        }

        var selection = window.getSelection()
        var start = selection.anchorOffset
        var end = selection.focusOffset

        selection.anchorNode.replaceData(start, end - start, transcribedText)
    };

    var getSelectedText = function() {
        return document.getSelection().toString();
    };

    var addTranscriptionDropdown = function() {
    	
        setTimeout(function() {

            $('.oo-ui-toolbar-tools').first().append('<div id="transcription-group" class="ve-ui-toolbar-group-style oo-ui-widget oo-ui-toolGroup oo-ui-iconElement oo-ui-indicatorElement oo-ui-popupToolGroup oo-ui-listToolGroup oo-ui-widget-enabled"></div>');
            $('#transcription-group').append('<span tabindex="0" id="cyrillic" aria-disabled="false" class="oo-ui-popupToolGroup-handle" role="button" aria-expanded="false"><span class="oo-ui-iconElement-icon oo-ui-icon-language"></span><span class="oo-ui-labelElement-label oo-ui-labelElement-invisible">Tekststil</span><span class="oo-ui-indicatorElement-indicator oo-ui-indicator-down"></span></span>');

            $('.oo-ui-toolbar-tools').first().append('<div id="transcription-group-menu" class="oo-ui-toolGroup-tools oo-ui-clippableElement-clippable oo-ui-floatableElement-floatable oo-ui-popupToolGroup-tools oo-ui-listToolGroup-tools oo-ui-toolGroup-enabled-tools" style="left: 550px; top: 43px; width: 120px; max-height: 604px;"></div>');

            $('#transcription-group-menu').append('<span id="transcription-russian" class="oo-ui-widget oo-ui-iconElement oo-ui-tool-with-icon oo-ui-tool oo-ui-tool-name-bold oo-ui-widget-enabled" aria-disabled="false"><a style="padding-left: 15px;" tabindex="0" aria-disabled="false" class="oo-ui-tool-link" role="button"><span class="oo-ui-tool-title">Russisk</span></a></span>')
            $('#transcription-group-menu').append('<span id="transcription-ukranian" class="oo-ui-widget oo-ui-iconElement oo-ui-tool-with-icon oo-ui-tool oo-ui-tool-name-bold oo-ui-widget-enabled" aria-disabled="false"><a style="padding-left: 15px;" tabindex="0" aria-disabled="false" class="oo-ui-tool-link" role="button"><span class="oo-ui-tool-title">Ukrainsk</span></a></span>')
            $('#transcription-group-menu').append('<span id="transcription-armenian" class="oo-ui-widget oo-ui-iconElement oo-ui-tool-with-icon oo-ui-tool oo-ui-tool-name-bold oo-ui-widget-enabled" aria-disabled="false"><a style="padding-left: 15px;" tabindex="0" aria-disabled="false" class="oo-ui-tool-link" role="button"><span class="oo-ui-tool-title">Armensk</span></a></span>')


            $('#transcription-group').mousedown(function(event) {
                event.preventDefault();
                $('#transcription-group-menu').toggle();
            });

            $('#transcription-ukranian').mousedown(function(event) {
                event.preventDefault();
                $('#transcription-group-menu').toggle();
                processAndUpdateText(getSelectedText(), 'Ukrainian');
            });

            $('#transcription-armenian').mousedown(function(event) {
                event.preventDefault();
                $('#transcription-group-menu').toggle();
                processAndUpdateText(getSelectedText(), 'Armenian');
            });

            $('#transcription-russian').mousedown(function(event) {
                event.preventDefault();
                $('#transcription-group-menu').toggle();
                processAndUpdateText(getSelectedText(), 'Russian');
            });

        }, WAIT_TIME_BEFORE_TRYING_TO_ADD_BUTTON)


    }

    $.when(mw.loader.using('ext.visualEditor.core'), $.ready).then(addTranscriptionDropdown);

});