MediaWiki:Gadget-Transkripsjon-kyrillisk.js

Fra Wikipedia, den frie encyklopedi

Merk: Etter publisering vil det kanskje være nødvendig å slette mellomlageret i nettleseren din for å se endringene.

  • Firefox / Safari: Hold Shift mens du klikker på Oppdater, eller trykk enten Ctrl+F5 eller Ctrl+R (⌘+R på Mac)
  • Google Chrome: Trykk Ctrl+Shift+R (⌘+Shift+R på Mac)
  • Internet Explorer / Edge: Hold Ctrl mens du trykker på Oppdater eller trykk Ctrl+F5
  • Opera: Ttrykk Ctrl+F5.
/*jslint browser: true*/
/*global $, jQuery, alert*/
/* See https://github.com/kjetilree-WMNO/kyrillisk for test cases   */
$(function() {

  var russianMapping = {
    'А': 'A',
    'а': 'a',
    'Б': 'B',
    'б': 'b',
    'В': 'V',
    'в': 'v',
    'Г': 'G',
    'г': 'g',
    'Д': 'D',
    'д': 'd',
    'Е': 'E',
    'е': 'e',
    'Ё': 'Jo',
    'ё': 'jo',
    'Ж': 'Zj',
    'ж': 'zj',
    'З': 'Z',
    'з': 'z',
    'И': 'I',
    'и': 'i',
    'Й': 'J',
    'й': 'j',
    'К': 'K',
    'к': 'k',
    'Л': 'L',
    'л': 'l',
    'М': 'M',
    'м': 'm',
    'Н': 'N',
    'н': 'n',
    'О': 'O',
    'о': 'o',
    'П': 'P',
    'п': 'p',
    'Р': 'R',
    'р': 'r',
    'С': 'S',
    'с': 's',
    'Т': 'T',
    'т': 't',
    'У': 'U',
    'у': 'u',
    'Ф': 'F',
    'ф': 'f',
    'Х': 'Kh',
    'х': 'kh',
    'Ц': 'Ts',
    'ц': 'ts',
    'Ч': 'Tsj',
    'ч': 'tsj',
    'Ш': 'Sj',
    'ш': 'sj',
    'Щ': 'Sjtsj',
    'щ': 'sjtsj',
    'Ы': 'Y',
    'ы': 'y',
    'Ь': 'J',
    'ь': 'j',
    'Э': 'E',
    'э': 'e',
    'Ю': 'Ju',
    'ю': 'ju',
    'Я': 'Ja',
    'я': 'ja'
  };

  var armenianMapping = {
    'Ա': 'A',
    'ա': 'a',
    'Բ': 'B',
    'բ': 'b',
    'Գ': 'G',
    'գ': 'g',
    'Դ': 'D',
    'դ': 'd',
    'Ե': 'E',
    'ե': 'e',
    'Զ': 'Z',
    'զ': 'z',
    'Է': 'E',
    'է': 'e',
    'Ը': 'E',
    'ը': 'e',
    'Թ': 'T',
    'թ': 't',
    'Ժ': 'Zj',
    'ժ': 'zj',
    'Ի': 'I',
    'ի': 'i',
    'Լ': 'L',
    'լ': 'l',
    'Խ': 'Kh',
    'խ': 'kh',
    'Ծ': 'Ts',
    'ծ': 'ts',
    'Կ': 'K',
    'կ': 'k',
    'Հ': 'H',
    'հ': 'h',
    'Ձ': 'Dz',
    'ձ': 'dz',
    'Ղ': 'Gh',
    'ղ': 'gh',
    'Ճ': 'Tsj',
    'ճ': 'tsj',
    'Մ': 'M',
    'մ': 'm',
    'Յ': 'J',
    'յ': 'j',
    'Ն': 'N',
    'ն': 'n',
    'Շ': 'Sj',
    'շ': 'sj',
    'Ո': 'O',
    'ո': 'o',
    'Չ': 'Tsj',
    'չ': 'tsj',
    'Պ': 'P',
    'պ': 'p',
    'Ջ': 'Dzj',
    'ջ': 'dzj',
    'Ռ': 'R',
    'ռ': 'r',
    'Ս': 'S',
    'ս': 's',
    'Վ': 'V',
    'վ': 'v',
    'Տ': 'T',
    'տ': 't',
    'Ր': 'R',
    'ր': 'r',
    'Ց': 'Ts',
    'ց': 'ts',
    'Ւ': 'W',
    'ւ': 'w',
    'Փ': 'P',
    'փ': 'p',
    'Ք': 'K',
    'ք': 'k',
    'Օ': 'O',
    'օ': 'o',
    'Ֆ': 'F',
    'ֆ': 'f'
  }

  var ukrainianMapping = {
    'А': 'A',
    'а': 'a',
    'Б': 'B',
    'б': 'b',
    'В': 'V',
    'в': 'v',
    'Г': 'H',
    'г': 'h',
    'Ґ': 'G',
    'ґ': 'g',
    'Д': 'D',
    'д': 'd',
    'Е': 'E',
    'е': 'e',
    'Є': 'Je',
    'є': 'je',
    'Ж': 'Zj',
    'ж': 'zj',
    'З': 'Z',
    'з': 'z',
    'И': 'Y',
    'и': 'y',
    'І': 'I',
    'і': 'i',
    'Ї': 'Ji',
    'ї': 'ji',
    'Й': 'J',
    'й': 'j',
    'К': 'K',
    'к': 'k',
    'Л': 'L',
    'л': 'l',
    'М': 'M',
    'м': 'm',
    'Н': 'N',
    'н': 'n',
    'О': 'O',
    'о': 'o',
    'П': 'P',
    'п': 'p',
    'Р': 'R',
    'р': 'r',
    'С': 'S',
    'с': 's',
    'Т': 'T',
    'т': 't',
    'У': 'U',
    'у': 'u',
    'Ф': 'F',
    'ф': 'f',
    'Х': 'Kh',
    'х': 'kh',
    'Ц': 'Ts',
    'ц': 'ts',
    'Ч': 'Tsj',
    'ч': 'tsj',
    'Ш': 'Sj',
    'ш': 'sj',
    'Щ': 'Sjtsj',
    'щ': 'sjtsj',
    'Ь': '',
    'ь': '',
    'Ю': 'Ju',
    'ю': 'ju',
    'Я': 'Ja',
    'я': 'ja',
    '’': '’'
  }

  var isRussianCharacter = function(char) {
    if (char.charCodeAt(0) >= 1040 && char.charCodeAt(0) <= 1103) {
        return true;
    }
    if (char.charCodeAt(0) === 1025 || char.charCodeAt(0) === 1105) {
        return true;
    }
    return false;
  };

  var isArmenianCharacter = function(char) {
    if (char.charCodeAt(0) >= 1329 && char.charCodeAt(0) <= 1414) {
        return true;
    }
    return false;
  };

  var isUkrainianCharacter = function(char) {
    if (char.charCodeAt(0) === 1028 || char.charCodeAt(0) === 1030 || char.charCodeAt(0) === 1031 || char.charCodeAt(0) === 1068) {
        return true;
    }
    if (char.charCodeAt(0) >= 1040 && char.charCodeAt(0) <= 1065) {
        return true;
    }
    if (char.charCodeAt(0) >= 1070 && char.charCodeAt(0) <= 1097) {
        return true;
    }
    if (char.charCodeAt(0) === 1100 || char.charCodeAt(0) === 1102 || char.charCodeAt(0) === 1103 || char.charCodeAt(0) === 1108) {
        return true;
    }
    if (char.charCodeAt(0) === 1110 || char.charCodeAt(0) === 1111 || char.charCodeAt(0) === 1168 || char.charCodeAt(0) === 1169) {
        return true;
    }
    if (char === '’') {
        return true;
    }
    return false;
  };

  var isCyrillicVowel = function(char) {
    return /[АаЭэЫыУуОоЯяЕеЁёЮюИи]/.test(char);
  };

  var isCyrillicConsonant = function(char) {
    return /[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШшЩщ]/.test(char);
  };

  var isCheShaShcha = function(char) {
    return char === 'Ч' || char === 'ч' || char === 'Ш' || char === 'ш' || char === 'Щ' || char === 'щ';
  };

  var isEsZe = function(char) {
    return char === 'С' || char === 'с' || char === 'З' || char === 'з';
  };

  var removeDualJ = function(translatedString) {
    for (i = 0; i < translatedString.length; i++) {
        if (translatedString[i] === 'j') {
            if (translatedString[i] === translatedString[i + 1]) {
                translatedString = translatedString.slice(0, i) + translatedString.slice(i + 1, translatedString.length);
            }
        }
    }
    return translatedString;
  }

  // Data and rules are from http://www.sprakradet.no/upload/Rettskriving%20og%20ordlister/russ.pdf
  var transcribeFromRussian = function(cyrillicString) {
    var latinString = [];

    /* Do the 'plain' transformations first */
    var i;
    for (i = 0; i < cyrillicString.length; i++) {
      if (!isRussianCharacter(cyrillicString[i])) {
          latinString[i] = cyrillicString[i];
      } else {
          latinString[i] = russianMapping[cyrillicString[i]];
      }
    }

    /* Funky e/ë rules.  */
    for (i = 0; i < cyrillicString.length; i++) {
      /* 'e' → 'je' in beginning of syllable (Елена → Jelena) */
      if (cyrillicString[i - 1] === undefined || cyrillicString[i - 1] === ' ') {
          if (cyrillicString[i] === 'Е') {
              latinString[i] = 'Je';
          }
          if (cyrillicString[i] === 'е') {
              latinString[i] = 'je';
          }
      }

      /* 'e' → 'je' after vowel (Николаев → Nikolajev) */
      if (isCyrillicVowel(cyrillicString[i - 1]) && cyrillicString[i] === 'е') {
          latinString[i] = 'je';
      }

      /* 'ё' → 'o' after 'ч', 'ш' and 'щ'  */
      if (isCheShaShcha(cyrillicString[i - 1])) {
          if (cyrillicString[i] === 'ё') {
              latinString[i] = 'o';
          }
          if (cyrillicString[i] === 'Ё') {
              latinString[i] = 'O';
          }
      }

      /* 'ё' → 'io' after 'с' or 'з'  */
      /* 'ю' → 'iu' after 'с' or 'з' */
      /* 'я' → 'ia' after 'с' or 'з' */
      if (isEsZe(cyrillicString[i - 1])) {
          if (cyrillicString[i] === 'ё') {
              latinString[i] = 'io';
          }
          if (cyrillicString[i] === 'Ё') {
              latinString[i] = 'IO';
          }
          if (cyrillicString[i] === 'ю') {
              latinString[i] = 'iu';
          }
          if (cyrillicString[i] === 'Ю') {
              latinString[i] = 'Iu';
          }
          if (cyrillicString[i] === 'я') {
              latinString[i] = 'ia';
          }
          if (cyrillicString[i] === 'Я') {
              latinString[i] = 'Ia';
          }
      }
    }

    /* Fixing the 'ь' mess  */
    for (i = 0; i < cyrillicString.length; i++) {
      // Remove in end of words
      if (cyrillicString[i + 1] === undefined || cyrillicString[i + 1] === ' ') {
          if (cyrillicString[i] === 'Ь' || cyrillicString[i] === 'ь') {
              latinString[i] = '';
          }
      }

      // Remove between consonants
      if (isCyrillicConsonant(cyrillicString[i - 1]) && cyrillicString[i] === 'ь' && isCyrillicConsonant(cyrillicString[i + 1])) {
          latinString[i] = '';
      }

      // Change to 'i' between ('с' or 'з') and vowel
      if (isEsZe(cyrillicString[i - 1]) && cyrillicString[i] === 'ь' && isCyrillicVowel(cyrillicString[i + 1])) {
          latinString[i] = 'i';
      }
    }

    latinString = latinString.join('');
    /* Special rule: Do not allow two consecutive 'j' characters  */
    latinString = removeDualJ(latinString);
    return latinString;
  };

  /* Rules are from https://no.wikipedia.org/w/index.php?title=Wikipedia:Transkripsjon_fra_armensk&oldid=16967698  */
  var transcribeFromArmenian = function(armenianString) {
    var latinString = [];
    
    /* Do the 'plain' transformations first */
      var i;
      for (i = 0; i < armenianString.length; i++) {
          if (!isArmenianCharacter(armenianString[i])) {
              latinString[i] = armenianString[i];
          } else {
              latinString[i] = armenianMapping[armenianString[i]];
          }
      }
    
    /* Special cases:  */
    for (i = 0; i < armenianString.length; i++) {

      /* Digraphs */
      /* ու  */
      if (armenianString[i - 1] === 'Ո' && armenianString[i] === 'ւ') {
        latinString[i - 1] = undefined;
        latinString[i] = 'U';
      }
      if (armenianString[i - 1] === 'ո' && armenianString[i] === 'ւ') {
        latinString[i - 1] = undefined;
        latinString[i] = 'u';
      }
      
      /* իւ */
      if (armenianString[i - 1] === 'Ի' && armenianString[i] === 'ւ') {
        latinString[i - 1] = 'J';
        latinString[i] = 'u';
      }
      if (armenianString[i - 1] === 'ի' && armenianString[i] === 'ւ') {
        latinString[i - 1] = 'j';
        latinString[i] = 'u';
      }
      
      /* Ligature */
      /* և -> ev, jev */
      if (armenianString[i] === 'և') {
        latinString[i] = 'ev';
      }	
    
      /* 'Ե' and 'ե' special casing  */
      if (armenianString[i - 1] === undefined || armenianString[i - 1] === ' ') {
        if (armenianString[i] === 'Ե') {
          latinString[i] = 'Je';
        }
        if (armenianString[i] === 'ե') {
          latinString[i] = 'je';
        }
      }
      
      /* 'Ո' and 'ո' special casing  */
      if (armenianString[i - 1] === undefined || armenianString[i - 1] === ' ') {
        if (armenianString[i] === 'Ո') {
          latinString[i] = 'Vo';
        }
        if (armenianString[i] === 'ո') {
          latinString[i] = 'vo';
        }
      }
      
      /* 'Յ' and 'յ' special casing  */
      if ((armenianString[i - 1] === 'Ծ' ||  armenianString[i - 1] === 'ծ' || armenianString[i - 1] === 'Ց' || armenianString[i - 1] === 'ց') && armenianString[i] === 'յ') {
        latinString[i] = 'i';
      }
      if ((armenianString[i - 1] === 'Ս' ||  armenianString[i - 1] === 'ս') && armenianString[i] === 'յ') {
        latinString[i] = 'i';
      }
      if ((armenianString[i - 1] === 'Զ' ||  armenianString[i - 1] === 'զ') && armenianString[i] === 'յ') {
        latinString[i] = 'i';
      }
      if ((armenianString[i - 1] === 'Ձ' ||  armenianString[i - 1] === 'ձ') && armenianString[i] === 'յ') {
        latinString[i] = 'i';
      }
    }
    
    latinString = latinString.join('');
    return latinString;
  }

  var transcribeFromUkrainian = function(ukrainianString) {

    var latinString = [];
    
    /* Do the 'plain' transformations first */
    var i;
    for (i = 0; i < ukrainianString.length; i++) {
        if (!isUkrainianCharacter(ukrainianString[i])) {
            latinString[i] = ukrainianString[i];
        } else {
            latinString[i] = ukrainianMapping[ukrainianString[i]];
        }
      }

      /* 'Є' and 'є' rule  */
    for (i = 0; i < ukrainianString.length; i++) {

      if (ukrainianString[i - 1] === 'З' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'З' ||
          ukrainianString[i - 1] === 'з' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'з' ||
          ukrainianString[i - 1] === 'С' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'С' ||
          ukrainianString[i - 1] === 'с' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'с' ||
          ukrainianString[i - 1] === 'Ц' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'Ц' ||
          ukrainianString[i - 1] === 'ц' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'ц') {
          if (ukrainianString[i] === 'Є') {
              latinString[i] = 'Ie';
          }
          if (ukrainianString[i] === 'є') {
              latinString[i] = 'ie';
          }
        }
      }

      /* 'Ь' and 'ь' rule  */
    for (i = 0; i < ukrainianString.length; i++) {

      if (isCyrillicConsonant(ukrainianString[i - 1]) && (ukrainianString[i + 1] === 'О' || ukrainianString[i + 1] === 'о')) {
        if (ukrainianString[i] === 'Ь') {
            latinString[i] = 'J';
        }
        if (ukrainianString[i] === 'ь') {
            latinString[i] = 'j';
        }
      }

      if (ukrainianString[i - 1] === 'З' && ukrainianString[i + 1] === 'о' ||
          ukrainianString[i - 1] === 'з' && ukrainianString[i + 1] === 'о' ||
          ukrainianString[i - 1] === 'С' && ukrainianString[i + 1] === 'о' ||
          ukrainianString[i - 1] === 'с' && ukrainianString[i + 1] === 'о' ||
          ukrainianString[i - 1] === 'Ц' && ukrainianString[i + 1] === 'о' ||
          ukrainianString[i - 1] === 'ц' && ukrainianString[i + 1] === 'о') {
        if (ukrainianString[i] === 'Ь') {
            latinString[i] = 'I';
        }
        if (ukrainianString[i] === 'ь') {
            latinString[i] = 'i';
        }
      }

    }

    /* 'Ю' and 'ю' rule */
    for (i = 0; i < ukrainianString.length; i++) {

      if (ukrainianString[i - 1] === 'З' ||
          ukrainianString[i - 1] === 'з' ||
          ukrainianString[i - 1] === 'С' ||
          ukrainianString[i - 1] === 'с' ||
          ukrainianString[i - 1] === 'Ц' ||
          ukrainianString[i - 1] === 'ц') {
        if (ukrainianString[i] === 'Ю') {
            latinString[i] = 'Iu';
        }
        if (ukrainianString[i] === 'ю') {
            latinString[i] = 'iu';
        }
      }
    }

    /* 'Я' and 'я' rule  */
    for (i = 0; i < ukrainianString.length; i++) {

      if (ukrainianString[i - 1] === 'З' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'З' ||
          ukrainianString[i - 1] === 'з' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'з' ||
          ukrainianString[i - 1] === 'С' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'С' ||
          ukrainianString[i - 1] === 'с' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'с' ||
          ukrainianString[i - 1] === 'Ц' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'Ц' ||
          ukrainianString[i - 1] === 'ц' || 
          ukrainianString[i - 1] === '’' && ukrainianString[i - 2] === 'ц') {
        if (ukrainianString[i] === 'Я') {
            latinString[i] = 'Ia';
        }
        if (ukrainianString[i] === 'я') {
            latinString[i] = 'ia';
        }
      }
    }

    latinString = latinString.join('');
    latinString = latinString.replace('’', '');
    latinString = removeDualJ(latinString);
    return latinString.trim();
  }

  var processAndUpdateText = function(sourceText, sourceLanguage) {
    if (sourceLanguage === 'Russian') {
        var transcribedText = transcribeFromRussian(sourceText);
    }
    if (sourceLanguage === 'Armenian') {
        var transcribedText = transcribeFromArmenian(sourceText);
    }
    if (sourceLanguage === 'Ukrainian') {
      var transcribedText = transcribeFromUkrainian(sourceText);
    }

    $('#wpTextbox1').val(function(i, text) {
      return text.slice(0, document.getElementById('wpTextbox1').selectionStart) + transcribedText + text.slice(document.getElementById('wpTextbox1').selectionEnd);
    });
  };

  var getSelectedText = function() {
    var textComponent = document.getElementById('wpTextbox1');
    // IE version
    if (document.selection !== undefined) {
      textComponent.focus();
      return document.selection.createRange().text;
    }
    // Not IE
    else if (textComponent.selectionStart !== undefined) {
      return textComponent.value.substring(textComponent.selectionStart, textComponent.selectionEnd);
    }
  };

  var addTranscriptionDropdown = function() {

    $('#wikiEditor-section-advanced').append('<div class="group" id="transcription-group"></div>');
    $('#transcription-group').append('<div class="tool tool-select" id="transcription-select"></div>');
    $('#transcription-select').append('<div class="menu" id="transcription-menu"></div>');
    $('#transcription-menu').append('<a class="label" href="#/">Transkripsjon</a>');
    $('#transcription-menu').append('<div class="options" id="transcription-items"></div>');
    $('#transcription-items').append('<a class="option" id="transcription-russian" href="#/">Russisk</a>');
    $('#transcription-items').append('<a class="option" id="transcription-armenian" href="#/">Armensk</a>');
    $('#transcription-items').append('<a class="option" id="transcription-ukrainian" href="#/">Ukrainsk</a>');
    
    $('#transcription-menu').mousedown(function(event) {
        event.preventDefault();
        $('#transcription-items').toggle();
    });

    $('#transcription-russian').mousedown(function(event) {
        event.preventDefault();
        $('#transcription-items').toggle(true);
        processAndUpdateText(getSelectedText(), 'Russian');
    });
    $('#transcription-armenian').mousedown(function(event) {
        event.preventDefault();
        $('#transcription-items').toggle(true);
        processAndUpdateText(getSelectedText(), 'Armenian');
    });
    $('#transcription-ukrainian').mousedown(function(event) {
      event.preventDefault();
      $('#transcription-items').toggle(true);
      processAndUpdateText(getSelectedText(), 'Ukrainian');
    });
  };

  $.when(mw.loader.using('ext.wikiEditor'), $.ready).then(addTranscriptionDropdown);

});