Speech Recognition does not recognize special characters

Asked

Viewed 229 times

4

I tried to use result.replace("arroba", "@") javascript to change the word "arroba" to "@" as it is in the code below, but it didn’t work. I want to do this with point, comma, underline, underline and etc... because, the Speech recognition spells out the special characters. Does anyone have any idea how I can do this?

HTML

<!DOCTYPE html>

<html>

    <head>
        <meta charset="UTF-8">
        <title></title>
        <script type="text/javascript" src="js/jquery-3.2.1.min.js"></script>
    </head>
    <body>
        <button id="btn-gravar-audio">Gravar</button><br/><br/>
        <script type="text/javascript" src="js/teste.js"></script>
        <textarea id="textarea" cols="60" rows="5"></textarea>
    </body>
</html>

JAVASCRIPT

window.addEventListener('DOMContentLoaded', function () {
var btn_gravacao = document.querySelector('#btn-gravar-audio');

var transcricao_audio = '';
var esta_gravando = false;

if (window.SpeechRecognition || window.webkitSpeechRecognition) {

    var speech_api = window.SpeechRecognition || window.webkitSpeechRecognition;
    var receber_audio = new speech_api();

    receber_audio.continuous = true;
    receber_audio.interimResults = true;
    receber_audio.lang = "pt-BR";

    receber_audio.onstart = function () {
        esta_gravando = true;
        btn_gravacao.innerHTML = 'Gravando! Parar gravação';
    };

    receber_audio.onend = function () {
        esta_gravando = false;
        btn_gravacao.innerHTML = 'Iniciar Gravação';
    };
    receber_audio.onresult = function (event) {
                var interim_transcript = '';

               for(var i = event.resultIndex; i < event.results.length; i++){
                   if(event.results[i].isFinal){
                       transcricao_audio += event.results[i][0].transcript; 
                   }else{
                       interim_transcript += event.results[i][0].transcript; 
                   }

                   var resultado = transcricao_audio || interim_transcript;

                   console.log(resultado);
               }



               document.getElementById("textarea").innerHTML = resultado;

    resultado.replace("arroba", "@");

    };

    btn_gravacao.addEventListener('click', function (e) {
        if (esta_gravando) {
            receber_audio.stop();
            return;
        }
        receber_audio.start();
    }, false);

} else {
    console.log("navegador não apresenta suporte a web speech api");
}
}, false);
  • Closes receber_audio.onresult = function (event) { with }.

  • Utilizes Regexp with the flag g to replace all occurrences instead of only the first. Example: "um dois um".replace(/um/g, "1").replace(/dois/g, "2");.

  • Thanks for the help!

1 answer

5


There are 2 errors, first order:

document.getElementById("textarea").innerHTML = resultado;

resultado.replace("arroba", "@");

Are you doing the replace then, it has to be before, something else you need to set the value again in the variable, so:

resultado = resultado.replace("arroba", "@");

document.getElementById("textarea").innerHTML = resultado;

Only if you use string like this replace("arroba", "@") he will only seek the first arroba, the ideial is to use with the modifier g that will fetch wheels words arroba and also with the modifier i (chance there are letters in upper case), could also use the metacharacter \b to prevent a word like arrobas (which probably refers to a description of the use of @ or the weight of something) would be converted into:

@s

Note that there would be an arroba and a s after, then do so that it will search for the word separated by space or chance it is at the end or beginning of the string:

resultado = resultado.replace(/\barroba\b/gi, "@");

document.getElementById("textarea").innerHTML = resultado;

For words with accent can do so:

resultado = resultado.replace(/\bv[íi]rgula\b/gi, ",");

document.getElementById("textarea").innerHTML = resultado;

Organizing the words with iteration

For better organization of words you can use a simple object and then iterate with for, something like:

var dicionario = {
    "@": /\barroba\b/gi,
    ";": /\bponto e v[íi]rgula\b/gi,
    ",": /\bv[íi]rgula\b/gi,
    "!": /\bexclamação\b/gi,
    "?": /\binterrogação\b/gi,
};

And then do this:

for (var substituto in dicionario) {
    resultado = resultado.replace(dicionario[substituto], substituto);
}

document.getElementById("textarea").innerHTML = resultado;

Very important note: to ensure that accents function properly .html must be saved with the same encoding you use on the page and must use the <meta charset="<seu charset>"> or send the charset via HEADER.

Online example in codepen (only works in Chrome and Safari): https://codepen.io/brcontainer/pen/YYrbNX

It should look like this (I used UTF-8 because it is the charset that I use on my websites, but it can change to iso-8859-1 chance use latin1/ansi/etc):

<!DOCTYPE html>
<html>
<head>
    <title></title>
    <meta charset="utf-8">
</head>
<body>
<textarea id="textarea"></textarea>
<button id="btn-gravar-audio">Gravar</button>

<script>
var btn_gravacao = document.querySelector('#btn-gravar-audio');

var transcricao_audio = '';
var esta_gravando = false;

var dicionario = {
    "@": /\barroba\b/gi,
    ";": /\bponto e v[íi]rgula\b/gi,
    ",": /\bv[íi]rgula\b/gi,
    "!": /\bexclamação\b/gi,
    "?": /\binterrogação\b/gi,
};

if (window.SpeechRecognition || window.webkitSpeechRecognition) {

    var speech_api = window.SpeechRecognition || window.webkitSpeechRecognition;
    var receber_audio = new speech_api();

    receber_audio.continuous = true;
    receber_audio.interimResults = true;
    receber_audio.lang = "pt-BR";

    receber_audio.onstart = function () {
        esta_gravando = true;
        btn_gravacao.innerHTML = 'Gravando! Parar gravação';
    };

    receber_audio.onend = function () {
        esta_gravando = false;
        btn_gravacao.innerHTML = 'Iniciar Gravação';
    };

    receber_audio.onresult = function (event) {
        var interim_transcript = '';

        for(var i = event.resultIndex; i < event.results.length; i++){
            if(event.results[i].isFinal){
                transcricao_audio += event.results[i][0].transcript;
            }else{
                interim_transcript += event.results[i][0].transcript;
            }

           var resultado = transcricao_audio || interim_transcript;
        }

        for (var substituto in dicionario) {
            resultado = resultado.replace(dicionario[substituto], substituto);
        }

        document.getElementById("textarea").innerHTML = resultado;

    };

    btn_gravacao.addEventListener('click', function (e) {
        if (esta_gravando) {
            receber_audio.stop();
            return;
        }

        receber_audio.start();
    }, false);

} else {
    console.log("navegador não apresenta suporte a web speech api");
}
</script>

</body>
</html>
  • 1

    Thanks for the help! It worked perfectly here. I had done so: Document.getElementById("textarea"). innerHTML = result.replace(/arroba/g, "@"); and it had worked, but the way you did it is much better and more organized.

Browser other questions tagged

You are not signed in. Login or sign up in order to post.