3
I am developing an HTML5 application that uses the Wikipedia API to give the definition of what we ask for. I used the code that they made available in a forum of another language of Stack and, in it, I noticed the presence of the use of regular expressions to eliminate certain parts of the extracted text (links, references, tags, etc.).
However, I would also like to remove texts between parentheses as in the example below:
Whatever stayed that way
Application software (application or application) is a computer program that aims to help its user perform...
Would look like this
Application software is a computer program that aims to help its user perform...
Well, I have the code below and I would like a help on the part of regular expressions (I haven’t learned yet, but I’m trying) to solve my problem:
if(d.getElementById('q').value !== "") {
algo = d.getElementById('q').value
}
var definir = function(algo) {
startFetch(algo, 1, 1000);
d.getElementById("feedback").value = "Definir > " + algo; // Palavra ou sentença a ser definida
d.getElementById('q').value = algo;
search();
d.getElementById("resposta").value = "Só um momento...";
voz();
}
var textbox = d.getElementById("resposta");
var tempscript = null, minchars, maxchars, attempts;
function startFetch(algo, minimumCharacters, maximumCharacters, isRetry) {
if (tempscript) return;
if (!isRetry) {attempts = 0;
minchars = minimumCharacters;
maxchars = maximumCharacters;
}
tempscript = d.createElement("script");
tempscript.type = "text/javascript";
tempscript.id = "tempscript";
tempscript.src = "https://pt.wikipedia.org/w/api.php?action=query&titles="
+ algo // Palavra ou sentença a ser definida
+ "&redirects="
+ "&prop=extracts"
+ "&exchars="
+ maxchars // Máximo de caracteres a ser "puxado"
+ "&exintro"
+ "&format=json"
+ "&callback=onFetchComplete"
+ "&requestid="
+ Math.floor(Math.random()*999999).toString();
d.body.appendChild(tempscript);
}
function onFetchComplete(data, algo) {
d.body.removeChild(tempscript);
tempscript = null
var s = getFirstProp(data.query.pages).extract;
s = htmlDecode(stripTags(s));
if (s.length > minchars || attempts++ > 5) {
d.getElementById("resposta").value = s;
d.getElementById("feedback").value = "Definindo...";
voz(); // Lê em voz alta a definição da palavra ou sentença
espera(); // Delay para se aproximar do carregamento do áudio
d.getElementById("log").value += "Definir > "
+ algo // Palavra ou sentença a ser definida
+ "\n"
+ s // Definição carregada
+ "\n\n";
saveHist(); // Salva a definição no log de conversa
}
else {
d.getElementById('resposta').value = "Não encontrei a definição, "+nomeDoUsuario+".";
voz();
}
}
function getFirstProp(obj) {
for (var i in obj) return obj[i];
}
function stripTags(s) {
// Abaixo está meu problema ***************************************************
return s.replace(/<\w+(\s+("[^"]*"|'[^']*'|[^>])+)?>|<\/\w+>/gi, "");
// Acima está meu problema ****************************************************
}
function htmlDecode(input){
var e = document.createElement("div"); e.innerHTML = input; return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
}
This is exactly what I needed. A very well explained answer, I learned here what I have been trying to learn for a long time. Thank you very much, @Sergio.
– Victor Ribeiro
@Great winner! I’m glad I helped. I voted
+1
in your question also for giving an example of what you were looking for.– Sergio