1
I created a process that takes the content of a given web page filters what is needed and generates a file *.csv
of this content, you work correctly but words that have some accentuation end up giving error when generating, it is possible to set the format UTF-8 to the file created who knows?
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.logging.Logger;
Logger logger= Logger.getLogger("org.bonitasoft");
URL url = null;
def y = 1
def lista = []
BufferedWriter strW = new BufferedWriter(new FileWriter("C:\\TESTE\\teste.csv"))
lista.add("EMPRESA;TELEFONE;EMAIL;RAMO;PRODUTO;SITE\n")
while (y <= 2){
url = new URL("http://site");
BufferedReader inFile = new BufferedReader(new InputStreamReader(url.openStream()));
String inLine;
if (!inFile.toString().isEmpty()){
while ((inLine = inFile.readLine()) != null) {
Matcher matcherRamo = Pattern.compile("Ramo:\\s<.strong>\\s.*").matcher(inLine)
Matcher matcherNome = Pattern.compile("consulta-associados-item-nome-fantasia").matcher(inLine)
Matcher matcherFone = Pattern.compile("<strong>Fone: <.strong>").matcher(inLine)
Matcher matcherEmail = Pattern.compile("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">").matcher(inLine)
Matcher matcherProduto = Pattern.compile("<span class=\"float-left\">").matcher(inLine)
Matcher matcherSite = Pattern.compile("<strong>Site: <.strong>\\s<a href=.* target=\"_blank\">").matcher(inLine)
if (matcherNome.find()){
lista.add(inLine.replace("<h3 class=\"consulta-associados-item-nome-fantasia\">", "").replace("</h3>", "").trim()+";")
}
if (matcherFone.find()){
lista.add(inLine.replace("<strong>Fone: </strong>", "").trim()+";")
}
if (matcherEmail.find()){
lista.add(inLine.replaceFirst("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">", "").replace("</a>", "").trim()+";")
}
if (matcherRamo.find()){
lista.add(inLine.replace("Ramo: </strong> ", "").replace("<strong>", "").trim()+";")
}
if (matcherProduto.find()){
lista.add(inLine.replace("<span class=\"float-left\">", "").replace("</span>", "").replace("<br>", " | ").trim()+";")
}
if (matcherSite.find()){
lista.add(inLine.replaceFirst("<strong>Site: <.strong>\\s<a href=.* target=\"_blank\">", "").replace("</a>", "").trim()+"\n")
}
}
y++
}
inFile.close();
}
strW.write(lista.toArray().toString().replace("[", "").replace("]", "").replace(",", ""))
strW.close();