Archive *.CSV with UTF-8

Asked

Viewed 954 times

1

I created a process that takes the content of a given web page filters what is needed and generates a file *.csv of this content, you work correctly but words that have some accentuation end up giving error when generating, it is possible to set the format UTF-8 to the file created who knows?

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.logging.Logger;
Logger logger= Logger.getLogger("org.bonitasoft");

URL url = null;
def y = 1
def lista = []
BufferedWriter strW = new BufferedWriter(new FileWriter("C:\\TESTE\\teste.csv"))
lista.add("EMPRESA;TELEFONE;EMAIL;RAMO;PRODUTO;SITE\n")

while (y <= 2){
    url = new URL("http://site");
    BufferedReader inFile = new BufferedReader(new InputStreamReader(url.openStream()));
    String inLine;
    if (!inFile.toString().isEmpty()){
        while ((inLine = inFile.readLine()) != null) {
            Matcher matcherRamo = Pattern.compile("Ramo:\\s<.strong>\\s.*").matcher(inLine)
            Matcher matcherNome = Pattern.compile("consulta-associados-item-nome-fantasia").matcher(inLine)
            Matcher matcherFone = Pattern.compile("<strong>Fone: <.strong>").matcher(inLine)
            Matcher matcherEmail = Pattern.compile("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">").matcher(inLine)
            Matcher matcherProduto = Pattern.compile("<span class=\"float-left\">").matcher(inLine)
            Matcher matcherSite = Pattern.compile("<strong>Site: <.strong>\\s<a href=.* target=\"_blank\">").matcher(inLine)
            if (matcherNome.find()){
                lista.add(inLine.replace("<h3 class=\"consulta-associados-item-nome-fantasia\">", "").replace("</h3>", "").trim()+";")
            }
            if (matcherFone.find()){
                lista.add(inLine.replace("<strong>Fone: </strong>", "").trim()+";")
            }
            if (matcherEmail.find()){
                lista.add(inLine.replaceFirst("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">", "").replace("</a>", "").trim()+";")
            }
            if (matcherRamo.find()){
                lista.add(inLine.replace("Ramo: </strong> ", "").replace("<strong>", "").trim()+";")
            }
            if (matcherProduto.find()){
                lista.add(inLine.replace("<span class=\"float-left\">", "").replace("</span>", "").replace("<br>", " | ").trim()+";")
            }
            if (matcherSite.find()){
                lista.add(inLine.replaceFirst("<strong>Site: <.strong>\\s<a href=.* target=\"_blank\">", "").replace("</a>", "").trim()+"\n")
            }
        }
        y++
    }
    inFile.close();
}
strW.write(lista.toArray().toString().replace("[", "").replace("]", "").replace(",", ""))
strW.close();

1 answer

3


I managed to solve my problem by changing the following line:

BufferedWriter strW = new BufferedWriter(new FileWriter("C:\\TESTE\\teste.csv"))

For that reason:

BufferedWriter strW = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("C:\\TESTE\\teste.csv"), StandardCharsets.ISO_8859_1));

I found the solution here

Browser other questions tagged

You are not signed in. Login or sign up in order to post.