How to fix CORS error in Node.JS? (Web Crawler)

Asked

Viewed 525 times

1

Well, me and my group of TCC we’re making a website, where his function is fetch products on various websites specializing in Hardware and show which site has the lowest price. We are making the site with HTML, PHP, Javascript and Node.JS (that has the modules request and Cheerio to make Crawler, and Browserify to run the modules in the browser), running only on Usbwebserver. It may seem strange, but it works.

That is the code "root" without showing in HTML, which successfully shows the results of all sites (I put to show in the console only the amount of information of each site):


var request = require('request');
var cheerio = require('cheerio');

busca = "processador";

urlKabum = "https://www.kabum.com.br/cgi-local/site/listagem/listagem.cgi?string=" + busca;
urlTera = "https://www.terabyteshop.com.br/busca?str=" + busca;
urlChip = "https://www.chipart.com.br/produtos/" + busca;
urlPichau = "https://www.pichau.com.br/catalogsearch/result/?q=" + busca;
urlSubmarino = "https://www.submarino.com.br/busca/" + busca;

console.log(urlTera + '\n' + urlKabum + '\n' + urlChip + '\n' + urlPichau + '\n' + urlSubmarino);

function Kabum() {
    request(urlKabum, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoKabum = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .listagem-box').each(function (i, elem) {
            $(' .H-titulo a').each(function (i, elem) {
                titulo[i] = $(this).text();
            });

            $(' .listagem-preco').each(function (i, elem) {
                preco[i] = $(this).text();
            });

            $(' .listagem-img img').each(function (i, elem) {
                imagem[i] = $(this).attr('src');
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoKabum.join(', ');

            produtoKabum[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoKabum.length);
    });
}

function Tera() {
    request(urlTera, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoTera = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .pbox.col-xs-12.col-sm-6.col-md-3').each(function (i, elem) {
            $(' .prod-name strong').each(function (i, elem) {
                titulo[i] = $(this).text();
            });

            $(' .prod-new-price span').each(function (i, elem) {
                preco[i] = $(this).text();
            });

            $(' .commerce_columns_item_image img').each(function (i, elem) {
                imagem[i] = $(this).attr('src');
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoTera.join(', ');

            if (preco[i] == undefined) {
                preco[i] = 'INDISPONÍVEL';
            }

            produtoTera[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoTera.length);
    });
}

function ChipArt() {
    request(urlChip, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoChip = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .products__list__item').each(function (i, elem) {
            $(' .product-card__title').each(function (i, elem) {
                titulo[i] = $(this).text();
            });

            $(' .price').each(function (i, elem) {
                preco[i] = $(this).text();
            });

            $(' .product-card__figure img').each(function (i, elem) {
                imagem[i] = $(this).attr('src');
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoChip.join(', ');

            produtoChip[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoChip.length);
    });
}

function Pichau() {
    request(urlPichau, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoPichau = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .item.product.product-item').each(function (i, elem) {
            $(' .product-item-link').each(function (i, elem) {
                titulo[i] = $(this).text();
            });

            $(' .price-boleto span').each(function (i, elem) {
                preco[i] = $(this).text();
            });

            $(' .product-image-photo').each(function (i, elem) {
                imagem[i] = $(this).attr('src');
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoPichau.join(', ');

            produtoPichau[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoPichau.length);
    });
}

function Submarino() {
    request(urlSubmarino, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoSubmarino = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .product-grid-item.ColUI-sc-1ey7nd2-0.fUgyk.ViewUI-oocyw8-6.kvewNe').each(function (i, elem) {
            $(' .TitleWrapper-c6mv26-7.cXTBDO.ViewUI-oocyw8-6.kvewNe h1').each(function (i, elem) {
                titulo[i] = $(this).text();
            });

            $(' .PriceUI-sc-1m3ayw0-10.pYYOk.PriceUI-pftkg3-0.hDaJyt.TextUI-sc-1hrwx40-0.hbVZKK').each(function (i, elem) {
                preco[i] = $(this).text();
            });

            $(' .ImageUI-sc-1m3ayw0-13.bqjwSN.PictureUI-alry8x-1.fyrQZb img').each(function (i, elem) {
                imagem[i] = $(this).attr('src');
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoSubmarino.join(', ');

            produtoSubmarino[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoSubmarino.length);
    });
}

Tera();
Kabum();
ChipArt();
Pichau();
Submarino();

However, when I pass the code to the HTML, only the websites Kabum! and Terabyte can be extracted and shown on our website, and the rest, causing the following error:

Access to fetch at 'https://www.chipart.com.br/produtos/processador' from origin 'http://localhost:8080' has been blocked by CORS policy: No 'Access-Control-Allow-Origin' header is present on the requested Resource. If an Opaque Response serves your needs, set the request’s mode to 'no-Cors' to fetch the Resource with CORS disabled.

The code to follow is a Javascript for show on the site (fields like htmlTera and similar have empty fields, which are actually just Divs, classes, images etc):


var unique = require('uniq');
var request = require('request');
var cheerio = require('cheerio');

function limpar() {
    document.getElementById("result").innerHTML = "";
    console.clear();
}

//Tratamento das informações:
function myTrim(trim) {
    return trim.replace(/^\s+|\s+$/gm, '');
}

function TrimAcent(trim) {
    return trim.replace(/�/g, 'í');
}

//URLs dos sites a serem requisitados:
function urls() {
    var busca = document.getElementById("inputProduto").value;

    resultSub = busca.replace(/ /g, "-");
    busca = busca.replace(/ /g, "+");

    urlChip = "https://www.chipart.com.br/produtos/" + busca;

    urlTera = "https://www.terabyteshop.com.br/busca?str=" + busca;
    console.log(urlTera);

    if ((busca == "processador") || (busca == "placa+de+video")) {
        urlKabum = "https://www.kabum.com.br/cgi-local/site/listagem/listagem.cgi?string=" + busca;
    }

    if (busca == "ssd") {
        urlKabum = "https://www.kabum.com.br/hardware/ssd-2-5/" + busca;
    }

    if (busca == "hd") {
        urlKabum = "https://www.kabum.com.br/hardware/disco-rigido-hd/" + busca;
    }

    if (busca == "cooler") {
        urlKabum = "https://www.kabum.com.br/hardware/coolers/" + busca;
    }

    console.log(urlKabum);
}

let main = function () {
    urls();
    limpar();
    Kabum();
    Tera();
    ChipArt();
}

function Kabum() {
    request(urlKabum, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoKabum = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .listagem-box').each(function (i, elem) {
            $(' .H-titulo a').each(function (i, elem) {
                titulo[i] = myTrim(TrimAcent($(this).text()));
            });

            $(' .listagem-preco').each(function (i, elem) {
                preco[i] = myTrim($(this).text());
            });

            $(' .listagem-img img').each(function (i, elem) {
                imagem[i] = myTrim($(this).attr('src'));
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoKabum.join(', ');

            var htmlKabum = '' + titulo[i] + '' + preco[i] + 'Selecionar';

            document.getElementById("result").innerHTML += htmlKabum;

            produtoKabum[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoKabum.length);
    });
}

function Tera() {
    request(urlTera, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoTera = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .pbox.col-xs-12.col-sm-6.col-md-3').each(function (i, elem) {
            $(' .prod-name strong').each(function (i, elem) {
                titulo[i] = myTrim($(this).text());
            });

            $(' .prod-new-price span').each(function (i, elem) {
                preco[i] = myTrim($(this).text());
            });

            $(' .commerce_columns_item_image img').each(function (i, elem) {
                imagem[i] = myTrim($(this).attr('src'));
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoTera.join(', ');

            if (preco[i] == undefined) {
                preco[i] = 'INDISPONÍVEL';
            }

            var htmlTera = '' + titulo[i] + '' + preco[i] + 'Selecionar';

            document.getElementById("result").innerHTML += htmlTera;

            produtoTera[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoTera.length);
    });
}

function ChipArt() {
    request(urlChip, function (err, res, html) {
        if (err) console.log('Erro:' + err);

        var $ = cheerio.load(html);

        let produtoChip = [];
        let titulo = [];
        let preco = [];
        let imagem = [];

        $(' .products__list__item').each(function (i, elem) {
            $(' .product-card__title').each(function (i, elem) {
                titulo[i] = $(this).text();
            });

            $(' .price').each(function (i, elem) {
                preco[i] = $(this).text();
            });

            $(' .product-card__figure img').each(function (i, elem) {
                imagem[i] = $(this).attr('src');
            });

            titulo.join(', ');
            preco.join(', ');
            imagem.join(', ');
            produtoChip.join(', ');

            var htmlChip = '' + titulo[i] + '' + preco[i] + 'Selecionar';

            document.getElementById("result").innerHTML += htmlChip;

            produtoChip[i] = [titulo[i], preco[i], imagem[i]];
        });
        console.log(produtoChip.length);
    });
}

let button = document.getElementById("search");
button.addEventListener("click", main, false);

What could be causing this mistake?

inserir a descrição da imagem aqui

2 answers

0

The CORS error will always occur when you request a domain that is different from what is being required.

This problem is caused by the BACKEND of the sites you are sending a request, does not return in the headers the "Access-Control-Allow-Origin": "*" for example, which is to allow requests from any source.

Maybe if you were requesting a domain with SSL (https) you could do either an http itself but a domain other than "localhost" would already work.

Since you are using localhost, I recommend the following:

Heroku, this guy "scams" the Cors and you can execute these requests from the site.

For example, for your request to Kabum the url would look like this:

https://cors-anywhere.herokuapp.com/https://www.kabum.com.br/cgi-local/site/listing/listing.cgi? string=

Important

  • Do not use Heroku in production (if you are going to move up this project), send to the website the headers that it needs and cookies if the site creates.

Another alternative that I think is better, is to use the lib Axios to make your requests, instead of the request.

I personally find it easier to find answers with him and implement.

I believe this is it expensive, first I would try to use Heroku, if it does not solve your life, you will have to send the headers and cookies that the site needs, if still does not work, I would try to climb a machine in aws (free Tier) t2.micro (is for free) and would test directly the ip of that machine to see if it can access the site (I believe so)

Another alterative for you is to use some tunnel for you to make the connection, I’ve used this one a lot: https://ngrok.com/

The only problem is that there is a limit of connections per minute in the free version of it, but gives you to test and see if it solves.

I believe that’s it, any doubt put there that I try to help.

-1

you can make a npm install cors --save ai in your main Node file (index.js or server.js usually) do const cors = require('cors'), and then make a app.use(cors()), where app would be your server instance.

  • In the project we didn’t create this part of app, we just call the modules of the request and Cheerio, we already tried to put some Headers in PHP, in . htaccess and nothing...

Browser other questions tagged

You are not signed in. Login or sign up in order to post.