Adjust columns csv with Scrapy

Question

Adjust columns csv with Scrapy

Asked 7 years, 6 months ago

Viewed 91 times

0

I’m having a problem, python by default when it generates the csv file separates the columns by comma, but I need the created items to turn into the respective columns, but I’m not able to do the same, can anyone help me? Follow the code and a demonstration of csv return.

`# -*- 
    coding: utf-8 -*-
    from scrapy import *
    from projeto_iruan.items import *
    import csv

    class imprensaNacional(scrapy.Spider):
        name = 'imprensaNacional'
        start_urls = ['http://www.imprensanacional.gov.br/leiturajornal?data=11-09-2018&secao=dou3']
        output = "output.csv"
        custom_settings = {
            'FEED_FORMAT': csv
        }
        def __init__(self):
            # empty outputfile
            open(self.output, "w").close()
            # alternative:
            # if os.path.isfile(self.output):
            #     os.remove(self.output)


        def parse(self, response):
            url_base = 'http://www.imprensanacional.gov.br/'
            script = response.xpath('//*[@class="span8 hierarchy-wrapper"]//*[contains(text(),"AVISO DE LICITA")]')
            for urls in script:
                links = urls.xpath('.//@href').extract_first().encode('utf-8')
                link_completo = url_base + links
                yield Request(url=link_completo, callback=self.parseAviso)

        def parseAviso(self, response):
            with open(self.output, "a") as f:
                writer = csv.writer(f)
                conteudo = response.xpath('//*[@class="journal-content-article"]')
                for info in conteudo:
                    titulo = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().encode('utf-8')
                    pregao = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[3].encode('utf-8')
                    uasg = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[6].encode('utf-8')
                    tipo = info.xpath('.//*[@class="identifica"]/text()[contains(.,"AVISO")]').extract_first().encode('utf-8')
                    pregoeiro = info.xpath('.//*[@class="assina"]/text()').extract_first().encode('utf-8')
                    descricao = info.xpath('.//*[@class="dou-paragraph"]/text()').extract_first().encode('utf-8')
                    dou = info.xpath('.//*[@class="dou-paragraph"]/text()[contains(.,"(")]').extract_first().encode('utf-8')
                    orgao = info.xpath('.//*[@class="orgao-dou-data"]/text()').extract_first().encode('utf-8')
                    data_publicacao = info.xpath('.//*[@class="publicado-dou-data"]/text()').extract_first().encode('utf-8')
                    edicao_dou = info.xpath('.//*[@class="edicao-dou-data"]/text()').extract_first().encode('utf-8')
                    secao = info.xpath('.//*[@class="secao-dou"]/text()').extract_first().encode('utf-8')
                    pagina = info.xpath('.//*[@class="secao-dou-data"]/text()').extract_first().encode('utf-8')

                    item = ImprensaNacional()
                    item.set_all()

                    item['Titulo'] = titulo
                    item['Tipo'] = tipo
                    item['Pregao'] = pregao
                    item['UASG'] = uasg
                    item['DOU'] = dou
                    item['DataPublicacao'] = data_publicacao
                    item['EdicaoDou'] = edicao_dou
                    item['SecaoDou'] = secao
                    item['Pagina'] = pagina
                    item['Orgao'] = orgao
                    item['Pregoeiro'] = pregoeiro
                    item['Url'] = response.url
                    item['Descricao'] = descricao
                    writer.writerow([ titulo,tipo,pregao,uasg,dou,data_publicacao,edicao_dou,secao,pagina,orgao,pregoeiro,response.url,descricao])

                    yield {'Titulo': titulo, 'Tipo': tipo, 'Pregao': pregao, 'UASG': uasg,
                           'DOU': dou, 'DataPublicacao': data_publicacao, 'Edicao': edicao_dou,
                           'Secao': secao, 'Pagina': pagina, 'Orgao': orgao, 'Pregoeiro': pregao,
                           'Url': response.url, 'Descricao': descricao}









`

1 answer

Browser other questions tagged python web-scraping scrapy

You are not signed in. Login or sign up in order to post.

by Lucas Miranda • **1,314** points · Answer 1 · 2018-09-17T12:34:00+00:00

1

When creating your Writer change your delimiter to ";" Example:

csv.writer(f, delimiter =';')

Opa, thanks for your help, helped me a lot hehe, but has how to treat these problems of special characters that appear?

– Jonathan Igor Bockorny Pereira

2018/09/17 at 23:43