Adjust columns csv with Scrapy

Asked

Viewed 91 times

0

I’m having a problem, python by default when it generates the csv file separates the columns by comma, but I need the created items to turn into the respective columns, but I’m not able to do the same, can anyone help me? Follow the code and a demonstration of csv return.

`# -*- 
    coding: utf-8 -*-
    from scrapy import *
    from projeto_iruan.items import *
    import csv

    class imprensaNacional(scrapy.Spider):
        name = 'imprensaNacional'
        start_urls = ['http://www.imprensanacional.gov.br/leiturajornal?data=11-09-2018&secao=dou3']
        output = "output.csv"
        custom_settings = {
            'FEED_FORMAT': csv
        }
        def __init__(self):
            # empty outputfile
            open(self.output, "w").close()
            # alternative:
            # if os.path.isfile(self.output):
            #     os.remove(self.output)


        def parse(self, response):
            url_base = 'http://www.imprensanacional.gov.br/'
            script = response.xpath('//*[@class="span8 hierarchy-wrapper"]//*[contains(text(),"AVISO DE LICITA")]')
            for urls in script:
                links = urls.xpath('.//@href').extract_first().encode('utf-8')
                link_completo = url_base + links
                yield Request(url=link_completo, callback=self.parseAviso)

        def parseAviso(self, response):
            with open(self.output, "a") as f:
                writer = csv.writer(f)
                conteudo = response.xpath('//*[@class="journal-content-article"]')
                for info in conteudo:
                    titulo = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().encode('utf-8')
                    pregao = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[3].encode('utf-8')
                    uasg = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[6].encode('utf-8')
                    tipo = info.xpath('.//*[@class="identifica"]/text()[contains(.,"AVISO")]').extract_first().encode('utf-8')
                    pregoeiro = info.xpath('.//*[@class="assina"]/text()').extract_first().encode('utf-8')
                    descricao = info.xpath('.//*[@class="dou-paragraph"]/text()').extract_first().encode('utf-8')
                    dou = info.xpath('.//*[@class="dou-paragraph"]/text()[contains(.,"(")]').extract_first().encode('utf-8')
                    orgao = info.xpath('.//*[@class="orgao-dou-data"]/text()').extract_first().encode('utf-8')
                    data_publicacao = info.xpath('.//*[@class="publicado-dou-data"]/text()').extract_first().encode('utf-8')
                    edicao_dou = info.xpath('.//*[@class="edicao-dou-data"]/text()').extract_first().encode('utf-8')
                    secao = info.xpath('.//*[@class="secao-dou"]/text()').extract_first().encode('utf-8')
                    pagina = info.xpath('.//*[@class="secao-dou-data"]/text()').extract_first().encode('utf-8')

                    item = ImprensaNacional()
                    item.set_all()

                    item['Titulo'] = titulo
                    item['Tipo'] = tipo
                    item['Pregao'] = pregao
                    item['UASG'] = uasg
                    item['DOU'] = dou
                    item['DataPublicacao'] = data_publicacao
                    item['EdicaoDou'] = edicao_dou
                    item['SecaoDou'] = secao
                    item['Pagina'] = pagina
                    item['Orgao'] = orgao
                    item['Pregoeiro'] = pregoeiro
                    item['Url'] = response.url
                    item['Descricao'] = descricao
                    writer.writerow([ titulo,tipo,pregao,uasg,dou,data_publicacao,edicao_dou,secao,pagina,orgao,pregoeiro,response.url,descricao])

                    yield {'Titulo': titulo, 'Tipo': tipo, 'Pregao': pregao, 'UASG': uasg,
                           'DOU': dou, 'DataPublicacao': data_publicacao, 'Edicao': edicao_dou,
                           'Secao': secao, 'Pagina': pagina, 'Orgao': orgao, 'Pregoeiro': pregao,
                           'Url': response.url, 'Descricao': descricao}









`

inserir a descrição da imagem aqui

1 answer

1

When creating your Writer change your delimiter to ";" Example:

csv.writer(f, delimiter =';')
  • Opa, thanks for your help, helped me a lot hehe, but has how to treat these problems of special characters that appear?

Browser other questions tagged

You are not signed in. Login or sign up in order to post.