0
How I Convert a File .csv
generated by python to .xlsx
?
I’m in two trouble:
One of them is that I couldn’t figure out how to make this conversion
The second is that even passing the command
crawl <nome> -o <nome>.csv -s CSV_DELIMITER=";"
in scrapy file, at the time I open the file generated directly by Excel, it does not format the columns, leaving separated by commas.
My code:
# -*- coding: utf-8 -*-
import scrapy
from scrapy import *
from Aranhas.items import ImprensaNacional
import csv
from Aranhas.settings import *
from Aranhas.pipelines import *
class imprensa_Nacional(scrapy.Spider):
name = 'imprensa_Nacional'
start_urls = ['http://www.imprensanacional.gov.br/leiturajornal?data=11-09-2018&secao=dou3']
custom_settings = {
'FEED_EXPORT_FIELDS': [
'Titulo', 'Tipo', 'Pregao', 'UASG', 'DOU', 'DataPublicacao', 'EdicaoDou',
'SecaoDou','Pagina', 'Orgao','Pregoeiro', 'Url', 'Descricao'
],
'FEED_FORMAT': 'csv'
}
def parse(self, response):
url_base = 'http://www.imprensanacional.gov.br/'
script = response.xpath('//*[@class="span8 hierarchy-wrapper"]//*[contains(text(),"AVISO DE LICITA")]')
for urls in script:
links = urls.xpath('.//@href').extract_first()
link_completo = url_base + str(links)
yield Request(url=link_completo, callback=self.parseAviso)
def parseAviso(self, response):
conteudo = response.xpath('//*[@class="journal-content-article"]')
for info in conteudo:
titulo = info.xpath('.//*[@class="identifica"]/text()').extract_first().strip()
pregao = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[3].strip()
uasg = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[6].strip()
tipo = info.xpath('.//*[@class="identifica"]/text()[contains(.,"AVISO")]').extract_first().strip()
pregoeiro = info.xpath('.//*[@class="assina"]/text()').extract_first().strip()
descricao = info.xpath('.//*[@class="dou-paragraph"]/text()').extract_first().strip()
dou = info.xpath('.//*[@class="dou-paragraph"]/text()[contains(.,"(")]').extract_first().strip()
orgao = info.xpath('.//*[@class="orgao-dou-data"]/text()').extract_first().strip()
data_publicacao = info.xpath('.//*[@class="publicado-dou-data"]/text()').extract_first().strip()
edicao_dou = info.xpath('.//*[@class="edicao-dou-data"]/text()').extract_first().strip()
secao = info.xpath('.//*[@class="secao-dou"]/text()').extract_first().strip()
pagina = info.xpath('.//*[@class="secao-dou-data"]/text()').extract_first().strip()
item = ImprensaNacional()
item.set_all()
item['Titulo'] = titulo.encode('iso-8859-1')
item['Tipo'] = tipo.encode('iso-8859-1')
item['Pregao'] = pregao.encode('iso-8859-1')
item['UASG'] = uasg.encode('iso-8859-1')
item['DOU'] = dou.encode('iso-8859-1')
item['DataPublicacao'] = data_publicacao.encode('iso-8859-1')
item['EdicaoDou'] = edicao_dou.encode('iso-8859-1')
item['SecaoDou'] = secao.encode('iso-8859-1')
item['Pagina'] = pagina.encode('iso-8859-1')
item['Orgao'] = orgao.encode('iso-8859-1')
item['Pregoeiro'] = pregoeiro.encode('iso-8859-1')
item['Url'] = response.url.encode('iso-8859-1')
item['Descricao'] = descricao.encode('iso-8859-1')
yield item