Problems with CSV file (python)

Asked

Viewed 90 times

-1

I am using a basic code of a channel, but I would like to remove the data quarterly, so I changed the link of the original code only, but when trying to run the program I have a problem, I am beginner in python. I would like to know what I am doing wrong, I could not understand well the program. I thank you already.

Original Program: https://gist.github.com/Vido/cbc33862dd27a22790df633f1d113ae6

import zipfile

def download():
    cvmzip_list = [
        'itr_cia_aberta_%d.zip' % y for y in range(2010, 2020)
    ]

    base_url = 'http://dados.cvm.gov.br/dados/CIA_ABERTA/DOC/ITR/DADOS/'

    for cvmzip in cvmzip_list:
        print('Arquivo:', cvmzip)
        response = requests.get(base_url + cvmzip)
        with open(cvmzip, 'wb') as fp:
            print('Download...')
            fp.write(response.content)

        with zipfile.ZipFile(cvmzip, 'r') as zip_ref:
            print('Unzip...')
            zip_ref.extractall()
            
download()

import csv
from collections import defaultdict
from dateutil import parser as du_parser


def carrega_dados():

    # Estrutura para carregar os dados
    # Empresa -> Categoria -> Ano do Exercício
    dados = defaultdict(lambda: defaultdict(dict))

    cvm_csv_list = [
        'itr_cia_aberta_con_%d.csv' % y for y in range(2010, 2020)
    ]

    for cvm_csv in cvm_csv_list:
        
        print('Processando:', cvm_csv)
        with open(cvm_csv, encoding='iso-8859-1') as fp:

            next(fp) # Pula o header
            csv_reader = csv.reader(fp, delimiter=';')

            for row in csv_reader:
                empresa = row[3]
                ref = (du_parser.parse(row[9]), du_parser.parse(row[10]))
                categoria = (row[11], row[12])
                valor = float(row[13])

                dados[empresa][categoria][ref] = valor

    return dados

dados = carrega_dados()

import pandas as pd

tabela=pd.DataFrame(dados)
print(tabela.columns.values)

1 answer

0


Hi, come on By analyzing the csv you chose, that name you searched for on your base, it doesn’t exist. In this way, I modified the code with the unzipped csv name "itr_cia_aberta_%d". Also, I changed the number of columns so you can reuse the code. Because if you look, you’ll see that csv Dre has different number of columns than Itr .

import zipfile
import requests

def download():
    cvmzip_list = [
        'itr_cia_aberta_%d.zip' % y for y in range(2010, 2020)
    ]

    base_url = 'http://dados.cvm.gov.br/dados/CIA_ABERTA/DOC/ITR/DADOS/'

    for cvmzip in cvmzip_list:
        print('Arquivo:', cvmzip)
        response = requests.get(base_url + cvmzip)
        with open(cvmzip, 'wb') as fp:
            print('Download...')
            fp.write(response.content)

        with zipfile.ZipFile(cvmzip, 'r') as zip_ref:
            print('Unzip...')
            zip_ref.extractall()


download()

import csv
from collections import defaultdict
from dateutil import parser as du_parser




def carrega_dados():
    # Estrutura para carregar os dados
    # Empresa -> Categoria -> Ano do Exercício
    dados = defaultdict(lambda: defaultdict(dict))

    cvm_csv_list = [
        'itr_cia_aberta_%d.csv' % y for y in range(2010, 2020)
    ]

    print(cvm_csv_list)

    for cvm_csv in cvm_csv_list:

        print('Processando:', cvm_csv)
        with open(cvm_csv, encoding='iso-8859-1') as fp:

            next(fp)  # Pula o header
            csv_reader = csv.reader(fp, delimiter=';')

            for row in csv_reader:
                empresa = row[3]
                ref = (du_parser.parse(row[1]), du_parser.parse(row[7]))
                categoria = (row[5])
                valor = float(row[6])

                dados[empresa][categoria][ref] = valor

    return dados


dados = carrega_dados()

import pandas as pd

tabela = pd.DataFrame(dados)
print(tabela.columns.values)

Browser other questions tagged

You are not signed in. Login or sign up in order to post.