Creating a CSV from 3 CSV tables

Asked

Viewed 119 times

1

I have two CSV tables and need to merge with the unusual columns in the two tables creating a single table.

However, when I write the code, it is a very long time of processing and not complete. How could I write differently the code below?

import pandas as pd
import numpy as np

file_path = "./exercise_data/"

data_a = pd.read_csv(
    file_path + "features.csv",
    delimiter="|", dtype=str
)
data_b = pd.read_csv(
    file_path + "historic_sales.csv",
    delimiter="|", dtype=str
)
data_c = pd.read_csv(
    file_path + "store_info.csv",
    delimiter=";", dtype=str
)

# Columns data_a ("features.csv")
data_b["Temperature"] = ""
data_b["Fuel_Price"] = ""
data_b["MarkDown1"] = ""
data_b["MarkDown2"] = ""
data_b["MarkDown3"] = ""
data_b["MarkDown4"] = ""
data_b["MarkDown5"] = ""
data_b["CPI"] = ""
data_b["Unemployment"] = ""

# Columns data_c ("store_info.csv")
data_b["Type"] = ""
data_b["Size"] = ""


for index, row in data_b.iterrows():
    store = row["Store"]
    date = row["Date"]
    try:
        df_feature = data_a.loc[
            (data_a["Store"] == store) & (data_a["Date"] == date)
        ].iloc[0]
        data_b.at[index, "Temperature"] = df_feature["Temperature"]
        data_b.at[index, "Fuel_Price"] = df_feature["Fuel_Price"]
        data_b.at[index, "MarkDown1"] = df_feature["MarkDown1"]
        data_b.at[index, "MarkDown2"] = df_feature["MarkDown2"]
        data_b.at[index, "MarkDown3"] = df_feature["MarkDown3"]
        data_b.at[index, "MarkDown4"] = df_feature["MarkDown4"]
        data_b.at[index, "MarkDown5"] = df_feature["MarkDown5"]
        data_b.at[index, "CPI"] = df_feature["CPI"]
        data_b.at[index, "Unemployment"] = df_feature["Unemployment"]
    except:
        pass
    try:
        df_info = data_c.loc[data_c["Store"] == store].iloc[0]
        data_b.at[index, "Type"] = df_info["Type"]
        data_b.at[index, "Size"] = df_info["Size"]
    except:
        pass


data_b.to_csv('a_and_c_to_b.csv')

1 answer

1

If I understand correctly you have two dataframes A and B and you want to create a new dataframe with the columns of A and B, you can do this on pandas using pd.concat

import pandas as pd

dataA = pd.read_csv(...)
dataB = pd.read_csv(...)

#Usamos axis = 1 para dizer pro pandas juntar os DF's nas colunas
dataC = pd.concat([dataA, dataB], axis=1)

For more information check out here

Browser other questions tagged

You are not signed in. Login or sign up in order to post.