1
I have two CSV tables and need to merge with the unusual columns in the two tables creating a single table.
However, when I write the code, it is a very long time of processing and not complete. How could I write differently the code below?
import pandas as pd
import numpy as np
file_path = "./exercise_data/"
data_a = pd.read_csv(
file_path + "features.csv",
delimiter="|", dtype=str
)
data_b = pd.read_csv(
file_path + "historic_sales.csv",
delimiter="|", dtype=str
)
data_c = pd.read_csv(
file_path + "store_info.csv",
delimiter=";", dtype=str
)
# Columns data_a ("features.csv")
data_b["Temperature"] = ""
data_b["Fuel_Price"] = ""
data_b["MarkDown1"] = ""
data_b["MarkDown2"] = ""
data_b["MarkDown3"] = ""
data_b["MarkDown4"] = ""
data_b["MarkDown5"] = ""
data_b["CPI"] = ""
data_b["Unemployment"] = ""
# Columns data_c ("store_info.csv")
data_b["Type"] = ""
data_b["Size"] = ""
for index, row in data_b.iterrows():
store = row["Store"]
date = row["Date"]
try:
df_feature = data_a.loc[
(data_a["Store"] == store) & (data_a["Date"] == date)
].iloc[0]
data_b.at[index, "Temperature"] = df_feature["Temperature"]
data_b.at[index, "Fuel_Price"] = df_feature["Fuel_Price"]
data_b.at[index, "MarkDown1"] = df_feature["MarkDown1"]
data_b.at[index, "MarkDown2"] = df_feature["MarkDown2"]
data_b.at[index, "MarkDown3"] = df_feature["MarkDown3"]
data_b.at[index, "MarkDown4"] = df_feature["MarkDown4"]
data_b.at[index, "MarkDown5"] = df_feature["MarkDown5"]
data_b.at[index, "CPI"] = df_feature["CPI"]
data_b.at[index, "Unemployment"] = df_feature["Unemployment"]
except:
pass
try:
df_info = data_c.loc[data_c["Store"] == store].iloc[0]
data_b.at[index, "Type"] = df_info["Type"]
data_b.at[index, "Size"] = df_info["Size"]
except:
pass
data_b.to_csv('a_and_c_to_b.csv')