Create a repeat function using two data frames

Asked

Viewed 42 times

0

I need the average of each variable (X1,x2,X3,X4) according to the month it belongs to. The value of this average will be recorded in (Y1,Y2,Y3,y4) according also to the month belonging to the data2$mes. For X1 will be recorded in Y1, for x2 in Y2 and so on.

I would like a more viable alternative than the one I made below.

dados1 <- data.frame(matrix(NA,nrow = 30, ncol = 5))
colnames(dados1) <- c("mes","x1","x2","x3","x4")

dados1$mes <- c(5,9,7,3,8,2,4,1,6,10,2,11,12,10,12,2,8,9,10,7,6,4,5,7,3,1,9,5,2,6)
dados1$x1 <- c(2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60)
dados1$x2 <- c(3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60,63,66,69,72,75,78,81,84,87,90)
dados1$x3 <- c(4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84,88,92,96,100,104,108,112,116,120)
dados1$x4 <- c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150)

dados2 <- data.frame(matrix(NA,nrow = 12, ncol = 5))
colnames(dados2) <- c("mes","y1","y2","y3","y4")
dados2$mes <- c(1,2,3,4,5,6,7,8,9,10,11,12)

I created the counters for every month:

contador_dias_jan <- 0
somatorio_jan <- 0
contador_dias_fev <- 0
somatorio_fev <- 0
contador_dias_mar <- 0
somatorio_mar <- 0
contador_dias_abr <- 0
somatorio_abr <- 0
contador_dias_mai <- 0
somatorio_mai <- 0
contador_dias_jun <- 0
somatorio_jun <- 0
contador_dias_jul <- 0
somatorio_jul <- 0
contador_dias_ago <- 0
somatorio_ago <- 0
contador_dias_set <- 0
somatorio_set <- 0
contador_dias_out <- 0
somatorio_out <- 0
contador_dias_nov <- 0
somatorio_nov <- 0
contador_dias_dez <- 0
somatorio_dez <- 0

Here is only for January and February, but would have the "if Else" until December.

for (L in 1:30) {
  #para janeiro
  if (dados1$mes[L] == 1) {
    contador_dias_jan <- contador_dias_jan + 1
    somatorio_jan <- somatorio_jan + dados1$x1[L]
    #para fevereiro
  } if else (dados1$mes[L] == 2) {
    contador_dias_fev <- contador_dias_fev + 1
    somatorio_fev <- somatorio_fev + dados1$x1[L]
  }
}

for (L in 1:12) {
  #para janeiro
  if (L == 1) {
    dados2$y1[L] <- somatorio_jan / contador_dias_jan
    #para fevereiro
  } else if (L == 2) {
    dados2$y1[L] <- somatorio_fev / contador_dias_fev
  }
}

As it stands, it would only fill Y1. Still missing Y2, Y3 and y4. Any suggestions?

1 answer

3


In general, it is neither necessary nor advisable to use loops in R; the family apply is more efficient. In the case of group operations, combine with the use of split:

# Separa os dados por mês
dadosL <- split(dados1, dados1$mes)

# Calcula as médias por colunas para todos os elementos da lista:
dadosM <- lapply(dadosL, colMeans)

# Une os elementos da lista de volta em um único data.frame
dadosF <- do.call("rbind", dadosM)

head(dadosF, 4)
#>   mes x1   x2 x3   x4
#> 1   1 34 51.0 68 85.0
#> 2   2 31 46.5 62 77.5
#> 3   3 29 43.5 58 72.5
#> 4   4 29 43.5 58 72.5

dplyr and data.table packages automate split/apply operations:

dplyr

library(dplyr)

dados1 %>%
  group_by(mes) %>%
  summarise_all(mean)

data table.

library(data.table)

setDT(dados1) # estabelece dados1 como classe data.table

dados1[, lapply(.SD, mean), mes]
  • Thank you for your guidance @Carlos Eduardo Lagosta. You helped me a lot! I will get more for these packages used in your reply.

Browser other questions tagged

You are not signed in. Login or sign up in order to post.