r Loop by script condition

Asked

Viewed 46 times

0

It is possible to loop from this data?

library(dplyr)
mes <- c("jan","fev","fev","mar","jan","nov","mar","mar","mar","jan","nov","maio","maio","maio","nov","nov","mar","jan","mar","jan")
a <- c(32.3,32.7,32.6,33.1,33.0,33.5,33.4,33.4,34.9,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,12)
b <- c(19.2,19.2,19.6,19.7,19.7,19.9,20.0,20.0,20.4,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,50)
c <- c(14.7,15.0,15.6,16.2,16.4,17.0,17.7,18.3,19.1,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,32)
d <- c(24.2,24.3,24.7,25.0,25.5,26.4,26.7,67.1,27.6,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,10)
e <- c(24.2,24.3,14.7,25.0,25.5,26.4,26.7,27.1,27.6,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,87)
f <- c(24.2,94.3,24.7,25.0,25.5,26.4,86.7,27.1,27.6,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,40)
g <- c(24.2,24.3,64.7,25.0,25.5,26.4,26.7,27.1,27.6,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,80)
h <- c(24.2,24.3,84.7,25.0,25.5,26.4,26.7,27.1,27.6,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,30)
i <- c(24.2,24.3,24.7,25.0,25.5,26.4,26.7,27.1,27.6,32.6,33.1,33.0,33.5,33.4,32.6,33.1,33.0,33.5,33.4,10)
temp <- data.frame(mes,a,b,c,d,e,f,g,h,i)
y <- c(1, 3, 5, 7,8,11,13,17,23)

Then I do the grouping to get the number of observations n and the averages of the whole sample:


temp.group <- temp %>% 
  group_by(mes)%>% 
  summarise(n = length(mes))

temp.g.mean <- temp %>% 
  group_by(mes) %>% 
  summarise_all((mean))

After that, it becomes log and regression:

log_temp <- apply(temp.g.mean[-1], 2, log)

model_list <- apply(log_temp, 1, function(x) lm(x ~ y))

coef_list <- t(sapply(model_list, coef))

model_smry <- lapply(model_list, summary)

R2_list <- sapply(model_smry, '[[', 'r.squared')
pval_list <- t(sapply(model_smry, function(LM){
  LM[['coefficients']][, 4]
}))

Dai, I calculate a rate of increase and create an equation:

coef_list <- data.frame(coef_list)
coef_list$r <- (exp(coef_list$y)-1)


string1 <- "mesf = mesi *"
string3<- "^t"

eq <- data.frame(coef_list$r)
eq$r1 <- eq[,1]+1
eq$s1 <- string1 
eq$s2 <- round(eq[,2],digits = 5)
eq$s3 <- string3 
eq$eq <- paste(eq$s1,eq$s2,eq$s3)

Finally, I put it all together in one data.frame

temp.g.mean <- round(temp.g.mean[-1],digits = 1)
R2_list <- data.frame(round(R2_list, digits = 3))
pval_list <- data.frame(pval_list)
coef_list <- round(coef_list, digits = 5)

result <- data.frame(temp.group[,1], temp.g.mean, n = temp.group$n, 
                     eq = eq$eq, 
                     R2 = R2_list[,1],
                     pvalor = pval_list$y, a = coef_list[,1], 
                     b = coef_list[,2], r = coef_list[,3])

I would like to make a loop of these steps, using the data.frame temp, only by varying the column numbers, provided that the columns are always calculated together in the minimum 3, and in the maximum number of existing columns.

the purpose of doing this is to be able to save in the table template results always the meses that would have a minimum of comments n=4

have how to go making these loops and saving on results one by one the data taken from these analyses, provided that n=4 ?

If you could give me a north, I’d be extremely grateful.

  • I’m not sure if you want to run regressions with all segments of 3 or more consecutive columns or if you want to start with the first 3 columns and move on from there.

1 answer

1


This is far from being a complete answer but if you want to stay only with the months with 4 or more lines, you can start with this code.

library(tidyverse)

temp.group <- temp %>% 
  group_by(mes) %>% 
  summarise(n = n()) %>%
  filter(n >= 4)

temp.g.mean <- temp %>% 
  group_by(mes) %>% 
  inner_join(temp.group, by = 'mes') %>%
  summarise_all((mean))

The data.frame final temp.g.mean has only the months with 4 or more lines. This is done with the function filter. Then you can process the data as in the question.

Browser other questions tagged

You are not signed in. Login or sign up in order to post.