Apply function, extract regression data

Asked

Viewed 40 times

2

I have the following sample:

x <- structure(list(POP = structure(c(1L, 12L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 
14L), .Label = c("pop1", "pop10", "pop11", "pop12", "pop13", 
"pop14", "pop15", "pop16", "pop17", "pop18", "pop19", "pop2", 
"pop20", "pop21", "pop3", "pop4", "pop5", "pop6", "pop7", "pop8", 
"pop9"), class = "factor"), a1 = c(91, 26.7, 51.9, 14, 0, 15.3, 
34.4, 19.1, 10.2, 52.5, 43.6, 13.1, 47.1, 34.7, 0, 58.9, 66.8, 
0, 0, 0, 0), a2 = c(92.9, 27.7, 54.1, 14.3, 0, 16.2, 35, 19.1, 
11.1, 52.5, 44.6, 13.4, 48.7, 34.4, 0, 59.5, 72.3, 0, 0, 0, 0
), a3 = c(92.6, 27.4, 54.7, 13.7, 0, 16.2, 36, 0, 11.1, 53.2, 
45.2, 13.7, 49.3, 0, 0, 59.5, 74.5, 0, 0, 0, 0), a4 = c(95.5, 
28.3, 57.3, 14.6, 0, 16.9, 36.9, 0, 11.8, 56.3, 47.1, 14, 53.2, 
0, 0, 62.7, 84.4, 0, 0, 0, 0), a5 = c(97.4, 28.6, 61.4, 14.3, 
0, 17.5, 36.9, 0, 12.4, 55.7, 47.4, 14.6, 53.8, 0, 0, 62.4, 0, 
0, 0, 0, 0), a6 = c(97.7, 29.3, 63.3, 14.6, 0, 18.5, 38.8, 0, 
13.1, 57.3, 49, 15.3, 55.4, 0, 0, 62.7, 0, 0, 0, 0, 0), a7 = c(102.2, 
0, 68.1, 14.6, 11.1, 20.1, 43.3, 0, 14.6, 64.9, 53.2, 0, 60.5, 
0, 0, 62.7, 0, 0, 0, 0, 0), a8 = c(106.3, 0, 71.9, 14.3, 0, 19.7, 
45.8, 0, 15.9, 70.7, 57.3, 0, 67.8, 0, 10.5, 0, 0, 0, 10, 0, 
0), a9 = c(113.2, 0, 75.5, 15, 0, 21.7, 49, 0, 18.5, 73, 59.8, 
0, 0, 0, 14.7, 0, 0, 0, 10.4, 0, 0), a10 = c(114.9, 0, 75.2, 
15, 0, 22.6, 49.6, 0, 19.8, 73.8, 59.9, 0, 0, 0, 16.6, 0, 0, 
10.5, 10.5, 0, 0), a11 = c(114.9, 0, 75.5, 15.1, 0, 23.2, 50.6, 
0, 19.8, 74.6, 59.2, 0, 0, 0, 18.2, 0, 0, 10.5, 10.6, 0, 0), 
    a12 = c(115, 0, 76, 15.9, 0, 26.1, 0, 0, 22.7, 75.4, 60.8, 
    0, 0, 0, 21, 0, 0, 10.3, 11.1, 0, 0), a13 = c(115.2, 11.6, 
    76, 16, 0, 26.6, 0, 0, 23.3, 75.5, 61.3, 0, 0, 0, 22.6, 0, 
    0, 10.7, 11.1, 0, 0), a14 = c(0, 11.6, 77.6, 0, 0, 29.5, 
    0, 0, 25.3, 76.2, 64, 0, 0, 0, 25.5, 0, 0, 11.6, 11.8, 10.2, 
    11)), class = "data.frame", row.names = c(NA, -21L))

And the annual data:

temp <- structure(list(ano = structure(c(1L, 7L, 8L, 9L, 10L, 11L, 12L, 
13L, 14L, 2L, 3L, 4L, 5L, 6L), .Label = c("a1", "a10", "a11", 
"a12", "a13", "a14", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 
"a9"), class = "factor"), temp = c(0L, 2L, 2L, 6L, 2L, 3L, 13L, 
8L, 7L, 3L, 2L, 5L, 2L, 5L)), class = "data.frame", row.names = c(NA, 
-14L))

Here I create lists to receive interpolation values, which will be done by a loop (for)

model_list <- list()
x_list <- list()
y_list <- list()
y <- temp$temp

log_x <- apply(x[-1], 2, log)

And I delete lines that have no valid observation

# linhas em log_x com -Inf em todas as lacunas
 linhas <- c(3,6,9,10,11)
 # Linhas com Algum Valor (LAV) 
 lav <- (1:21)[-linhas]
 lav
 # [1]  1  2  4  5  7  8 12 13 14 15 16 17 18 19 20 21

Now I create a loop that i) removes the positions without observation, both in the log_x file and in y as well; ii) interpolations. I also use a function if else to place NA in the observations that were shorter than 3

for(i in (lav)){ # loop por linha
  # indices de coluna com valores -Inf
  indice_com_inf <- which(log_x[i,]<0)
  # criar lista de x sem os -inf
  x_list[[i]] <- log_x[i,-indice_com_inf]
  # novo y1 em lista, sem os indices de valores -Inf
  y_list[[i]] <- y[-indice_com_inf]
  # condição
  if (length(x_list[[i]]) >= 3) { #condição comprimento >=3
    #rodar regressão
    model_list[[i]] <- lm(x_list[[i]] ~ y_list[[i]])
  } else {
    model_list[[i]] <- NA #se não atender a condição, não fazer nada
  }}

After that I would like to extract the data with a function apply as follows:

coef_list <- t(sapply(model_list, coef))

model_smry <- lapply(model_list, summary)

R2_list <- sapply(model_smry, '[[', 'r.squared')
pval_list <- t(sapply(model_smry, function(LM){
  LM[['coefficients']][, 4]
}))

f_list <- t(sapply(model_smry, '[[', 'fstatistic'))

But I get the error Error: $ operator is invalid for atomic vectors I understand that’s the value NA, how can I extract the data, and would like the values NA were not removed, as I need them to insert the name of the observations in the order and in the length of the sample.

how to do this process now? has as the observations that have not been calculated the regressions, get a value zero and go along, line by line, extracting the data.

1 answer

2


The problem is that your list model_list has elements of the type list, which are the result of lm, NULL type elements, where the if did nothing, and type NA elements, where she passed to the else. An option would be for you to change how you created this list, to avoid this. Then you use model_list_novo us sapply.

model_list_novo <- model_list[lengths(model_list)>1]

But as you may find it important to know the positions at which the coefficients were not calculated, you can test within apply for only the elements that come from lm suffer some operation:

coef_list <- t(sapply(model_list, function(M){
    if(is.list(M)) coef(M)
}))

R2_list <- sapply(model_smry, function(M){
    if(is.list(M) > 3) M[['r.squared']]
})

pval_list <- t(sapply(model_smry, function(LM){
    if(length(LM)>3) LM[['coefficients']][, 4]
}))

f_list <- sapply(model_smry, function(M){
    if(length(M) > 3) M[['fstatistic']]
})

For each apply would have to have a if thus.

Browser other questions tagged

You are not signed in. Login or sign up in order to post.