0
I have the following sample:
x <- structure(list(POP = structure(c(1L, 12L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L,
14L), .Label = c("pop1", "pop10", "pop11", "pop12", "pop13",
"pop14", "pop15", "pop16", "pop17", "pop18", "pop19", "pop2",
"pop20", "pop21", "pop3", "pop4", "pop5", "pop6", "pop7", "pop8",
"pop9"), class = "factor"), a1 = c(91, 26.7, 51.9, 14, 0, 15.3,
34.4, 19.1, 10.2, 52.5, 43.6, 13.1, 47.1, 34.7, 0, 58.9, 66.8,
0, 0, 0, 0), a2 = c(92.9, 27.7, 54.1, 14.3, 0, 16.2, 35, 19.1,
11.1, 52.5, 44.6, 13.4, 48.7, 34.4, 0, 59.5, 72.3, 0, 0, 0, 0
), a3 = c(92.6, 27.4, 54.7, 13.7, 0, 16.2, 36, 0, 11.1, 53.2,
45.2, 13.7, 49.3, 0, 0, 59.5, 74.5, 0, 0, 0, 0), a4 = c(95.5,
28.3, 57.3, 14.6, 0, 16.9, 36.9, 0, 11.8, 56.3, 47.1, 14, 53.2,
0, 0, 62.7, 84.4, 0, 0, 0, 0), a5 = c(97.4, 28.6, 61.4, 14.3,
0, 17.5, 36.9, 0, 12.4, 55.7, 47.4, 14.6, 53.8, 0, 0, 62.4, 0,
0, 0, 0, 0), a6 = c(97.7, 29.3, 63.3, 14.6, 0, 18.5, 38.8, 0,
13.1, 57.3, 49, 15.3, 55.4, 0, 0, 62.7, 0, 0, 0, 0, 0), a7 = c(102.2,
0, 68.1, 14.6, 11.1, 20.1, 43.3, 0, 14.6, 64.9, 53.2, 0, 60.5,
0, 0, 62.7, 0, 0, 0, 0, 0), a8 = c(106.3, 0, 71.9, 14.3, 0, 19.7,
45.8, 0, 15.9, 70.7, 57.3, 0, 67.8, 0, 10.5, 0, 0, 0, 10, 0,
0), a9 = c(113.2, 0, 75.5, 15, 0, 21.7, 49, 0, 18.5, 73, 59.8,
0, 0, 0, 14.7, 0, 0, 0, 10.4, 0, 0), a10 = c(114.9, 0, 75.2,
15, 0, 22.6, 49.6, 0, 19.8, 73.8, 59.9, 0, 0, 0, 16.6, 0, 0,
10.5, 10.5, 0, 0), a11 = c(114.9, 0, 75.5, 15.1, 0, 23.2, 50.6,
0, 19.8, 74.6, 59.2, 0, 0, 0, 18.2, 0, 0, 10.5, 10.6, 0, 0),
a12 = c(115, 0, 76, 15.9, 0, 26.1, 0, 0, 22.7, 75.4, 60.8,
0, 0, 0, 21, 0, 0, 10.3, 11.1, 0, 0), a13 = c(115.2, 11.6,
76, 16, 0, 26.6, 0, 0, 23.3, 75.5, 61.3, 0, 0, 0, 22.6, 0,
0, 10.7, 11.1, 0, 0), a14 = c(0, 11.6, 77.6, 0, 0, 29.5,
0, 0, 25.3, 76.2, 64, 0, 0, 0, 25.5, 0, 0, 11.6, 11.8, 10.2,
11)), class = "data.frame", row.names = c(NA, -21L))
And the annual data:
temp <- structure(list(ano = structure(c(1L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 2L, 3L, 4L, 5L, 6L), .Label = c("a1", "a10", "a11",
"a12", "a13", "a14", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
"a9"), class = "factor"), temp = c(0L, 2L, 2L, 6L, 2L, 3L, 13L,
8L, 7L, 3L, 2L, 5L, 2L, 5L)), class = "data.frame", row.names = c(NA,
-14L))
I can extract the regressions with the functions apply
, when I have the complete data in the 14 years, doing a data cleaning and keeping only the pop
who has the full 14 year series.
However I wanted to run the regression in the data that are not complete, I use the following code:
y <- temp$temp
log_x <- apply(x[-1], 2, log)
model_list <- apply(log_x, 1, function(x) lm(x ~ y))
coef_list <- t(sapply(model_list, coef))
model_smry <- lapply(model_list, summary)
the function apply
for the log error, which results in -inf
since it has zero values at the base.
has to run the regression to that point before the error and calculate the regression with the y
corresponding to the year that stopped?
For example:
It’s 14 years or columns
the year 4 has -inf
I calculate regression only with years 1, 2 and 3 with the y
corresponding to those years (stopping the function apply
in the -inf
). Doing it for all lines.
maybe you can do it with a function for
but I don’t know how to proceed, if you can help start or indicate something to study, it will already be a great help.
I’m unable to reproduce your mistake. By chance the line
log_x <- apply(log_x[-1], 2, log)
should belog_x <- apply(base[-1], 2, log)
? Also, what is the data frame fortemp
? Please review these details and edit the question so that it is playable.– Marcus Nunes
Oops, sorry. I just set the example.
– André