-2
I have a base with columns: USUARIO
, MÊS
, ANO
and compra_mês
. I would like to create variables dummy based on compras_mês
for USUARIO
, every month of the year.
USUARIO MÊS ANO compras_mês
<int> <ord> <dbl> <int>
1 37 jan 2019 1
2 37 set 2018 1
3 37 out 2018 4
4 37 nov 2018 3
5 37 dez 2018 3
6 49 out 2018 1
7 49 nov 2018 1
8 49 dez 2018 1
9 107 jan 2019 2
10 107 set 2018 1
As an example, my goal would be to leave the database like this:
USUARIO set out nov dez
37 1 1 1 1
49 1 0 1 0
107 0 0 1 1
How can I do that? The command dummy
would be a solution?
Follow the dput to help with the question:
> dput(head(teste, 50))
structure(list(USUARIO = c(37L, 37L, 37L, 37L, 37L, 49L, 49L,
49L, 107L, 107L, 107L, 107L, 107L, 934L, 934L, 934L, 934L, 934L,
1116L, 1116L, 1116L, 1116L, 1160L, 1160L, 1160L, 1160L, 1160L,
1160L, 1302L, 1302L, 1302L, 1302L, 1337L, 1337L, 1337L, 1384L,
1384L, 1384L, 1384L, 1384L, 1384L, 1532L, 1532L, 1532L, 1532L,
1532L, 1532L, 1551L, 1551L, 1551L), MÊS = structure(c(1L, 9L,
10L, 11L, 12L, 10L, 11L, 12L, 1L, 9L, 10L, 11L, 12L, 1L, 8L,
10L, 11L, 12L, 1L, 10L, 11L, 12L, 1L, 8L, 9L, 10L, 11L, 12L,
9L, 10L, 11L, 12L, 10L, 11L, 12L, 1L, 8L, 9L, 10L, 11L, 12L,
1L, 8L, 9L, 10L, 11L, 12L, 8L, 9L, 10L), .Label = c("jan", "fev",
"mar", "abr", "mai", "jun", "jul", "ago", "set", "out", "nov",
"dez"), class = c("ordered", "factor")), ANO = c(2019, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2019, 2018, 2018, 2018, 2018,
2019, 2018, 2018, 2018, 2018, 2019, 2018, 2018, 2018, 2019, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2019, 2018, 2018, 2018, 2018, 2018, 2019, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018), compras_mês = c(1L, 1L, 4L, 3L, 3L,
1L, 1L, 1L, 2L, 1L, 5L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 4L,
4L, 5L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 2L)), row.names = c(NA,
-50L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = c("USUARIO",
"MÊS"), drop = TRUE, indices = list(0L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L), group_sizes = c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), biggest_group_size = 1L, labels = structure(list(USUARIO = c(37L,
37L, 37L, 37L, 37L, 49L, 49L, 49L, 107L, 107L, 107L, 107L, 107L,
934L, 934L, 934L, 934L, 934L, 1116L, 1116L, 1116L, 1116L, 1160L,
1160L, 1160L, 1160L, 1160L, 1160L, 1302L, 1302L, 1302L, 1302L,
1337L, 1337L, 1337L, 1384L, 1384L, 1384L, 1384L, 1384L, 1384L,
1532L, 1532L, 1532L, 1532L, 1532L, 1532L, 1551L, 1551L, 1551L
), MÊS = structure(c(1L, 9L, 10L, 11L, 12L, 10L, 11L, 12L, 1L,
9L, 10L, 11L, 12L, 1L, 8L, 10L, 11L, 12L, 1L, 10L, 11L, 12L,
1L, 8L, 9L, 10L, 11L, 12L, 9L, 10L, 11L, 12L, 10L, 11L, 12L,
1L, 8L, 9L, 10L, 11L, 12L, 1L, 8L, 9L, 10L, 11L, 12L, 8L, 9L,
10L), .Label = c("jan", "fev", "mar", "abr", "mai", "jun", "jul",
"ago", "set", "out", "nov", "dez"), class = c("ordered", "factor"
))), row.names = c(NA, -50L), class = "data.frame", vars = c("USUARIO",
"MÊS"), drop = TRUE))
@Jdmello, thanks for the suggestions. But could you use the dput data I put in? I managed to get the result, but maybe I can get confused for some other user with the same doubt that I.
– Izak Mandrak