A: How to count and sum the amount of a certain "factor" in the observations (lines) of a data.frame?

Question

A: How to count and sum the amount of a certain "factor" in the observations (lines) of a data.frame?

Asked 7 years, 4 months ago

Viewed 4,047 times

4

Dear, would like to get the amount of "yes" (factor) on each line of a data.frame, as follows. Would anyone know what arguments I would have to use to do this with "mutate"? I tried several ways and could not. I tried with:

Base = Base %>% mutate(Total_yes = )

If anyone can help, I’d be grateful!

    > dput(Base)
structure(list(ID = structure(1:100, .Label = c("110001", "110002", 
"110003", "110004", "110005", "110006", "110007", "110008", "110009", 
"110010", "110011", "110012", "110013", "110014", "110015", "110018", 
"110020", "110025", "110026", "110028", "110029", "110030", "110032", 
"110033", "110034", "110037", "110040", "110045", "110050", "110060", 
"110070", "110080", "110090", "110092", "110094", "110100", "110110", 
"110120", "110130", "110140", "110143", "110145", "110146", "110147", 
"110148", "110149", "110150", "110155", "110160", "110170", "110175", 
"110180", "120001", "120005", "120010", "120013", "120017", "120020", 
"120025", "120030", "120032", "120033", "120034", "120035", "120038", 
"120039", "120040", "120042", "120043", "120045", "120050", "120060", 
"120070", "120080", "130002", "130006", "130008", "130010", "130014", 
"130020", "130030", "130040", "130050", "130060", "130063", "130068", 
"130070", "130080", "130083", "130090", "130100", "130110", "130115", 
"130120", "130130", "130140", "130150", "130160", "130165", "130170"
), class = "factor"), Col_1 = structure(c(1L, 4L, 4L, 3L, 2L, 
1L, 4L, 4L, 3L, 2L, 1L, 2L, 4L, 3L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 
4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 1L, 
4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 3L, 4L, 4L, 
4L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 2L, 4L), .Label = c("NA", 
"Não", "Não disponível", "Sim"), class = "factor"), Col_2 = structure(c(4L, 
4L, 4L, 3L, 4L, 2L, 2L, 2L, 3L, 4L, 4L, 2L, 2L, 3L, 2L, 2L, 4L, 
4L, 2L, 4L, 2L, 4L, 2L, 4L, 3L, 3L, 2L, 4L, 4L, 4L, 3L, 4L, 4L, 
3L, 4L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 
4L, 2L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 4L, 4L, 
4L, 4L, 3L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 2L, 2L, 4L, 4L, 4L, 1L, 
4L, 3L, 2L, 4L, 1L, 4L, 2L, 2L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 1L, 
2L, 4L, 4L), .Label = c("NA", "Não", "Não disponível", "Sim"), class = "factor"), 
    Col_3 = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L
    ), .Label = c("NA", "Não", "Sim"), class = "factor"), Col_4 = structure(c(3L, 
    3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 
    3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L), .Label = c("Não", "Não disponível", 
    "Sim"), class = "factor"), Col_5 = structure(c(4L, 4L, 4L, 
    2L, 4L, 2L, 2L, 4L, 2L, 4L, 1L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 
    2L, 4L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 
    3L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 
    4L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 2L, 4L, 
    4L, 4L, 4L, 4L, 2L, 3L, 2L, 4L, 4L, 4L, 4L, 2L, 4L, 2L, 2L, 
    2L, 4L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 
    4L, 2L, 4L, 4L, 2L, 4L, 4L), .Label = c("NA", "Não", "Não disponível", 
    "Sim"), class = "factor"), Col_6 = structure(c(2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 
    2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Não", "Sim"), class = "factor"), 
    Col_7 = structure(c(4L, 4L, 2L, 4L, 2L, 4L, 3L, 4L, 4L, 4L, 
    4L, 2L, 4L, 3L, 2L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 1L, 1L, 
    1L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
    3L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L, 4L, 
    4L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 2L, 4L, 4L, 4L, 
    4L, 4L, 2L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L
    ), .Label = c("NA", "Não", "Não disponível", "Sim"), class = "factor"), 
    Col_8 = structure(c(3L, 3L, 1L, 1L, 3L, 3L, 2L, 3L, 1L, 3L, 
    3L, 3L, 1L, 2L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 
    3L, 1L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 1L, 3L, 1L, 3L, 
    2L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 2L, 1L, 1L, 
    3L, 1L, 3L, 2L, 3L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 
    1L, 3L, 3L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 
    1L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L
    ), .Label = c("Não", "Não disponível", "Sim"), class = "factor"), 
    Col_9 = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 
    2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 
    2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 
    3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 
    3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 
    2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L
    ), .Label = c("Ignorado", "Não", "Sim"), class = "factor"), 
    Col_10 = structure(c(3L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 2L, 
    3L, 3L, 1L, 1L, 2L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 
    2L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 
    2L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 1L, 
    3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 
    3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 
    2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 
    3L), .Label = c("Não", "Não disponível", "Sim"), class = "factor"), 
    Total_Sim = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("ID", 
"Col_1", "Col_2", "Col_3", "Col_4", "Col_5", "Col_6", "Col_7", 
"Col_8", "Col_9", "Col_10", "Total_Sim"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -100L))

3 answers

8

Using the package dplyr:

library(dplyr)
Base <- Base %>%
  mutate(Total_Sim = rowSums(. == "Sim"))

It worked perfectly, Rafael. Grateful! Could you just enlighten me one thing: what the point means within the rowSums function?

– r_rabbit

2018/03/23 at 12:44
3

The . refers to all columns where the function will be applied rowSums. In this case, as no selection was made, it is referring to all columns of Base.

– Rafael Cunha

2018/03/23 at 12:47

Browser other questions tagged r dplyr

You are not signed in. Login or sign up in order to post.

by Rui Barradas • **15,422** points · Answer 1 · 2018-03-23T11:39:00+00:00

I think the simplest way is with rowSums. Like the comparisons == result in logical values FALSE/TRUE that the R encodes as 0/1, just add the values in each line.

rowSums(Base[, 2:11] == "Sim")
 #[1]  8  9  7  4  6  6  3  7  4  8  7  4  6  2  4  7  9  9  2 10  5  7  5  8  4
 #[26]  5  5  7  9 10  5  8  9  0  9  9  4  9  7  8  5  7  3  8  9  6  9  6  8  9
 #[51]  4  7  6  5  8  8  8  9  4 10  5  5  9  8 10  6 10  5  4  6  8  9  9  6  4
 #[76]  7  6  3  5  6  5  7  4  7  8  6  9  3  9  1  9  9  6  8  6  6  8  4  3  9

In addition to simplicity, it has the advantage of being very fast. The function rowSums is scheduled in C and avoids the cycles of R.

by Willian Vieira • **3,675** points · Answer 2 · 2018-03-23T00:45:08+00:00

Using the basic R package:

# função para quantificar os 'Sim'
func <- function(x) length(which(base[x, c(2:11)] == 'Sim'))

# aplicar para todas as linhas
base$Total_Sim <- sapply(1: dim(base)[1], func)