Group data by a certain column in the R

Asked

Viewed 840 times

1

I took some data because it is 1704 Obs. of 6 variables. So I selected the first 80 lines. Follow the code below:

structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Afghanistan", "Albania", "Algeria", "Angola", "Argentina", "Australia", "Austria", "Bahrain", "Bangladesh", "Belgium", "Benin", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad", "Chile", "China", "Colombia", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Costa Rica", "Cote d'Ivoire", "Croatia", "Cuba", "Czech Republic", "Denmark", "Djibouti", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Ethiopia", "Finland", "France", "Gabon", "Gambia", "Germany", "Ghana", "Greece", "Guatemala", "Guinea", "Guinea-Bissau", "Haiti", "Honduras", "Hong Kong, China", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kenya", "Korea, Dem. Rep.", "Korea, Rep.", "Kuwait", "Lebanon", "Lesotho", "Liberia", "Libya", "Madagascar", "Malawi", "Malaysia", "Mali", "Mauritania", "Mauritius", "Mexico", "Mongolia", "Montenegro", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger", "Nigeria", "Norway", "Oman", "Pakistan", "Panama", "Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Puerto Rico", "Reunion", "Romania", "Rwanda", "Sao Tome and Principe", "Saudi Arabia", "Senegal", "Serbia", "Sierra Leone", "Singapore", "Slovak Republic", "Slovenia", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Swaziland", "Sweden", "Switzerland", "Syria", "Taiwan", "Tanzania", "Thailand", "Togo", "Trinidad and Tobago", "Tunisia", "Turkey", "Uganda", "United Kingdom", "United States", "Uruguay", "Venezuela", "Vietnam", "West Bank and Gaza", "Yemen, Rep.", "Zambia", "Zimbabwe"), class = "factor"), continent = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Africa", "Americas", "Asia", "Europe", "Oceania"), class = "factor"), year = c(1952L, 1957L, 1962L, 1967L, 1972L, 1977L, 1982L, 1987L, 1992L, 1997L, 2002L, 2007L, 1952L, 1957L, 1962L, 1967L, 1972L, 1977L, 1982L, 1987L, 1992L, 1997L, 2002L, 2007L, 1952L, 1957L, 1962L, 1967L, 1972L, 1977L, 1982L, 1987L, 1992L, 1997L, 2002L, 2007L, 1952L, 1957L, 1962L, 1967L, 1972L, 1977L, 1982L, 1987L, 1992L, 1997L, 2002L, 2007L, 1952L, 1957L, 1962L, 1967L, 1972L, 1977L, 1982L, 1987L, 1992L, 1997L, 2002L, 2007L, 1952L, 1957L, 1962L, 1967L, 1972L, 1977L, 1982L, 1987L, 1992L, 1997L, 2002L, 2007L, 1952L, 1957L, 1962L, 1967L, 1972L, 1977L, 1982L, 1987L), lifeExp = c(28.801, 30.332, 31.997, 34.02, 36.088, 38.438, 39.854, 40.822, 41.674, 41.763, 42.129, 43.828, 55.23, 59.28, 64.82, 66.22, 67.69, 68.93, 70.42, 72, 71.581, 72.95, 75.651, 76.423, 43.077, 45.685, 48.303, 51.407, 54.518, 58.014, 61.368, 65.799, 67.744, 69.152, 70.994, 72.301, 30.015, 31.999, 34, 35.985, 37.928, 39.483, 39.942, 39.906, 40.647, 40.963, 41.003, 42.731, 62.485, 64.399, 65.142, 65.634, 67.065, 68.481, 69.942, 70.774, 71.868, 73.275, 74.34, 75.32, 69.12, 70.33, 70.93, 71.1, 71.93, 73.49, 74.74, 76.32, 77.56, 78.83, 80.37, 81.235, 66.8, 67.48, 69.54, 70.14, 70.63, 72.17, 73.18, 74.94 ), pop = c(8425333L, 9240934L, 10267083L, 11537966L, 13079460L, 14880372L, 12881816L, 13867957L, 16317921L, 22227415L, 25268405L, 31889923L, 1282697L, 1476505L, 1728137L, 1984060L, 2263554L, 2509048L, 2780097L, 3075321L, 3326498L, 3428038L, 3508512L, 3600523L, 9279525L, 10270856L, 11000948L, 12760499L, 14760787L, 17152804L, 20033753L, 23254956L, 26298373L, 29072015L, 31287142L, 33333216L, 4232095L, 4561361L, 4826015L, 5247469L, 5894858L, 6162675L, 7016384L, 7874230L, 8735988L, 9875024L, 10866106L, 12420476L, 17876956L, 19610538L, 21283783L, 22934225L, 24779799L, 26983828L, 29341374L, 31620918L, 33958947L, 36203463L, 38331121L, 40301927L, 8691212L, 9712569L, 10794968L, 11872264L, 13177000L, 14074100L, 15184200L, 16257249L, 17481977L, 18565243L, 19546792L, 20434176L, 6927772L, 6965860L, 7129864L, 7376998L, 7544201L, 7568430L, 7574613L, 7578903L), gdpPercap = c(779.4453145, 820.8530296, 853.10071, 836.1971382, 739.9811058, 786.11336, 978.0114388, 852.3959448, 649.3413952, 635.341351, 726.7340548, 974.5803384, 1601.056136, 1942.284244, 2312.888958, 2760.196931, 3313.422188, 3533.00391, 3630.880722, 3738.932735, 2497.437901, 3193.054604, 4604.211737, 5937.029526, 2449.008185, 3013.976023, 2550.81688, 3246.991771, 4182.663766, 4910.416756, 5745.160213, 5681.358539, 5023.216647, 4797.295051, 5288.040382, 6223.367465, 3520.610273, 3827.940465, 4269.276742, 5522.776375, 5473.288005, 3008.647355, 2756.953672, 2430.208311, 2627.845685, 2277.140884, 2773.287312, 4797.231267, 5911.315053, 6856.856212, 7133.166023, 8052.953021, 9443.038526, 10079.02674, 8997.897412, 9139.671389, 9308.41871, 10967.28195, 8797.640716, 12779.37964, 10039.59564, 10949.64959, 12217.22686, 14526.12465, 16788.62948, 18334.19751, 19477.00928, 21888.88903, 23424.76683, 26997.93657, 30687.75473, 34435.36744, 6137.076492, 8842.59803, 10750.72111, 12834.6024, 16661.6256, 19749.4223, 21597.08362, 23687.82607)), row.names = c(NA, 80L), class = "data.frame")

I want number of countries per continent in the R. Thank you.

2 answers

2

Using dplyr:

library(dplyr)

data %>% 
  group_by(continent) %>% 
  distinct(country) %>% 
  count()

# A tibble: 5 x 2
# Groups:   continent [5]
continent     n
<fct>     <int>
1 Africa        2
2 Americas      1
3 Asia          1
4 Europe        2
5 Oceania       1

Edit (requested in comment field)

By default, a tibble returns only the first observations to the Console. If you need to see all the observations, enter the function as.data.frame at the end of the code, since a data.frame does not control the number of rows returned. It would look like this:

data %>% 
  group_by(continent) %>% 
  distinct(country) %>% 
  count() %>% 
  as.data.frame()

However, this is not very useful for the following reason: disorganization of analysis. Suppose you have a database of 10,000 cases. Going back to the Console is not a wise action, as you will lose what you have done before.

If you’re using the RStudio, you can create an object and employ the function View to see all observations:

meu <- data %>% 
         group_by(continent) %>% 
         distinct(country) %>% 
         count()

View(meu)
  • If you have the message with 132 more Rows, can you list all the results? When a data set is too large, it shows only a few.

  • I edited the answer. See if it fits what you want.

  • Oh I got it. I couldn’t remember the view. Much better now. Thank you. thanks

2

In R base you can use the aggregate.
Note that I called the data dados.

aggregate(country ~ continent, dados, function(x) length(unique(x)))
#  continent country
#1    Africa       2
#2  Americas       1
#3      Asia       1
#4    Europe       2
#5   Oceania       1

To sort this result in descending order of number of countries, you must store the result and then apply order.

continentes <- aggregate(country ~ continent, dados, function(x) length(unique(x)))
continentes <- continentes[order(continentes$country, decreasing = TRUE), ]

continentes
#  continent country
#1    Africa       2
#4    Europe       2
#2  Americas       1
#3      Asia       1
#5   Oceania       1
  • I have to order from the largest to the smallest. I mean, in this one, Africa and Europe have 2 each. I wanted it to appear first. It’s like?

  • @Fidelhenriquefernandes Feito, see the answer edition.

Browser other questions tagged

You are not signed in. Login or sign up in order to post.