Error with variable naming functions in a list

Asked

Viewed 155 times

6

I try to run a function to rename variables from data.frames of a list and get only error messages.

When I run the function for a database the analysis takes place:

library(tidyverse)

tibble=structure(list(var1 = c(28.5627505742013, 22.8311421908438, 95.2216156944633, 
43.9405107684433, 97.11211245507, 48.4108281508088), var2 = c(32.9009465128183, 
54.1136392951012, 69.3181485682726, 70.2100433968008, 44.0986660309136, 
62.8759404085577), var3 = c(89.6971945464611, 67.174579706043, 
37.0924087055027, 87.7977314218879, 29.3221596442163, 37.5143952667713
), var4 = c(41.5336912125349, 98.2095112837851, 80.7970978319645, 
91.1278881691396, 66.4086666144431, 69.2618868127465), var5 = c(33.9312525652349, 
88.1815139763057, 98.4453701227903, 25.0217059068382, 41.1195872165263, 
37.0983888953924), var6 = c(39.813664201647, 80.6405956856906, 
30.0273275375366, 34.6203793399036, 96.5195455029607, 44.5830867439508
), kmeans = structure(c(2L, 1L, 3L, 1L, 3L, 1L), .Label = c("1", 
"2", "3"), class = "factor"), group = structure(c(1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("a", "b", "c", "d"), class = "factor")), .Names = 
c("var1", 
"var2", "var3", "var4", "var5", "var6", "kmeans", "group"), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

Function:

names(tibble)[str_which(names(tibble),regex('KMEaNS',ignore_case=TRUE))]<-'variable'

In the case of a list, the result is not returned:

mylist=structure(list(dataset1 = structure(list(var1 = c(28.5627505742013, 
22.8311421908438, 95.2216156944633, 43.9405107684433, 97.11211245507, 
48.4108281508088), var2 = c(32.9009465128183, 54.1136392951012, 
69.3181485682726, 70.2100433968008, 44.0986660309136, 62.8759404085577
), var3 = c(89.6971945464611, 67.174579706043, 37.0924087055027, 
87.7977314218879, 29.3221596442163, 37.5143952667713), var4 = c(41.5336912125349, 
98.2095112837851, 80.7970978319645, 91.1278881691396, 66.4086666144431, 
69.2618868127465), var5 = c(33.9312525652349, 88.1815139763057, 
98.4453701227903, 25.0217059068382, 41.1195872165263, 37.0983888953924
), var6 = c(39.813664201647, 80.6405956856906, 30.0273275375366, 
34.6203793399036, 96.5195455029607, 44.5830867439508), kmeans = structure(c(2L, 
1L, 3L, 1L, 3L, 1L), .Label = c("1", "2", "3"), class = "factor"), 
group = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("a", 
"b", "c", "d"), class = "factor")), .Names = c("var1", "var2", 
"var3", "var4", "var5", "var6", "kmeans", "group"), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame")), dataset2 = structure(list(
var1 = c(28.5627505742013, 22.8311421908438, 95.2216156944633, 
43.9405107684433, 97.11211245507, 48.4108281508088), var2 = c(32.9009465128183, 
54.1136392951012, 69.3181485682726, 70.2100433968008, 44.0986660309136, 
62.8759404085577), var3 = c(89.6971945464611, 67.174579706043, 
37.0924087055027, 87.7977314218879, 29.3221596442163, 37.5143952667713
), var4 = c(41.5336912125349, 98.2095112837851, 80.7970978319645, 
91.1278881691396, 66.4086666144431, 69.2618868127465), var5 = c(33.9312525652349, 
88.1815139763057, 98.4453701227903, 25.0217059068382, 41.1195872165263, 
37.0983888953924), var6 = c(39.813664201647, 80.6405956856906, 
30.0273275375366, 34.6203793399036, 96.5195455029607, 44.5830867439508
), kmeans = structure(c(2L, 1L, 3L, 1L, 3L, 1L), .Label = c("1", 
"2", "3"), class = "factor"), group = structure(c(1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("a", "b", "c", "d"), class = "factor")), .Names = 
c("var1", 
"var2", "var3", "var4", "var5", "var6", "kmeans", "group"), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame")), dataset3 = structure(list(
var1 = c(28.5627505742013, 22.8311421908438, 95.2216156944633, 
43.9405107684433, 97.11211245507, 48.4108281508088), var2 = c(32.9009465128183, 
54.1136392951012, 69.3181485682726, 70.2100433968008, 44.0986660309136, 
62.8759404085577), var3 = c(89.6971945464611, 67.174579706043, 
37.0924087055027, 87.7977314218879, 29.3221596442163, 37.5143952667713
), var4 = c(41.5336912125349, 98.2095112837851, 80.7970978319645, 
91.1278881691396, 66.4086666144431, 69.2618868127465), var5 = c(33.9312525652349, 
88.1815139763057, 98.4453701227903, 25.0217059068382, 41.1195872165263, 
37.0983888953924), var6 = c(39.813664201647, 80.6405956856906, 
30.0273275375366, 34.6203793399036, 96.5195455029607, 44.5830867439508
), kmeans = structure(c(2L, 1L, 3L, 1L, 3L, 1L), .Label = c("1", 
"2", "3"), class = "factor"), group = structure(c(1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("a", "b", "c", "d"), class = "factor")), .Names = 
c("var1", 
"var2", "var3", "var4", "var5", "var6", "kmeans", "group"), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))), .Names = c("dataset1", 
"dataset2", "dataset3"))

Function 1:

mylist%>%
  lapply(function(x){
    lapply(x,names(x)[str_which(names(x),regex('KMEa',ignore_case=T))])<-'variable'
  })

Error in lapply(x, Names(x)[str_which(Names(x), regex("Kmea", ignore_case = T)]) <- "variable" : could not find Function "lapply<-"

Function 2:

mylist%>%
  map(.,~names(.)[str_which(names(.),regex('KM',ignore_case=T))])<-'variable'

Error in Mylist %>% map(., ~Names(.)[str_which(Names(.), regex("KM", ignore_case = T)]) <- "variable" : could not find Function "%>%<-"

Two questions:

  • which errors in the two codes?

  • there is some similar solution with the dplyr::rename?

  • you can give a simple in stringr::str_replace_all(...) in the argument pattern without having to use stringr::regex() because of the argument ignore_case = T. Just use the special character "(?i)" inpattern which denotes ignoring whether the character is uppercase or lowercase (see my answer).

3 answers

5


The problem is that neither the function lapply nor the function map allow modifications in-place of objects.

When you use the assignment operator <- on the right side of a lapply he doesn’t know what to do with that.

In your case I think I’d do something like this:

mylist %>% 
  map(~set_names(
    .x, 
    nm = str_replace_all(
      names(.x), 
      pattern = regex("KM.*", ignore_case = TRUE),
      "variable"
      )
    )
  )

Anyway, when anonymous functions start to get more complicated, it is interesting to separate them into other functions to make the code more readable. For example:

renomear_df <- function(df) {
  names(df)[str_which(names(df),regex('KMEaNS',ignore_case=TRUE))]<-'variable'
  df
}

mylist %>% map(renomear_df)

The idea of maps that’s right. You create a function that works for a single part and with a command makes it work for a part list. When the function you use to transform the part is simple you can use an anonymous function or the formulas of the purrr, when it is more complex the ideal is to separate it into an auxiliary function, which makes the code much more readable.

There is no beautiful solution with rename pq it needs the exact name of the variable and does not let you make selections by regex.

  • A doubt: why use the element of function(x) (that is df, in this case) at the end of some functions?

  • 1

    why names(df) <- "alguma coisa" does not return anything ( actually, it invisibly returns the column names), and what you want the function to return is the data.frame with the modified names.

  • 1

    vc can see more here: https://adv-r.hadley.nz/names-values.html#Modify-in-place or here https://adv-r.hadley.nz/vectors-chap.html#Attributes

5

A solution with dplyr would be:

library(dplyr)

map(mylist,~rename_at(.,vars(contains('EaN')),~'variable'))

contains has action similar to stringr::regex with ignore_case=TRUE

Withdrawn (and adapted) from here.

3

The answers given are excellent and follow the pattern tidyverse. Anyway, I will give other solutions using the excellent package data.table and base.

Data.table solution

data.table has the function data.table::setnames() which renames data frame variables without the need to point to objects. The process is done by reference, without making any additional copies during the process that the function is called. Generally to do operations with data.table, you need to turn the object into data.table. However, to rename variables, data.table::setnames() works with tibble and class is preserved. The solution is very simple and runs with lapply():

lapply(mylist, function(x){
  ind <- grep(x = names(x), pattern = "(?i)km")
  data.table::setnames(x, names(x)[[ind]], "variable")
})

Result (partial):

> head(mylist[[1]])
# A tibble: 6 x 8
   var1  var2  var3  var4  var5  var6 variable group
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>    <fct>
1  28.6  32.9  89.7  41.5  33.9  39.8 2        a    
2  22.8  54.1  67.2  98.2  88.2  80.6 1        a    
3  95.2  69.3  37.1  80.8  98.4  30.0 3        a    
4  43.9  70.2  87.8  91.1  25.0  34.6 1        a    
5  97.1  44.1  29.3  66.4  41.1  96.5 3        a    
6  48.4  62.9  37.5  69.3  37.1  44.6 1        a 

Base solution R

# re-execute o código original para restaurar 'mylist'
for(i in seq_along(mylist)){
  ind <- grep(x = names(mylist[[i]]), pattern = "(?i)km")
  names(mylist[[i]])[[ind]] <- "variable"
}
rm(i)

Here we use a for() loop. Result:

> head(mylist[[1]])
# A tibble: 6 x 8
   var1  var2  var3  var4  var5  var6 variable group
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>    <fct>
1  28.6  32.9  89.7  41.5  33.9  39.8 2        a    
2  22.8  54.1  67.2  98.2  88.2  80.6 1        a    
3  95.2  69.3  37.1  80.8  98.4  30.0 3        a    
4  43.9  70.2  87.8  91.1  25.0  34.6 1        a    
5  97.1  44.1  29.3  66.4  41.1  96.5 3        a    
6  48.4  62.9  37.5  69.3  37.1  44.6 1        a 

Browser other questions tagged

You are not signed in. Login or sign up in order to post.