Separate subsets of a base in R

Asked

Viewed 82 times

1

I’m having trouble sorting out subsets of a particular database. On a given line he has the experience that indicates in which step that data belong, for example, the first line has the expression 1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1) and I need to create a column called ETAPA that all the data below it is written Etapa 1 up the line 2. BASES E REMUNERAÇÕES (ETAPA 2) which shall then be written Etapa 2 in column. The database is below. Excess of NA is because the original excel database has several merged columns and each step has a number of different columns.

EDIT: The amount of rows of each Step is different, and below is only a sample of the original basis. As a Dads analysis of each step will be done monthly I would like to create a routine to separate the bases of Caa step!

    structure(list(X__1 = c("1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)", 
"CNPJ", "03.066.219/0001-81", "03.066.219/0001-81", "04.809.688/0001-06", 
"2. BASES E REMUNERAÇÕES (ETAPA 2)", "Competência", "Março", 
"Março", "Março", "Março", "3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)", 
"Competência", "Março", "Março", "Março", "Março"), X__2 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), X__3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA), X__4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), X__5 = c(NA, NA, NA, NA, NA, 
NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro"
), X__6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA), X__7 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    X__8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), X__9 = c(NA, "Nome", "Fundo Único de Previdência Social do Estado do Rio de Janeiro", 
    "Militares", "UENF - Universidade Estadual do Norte Fluminense Darcy Ribeiro", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__10 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__11 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__12 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__13 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__14 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__15 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__16 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__17 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__18 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__19 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__20 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__21 = c(NA, NA, NA, NA, NA, NA, 
    "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro", 
    NA, "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro"
    ), X__22 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__23 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__24 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__25 = c(NA, NA, NA, NA, NA, NA, "Referência", "PAT-SEG", 
    "SEG", "PAT-SEG", "SEG", NA, "Referência", "PAT-SEG", "SEG", 
    "PAT-SEG", "SEG"), X__26 = c(NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__27 = c(NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), X__28 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__29 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__30 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__31 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__32 = c(NA, NA, NA, NA, NA, NA, 
    "Remun. Bruta", "68881.39", "68881.39", "37259.550000000003", 
    "37259.550000000003", NA, "Data", "42089", "42094", "42090", 
    "42095"), X__33 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), X__34 = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__35 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__36 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, "Valor Original", "14727.4", "7363.7", "8197.9599999999991", 
    "4098.9799999999996"), X__37 = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), X__38 = c(NA, 
    NA, NA, NA, NA, NA, "Base de Cálculo", "61355.47", "61355.47", 
    "33024.04", "33024.04", NA, NA, NA, NA, NA, NA), X__39 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__40 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__41 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, "Dedução de Benefícios", NA, NA, 
    NA, NA), X__42 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), X__43 = c(NA, NA, NA, NA, NA, 
    NA, "Servidores", NA, "7", NA, "5", NA, NA, NA, NA, NA, NA
    ), X__44 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_), X__45 = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__46 = c(NA, 
    "Tipo", "Unidade Gestora", "Militares", "Administração Indireta (Autarquias e Fundações)", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__47 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__48 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, "Valores Compensados", NA, NA, NA, NA), X__49 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__50 = c(NA, NA, NA, NA, NA, NA, "Aposentados", NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), X__51 = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    X__52 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    "Acréscimos Legais", NA, NA, NA, NA), X__53 = c(NA, NA, NA, 
    NA, NA, NA, "Pensionistas", NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), X__54 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), X__55 = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, "Total com Acréscimos", "14727.4", 
    "7363.7", "8197.9599999999991", "4098.9799999999996"), X__56 = c(NA, 
    NA, NA, NA, NA, NA, "Dependentes", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), X__57 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), X__58 = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    X__59 = c(NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("X__1", "X__2", "X__3", "X__4", 
"X__5", "X__6", "X__7", "X__8", "X__9", "X__10", "X__11", "X__12", 
"X__13", "X__14", "X__15", "X__16", "X__17", "X__18", "X__19", 
"X__20", "X__21", "X__22", "X__23", "X__24", "X__25", "X__26", 
"X__27", "X__28", "X__29", "X__30", "X__31", "X__32", "X__33", 
"X__34", "X__35", "X__36", "X__37", "X__38", "X__39", "X__40", 
"X__41", "X__42", "X__43", "X__44", "X__45", "X__46", "X__47", 
"X__48", "X__49", "X__50", "X__51", "X__52", "X__53", "X__54", 
"X__55", "X__56", "X__57", "X__58", "X__59"), row.names = c(NA, 
-17L), class = c("tbl_df", "tbl", "data.frame"))
  • Each X_n would be a column? You want to create another column to identify in which step each row belongs to is this?

  • Exact Fernandes, each X-n is a different column and I want to create a new indicated step to which data belongs below, so I can apply a subset and work with each separate database

2 answers

4


fEtapas <- function(x) {
  linha <- c(grep('\\(ETAPA [0-9]*)$', x[,1]), nrow(x))
  etapa <- rep(NA, nrow(x))
  for(i in 1:(length(linha)-1)) i -> etapa[linha[i]:linha[i+1]]
  x$etapa <- etapa
  return(x)
}

planilha <- fEtapas(planilha.original)

> planilha[c(1:2,59:60)]
                                                   X__1 X__2 X__59 etapa
1             1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)   NA  <NA>     1
2                                                  CNPJ   NA  <NA>     1
3                                    03.066.219/0001-81   NA  <NA>     1
4                                    03.066.219/0001-81   NA  <NA>     1
5                                    04.809.688/0001-06   NA  <NA>     1
6                     2. BASES E REMUNERAÇÕES (ETAPA 2)   NA  <NA>     2
7                                           Competência   NA  <NA>     2
8                                                 Março   NA  <NA>     2
9                                                 Março   NA  <NA>     2
10                                                Março   NA  <NA>     2
11                                                Março   NA  <NA>     2
12 3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)   NA  <NA>     3
13                                          Competência   NA  <NA>     3
14                                                Março   NA  <NA>     3
15                                                Março   NA  <NA>     3
16                                                Março   NA  <NA>     3
17                                                Março   NA  <NA>     3

It will work with any number of steps, assuming that the steps are in sequence and are always identified with the same pattern ("(STAGE XX)" at the end of the line). Placing code inside a function allows you to apply it to any date.frame without having to copy and paste.

1

The last column of the dataframe is the steps.

dados =  structure(list(X__1 = c("1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)", 
    "CNPJ", "03.066.219/0001-81", "03.066.219/0001-81", "04.809.688/0001-06", 
    "2. BASES E REMUNERAÇÕES (ETAPA 2)", "Competência", "Março", 
    "Março", "Março", "Março", "3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)", 
    "Competência", "Março", "Março", "Março", "Março"), X__2 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), X__3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), X__4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA), X__5 = c(NA, NA, NA, NA, NA, 
    NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
    NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro"
    ), X__6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), X__7 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_), 
        X__8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA), X__9 = c(NA, "Nome", "Fundo Único de Previdência Social do Estado do Rio de Janeiro", 
        "Militares", "UENF - Universidade Estadual do Norte Fluminense Darcy Ribeiro", 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__10 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__11 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__12 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__13 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__14 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__15 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__16 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__17 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__18 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__19 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__20 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__21 = c(NA, NA, NA, NA, NA, NA, 
        "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro", 
        NA, "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro"
        ), X__22 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__23 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__24 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__25 = c(NA, NA, NA, NA, NA, NA, "Referência", "PAT-SEG", 
        "SEG", "PAT-SEG", "SEG", NA, "Referência", "PAT-SEG", "SEG", 
        "PAT-SEG", "SEG"), X__26 = c(NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__27 = c(NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
        ), X__28 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__29 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__30 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__31 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__32 = c(NA, NA, NA, NA, NA, NA, 
        "Remun. Bruta", "68881.39", "68881.39", "37259.550000000003", 
        "37259.550000000003", NA, "Data", "42089", "42094", "42090", 
        "42095"), X__33 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA), X__34 = c(NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__35 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__36 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, "Valor Original", "14727.4", "7363.7", "8197.9599999999991", 
        "4098.9799999999996"), X__37 = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), X__38 = c(NA, 
        NA, NA, NA, NA, NA, "Base de Cálculo", "61355.47", "61355.47", 
        "33024.04", "33024.04", NA, NA, NA, NA, NA, NA), X__39 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__40 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__41 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, "Dedução de Benefícios", NA, NA, 
        NA, NA), X__42 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA), X__43 = c(NA, NA, NA, NA, NA, 
        NA, "Servidores", NA, "7", NA, "5", NA, NA, NA, NA, NA, NA
        ), X__44 = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_), X__45 = c(NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__46 = c(NA, 
        "Tipo", "Unidade Gestora", "Militares", "Administração Indireta (Autarquias e Fundações)", 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__47 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__48 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, "Valores Compensados", NA, NA, NA, NA), X__49 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__50 = c(NA, NA, NA, NA, NA, NA, "Aposentados", NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA), X__51 = c(NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
        X__52 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        "Acréscimos Legais", NA, NA, NA, NA), X__53 = c(NA, NA, NA, 
        NA, NA, NA, "Pensionistas", NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA), X__54 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA), X__55 = c(NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, "Total com Acréscimos", "14727.4", 
        "7363.7", "8197.9599999999991", "4098.9799999999996"), X__56 = c(NA, 
        NA, NA, NA, NA, NA, "Dependentes", NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA), X__57 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA), X__58 = c(NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
        X__59 = c(NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_)), .Names = c("X__1", "X__2", "X__3", "X__4", 
    "X__5", "X__6", "X__7", "X__8", "X__9", "X__10", "X__11", "X__12", 
    "X__13", "X__14", "X__15", "X__16", "X__17", "X__18", "X__19", 
    "X__20", "X__21", "X__22", "X__23", "X__24", "X__25", "X__26", 
    "X__27", "X__28", "X__29", "X__30", "X__31", "X__32", "X__33", 
    "X__34", "X__35", "X__36", "X__37", "X__38", "X__39", "X__40", 
    "X__41", "X__42", "X__43", "X__44", "X__45", "X__46", "X__47", 
    "X__48", "X__49", "X__50", "X__51", "X__52", "X__53", "X__54", 
    "X__55", "X__56", "X__57", "X__58", "X__59"), row.names = c(NA, 
    -17L), class = c("tbl_df", "tbl", "data.frame"))

x = dados$X__1 == "1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)"
y = dados$X__1 == "2. BASES E REMUNERAÇÕES (ETAPA 2)"
z = dados$X__1 == "3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)"

t = nrow(dados)
n = rep(1:t)

dados$etapa = c(rep('Etapa 1',n[y]-n[x]), rep('Etapa 2',n[z]-n[y]), rep('Etapa 3',(t-n[z])+1))
  • I ended up not reporting on the problem and I will make an Edit. This base is a sample of the original. The steps do not necessarily have the same number of lines. And as I will do the same analysis monthly with different files I would like to automate

  • Will there be another stage or only these three? Otherwise, the name of each stage will be the same or will change?

  • There are more stages (5 in total) and at first the name of each stage is the same!

  • I edited the script, just add the other two steps.

Browser other questions tagged

You are not signed in. Login or sign up in order to post.