1
I’m having trouble sorting out subsets of a particular database. On a given line he has the experience that indicates in which step that data belong, for example, the first line has the expression 1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)
and I need to create a column called ETAPA
that all the data below it is written Etapa 1
up the line 2. BASES E REMUNERAÇÕES (ETAPA 2)
which shall then be written Etapa 2
in column. The database is below. Excess of NA
is because the original excel database has several merged columns and each step has a number of different columns.
EDIT: The amount of rows of each Step is different, and below is only a sample of the original basis. As a Dads analysis of each step will be done monthly I would like to create a routine to separate the bases of Caa step!
structure(list(X__1 = c("1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)",
"CNPJ", "03.066.219/0001-81", "03.066.219/0001-81", "04.809.688/0001-06",
"2. BASES E REMUNERAÇÕES (ETAPA 2)", "Competência", "Março",
"Março", "Março", "Março", "3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)",
"Competência", "Março", "Março", "Março", "Março"), X__2 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), X__3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), X__4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), X__5 = c(NA, NA, NA, NA, NA,
NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro",
"AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro",
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro",
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro",
NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro",
"AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro",
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro",
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro"
), X__6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), X__7 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
X__8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), X__9 = c(NA, "Nome", "Fundo Único de Previdência Social do Estado do Rio de Janeiro",
"Militares", "UENF - Universidade Estadual do Norte Fluminense Darcy Ribeiro",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__10 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__11 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__12 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__13 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__14 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__15 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__16 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__17 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__18 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__19 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__20 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__21 = c(NA, NA, NA, NA, NA, NA,
"Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro",
NA, "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro"
), X__22 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__23 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__24 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__25 = c(NA, NA, NA, NA, NA, NA, "Referência", "PAT-SEG",
"SEG", "PAT-SEG", "SEG", NA, "Referência", "PAT-SEG", "SEG",
"PAT-SEG", "SEG"), X__26 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__27 = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), X__28 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__29 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__30 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__31 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__32 = c(NA, NA, NA, NA, NA, NA,
"Remun. Bruta", "68881.39", "68881.39", "37259.550000000003",
"37259.550000000003", NA, "Data", "42089", "42094", "42090",
"42095"), X__33 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), X__34 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__35 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__36 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "Valor Original", "14727.4", "7363.7", "8197.9599999999991",
"4098.9799999999996"), X__37 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), X__38 = c(NA,
NA, NA, NA, NA, NA, "Base de Cálculo", "61355.47", "61355.47",
"33024.04", "33024.04", NA, NA, NA, NA, NA, NA), X__39 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__40 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), X__41 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, "Dedução de Benefícios", NA, NA,
NA, NA), X__42 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), X__43 = c(NA, NA, NA, NA, NA,
NA, "Servidores", NA, "7", NA, "5", NA, NA, NA, NA, NA, NA
), X__44 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), X__45 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__46 = c(NA,
"Tipo", "Unidade Gestora", "Militares", "Administração Indireta (Autarquias e Fundações)",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__47 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__48 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "Valores Compensados", NA, NA, NA, NA), X__49 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), X__50 = c(NA, NA, NA, NA, NA, NA, "Aposentados", NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), X__51 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
X__52 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
"Acréscimos Legais", NA, NA, NA, NA), X__53 = c(NA, NA, NA,
NA, NA, NA, "Pensionistas", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), X__54 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), X__55 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "Total com Acréscimos", "14727.4",
"7363.7", "8197.9599999999991", "4098.9799999999996"), X__56 = c(NA,
NA, NA, NA, NA, NA, "Dependentes", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), X__57 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), X__58 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
X__59 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_)), .Names = c("X__1", "X__2", "X__3", "X__4",
"X__5", "X__6", "X__7", "X__8", "X__9", "X__10", "X__11", "X__12",
"X__13", "X__14", "X__15", "X__16", "X__17", "X__18", "X__19",
"X__20", "X__21", "X__22", "X__23", "X__24", "X__25", "X__26",
"X__27", "X__28", "X__29", "X__30", "X__31", "X__32", "X__33",
"X__34", "X__35", "X__36", "X__37", "X__38", "X__39", "X__40",
"X__41", "X__42", "X__43", "X__44", "X__45", "X__46", "X__47",
"X__48", "X__49", "X__50", "X__51", "X__52", "X__53", "X__54",
"X__55", "X__56", "X__57", "X__58", "X__59"), row.names = c(NA,
-17L), class = c("tbl_df", "tbl", "data.frame"))
Each X_n would be a column? You want to create another column to identify in which step each row belongs to is this?
– Thiago Fernandes
Exact Fernandes, each X-n is a different column and I want to create a new indicated step to which data belongs below, so I can apply a subset and work with each separate database
– Flavio Silva