-1
import pandas as pd
data_r = open('rosalind_gc.txt', 'r')
data_r1 = data_r.readlines()
data_r2 = []
data_r3 = []
#tirar os \n do texto
for i in data_r1:
data_r2.append(i.rstrip())
data_index = []
#filtrar Rosalind em index --done
for i in data_r2:
if 'Rosalind' in i:
data_index.append(data_r2.index(i))
#criar dicts com rosalind
for linha in data_r2:
linha_index = data_r2.index(linha)
if linha_index in data_index: # só acontece nas linhas == Rosalind
out_index = linha_index + 1
data_r3.append({linha:''})
#sequenciar cadeia de dados
I am trying to sequence the data manually, to make a dictionary that returns the Rosalind species with their respective sequence, but always goes without value in the last key of the dictionary.
here is an example of abbreviated dataset: (as in txt file)
Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG Rosalind_5959 CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGAGCAGACACGC Rosalind_0808 CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT