(Python) I also have a text file with a few words

Asked

Viewed 29 times

-2

def carrega_palavras():
with open("palavras.txt", 'r', encoding='utf-8') as arquivo:
    palavras = []

    for linha in arquivo:
        linha = linha.strip()
        palavras.append (linha)
return palavras

def palavra_lower(palavras):
palavras_lower = []
for palavra in palavras:

palavras_lower.append(
palavra.lower())
return palavra_lower

def analisa_letras(palavra_lower):
aparicoes = Counter(palavra_lower())
total_de_caracteres = sum(aparicoes.values())

proporcoes = [(letra, frequencia / total_de_caracteres) for letra, frequencia in aparicoes.items()]
proporcoes = Counter(dict(proporcoes))
mais_comuns = proporcoes.most_common(15)
for caractere, proporcao in mais_comuns:
  print("{} → {:.2f}%".format(caractere, proporcao * 100))

palavras = carrega_palavras()
palavra_lower(palavras)
analisa_letras(palavra_lower)

1 answer

0

Everything indicates that you are trying to calculate the histogram of the characters of the words contained in the input file.

Assuming your input file is something like:

Lorem
Ipsum
Dolor
Sit
Amet
Consectetur
Adipiscing
Elit
Etiam
Aliquet
Urna
Vitae
Ipsum
Ullamcorper
Aliquam

Your code can be adjusted as follows:

from collections import Counter

with open('./palavras.txt', 'r', encoding='utf-8') as arq:
    palavras = [linha.strip().lower() for linha in arq]

aparicoes = Counter(''.join(palavras))

proporcoes = {caractere : aparicoes[caractere] / len(aparicoes) * 100.0 for caractere in aparicoes}

mais_comuns = sorted(proporcoes.items(), key=lambda x: x[1], reverse=True)[0:15]

for caractere, proporcao in mais_comuns:
    print("{} → {:.2f}%".format(caractere, proporcao))

Exit:

i → 64.71%
e → 52.94%
a → 52.94%
t → 47.06%
l → 41.18%
m → 41.18%
u → 41.18%
r → 35.29%
o → 29.41%
s → 29.41%
p → 23.53%
c → 23.53%
n → 17.65%
d → 11.76%
q → 11.76%

See working on Repl.it

Browser other questions tagged

You are not signed in. Login or sign up in order to post.