0
I’m doing tests with NLTK for chatbot, and I ended up having an error when I tried to tag a phrase using the Forest after mining data from the internet (this error is also when I tested without the miner) I’m using a macmini with the IDE Pycharm Python 3.7
code snippet:
# minerador de sinonimos
class SearchSynn(object):
"""docstring for SearchSynn"""
def __init__(self, wordpage):
super(SearchSynn, self).__init__()
self.word = wordpage.split(" ")
def synonyms(self, verbose=False):
param = "-".join(self.word)
param = normalize('NFKD', param).encode('ASCII', 'ignore').decode('ASCII')
if verbose == True:
try:
r = requests.get('https://www.sinonimos.com.br/{}/'.format(param))
print(r)
if r.status_code == 200:
conteudo_page = r.content.decode('iso8859-1')
wordpage = sc(text=conteudo_page).xpath('//h1[@class="h-palavra"]/text()').extract_first()[12:]
sinonimos = sc(text=conteudo_page).xpath('//a[@class="sinonimo"]/text()').extract()
tam_sinonimos = len(sinonimos)
# print("{} resultados encontrados:".format(tam_sinonimos))
arq = open('.historico', 'a')
arq.write("{} resultados encontrados para '{}':\n".format(tam_sinonimos, wordpage))
arq.write(str(sinonimos))
arq.write("\n\n")
arq.close()
return sinonimos
else:
return "Nada."
except Exception as e:
return "Erro de Conexão. :/. error type: {}".format(e)
else:
try:
r = requests.get('https://www.sinonimos.com.br/{}/'.format(param))
if r.status_code == 200:
conteudo = r.content.decode('iso8859-1')
wordpage = sc(text=conteudo).xpath('//h1[@class="h-palavra"]/text()').extract_first()[12:]
sinonimos = sc(text=conteudo).xpath('//a[@class="sinonimo"]/text()').extract()
tam_sinonimos = len(sinonimos)
arq = open('.historico', 'a')
arq.write("{} resultados encontrados para '{}':\n".format(tam_sinonimos, wordpage))
arq.write(str(sinonimos))
arq.write("\n\n")
arq.close()
return sinonimos
else:
return r.status_code
except Exception as e:
return e
# tratamento da frase
class Treatment(object):
def __init__(self, sentence):
super(Treatment, self).__init__()
self.wor = sentence.lower()
def Tokenize(self):
sent = sent_tokenize(self)
sentence = [word_tokenize(self) for keyword in sent]
tolkien = [floresta.tagged_words(self) for key in sentence]
return tolkien
def main():
testeb = input()
while True:
teste_tok = Treatment.Tokenize(testeb)
for key in teste_tok:
i = SearchSynn(key)
etiquetas = {key: i.synonyms()}
print(etiquetas)
testeb = input()
pass
if __name__ == "__main__":
main()
error: I sent a "test" on the console and returned
File "/Users/Neemo/Pycharmprojects/untield/venv/lib/python3.7/site-Packages/nltk/data.py", line 333, in __init__ raise Ioerror('No such file or directory: %r' %_path) Oserror: No such file or directory: '/Users/Neemo/nltk_data/corpora/forest/test'
It is very difficult to understand your problem with just this snippet of code. Can you make a replicable example? See instructions here: https://answall.com/help/minimal-reproducible-example
– Lucas
I sent it so I think I could replicate, but I don’t know if it’s good
– ViniciusY.
I think it works
– ViniciusY.
Where you defined
floresta
?– Lucas
from nltk.corpus import forest from nltk import word_tokenize,sent_tokenize from nltk import corpus import requests from scrapy.selector import Selector as sc from unicodedata import normalize
– ViniciusY.