0
I’m doing tests with NLTK for chatbot, and I ended up having an error when I tried to tag a phrase using the Forest after mining data from the internet (this error is also when I tested without the miner) I’m using a macmini with the IDE Pycharm Python 3.7
code snippet:
    # minerador de sinonimos
class SearchSynn(object):
    """docstring for SearchSynn"""
    def __init__(self, wordpage):
        super(SearchSynn, self).__init__()
        self.word = wordpage.split(" ")
    def synonyms(self, verbose=False):
        param = "-".join(self.word)
        param = normalize('NFKD', param).encode('ASCII', 'ignore').decode('ASCII')
        if verbose == True:
            try:
                r = requests.get('https://www.sinonimos.com.br/{}/'.format(param))
                print(r)
                if r.status_code == 200:
                    conteudo_page = r.content.decode('iso8859-1')
                    wordpage = sc(text=conteudo_page).xpath('//h1[@class="h-palavra"]/text()').extract_first()[12:]
                    sinonimos = sc(text=conteudo_page).xpath('//a[@class="sinonimo"]/text()').extract()
                    tam_sinonimos = len(sinonimos)
                    # print("{} resultados encontrados:".format(tam_sinonimos))
                    arq = open('.historico', 'a')
                    arq.write("{} resultados encontrados para '{}':\n".format(tam_sinonimos, wordpage))
                    arq.write(str(sinonimos))
                    arq.write("\n\n")
                    arq.close()
                    return sinonimos
                else:
                    return "Nada."
            except Exception as e:
                return "Erro de Conexão. :/. error type: {}".format(e)
        else:
            try:
                r = requests.get('https://www.sinonimos.com.br/{}/'.format(param))
                if r.status_code == 200:
                    conteudo = r.content.decode('iso8859-1')
                    wordpage = sc(text=conteudo).xpath('//h1[@class="h-palavra"]/text()').extract_first()[12:]
                    sinonimos = sc(text=conteudo).xpath('//a[@class="sinonimo"]/text()').extract()
                    tam_sinonimos = len(sinonimos)
                    arq = open('.historico', 'a')
                    arq.write("{} resultados encontrados para '{}':\n".format(tam_sinonimos, wordpage))
                    arq.write(str(sinonimos))
                    arq.write("\n\n")
                    arq.close()
                    return sinonimos
                else:
                    return r.status_code
            except Exception as e:
                return e
# tratamento da frase
class Treatment(object):
    def __init__(self, sentence):
        super(Treatment, self).__init__()
        self.wor = sentence.lower()
    def Tokenize(self):
        sent = sent_tokenize(self)
        sentence = [word_tokenize(self) for keyword in sent]
        tolkien = [floresta.tagged_words(self) for key in sentence]
        return tolkien
def main():
    testeb = input()
    while True:
        teste_tok = Treatment.Tokenize(testeb)
        for key in teste_tok:
            i = SearchSynn(key)
            etiquetas = {key: i.synonyms()}
            print(etiquetas)
        testeb = input()
    pass
if __name__ == "__main__":
    main()
error: I sent a "test" on the console and returned
File "/Users/Neemo/Pycharmprojects/untield/venv/lib/python3.7/site-Packages/nltk/data.py", line 333, in __init__ raise Ioerror('No such file or directory: %r' %_path) Oserror: No such file or directory: '/Users/Neemo/nltk_data/corpora/forest/test'
It is very difficult to understand your problem with just this snippet of code. Can you make a replicable example? See instructions here: https://answall.com/help/minimal-reproducible-example
– Lucas
I sent it so I think I could replicate, but I don’t know if it’s good
– ViniciusY.
I think it works
– ViniciusY.
Where you defined
floresta?– Lucas
from nltk.corpus import forest from nltk import word_tokenize,sent_tokenize from nltk import corpus import requests from scrapy.selector import Selector as sc from unicodedata import normalize
– ViniciusY.