Oserror NLTK Forest labeling

Asked

Viewed 108 times

0

I’m doing tests with NLTK for chatbot, and I ended up having an error when I tried to tag a phrase using the Forest after mining data from the internet (this error is also when I tested without the miner) I’m using a macmini with the IDE Pycharm Python 3.7

code snippet:

    # minerador de sinonimos
class SearchSynn(object):
    """docstring for SearchSynn"""

    def __init__(self, wordpage):
        super(SearchSynn, self).__init__()
        self.word = wordpage.split(" ")

    def synonyms(self, verbose=False):
        param = "-".join(self.word)
        param = normalize('NFKD', param).encode('ASCII', 'ignore').decode('ASCII')
        if verbose == True:
            try:
                r = requests.get('https://www.sinonimos.com.br/{}/'.format(param))
                print(r)
                if r.status_code == 200:
                    conteudo_page = r.content.decode('iso8859-1')
                    wordpage = sc(text=conteudo_page).xpath('//h1[@class="h-palavra"]/text()').extract_first()[12:]
                    sinonimos = sc(text=conteudo_page).xpath('//a[@class="sinonimo"]/text()').extract()
                    tam_sinonimos = len(sinonimos)

                    # print("{} resultados encontrados:".format(tam_sinonimos))

                    arq = open('.historico', 'a')
                    arq.write("{} resultados encontrados para '{}':\n".format(tam_sinonimos, wordpage))
                    arq.write(str(sinonimos))
                    arq.write("\n\n")
                    arq.close()
                    return sinonimos
                else:
                    return "Nada."
            except Exception as e:
                return "Erro de Conexão. :/. error type: {}".format(e)
        else:
            try:
                r = requests.get('https://www.sinonimos.com.br/{}/'.format(param))
                if r.status_code == 200:
                    conteudo = r.content.decode('iso8859-1')
                    wordpage = sc(text=conteudo).xpath('//h1[@class="h-palavra"]/text()').extract_first()[12:]
                    sinonimos = sc(text=conteudo).xpath('//a[@class="sinonimo"]/text()').extract()
                    tam_sinonimos = len(sinonimos)
                    arq = open('.historico', 'a')
                    arq.write("{} resultados encontrados para '{}':\n".format(tam_sinonimos, wordpage))
                    arq.write(str(sinonimos))
                    arq.write("\n\n")
                    arq.close()
                    return sinonimos
                else:
                    return r.status_code
            except Exception as e:
                return e


# tratamento da frase
class Treatment(object):
    def __init__(self, sentence):
        super(Treatment, self).__init__()
        self.wor = sentence.lower()

    def Tokenize(self):
        sent = sent_tokenize(self)
        sentence = [word_tokenize(self) for keyword in sent]
        tolkien = [floresta.tagged_words(self) for key in sentence]
        return tolkien




def main():
    testeb = input()
    while True:
        teste_tok = Treatment.Tokenize(testeb)
        for key in teste_tok:
            i = SearchSynn(key)
            etiquetas = {key: i.synonyms()}
            print(etiquetas)
        testeb = input()
    pass


if __name__ == "__main__":
    main()

error: I sent a "test" on the console and returned

File "/Users/Neemo/Pycharmprojects/untield/venv/lib/python3.7/site-Packages/nltk/data.py", line 333, in __init__ raise Ioerror('No such file or directory: %r' %_path) Oserror: No such file or directory: '/Users/Neemo/nltk_data/corpora/forest/test'

  • It is very difficult to understand your problem with just this snippet of code. Can you make a replicable example? See instructions here: https://answall.com/help/minimal-reproducible-example

  • I sent it so I think I could replicate, but I don’t know if it’s good

  • 1

    I think it works

  • Where you defined floresta?

  • from nltk.corpus import forest from nltk import word_tokenize,sent_tokenize from nltk import corpus import requests from scrapy.selector import Selector as sc from unicodedata import normalize

No answers

Browser other questions tagged

You are not signed in. Login or sign up in order to post.