Problem with special python characters

Asked

Viewed 527 times

3

I have a bot made in Python with the Selenium webdriver, which takes some data from Tesouro Direto and generates a json, however I am facing a problem, which occurs when a special character appears.

To remedy this problem I added the .encode('utf8') works perfectly, but this solution is only needed in the Linux, and when I use the script on a system Windows with this method it generates an error, but without this .encode('utf8') the script in Windows works.

Error in the Windows with the use of .encode('utf8'):

AttributeError: 'NoneType' object has no attribute 'encode'

Error in the Linux without the use of .encode('utf8'):

UnicodeEncodeError: 'ascii' codec can't encode characters in position 85-86: ordinal not in range(128)

Is there any way I can define that to work on both systems ?

tesouro_directo_protocol.py

# -*- coding: utf-8 -*-

# =========== IMPORTS =========== 
from datetime import datetime
import dateutil.relativedelta
from time import sleep
import sys
from selenium import webdriver
from selenium.webdriver import FirefoxOptions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
# ===============================


# ========== FUNCTIONS ==========
# Check if xpath existis
def is_element_present_xpath(xpath):
    try: firefox.find_element_by_xpath(xpath)
    except NoSuchElementException: return False
    return True
# ===============================

print('[ {"inicio": "%s"},' % str(datetime.now()))
# necessario para funcionar remotamente
opts = FirefoxOptions()
opts.add_argument("--headless")
firefox = webdriver.Firefox(firefox_options=opts)
# ============================================

# parametros
user_login = sys.argv[1]
user_pass = sys.argv[2]
try: 
    dir_file = sys.argv[3]
except:
    pass
default_file_name = 'erro.png'
wait_time = 10
# =====================================

# PAGINA DE LOGIN
firefox.get('https://tesourodireto.bmfbovespa.com.br/portalinvestidor/')
try:
    # preenchendo formulario de login
    login = WebDriverWait(firefox, wait_time).until(EC.presence_of_element_located((By.ID, 'BodyContent_txtLogin'))) 
    password = WebDriverWait(firefox, wait_time).until(EC.presence_of_element_located((By.ID, 'BodyContent_txtSenha'))) 
    login.send_keys("", user_login)
    password.send_keys("", user_pass)
    login_attempt = WebDriverWait(firefox, wait_time).until(EC.presence_of_element_located((By.ID, 'BodyContent_btnLogar'))) 
    login_attempt.click()
    # ====================================

    # pagina de protocolos
    firefox.get('https://tesourodireto.bmfbovespa.com.br/portalinvestidor/consulta-protocolo.aspx')


    # INVESTIMENTOS
    # selecionando a operação
    select_operacao = Select(firefox.find_element_by_id('BodyContent_ddlOperacao'))
    select_operacao.select_by_visible_text('Investimento')

    # selecionando a data
    key_data_inicial = datetime.now()
    key_data_final = key_data_inicial - dateutil.relativedelta.relativedelta(months=1)
    # data inicial
    data_inicial = firefox.find_element_by_id('BodyContent_dtRealizacaoInicial')
    data_inicial.send_keys("", key_data_final.strftime("%d%m%Y"))
    # data_inicial.send_keys("", key_data_final.strftime("17102017")) 
    # data final
    data_final = firefox.find_element_by_id('BodyContent_dtRealizacaoFinal')
    data_final.send_keys("", key_data_inicial.strftime("%d%m%Y"))
    # data_final.send_keys("", key_data_inicial.strftime("13062018"))  
    # clicando em consulta
    btn_consultar = firefox.find_element_by_id('BodyContent_btnConsultar')
    btn_consultar.click()

    # =====================================

    protocolos = firefox.find_elements_by_xpath("//table[contains(@class, 'responsive')]/tbody[2]/tr[contains(@class, 'nowrap')]")

    if is_element_present_xpath("//table[contains(@class, 'responsive')]/tbody[2]/tr[contains(@class, 'nowrap')][1]/td[2]"):    
        for protocolo in protocolos:
            numero_protocolo = protocolo.find_element_by_xpath('./td[1]').text
            operacao = protocolo.find_element_by_xpath('./td[2]').text
            situacao = protocolo.find_element_by_xpath('./td[3]').text
            realizacao = datetime.strptime(protocolo.find_element_by_xpath('./td[4]').text, '%d/%m/%Y')
            liquidacao = protocolo.find_element_by_xpath('./td[5]').text
            if liquidacao:        
                liquidacao = datetime.strptime(liquidacao, '%d/%m/%Y')

            detalhes = protocolo.find_element_by_xpath('./td[6]/img')
            detalhes.click()
            sleep(2)
            # modal detalhes 
            # modal_frame = firefox.switch_to.frame('modal')    
            detalhes_modal = firefox.find_element_by_xpath('//*[@id="modal"]')    
            # Dados Modal
            nome_representante = detalhes_modal.find_element_by_xpath('./div[1]/div[2]/div[4]/div[2]/label').text.split(' - ')
            nome_representante = nome_representante[0]
            titulo = detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[1]').text
            quantidade = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[2]').text).replace('.', '').replace(',','.')
            valor_unitario = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[3]').text).replace('.', '').replace(',','.')
            taxa_juros = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[4]').text).replace('.', '').replace(',','.')
            taxa_b3 = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[5]').text).replace('.', '').replace(',','.')
            taxa_custodia = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[6]').text).replace('.', '').replace(',','.')
            valor_total = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[7]').text).replace('.', '').replace(',','.')
            # print valores
            print('{ "numero_protocolo": "%s", "operacao": "%s", "situacao": "%s", "realizacao": "%s", "liquidacao": "%s", "representante": "%s", "titulo": "%s", "quantidade": "%s", "valor_unitario": "%s", "taxa_juros": "%s", "taxa_b3": "%s", "taxa_custodia": "%s", "valor_total": "%s" }, ' % (numero_protocolo, operacao, situacao, realizacao, liquidacao, nome_representante, titulo, quantidade, valor_unitario, taxa_juros, taxa_b3, taxa_custodia, valor_total)).encode('utf8')
            # fechando modal 
            sair_modal = detalhes_modal.find_element_by_class_name('close-reveal-modal')
            sair_modal.click()
    # INVESTIMENTOS -- FIM

    # RESGATE
    # Reiniciando a Consulta
    sleep(1)
    btn_consultar = firefox.find_element_by_id('BodyContent_btnConsultar')
    btn_consultar.click()
    # selecionando a operação
    select_operacao = Select(firefox.find_element_by_id('BodyContent_ddlOperacao'))
    select_operacao.select_by_visible_text('Resgate')

    # selecionando a data
    key_data_inicial = datetime.now()
    key_data_final = key_data_inicial - dateutil.relativedelta.relativedelta(months=1)
    # data inicial
    data_inicial = firefox.find_element_by_id('BodyContent_dtRealizacaoInicial')
    data_inicial.send_keys("", key_data_final.strftime("%d%m%Y"))
    # data_inicial.send_keys("", key_data_final.strftime("17102017")) 
    # data final
    data_final = firefox.find_element_by_id('BodyContent_dtRealizacaoFinal')
    data_final.send_keys("", key_data_inicial.strftime("%d%m%Y"))
    # data_final.send_keys("", key_data_inicial.strftime("13062018"))  

    # clicando em consulta
    btn_consultar = firefox.find_element_by_id('BodyContent_btnConsultar')
    btn_consultar.click()

    # =====================================

    protocolos = firefox.find_elements_by_xpath("//table[contains(@class, 'responsive')]/tbody[2]/tr[contains(@class, 'nowrap')]")
    if is_element_present_xpath("//table[contains(@class, 'responsive')]/tbody[2]/tr[contains(@class, 'nowrap')][1]/td[2]"):    
        for protocolo in protocolos:
            numero_protocolo = protocolo.find_element_by_xpath('./td[1]').text
            operacao = protocolo.find_element_by_xpath('./td[2]').text
            situacao = protocolo.find_element_by_xpath('./td[3]').text
            realizacao = datetime.strptime(protocolo.find_element_by_xpath('./td[4]').text, '%d/%m/%Y')
            liquidacao = protocolo.find_element_by_xpath('./td[5]').text
            if liquidacao:        
                liquidacao = datetime.strptime(liquidacao, '%d/%m/%Y')

            detalhes = protocolo.find_element_by_xpath('./td[6]/img')
            detalhes.click()
            sleep(2)
            # modal detalhes 
            # modal_frame = firefox.switch_to.frame('modal')    
            detalhes_modal = firefox.find_element_by_xpath('//*[@id="modal"]')    
            # Dados Modal
            nome_representante = detalhes_modal.find_element_by_xpath('./div[1]/div[2]/div[4]/div[2]/label').text.split(' - ')
            nome_representante = nome_representante[0]
            titulo = detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[1]').text
            quantidade = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[2]').text).replace('.', '').replace(',','.')
            valor_unitario = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[3]').text).replace('.', '').replace(',','.')
            taxa_juros = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[4]').text).replace('.', '').replace(',','.')
            taxa_b3 = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[5]').text).replace('.', '').replace(',','.')
            taxa_custodia = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[6]').text).replace('.', '').replace(',','.')
            valor_total = (detalhes_modal.find_element_by_xpath('./div[2]/div/div/table/tbody/tr/td[7]').text).replace('.', '').replace(',','.')
            # print valores
            print('{ "numero_protocolo": "%s", "operacao": "%s", "situacao": "%s", "realizacao": "%s", "liquidacao": "%s", "representante": "%s", "titulo": "%s", "quantidade": "%s", "valor_unitario": "%s", "taxa_juros": "%s", "taxa_b3": "%s", "taxa_custodia": "%s", "valor_total": "%s" }, ' % (numero_protocolo, operacao, situacao, realizacao, liquidacao, nome_representante, titulo, quantidade, valor_unitario, taxa_juros, taxa_b3, taxa_custodia, valor_total)).encode('utf8')
            # fechando modal 
            sair_modal = detalhes_modal.find_element_by_class_name('close-reveal-modal')
            sair_modal.click()
    # RESGATE -- FIM

    # Fechar navegador
    firefox.quit()
    print('{"fim": "%s"} ]' % str(datetime.now()))
except Exception:  
    if dir_file is not None:
        firefox.save_screenshot(dir_file + "\\" + default_file_name)
        pass
    firefox.quit()
    raise
  • What version of Python? Unless mistaken, the .encode('utf8') is only used in Python 2.

  • Actually the standard python of linux is 2.7

  • 3

    You must be using Python2 on Linux and Python 3 on windows. Check this.

  • Yes in linux the default is 2.7 but, this problem would occur in Windows 2.7 also ?

  • In the code you sent the Encode is being called at the return of the print. Example: print('text'). Encode('utf-8')

1 answer

1

Try this here

op = "numero_protocolo" + numero_protocolo + "operacao" +  operacao + "situacao" +  situacao + "realizacao" +  realizacao + "liquidacao" +  liquidacao + "nome_representante" +  nome_representante + "titulo" +  titulo + "quantidade" +  quantidade + "valor_unitario" +  valor_unitario + "taxa_juros" +  taxa_juros + "taxa_b3" +  taxa_b3 + "taxa_custodia" +  taxa_custodia + "valor_total" +  valor_total
if op is not None:
     print(op.enconde("utf-8"))

What happens here?

The value you tried to print is Nonetype (No value) and not a string, the error is because it does not exist None.encode("UTF-8").

How this is resolved?

Putting a if is not None. The code inside the if will only be executed if the value is not NoneType

Browser other questions tagged

You are not signed in. Login or sign up in order to post.