How to recognize images using python, cv2, Pillow and Tesseract?

Asked

Viewed 126 times

0

I’m trying to recognize the images below, but I’m not able to abstract the content correctly. A few examples:

captcha1 captcha2

Code

import numpy as np
   import cv2
   import mahotas
   import pytesseract as ocr
   from PIL import Image
   from string import ascii_letters, digits

    def QuebraCaptch(self):
            self.layout_de_analise = 9
            self.caracteres_permitidos = ascii_letters + digits
            self.image = cv2.imread('Imagens/captchFull.png')
            self.img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
            self.suave = cv2.blur(self.img, (3, 3))
            self.T = mahotas.thresholding.otsu(self.suave)
            self.bin = self.suave.copy()
            self.bin[self.bin > self.T] = 255
            self.bin[self.bin < 255] = 0
            self.bin = cv2.bitwise_not(self.bin)
            self.suave = cv2.GaussianBlur(self.img, (3, 3), 0)
            (self.T, self.bin) = cv2.threshold(
                self.suave, 160, 255, cv2.THRESH_BINARY)
            self.resultado = np.vstack([np.hstack([self.bin])])
            cv2.imwrite("Imagens/captcha.png", self.resultado)

            self.img = cv2.imread('Imagens/captcha.png')
            self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
            (self.T, self.binI) = cv2.threshold(
                self.img, 160, 255, cv2.THRESH_BINARY_INV)
            self.resultado = np.hstack([self.binI])
            self.resultado = cv2.blur(self.resultado, (1, 1))
            cv2.imwrite("Imagens/captcha.png", self.resultado)

            self.img = cv2.imread('Imagens/captcha.png')
            self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
            self.suave = cv2.GaussianBlur(self.img, (3, 3), 0)
            (self.T, self.bin) = cv2.threshold(
                self.suave, 160, 255, cv2.THRESH_BINARY)
            self.resultado = np.vstack([np.hstack([self.bin])])
            img_tratada = ocr.image_to_string(self.resultado,config=f"--psm {self.layout_de_analise} -c tessedit_char_whitelist={self.caracteres_permitidos} lang='por'")
            print(img_tratada)
No answers

Browser other questions tagged

You are not signed in. Login or sign up in order to post.