0
I’m having trouble turning a function into a python class:
from bs4 import BeautifulSoup
from selenium import webdriver
import html2text
# driver.page_source = driver.get())#
def getPEP(strg):
driver = webdriver.Firefox()
driver.get(strg)
html = driver.page_source
driver.close()
text=html2text.html2text(html)
return(text);
# txt=getPEP('http://www.mtsamples.com/site/pages/sample.asp?type=3-Allergy%20/%20Immunology&sample=386-Allergic%20Rhinitis, Allergic Rhinitis')
# print(txt)
peps = open('PEP.txt', 'r')
lines = tuple(peps)
print(lines)
peps.close()
# i=1798 #http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation, Request For Consultation
for i in range(len(lines)):
i=1798
strg=lines[i].replace('\n','')
text=getPEP(strg)# print(text)
start = '# '
end = ', \n\n[ ![Join us on'
cleaned=(text.split(start))[1].split(end)[0]
# print(cleaned)
file = open(str(i)+'.txt', 'w')
file.write(cleaned.replace(' ** ','').replace('**',''))
file.close()
# print('arquivo ' + str(i) + ' gravado com sucesso')
I created a python Class:
from bs4 import BeautifulSoup
from selenium import webdriver
import html2text
class ClassCrawler:
def __init__(self):
self.test = self.getPepFromInternt("http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation,")
self.getAllPep('/home/angelica/Documents/gitbucket/mscangelica/dataset/LinksTomtsamples.txt')
def getPepFromInternt(strg):
driver = webdriver.Firefox()
driver.get(strg)
html = driver.page_source
driver.close()
text=html2text.html2text(html)
return(text)
def getAllPep(linksList):
peps = open(linksList, 'r')
lines = tuple(peps)
print(lines)
peps.close()
# i=1798 #http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation, Request For Consultation
for i in range(len(lines)):
# i=1798 //when the connection is broken
strg=lines[i].replace('\n','')
text= self.getPepFromInternt(strg)# print(text)
start = '# '
end = ', \n\n[ ![Join us on'
cleaned=(text.split(start))[1].split(end)[0]
# print(cleaned)
file = open(str(i)+'.txt', 'w')
file.write(cleaned.replace(' ** ','').replace('**',''))
file.close()
But when I call the class
>>> from ClassCrawlerPEP import ClassCrawler
>>> c = ClassCrawler()
generates the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "~/PEPS/ClassCrawlerPEP.py", line 8, in __init__
self.test = self.getPepFromInternt("http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation,")
TypeError: getPepFromInternt() takes 1 positional argument but 2 were given
I had already put the self had not worked out, now gave, this is some python bug? Thank you so much for the answer.
– user2535338
Was he accusing the same mistake as before? If so, were you declaring only the
self
parameter or thestrg
also?– Emoon