-1
I’m trying to make a Scrap on a college site, I usually do in Javascript with Casperjs and even then everything ok. But now we are trying to use Python for this and I’m having problems with this error:
/usr/lib/python3/dist-packages/apport/report.py:13: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
import fnmatch, glob, traceback, errno, sys, atexit, locale, imp
Traceback (most recent call last):
File "unicsul-ead.py", line 38, in <module>
polos=polos+r['https://www.cruzeirodosulvirtual.com.br/webservice/nossos-polos.php?polos='+str(p)]['polos']
TypeError: 'NoneType' object is not subscriptable
I’m using these Imports:
from me.robot import Execution
import multi_req
from bs4 import BeautifulSoup
from money_parser import price_str
import sys
import glob
# import pp
# import json
Since I’m pretty uneducated about it, I noticed he gives a TypeError
but even so returns something about some import
, but I don’t understand it well. Could anyone tell me about it? I’m researching and I don’t understand almost anything.
Well, this is the full code:
from me.robot import Execution
import multi_req
from bs4 import BeautifulSoup
from money_parser import price_str
import sys
import glob
# import pp
# import json
if len(sys.argv)<2:
raise Exception('Passar o nome do arquivo é necessário!')
if(len(sys.argv)==4):
exe=Execution(sys.argv[1], int(sys.argv[3]))
else:
exe=Execution(sys.argv[1])
exe.start()
keepgoing_local=True
keepgoing_course=True
if(len(sys.argv) > 2 and sys.argv[2]!='-'): #tem keepgoing
l=sys.argv[2].split(';')
keepgoing_course_name=l[3]
keepgoing_uf_name=l[7]
keepgoing_city_name=l[9]
keepgoing_local_name=l[11]
keepgoing_local=False
keepgoing_course=False
polos_urls,polos=[],[]
for p in range(1, 1000, 100):
r=multi_req.scrap(['https://www.cruzeirodosulvirtual.com.br/webservice/nossos-polos.php?polos='+str(p)], resp_type='json')
polos=polos+r['https://www.cruzeirodosulvirtual.com.br/webservice/nossos-polos.php?polos='+str(p)]['polos']
for p in polos:
# print(keepgoing_uf_name.encode('UTF-8', 'surrogateescape').strip().lower()+b' == '+p['uf'].encode('UTF-8', 'surrogateescape').strip().lower())
# print(keepgoing_city_name.encode('UTF-8', 'surrogateescape').strip().lower()+b' == '+p['cidade'].encode('UTF-8', 'surrogateescape').strip().lower())
# print(keepgoing_local_name.encode('UTF-8', 'surrogateescape').strip().lower()+b' == '+p['nome'].encode('UTF-8', 'surrogateescape').strip().lower())
if(not keepgoing_local and keepgoing_uf_name.encode('UTF-8', 'surrogateescape').strip().lower()==p['uf'].encode('UTF-8', 'surrogateescape').strip().lower() and keepgoing_city_name.encode('UTF-8', 'surrogateescape').strip().lower()==p['cidade'].encode('UTF-8', 'surrogateescape').strip().lower() and keepgoing_local_name.encode('UTF-8', 'surrogateescape').strip().lower()==p['nome'].encode('UTF-8', 'surrogateescape').strip().lower()):
keepgoing_local=True
exe.log('keepgoing polo: '+p['nome'].lower())
if(keepgoing_local):
if(p['classes'].find(' graduacao ')>-1):
rp=multi_req.scrap(['https://www.cruzeirodosulvirtual.com.br'+p['polo']])
psoup=BeautifulSoup(rp[next(iter(rp))], 'html.parser')
curls=[]
for c in psoup.select(".cursos .titulo-curso-sl"):
curls.append(c.parent.a['href'])
if(len(curls) % 10 == 0):
course_scrap=multi_req.scrap(curls)
for rc in course_scrap.keys():
csoup=BeautifulSoup(course_scrap[rc], 'html.parser')
if(not keepgoing_course and keepgoing_course_name.encode('UTF-8', 'surrogateescape').strip().lower()==csoup.h1.text.strip().encode('UTF-8', 'surrogateescape').strip().lower()):
keepgoing_course=True
exe.log('keepgoing course: '+keepgoing_course_name.lower())
continue
if(keepgoing_course):
if(len(csoup.find_all(id="investimento-hide"))):
exe.add_price(
ies_sigle='UNICSUL EAD',
course_name=csoup.h1.text.strip(),
price=price_str(csoup.find_all(id="investimento-hide")[0].h3.text),
state_name=p['uf'],
city_name=p['cidade'],
local_name=p['nome'],
modality='EAD'
)
curls=[]
exe.end()
The problem is not in import - it only gives a warning of "deprecated" - the error is that the variable "r" you try to use with a dictionary syntax contains
None
- but as you did not paste the whole code it is impossible to guide the correction.– jsbueno
I’ll edit and put the code, I’m running and I forgot. Just a minute!
– Juan Lencina
I recommend you use Scrapy: https://scrapy.org.
– Thiago Krempser