0
Hello, I’m having a problem generating a dictionary answer within another dictionary.
I have a home page that contains 8 bimonthly programs, in these picked up their respective links and properties. These links have a schedule per week, I need to access your links to get the details of each week’s schedule. I can access all the data. But currently it is repeating several times the header of the bimonthly program. What I’d like to do is bring a list of all the weekly schedules within the dictionary containing the headers.
Then in the final result it would contain 8 dictionaries with their respective n weekly schedules
Below is a sample of the code that parses these Urls
class ApostilaSpider(scrapy.Spider):
name = 'apostila'
start_urls = ['https://www.jw.org/pt/biblioteca/jw-apostila-do-mes/']
def parse(self, response):
apostilas = response.css('div.publicationDesc')[:8]
for apostila in apostilas:
loader = ItemLoader(item=ApostilaItems(), selector=apostila)
loader.add_css('href', 'a::attr(href)')
loader.add_css('descricao', 'a::text')
loader.add_css('mes_inicio', 'a')
loader.add_css('mes_fim', 'a')
loader.add_css('ano_inicio', 'a')
loader.add_css('ano_fim', 'a')
url = f"https://www.jw.org{apostila.css('a::attr(href)').get()}"
desig = scrapy.Request(url, self.parse_apostila_hrefs, priority=1000)
yield {
"apostila": loader.load_item(),
"designacoes" : desig
}
def parse_apostila_hrefs(self, response):
urls = response.css('div.syn-body.textOnly.accordionHandle > h2 > a::attr(href)').getall()
for url in urls:
yield response.follow(url, self.parse_designacoes_apostila)
def parse_designacoes_apostila(self, response):
loader = ItemLoader(item=ProgramaSemanaItems(), selector=response)
loader.add_css('semana_referencia' , '#p1')
loader.add_css('leitura_semana', '#p2')
loader.add_css('cantico_inicial', '#p3')
loader.add_css('tpd_titulo', '#section2 > div > ul > li:nth-child(1) > p')
loader.add_css('tpd_href', '#section2 > div > ul > li:nth-child(1) > p > a::attr(href)')
loader.add_css('tpd_duracao', '#section2 > div > ul > li:nth-child(1) > p')
loader.add_css('tpd_joias_titulo', '#section2 > div > ul > li:nth-child(2) > p')
loader.add_css('tpd_joias_duracao', '#section2 > div > ul > li:nth-child(2) > p')
loader.add_css('tpd_joias_descricao', '#section2 > div.pGroup > ul > li:nth-child(2) > ul')
loader.add_css('tpd_leitura_titulo', '#section2 > div.pGroup > ul > li:nth-child(3)')
loader.add_css('tpd_leitura_duracao', '#section2 > div.pGroup > ul > li:nth-child(3)')
loader.add_css('tpd_leitura_texto_base', '#section2 > div.pGroup > ul > li:nth-child(3) > p > a')
loader.add_css('tpd_leitura_href', '#section2 > div.pGroup > ul > li:nth-child(3) > p > a::attr(href)')
loader.add_css('tpd_leitura_licao_melhore_titulo', '#section2 > div.pGroup > ul > li:nth-child(3) > p > a.pub-th')
loader.add_css('tpd_leitura_licao_melhore_href', '#section2 > div.pGroup > ul > li:nth-child(3) > p > a.pub-th::attr(href)')
yield {
"designacoes": loader.load_item()
}