The image is in a CDATA, so it’s like text to Domparser and not like HTML, so you won’t find anything, so .getElementsByTagName('img')
returns an empty Nodelist
Another thing, namespace tags like <content:encoded>
need to use a function to do so, if getElementsByTagName
take normal elements then for elements with namespace should be used getElementsByTagNameNS
.
Looking in your FEED the namespace of :content
is http://purl.org/rss/1.0/modules/content/
, then an example to pick up the content would be:
var ces = doc.getElementsByTagNameNS('http://purl.org/rss/1.0/modules/content/', 'encoded');
console.log(ces.length);
See a test, you will return 1 if you find at least one tag:
var data = `<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
>
<channel>
<title>Parceria Social de Empregos</title>
<atom:link href="http://parceriasocialdeempregos.com.br/feed/" rel="self" type="application/rss+xml" />
<link>http://parceriasocialdeempregos.com.br</link>
<description>Um blog com vagas de emprego</description>
<lastBuildDate>Sat, 06 Jul 2019 17:05:44 +0000</lastBuildDate>
<language>pt-BR</language>
<sy:updatePeriod>hourly</sy:updatePeriod>
<sy:updateFrequency>1</sy:updateFrequency>
<generator>https://wordpress.org/?v=4.9.10</generator>
<image>
<url>http://parceriasocialdeempregos.com.br/wp-content/uploads/2019/01/cropped-logo_01-1-32x32.png</url>
<title>Parceria Social de Empregos</title>
<link>http://parceriasocialdeempregos.com.br</link>
<width>32</width>
<height>32</height>
</image>
<item>
<title>Estágio loja de autopeças Av Ipanema</title>
<link>http://parceriasocialdeempregos.com.br/06/07/2019/estagio-loja-de-autopecas-av-ipanema-3/</link>
<comments>http://parceriasocialdeempregos.com.br/06/07/2019/estagio-loja-de-autopecas-av-ipanema-3/#respond</comments>
<pubDate>Sat, 06 Jul 2019 17:05:44 +0000</pubDate>
<dc:creator><![CDATA[Watanabe]]></dc:creator>
<category><![CDATA[Vaga de Emprego]]></category>
<guid isPermaLink="false">http://parceriasocialdeempregos.com.br/?p=25590</guid>
<description><![CDATA[Você gosta do mundo automotivo? Esta pode ser sua chance de conhecer mais sobre a área! Obrigatório: – Estar cursando o 2º ou 3º ano do Ensino Médio em 2019; – Residir próximo a Avenida Ipanema; – Gostar do mundo automotivo; – Aptidão em português e matemática; – Disponibilidade aos domingos. Enviar currículo para [email protected]]]></description>
<content:encoded><![CDATA[<p><img class="alignnone wp-image-1941" src="http://parceriasocialdeempregos.com.br/wp-content/uploads/2018/12/estagio-watanabe-300x75.jpg" alt="" width="420" height="105" srcset="http://parceriasocialdeempregos.com.br/wp-content/uploads/2018/12/estagio-watanabe-300x75.jpg 300w, http://parceriasocialdeempregos.com.br/wp-content/uploads/2018/12/estagio-watanabe.jpg 600w" sizes="(max-width: 420px) 85vw, 420px" /></p>]]></content:encoded>
<wfw:commentRss>http://parceriasocialdeempregos.com.br/06/07/2019/estagio-loja-de-autopecas-av-ipanema-3/feed/</wfw:commentRss>
<slash:comments>0</slash:comments>
</item>
</channel>
</rss>`;
var parser = new DOMParser();
var doc = parser.parseFromString(data, 'application/xml');
var ces = doc.getElementsByTagNameNS('http://purl.org/rss/1.0/modules/content/', 'encoded');
console.log(ces.length);
Of course, as I said at the beginning the content within CDATA is not processed, because this is precisely the purpose of CDATA (What does <! [CDATA []]> in XML?), so in case you have to do another parse, we have already parsed for the RSS now is the parse for the HTML content within this RSS, example:
var data = `<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
>
<channel>
<title>Parceria Social de Empregos</title>
<atom:link href="http://parceriasocialdeempregos.com.br/feed/" rel="self" type="application/rss+xml" />
<link>http://parceriasocialdeempregos.com.br</link>
<description>Um blog com vagas de emprego</description>
<lastBuildDate>Sat, 06 Jul 2019 17:05:44 +0000</lastBuildDate>
<language>pt-BR</language>
<sy:updatePeriod>hourly</sy:updatePeriod>
<sy:updateFrequency>1</sy:updateFrequency>
<generator>https://wordpress.org/?v=4.9.10</generator>
<image>
<url>http://parceriasocialdeempregos.com.br/wp-content/uploads/2019/01/cropped-logo_01-1-32x32.png</url>
<title>Parceria Social de Empregos</title>
<link>http://parceriasocialdeempregos.com.br</link>
<width>32</width>
<height>32</height>
</image>
<item>
<title>Estágio loja de autopeças Av Ipanema</title>
<link>http://parceriasocialdeempregos.com.br/06/07/2019/estagio-loja-de-autopecas-av-ipanema-3/</link>
<comments>http://parceriasocialdeempregos.com.br/06/07/2019/estagio-loja-de-autopecas-av-ipanema-3/#respond</comments>
<pubDate>Sat, 06 Jul 2019 17:05:44 +0000</pubDate>
<dc:creator><![CDATA[Watanabe]]></dc:creator>
<category><![CDATA[Vaga de Emprego]]></category>
<guid isPermaLink="false">http://parceriasocialdeempregos.com.br/?p=25590</guid>
<description><![CDATA[Você gosta do mundo automotivo? Esta pode ser sua chance de conhecer mais sobre a área! Obrigatório: – Estar cursando o 2º ou 3º ano do Ensino Médio em 2019; – Residir próximo a Avenida Ipanema; – Gostar do mundo automotivo; – Aptidão em português e matemática; – Disponibilidade aos domingos. Enviar currículo para [email protected]]]></description>
<content:encoded><![CDATA[<p><img class="alignnone wp-image-1941" src="http://parceriasocialdeempregos.com.br/wp-content/uploads/2018/12/estagio-watanabe-300x75.jpg" alt="" width="420" height="105" srcset="http://parceriasocialdeempregos.com.br/wp-content/uploads/2018/12/estagio-watanabe-300x75.jpg 300w, http://parceriasocialdeempregos.com.br/wp-content/uploads/2018/12/estagio-watanabe.jpg 600w" sizes="(max-width: 420px) 85vw, 420px" /></p>]]></content:encoded>
<wfw:commentRss>http://parceriasocialdeempregos.com.br/06/07/2019/estagio-loja-de-autopecas-av-ipanema-3/feed/</wfw:commentRss>
<slash:comments>0</slash:comments>
</item>
</channel>
</rss>`;
var parser = new DOMParser;
var doc = parser.parseFromString(data, 'application/xml');
var ces = doc.getElementsByTagNameNS('http://purl.org/rss/1.0/modules/content/', 'encoded');
//Executa só se a tag existir
if (ces.length) {
let ce = ces.item(0).textContent;
let htmlParser = new DOMParser;
let htmlDoc = parser.parseFromString(ce, 'text/html');
let imgs = htmlDoc.getElementsByTagName('img');
console.log(imgs.item(0));
}
Resolution
In advance, every introduction of the answer was to explain how things work so that in the future you adapt to other codes, but in this specific code you can do so (read the reviews):
//Pode aplicar o ANY direto na variavel
let items: any = doc.querySelectorAll("channel > item");
for (let el of items) {
let obj = {
"title": el.querySelector("title").textContent,
"link": el.querySelector("link").textContent,
"author": el.querySelector("creator").textContent,
"description": el.querySelector("description").textContent,
"pubDate": el.querySelector("pubDate").textContent,
"content": el.querySelector("encoded").textContent,
"thumbnail": this.getImgFromEncodedTag(el)
}
objs.unshift(obj);
}
...
private getImgFromEncodedTag(el)
{
let ces: any = doc.getElementsByTagNameNS('http://purl.org/rss/1.0/modules/content/', 'encoded');
//Executa só se a tag existir
if (ces.length) {
let ce = ces.item(0).textContent;
let htmlParser = new DOMParser;
let htmlDoc = parser.parseFromString(ce, 'text/html');
let imgs = htmlDoc.getElementsByTagName('img');
//Verifica se existem imagens
if (imgs.length) {
return imgs[0].src;
}
}
//Se não houver imagens retorna null
return null;
}