Only take the last links from a sitemap

Asked

Viewed 93 times

3

using HtmlAgilityPack;
using JoeBlogs;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;

namespace ReaderXML
{
    class Program
    {
        static void Main(string[] args)
        {
            var website = LeitorDeXML("http://SITE ORIGEM/").ToArray();
            var total = website.Count();
            for (int i = 1; i < total; i++)
            {
                Postagem(website[i]);
            }
        }
        private static IEnumerable<string> LeitorDeXML(string url)
        {
            Console.WriteLine("Carregando " + string.Format("{0}sitemap.xml", url));

            XmlReader xmlReader = new XmlTextReader(string.Format("{0}sitemap.xml", url));
            XElement element = XElement.Load(string.Format("{0}sitemap.xml", url));

            XName urlNodes = XName.Get("url", "http://www.sitemaps.org/schemas/sitemap/0.9");
            XName locNodes = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9");

            List<string> resultado = new List<string>();

            // Pega os últimos 10 elementos da lista
            List<XElement> lista = Enumerable.Reverse(element.Elements(urlNodes)).Take(10).Reverse().ToList();

            foreach (XElement e in lista)
            {
                var postUrl = e.Element(locNodes).Value;
                resultado.Add(postUrl);
            }

            return resultado;
        }
        private static void Postagem(string website)
        {
            try
            {
                //LINK,USER,SENHA WP
                string link = "http://SITE NOVO";
                string username = "user wp";
                string password = "senha wp";

                //Leitura do HTML
                HtmlWeb web = new HtmlWeb();
                HtmlDocument resultat = web.Load(website);

                //Separar TITULO e CONTEUDO
                string titulopost = resultat.DocumentNode.SelectNodes("//*[contains(@class,'entry-title')]")[0].InnerHtml;
                string conteudo = resultat.DocumentNode.SelectNodes("//*[contains(@class,'entry-content')]")[0].InnerHtml; //ERRO AQUI

                //Entrada no wp
                var wp = new WordPressWrapper(link + "/xmlrpc.php", username, password);
                var post = new Post();

                //Categoria -- Não está funcionando ainda a parte da criação da categoria

                int website_corpo = 25;
                string categoria = website.Substring(website_corpo);
                int indexof_barra = categoria.IndexOf("/");
                int comeco_cat = 0;
                string categoria_f = categoria.Substring(comeco_cat, indexof_barra);

                //Data

                post.DateCreated = DateTime.Today.AddHours(0);

                //Postagem
                post.Title = titulopost;
                post.Body = conteudo;

                wp.NewPost(post, true);
            }
            catch (Exception e)
            {
                Console.WriteLine("Error: {0}", e);
                Console.ReadKey();
            }
        }
    }
}

You don’t cross line 63...

Error: System.NullReferenceException: Object reference not set to an instance of an object
  • I have a for: var website = Leitordexml("http://SITE LINK TO READ SITEMAP/"). Toarray(); var total = website.Count(); for (int i = 1; i < total; i++) { Posting(website[i]); } the Leitordexml method is the method that will read the Sitemap... the Posting method does not know if it is relevant... Daria to modify the FOR I already have ?

  • Go back and forth on this line. foreach (Xelement and in list) {

  • How would this FOR?

  • foreach (Xelement e in list) { for(int i = 0; i < list.Count; i-;) { var postUrl = e.Element(locNodes). Value; countPost++; urls.Add(postUrl); } } This? 'Cause if that’s what’s wrong...

  • I’m sorry, but since you send a message through chat?

2 answers

3


One way to do this would be to use the method Enumerable.Reverse to invert the list, and Enumerable.Take get n elements:

List<XElement> lista = Enumerable.Reverse(element.Elements(urlNodes)).Take(10).Reverse().ToList();

Your job should stay that way:

private static IEnumerable<string> LeitorDeXML(string url)
{
    Console.WriteLine("Carregando " + string.Format("{0}sitemap.xml", url));

    XmlReader xmlReader = new XmlTextReader(string.Format("{0}sitemap.xml", url));
    XElement element = XElement.Load(string.Format("{0}sitemap.xml", url));

    XName urlNodes = XName.Get("url", "http://www.sitemaps.org/schemas/sitemap/0.9");
    XName locNodes = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9");

    List<string> resultado = new List<string>();

    // Pega os últimos 10 elementos da lista
    List<XElement> lista = Enumerable.Reverse(element.Elements(urlNodes)).Take(10).Reverse().ToList();

    foreach (XElement e in lista)
    {
        var postUrl = e.Element(locNodes).Value;
        resultado.Add(postUrl);
    }

    return resultado;
}
  • continue with var countPost = 1;?

  • @Raulcorreia You don’t need it, because you only get the last 10 elements.

  • 1

    I was going to post a back to front I don’t see as being a good solution. + 1

  • Gave some errors ;x

  • Error 2 A local variable named 'url' cannot be declared in this Scope because it would Give a Different meaning to 'url', which is already used in a 'Parent or Current' Scope to denote Something Else

  • Error 3 Cannot Convert type 'string' to 'System.xml.Linq.Xelement'

  • Error 1 Cannot implicitly Convert type 'System.Collections.Generic.List<System.xml.Linq.Xelement>' to 'System.Collections.Generic.List<string>'

  • @Raulcorreia I fixed it, see if it works for you now.

  • I’m trying to transfer the posts from one site to another, one is mine and the other also rs... The easiest way I could find would be through the sitemap, because the other one doesn’t have any access... I have the following code in total: (I’m going to post the answer) He’s making a mistake on line 63, he’s not getting through it...

  • @Raulcorreia But the initial problem, getting the last 10 items, is OK? I tested it here and it worked.

  • He can take it... Can’t pass to the other method for separating CONTENT and TITLE and then post...

  • @Raulcorreia Good, if possible Place and mark the answer as accepted. = ) This another problem is with xPath, which is a different subject, if possible create a new question, I can try to help there. Thank you.

  • Okay, thank you all =)

Show 8 more comments

0

If you want to return the last 10, but not backwards, you can create an extension method:

namespace Extensoes 
{
    public static class IEnumerableExtensions
    {
        public static IEnumerable<T> TakeLast<T>(this IEnumerable<T> source, int n)
        {
            return source.Skip(Math.Max(0, source.Count() - n));
        }
    }
}

And then use it like this:

using Extensoes;

private static IEnumerable<string> LeitorDeXML(string url)
{
    Console.WriteLine("Carregando " + string.Format("{0}sitemap.xml", url));
    XElement element = XElement.Load(string.Format("{0}sitemap.xml", url));

    XName urlNodes = XName.Get("url", "http://www.sitemaps.org/schemas/sitemap/0.9");
    XName locNodes = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9");

    return element.Elements(urlNodes).TakeLast(10).Select(x => x.Element(locNodes).Value);
}

Browser other questions tagged

You are not signed in. Login or sign up in order to post.