3
Dear, was looking at this topic Take a value within html and understood that using this resource "parse_url($href, PHP_URL_QUERY)" it has the function of storing the query string in a variable.
My question is this: I want to go through a page (URL), and find all the urls, in this case, the href that has only a certain chunk of the URL, for example: "/folder/subfolder/", ie only the links that have that chunk in the URL.
I looked at some example on the web, but it’s not working properly, it’s printing all the URLS
Here’s what I’m trying to figure out:
<?php
  $url = "htt´://www.minhaurl.com.br";
  $ch = curl_init();
  $timeout = 5;
  curl_setopt($ch, CURLOPT_URL, $url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
  $html = curl_exec($ch);
  curl_close($ch);
  $string = '';
  $nlinks = 0;
  $slinks = 0;
  $meuslinks = array();
  $x=1;
  $dom = new DOMDocument();
  @$dom->loadHTML($html);
  foreach($dom->getElementsByTagName("a") as $link) {
    $string = $link->getAttribute("href");
    if(strpos($string,"/pasta/subpasta/") == false){
        $slinks++;
    }else{
        if(strpos($string,"/pasta/subpasta/") == true){
            $exibe = "<a href='".$string."'>".$string."</a>";
            echo $exibe."<br>";
            $nlinks++;
            $meuslinks[$nlinks] = $exibe;
        }
    }
  }
  $tam = sizeof($meuslinks);
  while($x <= $tam){
    echo $meuslinks[$x]."<br>";
    $x++;
  }
  echo "<br> ".$nlinks." links foram encontrados!<br>";
  echo "<br> ".$slinks." links foram encontrados!<br>";
  ?>