1
I’m using an example I found called Androidjsoup to get the source HTML
from a certain page, but I’m not getting just the snippet of code I wish is in a certain <script>
In short, the Androidjsoup should rotate, picking up the HTML
, applying a regex
and returning in String resultado1
Follow my source together with the page reference and example HTML
to be caught. Also the regex
taken from my script php
.
Android Mainactivity.java
package com.survivingwithandroid.jsoup;
import android.os.AsyncTask;
import android.os.Bundle;
import android.support.v7.app.ActionBarActivity;
import android.util.Log;
import android.view.Menu;
import android.view.MenuItem;
import android.view.View;
import android.widget.Button;
import android.widget.EditText;
import org.jsoup.Jsoup;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class MainActivity extends ActionBarActivity {
private EditText respText;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
final EditText edtUrl = (EditText) findViewById(R.id.edtURL);
Button btnGo = (Button) findViewById(R.id.btnGo);
respText = (EditText) findViewById(R.id.edtResp);
btnGo.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View view) {
String siteUrl = edtUrl.getText().toString();
( new ParseURL() ).execute(new String[]{siteUrl});
}
});
}
@Override
public boolean onCreateOptionsMenu(Menu menu) {
// Inflate the menu; this adds items to the action bar if it is present.
getMenuInflater().inflate(R.menu.main, menu);
return true;
}
@Override
public boolean onOptionsItemSelected(MenuItem item) {
// Handle action bar item clicks here. The action bar will
// automatically handle clicks on the Home/Up button, so long
// as you specify a parent activity in AndroidManifest.xml.
int id = item.getItemId();
if (id == R.id.action_settings) {
return true;
}
return super.onOptionsItemSelected(item);
}
private class ParseURL extends AsyncTask<String, Void, String> {
@Override
protected String doInBackground(String... strings) {
StringBuffer buffer = new StringBuffer();
try {
Log.d("JSwa", "Connecting to ["+strings[0]+"]");
Document doc = Jsoup.connect(strings[0]).get();
Log.d("JSwa", "Connected to ["+strings[0]+"]");
// Get document (HTML page) title
String title = doc.title();
Log.d("JSwA", "Title ["+title+"]");
buffer.append("Title: " + title + "\r\n");
// Get meta info
Elements metaElems = doc.select("meta");
buffer.append("META DATA\r\n");
for (Element metaElem : metaElems) {
String name = metaElem.attr("name");
String content = metaElem.attr("content");
buffer.append("name ["+name+"] - content ["+content+"] \r\n");
}
Elements topicList = doc.select("h2.topic");
buffer.append("Topic list\r\n");
for (Element topic : topicList) {
String data = topic.text();
buffer.append("Data [" + data + "] \r\n");
}
//==========
Elements scriptElements = doc.getElementsByTag("script");
buffer.append("Variavel resultado1\r\n");
for (Element element :scriptElements ){
for (DataNode node : element.dataNodes()) {
System.out.println(node.getWholeData());
String scriptdata = node.getWholeData();
buffer.append("StriptData [" + scriptdata + "] \r\n");
//String resultado1
}
System.out.println("-------------------");
}
//==========
}
catch(Throwable t) {
t.printStackTrace();
}
return buffer.toString();
}
@Override
protected void onPreExecute() {
super.onPreExecute();
}
@Override
protected void onPostExecute(String s) {
super.onPostExecute(s);
respText.setText(s);
}
}
}
Sample page HTML
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<script type="text/javascript">
function var1() {
...etc...
}
</script>
<title>Link das Pessoas</title>
</head>
<body>
<div>Conteudo</div>
<script>
function(...)
etc valorM = (valores de xyz);
etc valorE = (valores de xy);
pegavalor(function() {
...funcoes_diversars(Conteudo dinamico e estatico...http://arquivosdofulano.com/pessoas
...Conteudo dinamico e estatico)
})
</script>
<div>Conteudo #2</div>
<script type="text/javascript">
var google...
</script>
</html>
Regex to take the value of resultado1
:
/(([http]+[https]:\/\/)(.*?).(com\/pessoas))/
Note: I removed php, are know if you need to change something in this regex for java.
If possible a code that would allow me to add other regex
to capture other values in a string. Ex resultado2
...
Source Androidjsoup
Source Source Code
No, it’s the url
http://arquivosdofulano.com/pessoas
, as well as other values I will do as needed. Take a look at Regex. @re22– Florida
So far I’ve been able to refine the search and only get the content of the script I want with
String procurarPor = "pegavalor(function()";
 if(scriptdata.toLowerCase().contains(procurarPor.toLowerCase())){
However I can not apply the regex at all, always returns ajava.util.regex.Matcher@53724000
, I’ll keep looking when dawn dawn dawn until a solution appears here, in case you find it first, put, but I don’t think it will happen. =(– Florida
The problem was solved, not in the way I expected, reading the entire page and passing the content to the
php
and in it applying the search operations withregex
and returning the value, I wish I had answered my own question, but as this does not fit into what I wanted, which may be the doubt of others, I leave here the alternative, and the open question for a possible answer, whatever it may be, but at least serve as my alternative.– Florida