boomerang bochs cheatengine爬虫
这次我又写了一个爬虫。。。。。。。。。。import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class boomerang
{
public static String url="http://svn.code.sf.net/p/boomerang/code/";
public static String path="C:/boomerang/";
public static int filethreadnum=0;
public static boolean setinit=false;//是否强制初始化
public static String[] initstring={};//初始化目录位要开始更新的目录,按深度顺序
public static int curdepth=0;//当前初始化深度
public static class FileThread extends Thread
{
String curnode;
FileThread(String curnode)
{
this.curnode=curnode;
}
public void run()
{
try
{
while(filethreadnum>10)
{
sleep(1000);
}
filethreadnum++;
int byteread=0;
int bytesum=0;
URL weburl=new URL(url+curnode);
URLConnection con=weburl.openConnection();
InputStream instream=con.getInputStream();
FileOutputStream fs=new FileOutputStream((path+curnode).replace("%20"," "));
byte[] buffer=new byte;
while((byteread=instream.read(buffer)) != -1)
{
bytesum+=byteread;
fs.write(buffer,0,byteread);
System.out.println("\t\t当前下载文件:"+curnode+"\t当前大小:"+bytesum);
}
fs.close();
instream.close();
filethreadnum--;
}
catch(Exception e)
{
System.out.println("error"+e.getMessage());
// new File(path+filepath).deleteOnExit();;
filethreadnum--;
}
}
}
public static String escape(String src)
{
StringBuffer sbuf=new StringBuffer();
int len=src.length();
for(int i=0;i<len;i++)
{
int ch=src.charAt(i);
if(ch == '\\' || ch == '*' || ch == '?' || ch == '"' || ch == '<' || ch == '>' || ch == '|')
sbuf.append('x');//忽略不能做文件名的字符
else
sbuf.append(ch);
}
return sbuf.toString();
}
public static String createFolder(String folderPath)
{
String txt = folderPath;
txt.replace('\\','/');
if(txt.charAt(txt.length()-1) != '/')
txt+='/';
try
{
File myFilePath = new File(txt);
txt = folderPath;
if (!myFilePath.exists())
{
if(!myFilePath.mkdir())
{
String newpath=folderPath.substring(0,folderPath.length()-1);
newpath=newpath.substring(0,newpath.lastIndexOf('/'));
createFolder(newpath);
myFilePath.mkdir();
}
}
}
catch (Exception e)
{
System.out.println("错误!"+e.getMessage());
}
return txt;
}
public static void myresolve(Element e,String before) throws IOException
{
try
{
String curnode=e.attr("href");
System.out.println(before+curnode);
if(setinit)
{
if(!curnode.equals(initstring))
return;
else
curdepth++;
if(curdepth >= initstring.length)
setinit=false;
}
if(!curnode.contains(".."))
{//非父目录
if(curnode.charAt(curnode.length()-1) == '/')
{//目录
createFolder((path+before+curnode).replace("%20"," "));
Document doc=Jsoup.connect(url+before+curnode).timeout(0).get();
System.out.println("当前目录:"+url+curnode);
Elements items = doc.select("li a");
for(Element ele:items)
{
myresolve(ele,before+curnode);
}
}
else
{//文件
String filepath=e.text();
File curfile=new File((path+before+filepath).replace("%20"," "));
if(curfile.exists())
return;
while(filethreadnum>10)
{
Thread.sleep(1000);
}
(new FileThread(before+filepath)).start();
}
}
}
catch(Exception exc)
{
System.out.println("错误!"+exc.getMessage());
}
}
public static void main(String[] args) throws IOException
{
try
{
Document doc = Jsoup.connect(url+"/").timeout(0).get();
Elements items=doc.select("li a");
createFolder(path);
for(Element e:items)
{
myresolve(e,"");
}
}
catch(Exception exc)
{
System.out.println("错误!"+exc.getMessage());
}
}
}
稍作修改,bochs的源码也可以下载了
public static String url="http://svn.code.sf.net/p/bochs/code/";
public static String path="C:/bochs/";
再稍作修改,cheatengine的源码可以下载了
public static String url="http://cheat-engine.googlecode.com/svn/"; public static String path="C:/cheatengine/";
谢谢了。沙发 顶一个。。。。。。。。。。。。。
页:
[1]