JAVA HttpClient实现页面信息抓取(获取图片验证码并传入cookie实现信息获取) - Go语言中文社区

JAVA HttpClient实现页面信息抓取(获取图片验证码并传入cookie实现信息获取)


有时候我们的程序中需要调用第三方接口获取数据,比如在这里需要在我的程序里实现用户输入汽车号牌等信息就可以查到用户的违章信息,在没有其他方法的情况下我就得想办法在官网获取信息。上图是官网获取信息的网站页面。

传统的ajax请求不可能实现,光不能跨域这一点就实现不了。

使用java的post请求可以实现,在一般这种网站安全保护较高,不会让你随意访问,一般的做法就是进入网站首页时request中返回cookie,以后的每次操作后台都会比对你传入的cookie是否相同,相同则认为你是从浏览器首页点进来的,正常显示。请求信息的cookie不同或没有cookie,则认为是机器程序访问,禁止访问。

    要想获取首页的cookie,我们需要先用java请求首页,获取cookie,以后的每次请求都将此cookie set进去即可。

    因为要请求数据还有要输入图片验证码这一步。网站传输的图片验证码一般都是返回文件流,设置到img 的src属性里面。所以我们需要先获取这个图片验证码返回给前台。用户看到验证码可以填写验证码和其他信息提交。我再用java请求官网并将参数一并传入,当然不要忘了传cookie,这样就可以顺利获取官网返回的违章信息了。

具体代码如下:

引入httpclient相关的包

import net.sf.json.JSONArray;
import net.sf.json.JSONObject;

import org.apache.commons.collections.map.ListOrderedMap;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.lang.StringUtils;
//违章查询官网页面抓取图片验证码(第一步)
	public void getImage(){
		HttpServletRequest request = ServletActionContext.getRequest();
		HttpServletResponse response = ServletActionContext.getResponse();
		//初始化httpclient
		HttpClient httpClient = new HttpClient();
		//首先把进入山西交警官网的首页得到cookie(里面会包括token和sessionid等);
		String url1 = "http://sx.122.gov.cn/views/inquiry.html?q=j";
		GetMethod getMethod1 = new GetMethod(url1);
		httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
        try {
        	//执行访问页面
			int statusCode=httpClient.executeMethod(getMethod1);
		} catch (HttpException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
        // 获得登陆后的 Cookie
        Cookie[] cookies = httpClient.getState().getCookies();
        StringBuffer tmpcookies = new StringBuffer();
        for (Cookie c : cookies) {
            tmpcookies.append(c.toString() + ";");
        }
        //这里吧cookie存在本地session里,供查询提交数据用
        request.getSession().setAttribute("cookie", tmpcookies.toString());
        //给路径加后缀,避免相同路径被缓存不再请求
        String url2 = "http://sx.122.gov.cn/captcha?t="+new Date().getTime();
		GetMethod getMethod2 = new GetMethod(url2);
		 try {
				int statusCode2=httpClient.executeMethod(getMethod2);
			} catch (HttpException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		//获取请求到的数据
        byte[] responseBody = null;
		try {
			responseBody = getMethod2.getResponseBody();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		//将请求的验证码图片用输出流方式输出
		try {
			OutputStream out = response.getOutputStream();
			response.setContentType("image/jpeg");
			response.setHeader("Content-Type","image/jpeg");
			out.write(responseBody);
			out.flush();
			
			out.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

关于前台图片验证码的显示和刷新

<li class="li2" style="position:relative;">
					<span class="title1">验证码:</span>
					<input type="text" value="" placeholder="请输入验证码" class="yuan" name="captcha" style="width:100px"; maxlength="4" required="required"/>
						<div class="yanzhengblock" style="width:130px;height:30px;border:1px solid #ddd;
							position:absolute;right:10px;top:8px;background:white;overflow:hidden;">
							<img class="yanzhengimg"  src="getImage.action" 
							style="width:130px;height:30px;margin-left:-2px;margin-top:-2px;" οnclick="changeimg(this)">
						</div>
						
				</li>
				
				<script>
				function changeimg(a){
					//每次带入不同参数,防止缓存
					$(a).attr("src","getImage.action?"+(new Date()).getTime()+"");
				}
				</script>

最后传入参数请求数据

//给山西交警网提交数据(页面抓取)违章查询
	public void trafficWeb() throws HttpException, IOException{
		HttpServletRequest request = ServletActionContext.getRequest();
		HttpServletResponse response = ServletActionContext.getResponse();
		//前面已经获取到了交警网的token和sessionid(cookie)这里开始提交数据,用postMethod;
		HttpClient httpClient = new HttpClient();
        String posturl = "http://sx.122.gov.cn/m/publicquery/vio";
		PostMethod postMethod = new PostMethod(posturl);
		//获取提取验证码时得到的cookie;
		String cookie=(String) request.getSession().getAttribute("cookie");
		postMethod.setRequestHeader("Cookie", cookie+"userpub=1;");
        // referer指当前页面从哪里来的,页面为了限制机器操作的方法一般为cookie,referer和验证码;
		//设置一些header
        postMethod.setRequestHeader("Accept", "application/json, text/javascript, */*; q=0.01");
        postMethod.setRequestHeader("Accept-Encoding", "gzip, deflate");
        postMethod.setRequestHeader("Accept-Language", "zh-CN,zh;q=0.8");
        postMethod.setRequestHeader("Connection", "keep-alive");
        postMethod.setRequestHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
        postMethod.setRequestHeader("Host", "sx.122.gov.cn");
        postMethod.setRequestHeader("Origin", "http://sx.122.gov.cn");
        postMethod.setRequestHeader("Referer", "http://sx.122.gov.cn/views/inquiry.html");
        postMethod.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36");
        postMethod.setRequestHeader("X-Requested-With", "XMLHttpRequest");
		//页面表单填写的参数
        String hpzl=(String) request.getAttribute("hpzl");
		String hphm1b=(String) request.getAttribute("hphm1b");
		String hphm=(String) request.getAttribute("hphm");
		String fdjh=(String) request.getAttribute("fdjh");
		String qm=(String) request.getAttribute("qm");
		String captcha=(String) request.getAttribute("captcha");
		String page1=(String) request.getAttribute("page1");
		
		//保存车辆信息
		member=(TMember) request.getSession().getAttribute("MEMBER");
		String hqlWhere = "";
		hqlWhere+=" and carNumber = '"+hphm.trim()+"'";
		cars=memberService.findCars(hqlWhere,page);
		if(cars!=null&&cars.size()>0){
			car=cars.get(0);
			car.setCarNumber(hphm);
			car.setCarMember(member.getMemberCode());
			car.setCarEngine(fdjh);
			car.setCarPlateType(hpzl);
			memberService.updateCar(car);
			
		}else{
			car = new TCar();
			car.setCarNumber(hphm);
			car.setCarMember(member.getMemberCode());
			car.setCarEngine(fdjh);
			car.setCarPlateType(hpzl);
			memberService.saveCar(car);
		}
		
		
		//把官网需要提交的参数添加
		postMethod.addParameter("hpzl", hpzl);
		postMethod.addParameter("hphm1b", hphm1b);
		postMethod.addParameter("hphm", hphm);
		postMethod.addParameter("fdjh", fdjh);
		postMethod.addParameter("qm", qm);
		postMethod.addParameter("captcha", captcha);
		postMethod.addParameter("page", page1);
		//postMethod.getRequestHeaders();
		//执行提交方法
        int bb=httpClient.executeMethod(postMethod);
        
        //开始得到网站返回值
        byte[] responseBody = null;
		try {
			responseBody = postMethod.getResponseBody();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		//转成字符串并以json格式返回页面
		String result=new String(responseBody, "UTF-8");
		System.out.println(result);
		JSONObject jsonObj = new JSONObject();
		jsonObj.put("data", JSONObject.fromObject(result));
		
		//设置response输出
		PrintWriter out = null;
		response.setCharacterEncoding("UTF-8");
		response.setContentType("application/json;charset=UTF-8");
		try {
			out = response.getWriter();
			out.print(jsonObj);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			if (out != null) {
				out.flush();
				out.close();
			}
		}
		
		request.setAttribute("weizhangresult", jsonObj);
	}

 

 

 

 

 

版权声明:本文来源CSDN,感谢博主原创文章,遵循 CC 4.0 by-sa 版权协议,转载请附上原文出处链接和本声明。
原文链接:https://blog.csdn.net/lianzhang861/article/details/80366174
站方申明:本站部分内容来自社区用户分享,若涉及侵权,请联系站方删除。
  • 发表于 2020-04-18 19:43:25
  • 阅读 ( 1040 )
  • 分类:

0 条评论

请先 登录 后评论

官方社群

GO教程

猜你喜欢