爬取flash數據
阿新 • • 發佈:2019-03-30
serialize 查看 引入 repo list() eid lse blazeds 房產
關於html爬取數據的文章已經有很多了,我今天主要和大家交流的是如何爬取flash網頁的數據。這方面資料相對比較少,主要是html5興起後現在flash站很少了,不過用於技術研究還是可以嘗試一下,這篇文章就主要介紹我爬取數據的整個過程。
以房產透明網為例,該網站的一房一價數據就是通過flash顯示,接下來將一步步介紹如何獲取對應的數據。
特別聲明,本文章僅做相關技術學習交流,數據版權為成都透明網,個人或企業請勿用於商業或非法用途,如該文章有不妥之處請聯系本人刪除。
我找了一個樓盤用瀏覽器自帶的工具查看,可以看到返回的數據是亂碼,如下圖。
這個主要是返回的數據格式是application/x-amf,瀏覽器無法正常解析,接下來就需要用的抓包工具Charles了,這個工具沒給錢的話30分鐘會關閉,我覺得30分鐘也夠用了,目前一直忍受著。
1.首先打開Charles
2.打開透明網一房一價頁面,點擊一個單元後就可以看到請求的數據了
這裏面比較重要的幾個部分我都截取了一下,最後HOUSEITEMLIST,就我們需要處理的數據了。
3.可以看到通過抓包工具已經可以看到請求的數據了,接下來就需要用java模擬amf的請求。
<!-- https://mvnrepository.com/artifact/org.apache.flex.blazeds/flex-messaging-core --> <dependency> <groupId>org.apache.flex.blazeds</groupId> <artifactId>flex-messaging-core</artifactId> <version>4.7.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.flex.blazeds/flex-messaging-common --> <dependency> <groupId>org.apache.flex.blazeds</groupId> <artifactId>flex-messaging-common</artifactId> <version>4.7.2</version> </dependency>
先要引入這兩個包,這個請求代碼如下,部分參數我設置為******,如果需要測試自行粘貼對應的參數。
public static void main(String[] args) { try { URL urlObject = new URL("http://cd.funi.com/messagebroker/amf"); HttpURLConnection urlConnection = (HttpURLConnection) urlObject.openConnection(); urlConnection.setDoOutput(true); urlConnection.setRequestProperty("Content-type", "application/x-amf;charset=gb2312"); urlConnection.setRequestProperty("Host", "cd.funi.com"); urlConnection.setRequestProperty("Origin", "http://user.funi.com"); urlConnection.setRequestProperty("Referer", "http://user.funi.com/resource/swf/house/FundateClient_www.swf?communityId=DAZXiSEGhWZLhWIrVooMiDNjk4UzP3et1CztbkK1SZrXmBDQfGR%2BAFaCxnPg5MFf&t=20181131/[[DYNAMIC]]/1"); urlConnection.setRequestProperty("Cookie", "pgv_pvi=9961606144; pgv_si=s9152640000; Hm_lvt_77be290eccb6ceb57b524a860b6faadc=1545658648,1545745229,1545917030,1546227366; Hm_lpvt_77be290eccb6ceb57b524a860b6faadc=1546227368"); OutputStream outputStream = urlConnection.getOutputStream(); SerializationContext serializationContext = new SerializationContext(); ActionContext actionContext = new ActionContext(); //構建請求信息(0-amf0 3-amf3) ActionMessage requestMessage = new ActionMessage(); AmfTrace amfTrace = new AmfTrace(); RemotingMessage remotingMessage = new RemotingMessage(); remotingMessage.setOperation("***********************************"); remotingMessage.setSource(null); remotingMessage.setClientId("FF66DFC9-B00D-2C39-E122-6B6752416543"); remotingMessage.setDestination("dEEDOCService"); remotingMessage.setMessageId("******************************"); remotingMessage.setHeader("DSEndpoint", "my-amf"); remotingMessage.setHeader("DSId", "*************************"); remotingMessage.setTimeToLive(0); remotingMessage.setTimestamp(0); remotingMessage.setBody(new Object[]{"kezlmwCvdjGPckPbY1SmeL3frogB2sfc7IgjBssaFJ2ihf5M93DgMgf5mIqLiWgMNvNwBsVQKuDfTympu4bAjLV9/3mGEHK+MfNqVZKTY0xC3uGOkDg+i2Pt9oTDxBm1xU5Cvmjmd/9mXzN/v3UOvSoqKlLNYy42g8uGAq+JFczhHpdRi7LBtP56E8OJaGq4VksJJnPhGLtMLt1T3wZZKzcV4MqJ2U7NTg7q5AmyCC89nvetx/5Gop8mUBe0tHQdSop8mhHerHn+n7y5O1BL3sRS8T3e1B9F2txtWzcNX0NBzDgAMpfa3AJAhaZ7yuhwd5VtLYD+KquXCUmxJAd/YSjjZGAYYomWjZqRMfO5x5cP/SH8AeI4BiKbTQ+2UygOvYCiTAzy+8GNG0oKpTDCnP2/j2CFhISaMutwAFTF7CZw6HCzJq+2iA8sVnNmCePQMieuZOyq7LG0PppzHRkQYGpUzGynN4FJ8Dz7TBXmuKu7bWJ7jlrYdHbsexEGhoI2fEh/hivzSuCaBfWojChwMQOrtiYKG/YYEgtxNmEUYVdDH5XUiFHVH0V3W+O16fluHZUoaJdvZ+Fbm9oJIB2cz1X9hQSOcs3Cc7i95hhJ0SdQGa1yMw7c2vJSWzbTKuc6rnFm8IDmR6qm6sEIUHRokN56IsDqS+ZHaXWNoOG4q0xR97tFCPlrURWxLcJX3tIJ4xl/imVVlifcAZX4/gXkykAGpM7tdGOy0J/hegAZqCY="}); MessageBody amfMessage = new MessageBody(null, "/3", new Object[]{remotingMessage}); requestMessage.addBody(amfMessage); // Setup for AMF message serializer actionContext.setRequestMessage(requestMessage); ByteArrayOutputStream outBuffer = new ByteArrayOutputStream(); AmfMessageSerializer amfMessageSerializer = new AmfMessageSerializer(); amfMessageSerializer.initialize(serializationContext, outBuffer, amfTrace); amfMessageSerializer.writeMessage(requestMessage); outBuffer.writeTo(outputStream); outBuffer.flush(); outBuffer.close(); InputStream inputStream = urlConnection.getInputStream(); BufferedInputStream urlConnectionInputStream = new BufferedInputStream(inputStream); serializationContext = new SerializationContext(); actionContext = new ActionContext(); ActionMessage message = new ActionMessage(); actionContext.setRequestMessage(message); ClassAliasRegistry.getRegistry().registerAlias("DSK", "com.funi.frontend.dto.HouseTable");//需在項目中設置對應的類包名需一致 MessageDeserializer deserializer = new AmfMessageDeserializer(); deserializer.initialize(serializationContext, urlConnectionInputStream, amfTrace); deserializer.readMessage(message, actionContext); Object result = null; for (MessageBody msg : (ArrayList<MessageBody>) message.getBodies()) { java.lang.String targetURI = msg.getTargetURI(); if (targetURI.endsWith(MessageIOConstants.RESULT_METHOD)) { result = msg.getData(); AcknowledgeMessage acknowledgeMessage=(AcknowledgeMessage)result; Object body = acknowledgeMessage.getBody(); ASObject asObject=(ASObject)body; ArrayCollection houseitemlist =(ArrayCollection) asObject.get("HOUSEITEMLIST"); for (Object o : houseitemlist) { HouseTable houseTable=(HouseTable)o; System.out.println(DecodeUtils.decode(houseTable.getUnitNo())); System.out.println(DecodeUtils.decode(houseTable.getUsage())); System.out.println(DecodeUtils.decode(houseTable.getTotalArea())); } } else if (targetURI.endsWith(MessageIOConstants.STATUS_METHOD)) { java.lang.String exMessage = "Server error"; result = exMessage; } } } catch (Exception e) { System.out.print("error"); } }
package com.funi.frontend.dto; public class HouseTable { private Boolean isMortgage; private String status; private String roomNo; private String listWaterPrice; private String typeHouse; private String huxId; private String buildingNo; private String fitmentPrice; private String floorNo; private String listPrice; private Boolean isSealUp; private String usage; private String totalArea; private Object houseTableList; private Object phase; private String unitNo; private String buildingId; private String communityId; public Boolean getMortgage() { return isMortgage; } public void setMortgage(Boolean mortgage) { isMortgage = mortgage; } public String getStatus() { return status; } public void setStatus(String status) { this.status = status; } public String getRoomNo() { return roomNo; } public void setRoomNo(String roomNo) { this.roomNo = roomNo; } public String getListWaterPrice() { return listWaterPrice; } public void setListWaterPrice(String listWaterPrice) { this.listWaterPrice = listWaterPrice; } public String getTypeHouse() { return typeHouse; } public void setTypeHouse(String typeHouse) { this.typeHouse = typeHouse; } public String getHuxId() { return huxId; } public void setHuxId(String huxId) { this.huxId = huxId; } public String getBuildingNo() { return buildingNo; } public void setBuildingNo(String buildingNo) { this.buildingNo = buildingNo; } public String getFitmentPrice() { return fitmentPrice; } public void setFitmentPrice(String fitmentPrice) { this.fitmentPrice = fitmentPrice; } public String getFloorNo() { return floorNo; } public void setFloorNo(String floorNo) { this.floorNo = floorNo; } public String getListPrice() { return listPrice; } public void setListPrice(String listPrice) { this.listPrice = listPrice; } public Boolean getSealUp() { return isSealUp; } public void setSealUp(Boolean sealUp) { isSealUp = sealUp; } public String getUsage() { return usage; } public void setUsage(String usage) { this.usage = usage; } public String getTotalArea() { return totalArea; } public void setTotalArea(String totalArea) { this.totalArea = totalArea; } public Object getHouseTableList() { return houseTableList; } public void setHouseTableList(Object houseTableList) { this.houseTableList = houseTableList; } public Object getPhase() { return phase; } public void setPhase(Object phase) { this.phase = phase; } public String getUnitNo() { return unitNo; } public void setUnitNo(String unitNo) { this.unitNo = unitNo; } public String getBuildingId() { return buildingId; } public void setBuildingId(String buildingId) { this.buildingId = buildingId; } public String getCommunityId() { return communityId; } public void setCommunityId(String communityId) { this.communityId = communityId; } }
最後獲取到對應數據後用base64解密一下即可。
特別聲明,本文章僅做相關技術學習交流,數據版權為成都透明網,個人或企業請勿用於商業或非法用途,如該文章有不妥之處請聯系本人刪除。
喜歡java開發的可以加我qq3369245209,後面會建立一個java開發高級群,下期將介紹如何爬取app數據。
爬取flash數據