censys 資料庫地理資訊自定義介面(python版)
阿新 • • 發佈:2019-01-25
公司內部的ip資訊庫覆蓋面不是很夠
導致日誌處理的時候ip經常差不到
有人推薦,censys比較權威,
但是沒有文件,而且介面不太好用,所以自己寫了一個查ip的介面
首先
到官網逛了逛,censys特殊之處在於註冊了才能用api
註冊以後有Secret,API_ID,在查詢時需要用到
百度了一下發現也沒什麼有用的教程,又看了看官方的介紹以及原始碼
得到了最初的版本
import censys
from censys import *
Secret=“”
API_ID=“”
self.api = censys.ipv4.CensysIPv4(api_id=self.API_ID, api_secret=self.Secret)
res = self.api.view(ip)
geo = res['location' ]
後來發現,這個庫不是專業的地理資訊庫,這樣查詢很多ip的地址view不到。。
但是在網頁上面卻是可以顯示地理資訊的,想了想,準備直接用url發請求
import requests
url="https://www.censys.io/ipv4/%s"%ip
res = requests.get(url, auth=(API_ID, Secret))
s=res.content
也可以用urllib2
import urllib2
values ={"user":API_ID,"passwd":Secret}
jdata = json.dumps(values)
req = urllib2.Request (url, jdata)
response = urllib2.urlopen(req)
s=response.read()
兩者差不太多吧,我用的是第一種
之後就是解析html了
上網找了找,發現神器bs4
搞了搞發現好方便,直接貼程式碼
from bs4 import BeautifulSoup
soup = BeautifulSoup(s, "html5lib")
這樣html就被解析出來了,結合censys返回的html,可以解析出地理資訊
b=soup.find_all("dl","dl-horizontal dl-hostbox")
if len(b) == 0 :
print "not found"
geo=b[0].find_all('dd')
接下來繼續解析出所需各項
lat_long=geo[3].string.split(',')
country=geo[2].string.split(' ')
json_data = {
"ip" : ip,
"latitude": float(lat_long[0]),
"country": str(country[0]),
"country_code": str(country[1][1:-1]),
"longitude": float(lat_long[1]),
"province": str(geo[1].string),
"city": str(geo[0].string)
}
這樣也就可以用了
但是。。
有的ip地理資訊竟然是殘缺的,所以根據實際情況修改了一下解析過程
思路很簡單,就是如果geo中五項都有,就直接過,缺項的時候,就看看有什麼,然後加什麼,沒有的用“unknow”代替
try:
city = str(geo[0].string)
provice = str(geo[1].string)
country = geo[2].string.split(' ')
lat_long= geo[3].string.split(',')
Timezone= str(geo[4].string)
except:
print "did not get enough info at ip%s"%ip
filed = b[0].find_all('dt')
lst=[]
for i in filed:
lst.append(i.string)
city = "unknow"
provice = "unknow"
country = ["unknow",("unknow")]
lat_long=[0.0,0.0]
fileds= ["City","Province","Country","Lat/Long","Timezone"]
num=0
for i in lst:
index=fileds.index(i)
#print index
if index == 0:
city = str(geo[num].string)
elif index == 1:
provice = str(geo[num].string)
elif index == 2:
country = geo[num].string.split(' ')
elif index == 3:
lat_long= geo[num].string.split(',')
else :
continue
num=num+1
json_data = {
"ip" : ip,
"latitude": float(lat_long[0]),
"country": str(country[0]),
"country_code": str(country[1][1:-1]),
"longitude": float(lat_long[1]),
"province": provice,
"city": city
}
這樣完整的地理資訊就解析出來了
這裡放一下完整的api
import json
import urllib2
import censys
from censys import *
import requests
from bs4 import BeautifulSoup
class censys_ip():
debug = False
Secret=""
API_ID=""
def __init__(self):
self.api = censys.ipv4.CensysIPv4(api_id=self.API_ID, api_secret=self.Secret)
def censys_html_search(self,ip):
url="https://www.censys.io/ipv4/%s"%ip
res = requests.get(url, auth=(self.API_ID, self.Secret))
s=res.content
soup = BeautifulSoup(s, "html5lib")
b=soup.find_all("dl","dl-horizontal dl-hostbox")
if len(b) == 0:
return {}
geo=b[0].find_all('dd')
try:
city = str(geo[0].string)
provice = str(geo[1].string)
country = geo[2].string.split(' ')
lat_long= geo[3].string.split(',')
Timezone= str(geo[4].string)
except:
print "did not get enough info at ip%s"%ip
filed = b[0].find_all('dt')
lst=[]
for i in filed:
lst.append(i.string)
city = "unknow"
provice = "unknow"
country = ["unknow",("unknow")]
lat_long=[0.0,0.0]
fileds= ["City","Province","Country","Lat/Long","Timezone"]
num=0
for i in lst:
index=fileds.index(i)
#print index
if index == 0:
city = str(geo[num].string)
elif index == 1:
provice = str(geo[num].string)
elif index == 2:
country = geo[num].string.split(' ')
elif index == 3:
lat_long= geo[num].string.split(',')
else :
continue
num=num+1
json_data = {
"ip" : ip,
"latitude": float(lat_long[0]),
"country": str(country[0]),
"country_code": str(country[1][1:-1]),
"longitude": float(lat_long[1]),
"province": provice,
"city": city
}
return json_data
def search(self,ip):
try:
res = self.api.view(ip)
geo = res['location']
json_data = {
"ip" : ip,
"latitude": float(geo["longitude"]),
"country": geo["country"],
"country_code": geo["country_code"],
"longitude": float(geo["latitude"]),
"province": geo["province"],
"city": geo["city"]
}
return json_data
except:
json_data=self.censys_html_search(ip)
return json_data
def get_geo(self,ip):
json_data=self.search(ip)
if len(json_data) == 0:
print "can not find ip: %s"%ip
return -1
print "get geo of ip: %s"%ip
print json_data
return 1
def main(self,ip_lst):
lst=[]
for ip in ip_lst:
print "========================"
finish_num = self.get_geo(ip)
if finish_num == -1 :
lst.append(ip)
print lst
if __name__ == '__main__':
ip_lst=["8.8.8.8"]
print "=================================start=========================================="
a=censys_ip()
a.main(ip_lst)
print "=================================end=========================================="
反正還可以湊活著用吧
最後,他們告訴我有genip…………………………………………………
無所謂啦,反正寫著玩的