【www.bbyears.com--python】
因本人的网站需要查询源IP拿来查询其地理位置信息。所以,我就用到了根据IP查询地理位置信息的一些东西,现在将这方面积累的一点东西共享出来。
首先要说的是,几年前刚学Python那会儿,已经写过类似的一篇博客(只是写的不够全面),见:Python获取本地的IP和所在地
根据IP查询所在地、运营商等信息的一些API如下(根据我有限的一点经验):
1. 淘宝的API(推荐):http://ip.taobao.com/service/getIpInfo.php?ip=110.84.0.129
2. 国外freegeoip.net(推荐):http://freegeoip.net/json/110.84.0.129 这个还提供了经纬度信息(但不一定准)
3. 新浪的API:http://int.dpool.sina.com.cn/iplookup/iplookup.php?format=json&ip=110.84.0.129
4. 腾讯的网页查询:http://ip.qq.com/cgi-bin/searchip?searchip1=110.84.0.129
5. ip.cn的网页:http://www.ip.cn/index.php?ip=110.84.0.129
6. ip-api.com: http://ip-api.com/json/110.84.0.129 (看起来挺不错的,貌似直接返回中文城市信息,文档在 ip-api.com/docs/api:json)
7. http://www.locatorhq.com/ip-to-location-api/documentation.php (这个要注册才能使用,还没用过呢)
(第2个freegeoip.net的网站和IP数据的生成,代码在:https://github.com/fiorix/freegeoip)
为什么其中第4、5两个是网页查询也推荐了呢?是因为两方面原因,一是它们提供的信息比较准,二是使用了页面信息自动抓取(可能会用到我曾经写过的PhantomJS)也容易将其写到程序中成为API。
根据IP查询地理位置信息,我将其写成了一个较为通用的Python库(提供了前面提到的1、2、4、5等4种查询方式的API),可以根据IP查询到地域信息和ISP信息,具体代码见:
https://github.com/smilejay/python/blob/master/py2013/iplocation.py
注意其中对ip.cn网页的解析用到了webdriver和PhantomJS.
# -*- coding: utf-8 -*-
"""
Created on Oct 20, 2013
@summary: geography info about an IP address
@author: Jay
"""
import json, urllib2
import re
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
class location_freegeoip():
"""
build the mapping of the ip address and its location.
the geo info is from
"""
def __init__(self, ip):
"""
Constructor of location_freegeoip class
"""
self.ip = ip
self.api_format = "json"
self.api_url = "http://freegeoip.net/%s/%s" % (self.api_format, self.ip)
def get_geoinfo(self):
""" get the geo info from the remote API.
return a dict about the location.
"""
urlobj = urllib2.urlopen(self.api_url)
data = urlobj.read()
datadict = json.loads(data, encoding="utf-8")
# print datadict
return datadict
def get_country(self):
key = "country_name"
datadict = self.get_geoinfo()
return datadict[key]
def get_region(self):
key = "region_name"
datadict = self.get_geoinfo()
return datadict[key]
def get_city(self):
key = "city"
datadict = self.get_geoinfo()
return datadict[key]
class location_taobao():
"""
build the mapping of the ip address and its location
the geo info is from Taobao
e.g. http://ip.taobao.com/service/getIpInfo.php?ip=112.111.184.63
The getIpInfo API from Taobao returns a JSON object.
"""
def __init__(self, ip):
self.ip = ip
self.api_url = "http://ip.taobao.com/service/getIpInfo.php?ip=%s" % self.ip
def get_geoinfo(self):
""" get the geo info from the remote API.
return a dict about the location.
"""
urlobj = urllib2.urlopen(self.api_url)
data = urlobj.read()
datadict = json.loads(data, encoding="utf-8")
# print datadict
return datadict["data"]
def get_country(self):
key = u"country"
datadict = self.get_geoinfo()
return datadict[key]
def get_region(self):
key = "region"
datadict = self.get_geoinfo()
return datadict[key]
def get_city(self):
key = "city"
datadict = self.get_geoinfo()
return datadict[key]
def get_isp(self):
key = "isp"
datadict = self.get_geoinfo()
return datadict[key]
class location_qq():
"""
build the mapping of the ip address and its location.
the geo info is from Tencent.
Note: the content of the Tencent"s API return page is encoded by "gb2312".
e.g. http://ip.qq.com/cgi-bin/searchip?searchip1=112.111.184.64
"""
def __init__(self, ip):
"""
Construction of location_ipdotcn class.
"""
self.ip = ip
self.api_url = "http://ip.qq.com/cgi-bin/searchip?searchip1=%s" % ip
def get_geoinfo(self):
urlobj = urllib2.urlopen(self.api_url)
data = urlobj.read().decode("gb2312").encode("utf8")
pattern = re.compile(r"该IP所在地为:(.+)")
m = re.search(pattern, data)
if m != None:
return m.group(1).split(" ")
else:
return None
def get_region(self):
return self.get_geoinfo()[0]
def get_isp(self):
return self.get_geoinfo()[1]
class location_ipdotcn():
"""
build the mapping of the ip address and its location.
the geo info is from www.ip.cn
need to use PhantomJS to open the URL to render its JS
"""
def __init__(self, ip):
"""
Construction of location_ipdotcn class.
"""
self.ip = ip
self.api_url = "http://www.ip.cn/%s" % ip
def get_geoinfo(self):
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/29.0 " )
driver = webdriver.PhantomJS(executable_path="/usr/local/bin/phantomjs", desired_capabilities=dcap)
driver.get(self.api_url)
text = driver.find_element_by_xpath("//div[@id="result"]/div/p").text
res = text.split("来自:")[1].split(" ")
driver.quit()
return res
def get_region(self):
return self.get_geoinfo()[0]
def get_isp(self):
return self.get_geoinfo()[1]
if __name__ == "__main__":
ip = "110.84.0.129"
# iploc = location_taobao(ip)
# print iploc.get_geoinfo()
# print iploc.get_country()
# print iploc.get_region()
# print iploc.get_city()
# print iploc.get_isp()
# iploc = location_qq(ip)
iploc = location_ipdotcn(ip)
# iploc.get_geoinfo()
print iploc.get_region()
print iploc.get_isp()