python 爬蟲需要的庫
阿新 • • 發佈:2018-12-22
pip install builtwit 該模組將URL作為引數,下載該URL並對其進行分析,然後返回該網站使 用的技術。下面是使用該模組的-一個例子。 import builtwith builtwith.parse('http://example.webscraping.com') {'web-servers': ['Nginx'], 'web-frameworks': ['Web2py', 'Twitter Bootstrap'], 'programming-languages': ['Python'], 'javascript-frameworks': ['jQuery', 'Modernizr', 'jQuery UI']}
尋找網站所有者 pip install python-whois
import whois print (whois.whois('http://example.webscraping.com/')) { "domain_name": "WEBSCRAPING.COM", "registrar": "GoDaddy.com, LLC", "whois_server": "whois.godaddy.com", "referral_url": null, "updated_date": [ "2013-08-20 08:08:30", "2013-08-20 08:08:29" ], "creation_date": "2004-06-26 18:01:19", "expiration_date": "2020-06-26 18:01:19", "name_servers": [ "NS1.WEBFACTION.COM", "NS2.WEBFACTION.COM", "NS3.WEBFACTION.COM", "NS4.WEBFACTION.COM" ], "status": [ "clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited", "clientRenewProhibited https://icann.org/epp#clientRenewProhibited", "clientTransferProhibited https://icann.org/epp#clientTransferProhibited", "clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited", "clientTransferProhibited http://www.icann.org/epp#clientTransferProhibited", "clientUpdateProhibited http://www.icann.org/epp#clientUpdateProhibited", "clientRenewProhibited http://www.icann.org/epp#clientRenewProhibited", "clientDeleteProhibited http://www.icann.org/epp#clientDeleteProhibited" ], "emails": "[email protected]", "dnssec": "unsigned", "name": null, "org": null, "address": null, "city": null, "state": "Victoria", "zipcode": null, "country": "AU" }