鏈家BeautifulSoup4爬取房源資訊
阿新 • • 發佈:2019-02-08
from bs4 import BeautifulSoup
import pymysql
import requests
# 資料庫儲存
class Mysql_save(object):
def __init__(self):
self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',database='py10',port=3306,charset='utf8')
self.cursor = self.db.cursor()
def exectute_modify_sql (self,sql,data):
self.cursor.execute(sql,data)
self.db.commit()
def __del__(self):
self.db.cursor()
self.db.close()
conn = Mysql_save()
sql = 'insert into lianjia_info(title,address,flood,followinfo) values (%s,%s,%s,%s)'
url = 'https://bj.lianjia.com/ershoufang'
response = requests.get(url)
# with open('lianjia.html','wb') as f:
# f.write(response.content)
soup = BeautifulSoup(response.text,'lxml')
# ul_tag = soup.find('ul', class_="sellListContent")
li_tags = soup.find_all('li' ,class_="clear LOGCLICKDATA")
for li_tag in li_tags:
title = li_tag.select('div.title > a')[0].text
# print(title)
address = li_tag.select('div.address' )[0].text
# print(address)
flood = li_tag.select('div.positionInfo')[0].text
# print(flood)
followinfo = li_tag.select('div.followInfo')[0].text
# print(followinfo)
data = (title,address,flood,followinfo)
conn.exectute_modify_sql(sql,data)