python3爬取資料至mysql的方法
阿新 • • 發佈:2020-01-09
本文例項為大家分享了python3爬取資料至mysql的具體程式碼,供大家參考,具體內容如下
直接貼程式碼
#!/usr/local/bin/python3.5 # -*- coding:UTF-8 -*- from urllib.request import urlopen from bs4 import BeautifulSoup import re import datetime import random import pymysql connect = pymysql.connect(host='192.168.10.142',unix_socket='/tmp/mysql.sock',user='root',passwd='1234',db='scraping',charset='utf8') cursor = connect.cursor() cursor.execute('USE scraping') random.seed(datetime.datetime.now()) def store(title,content): execute = cursor.execute("select * from pages WHERE `title` = %s",title) if execute <= 0: cursor.execute("insert into pages(`title`,`content`) VALUES(%s,%s)",(title,content)) cursor.connection.commit() else: print('This content is already exist.') def get_links(acticle_url): html = urlopen('http://en.wikipedia.org' + acticle_url) soup = BeautifulSoup(html,'html.parser') title = soup.h1.get_text() content = soup.find('div',{'id': 'mw-content-text'}).find('p').get_text() store(title,content) return soup.find('div',{'id': 'bodyContent'}).findAll('a',href=re.compile("^(/wiki/)(.)*$")) links = get_links('') try: while len(links) > 0: newActicle = links[random.randint(0,len(links) - 1)].attrs['href'] links = get_links(newActicle) print(links) finally: cursor.close() connect.close()
以上就是本文的全部內容,希望對大家的學習有所幫助,也希望大家多多支援我們。