python - 多線程/多進程

阿新 • • 發佈：2018-02-23

進程間 lose elf nal 運行時 url .get [] pen

　　多線程：

import threading
from multiprocessing import Queue
from time import sleep
from bs4 import BeautifulSoup
from requests import get
import re

class myThread(threading.Thread):
    def __init__(self, qlock, queue):
        threading.Thread.__init__(self)
        self.qlock = qlock
        self.queue = queue

    def run(self):
        process(self.qlock, self.queue)

def process(qlock, queue):
    qlock.acquire() # 互斥鎖
    try:
        data = queue.get() # 獲取隊列
        print(data)
    finally:
        qlock.release() # 釋放鎖
    sleep(1)

# 建立隊列
workQueue = Queue(50)
qlock = threading.Lock()

url = ‘https://www.pixiv.net/ranking.php?mode=daily‘

r = get(url, timeout=1)
html = r.text
soup = BeautifulSoup(html,‘lxml‘)

urls = soup.find_all(‘img‘)

links = []
for url in urls:
    r = re.compile(r‘data-src="(.+?)"‘)
    link = r.findall(str(url))
    workQueue.put(link)  # 寫入隊列
    links.append(link)

threads = []
for url in links:
    thread = myThread(qlock, workQueue)
    thread.daemon = True
    thread.start()
    threads.append(thread)

# 清空隊列
while not workQueue.empty():
    pass

# 等待線程結束
for t in threads:
    t.join()

　　多進程：

　　1.使用Pool模塊創建進程池：

from multiprocessing import Pool
from bs4 import BeautifulSoup
from requests import get
import re
import os

def run_process(url):
    print(url)

if __name__ == ‘__main__‘:
    url = ‘https://www.pixiv.net/ranking.php?mode=daily‘
    html = get(url, timeout=1).text
    soup = BeautifulSoup(html, ‘lxml‘)
    urls = soup.find_all(‘img‘)

    links = []
    for u in urls:
        r = re.compile(r‘data-src="(.+?.jpg)"‘)
        link = r.findall(str(u))
        links.append(link)

    process = Pool(os.cpu_count()) # cpu核個數
    for u in links:
        process.apply_async(run_process,args=(u,))
    process.close()
    process.join()

　　2.Process模塊、Queue模塊進行進程間的通信（但我的寫入隊列沒有用多進程）：

from multiprocessing import Process, Queue
from bs4 import BeautifulSoup
from requests import get
import re

class myProcess(Process):
    def __init__(self, queue):
        Process.__init__(self)
        self.queue = queue

    def run(self):
        run_process(self.queue)

def run_process(queue):
    data = queue.get()
    print(data)

if __name__ == ‘__main__‘:
    url = ‘https://www.pixiv.net/ranking.php?mode=daily‘
    html = get(url, timeout=1).text
    soup = BeautifulSoup(html, ‘lxml‘)
    urls = soup.find_all(‘img‘)

    queue = Queue(50)
    links = []
    for u in urls:
        r = re.compile(r‘data-src="(.+?.jpg)"‘)
        link = r.findall(str(u))
        queue.put(link)
        links.append(link)

    for u in links:
        process = myProcess(queue)
        process.start()

    while not queue.empty():
        pass

    process.join()

　　第2個比第1個明顯慢了很多，不知道為什麽...

　　但上面只是cpu密集型，測試一下用io密集型的小爬蟲來看看效果：

　　1.多線程：

import threading
from multiprocessing import Queue
from time import sleep
from bs4 import BeautifulSoup
from requests import get
import re

class myThread(threading.Thread):
    def __init__(self, qlock, queue):
        threading.Thread.__init__(self)
        self.qlock = qlock
        self.queue = queue

    def run(self):
        process(self.qlock, self.queue)

def process(qlock, queue):
    qlock.acquire() # 互斥鎖
    try:
        url = queue.get()[0] # 獲取隊列
        img = get(url,timeout=1).content
        name = url.split(‘/‘)[-1]
        imgid = name[:8]
        with open(‘C:/Users/adimin/Desktop/video/{}.jpg‘.format(imgid), ‘wb‘) as fp:
            fp.write(img)
        print(‘download: ‘ + url)
    finally:
        qlock.release() #
    sleep(1)

# 建立隊列
workQueue = Queue(50)
qlock = threading.Lock()

url = ‘https://www.pixiv.net/ranking.php?mode=daily‘

html = get(url, timeout=1).text
soup = BeautifulSoup(html,‘lxml‘)
urls = soup.find_all(‘img‘)

links = []
for u in urls:
    r = re.compile(r‘data-src="(.+?.jpg)"‘)
    link = r.findall(str(u))
    workQueue.put(link)  # 寫入隊列
    links.append(link)

threads = []
for u in links:
    thread = myThread(qlock, workQueue)
    thread.start()
    threads.append(thread)

# 清空隊列
while not workQueue.empty():
    pass

# 等待線程結束
for t in threads:
    t.join()

　　2.多進程：

from multiprocessing import Process, Queue
from bs4 import BeautifulSoup
from requests import get
import re

class myProcess(Process):
    def __init__(self, queue):
        Process.__init__(self)
        self.queue = queue

    def run(self):
        run_process(self.queue)

def run_process(queue):
    url = queue.get()[0]  # 獲取隊列
    img = get(url, timeout=1).content
    name = url.split(‘/‘)[-1]
    imgid = name[:8]
    with open(‘C:/Users/adimin/Desktop/video/{}.jpg‘.format(imgid), ‘wb‘) as fp:
        fp.write(img)
    print(‘download: ‘ + url)

if __name__ == ‘__main__‘:
    url = ‘https://www.pixiv.net/ranking.php?mode=daily‘
    html = get(url, timeout=1).text
    soup = BeautifulSoup(html, ‘lxml‘)
    urls = soup.find_all(‘img‘)

    queue = Queue(50)
    links = []
    for u in urls:
        r = re.compile(r‘data-src="(.+?.jpg)"‘)
        link = r.findall(str(u))
        queue.put(link)
        links.append(link)

    for u in links:
        process = myProcess(queue)
        process.start()

    while not queue.empty():
        pass

    process.join()

　　最後，感覺運行時間都差不多...還是看不太出來差距。

python - 多線程/多進程

Python多線程和進程

計算機上下 pytho 由於計算跟蹤執行順序所有上下文一、進程和線程 1.進程計算機程序只是存儲在磁盤上的可執行二進制文件，只有把它們加載到內存中並被操作系統調用，才擁有生命周期。進程則是一個執行中的程序。每個進程都有自己的地址空間、內存、數據棧以及

Python的線程&進程&協程[2] -> 進程 -> 多進程的基本使用

程序參數 .html shell 測試求一個輸入 cti hello 多進程的基本使用 1 subprocess 常用函數示例首先定義一個子進程調用的程序，用於打印一個輸出語句，並獲取命令行參數 1 import sys 2 print(‘Called_Fu

線程和進程——python的多線程

threading dump == lxml ems 參考一個防止 pos 　　　　首先要分清楚這兩個概念。　　　　　　　　進程：一個具有獨立功能的程序關於某個數據集合的一次運行活動。其一，它是一個實體；其二，是一個“執行中的程序”。　　　　線程：進程裏包含的執行

001-多線程基礎-進程線程、線程狀態、優先級、用戶線程和守護線程

我們 row 上進最好 left 同一時間 set 關系 dos系統一、進程與線程 1、DOS系統【單進程系統】　　最早的時候DOS有一個特點：只要電腦有病毒，那麽電腦就死機了。　　原因：傳統的DOS系統屬於單進程系統，即：在同一時間段內只允許有一個程序運行。

線程vs進程，多線程實例

class alt 唱歌優缺點 get 定義 yellow 單位 range 進程VS線程功能進程，能夠完成多任務，比如在一臺電腦上能夠同時運行多個QQ 線程，能夠完成多任務，比如一個QQ中的多個聊天窗口定義的不同進程是系統進行資

38. Python 多進程Manager 進程池

python manager 進程池強大的Manager模塊上一節實現的數據共享的方式只有兩種結構Value和Array。Python中提供了強大的Manager模塊，專門用來做數據共享。他支持的類型非常多，包括：Value、Araay、list、dict、Queue、Lock等。以下例子：import

day10-02_多線程之進程與線程的pid

__name__ back .get start proc 進程與線程 tpi size == 一、多個線程之間PID的區別主進程跟線程的pid是一樣的 from threading import Thread from multiprocessing impo

python:多進程，多進程隊列，多進程管道,Manager,進程鎖,進程池

子進程 __name__ art get 調用 sta manage parent call #!usr/bin/env python# -*- coding:utf-8 -*-__author__ = "Samson"import multiprocessingimpor

多線程與進程

叠代器 ava 監視 run 整數數量就是同時加載進程:(process)進程是操作系統中運行的一個任務(一個應用程序運行在一個進程中)進程中所包含的一個或多個執行單位稱為線程(thread) 多線程多線程允許我們可以"同時"執行多段代碼實

7.2.4 - 並發多線程守護進程

error ssi 強調應該 child ren range print 自己的一守護進程主進程創建子進程，然後將該進程設置成守護自己的進程。關於守護進程需要強調兩點：其一：守護進程會在主進程代碼執行結束後就終止其二：守護進程內無法再開啟子進程,否則拋

python學習筆記——多進程二進程的退出

ron 多進程 span 數據包 spa gpo stat 退出 python 1、進程的退出函數進程的退出含有有os._exit([status])和sys.exit([])兩種，從數據包來看，該退出模塊僅在linux或者unix系統下可用，windows系統下沒有該模

【轉】編程思想之多線程與多進程(1)——以操作系統的角度述說線程與進程

意圖發生多個責任提升 get 好的 9.png 順序什麽是線程什麽是線程？線程與進程與有什麽關系？這是一個非常抽象的問題，也是一個特別廣的話題，涉及到非常多的知識。我不能確保能把它講的話，也不能確保講的內容全部都正確。即使這樣，我也希望盡可能地把他講通俗一點，

網絡編程——同一進程中的隊列（多線程）

字符串所有優先級優先級隊列當前進程字符網絡編程表示 import queue queue.Queue() 先進先出 queue.LifoQueue() 後進先出 queue.PriorityQueue() 優先級隊列　　優先級隊列 q = queue.Pr

python 多進程——使用進程池，多進程消費的數據）是一個隊列的時候，他會自動去隊列裏依次取數據

10個 sta find 因此 pool 時間 import nbsp load 我的mac 4核，因此每次執行的時候同時開啟4個線程處理： # coding: utf-8 import time from multiprocessing import Pool d

python socket 套接字編程單進程服務器實現多客戶端訪問

host port list 不能 ioerror 存儲 utf8 所有 tin 服務器： 1 import socket 2 #單進程服務器實現多客戶端訪問 IO復用 3 #吧所有的客戶端套接字放在一個列表裏面，一次又一次的便利過濾 4 #這就是apache

shell多線程之進程間通信（2）

操作系統依次 echo 圖片 tmp end 進行自己的 linu 工作中往往遇到這種情況，有許多任務，依次執行比較浪費時間，由於任務之間有依賴關系，簡單的並發執行又不行。就如同下面這種情況，任務new和dvidUser是可以並發執行的，fact任務依賴於new任務，

PYTHON——多進程：進程間通信和數據共享

cve 不同連接 space event rlock child print 隊列 1、采用Queue隊列通信 from multiprocessing import Process, Queue def f(q,n): q.put([42, n, ‘hell

Python網絡編程之線程與進程

楊文 python What is a Thread？線程是操作系統能夠進行運算調度的最小單位，它被包含在進程之中，是進程中的實際運作單位，一條線程指的是進程中一個單一順序的控制流，一個進程中可以並發多個線程，每條線程並行執行不同的任務。在同一個進程內的線程的數據是可以進行互相訪問的。線程的切換使用過

進擊的Python【第九章】：paramiko模塊、線程與進程、各種線程鎖、queue隊列、生產者消費者模型

password locking form maxsize 廁所 sorted [0 hostname nbsp 一、paramiko模塊他是什麽東西？　　paramiko模塊是用python語言寫的一個模塊，遵循SSH2協議，支持以加密和認證的方式，進行遠程服務器的連

Python並發編程之線程池/進程池--concurrent.futures模塊

when nod 模式進程 d參數 executor 其他 done 對比 h2 { color: #fff; background-color: #f7af0d; padding: 3px; margin: 10px 0px } 一、關於concurrent.futur

python - 多線程/多進程

相關推薦