線程間通信共享變量和queue

本文轉載自查看原文 2018-07-29 14:06 1033

　　在多線程中，數據是共享，如何在多線程安全的通信，是首先要可慮的問題的

#線程間的通信

import time
import threading
from threading import RLock

detail_url_list = []

lock = RLock()

def get_detail_html(url):
    #爬取文章詳情頁
    global detail_url_list
    #第一次我的想法也是用for循環，
    # 但是你要知道，爬取文章的列表頁要快於爬取文章詳情頁
    #所以開啟多個線程來爬取多個文章詳情頁
    lock.acquire()
    url = detail_url_list.pop()
    print('get detail html started')
    time.sleep(2)
    print('get detail html end')
    lock.release()
    '''
    for url in detail_url_list:
        print('get detail html started')
        time.sleep(2)
        print('get detail html end')
    '''


def get_detail_url(url):
    #爬取文章列表頁
    global detail_url_list
    print('get detail url started')
    time.sleep(4)
    for i in range(20):
        detail_url_list.append('http://projectsedu.com/{id}'.format(id=i))
    print('get detail url end')

#需求就是爬取文章列表頁的url給文章詳情頁的url爬取：
#這個時候，設計到文章間的資源通信

#第一種方法就是  共享變量（共享變量其實就是全局變量，給各個函數調用）
#具體方法如下：


if __name__ == '__main__':
    # thread1 = threading.Thread(target=get_detail_html,args=(('',)))
    for i in range(10):
        thread1 = threading.Thread(target=get_detail_html)
        thread1.start()
    thread2 = threading.Thread(target=get_detail_url,args=(('http://bolezaixian.com',)))
    thread2.start()
    # start_time = time.time()
    # thread1.setDaemon(True)#設置線程1為守護線程
    # thread1.start()
    # thread2.start()
    # thread2.join()
    # print('last time:{}'.format(time.time()-start_time))
共享變量也是要枷鎖的。

import threading
from threading import Lock
#把共享變量存在settings配置文件中
import settings
import time

lock = Lock()


def get_detail_html():
    #爬取文章詳情頁

    detail_url_list=settings.detail_list_url
    #第一次我的想法也是用for循環，
    # 但是你要知道，爬取文章的列表頁要快於爬取文章詳情頁
    #所以開啟多個線程來爬取多個文章詳情頁
    while True:
        try:
            if len(detail_url_list):
                # lock.acquire()
                url = detail_url_list.pop()
                print('get detail html started')
                time.sleep(2)
                print('get detail html end')
                # lock.release()
        except Exception as e:
            print(e)
            print('線程已運行完了')
            break
    '''
    for url in detail_url_list:
        print('get detail html started')
        time.sleep(2)
        print('get detail html end')
    '''


def get_detail_url():
    #爬取文章列表頁

    detail_url_list = settings.detail_list_url
    print('get detail url started')
    time.sleep(4)
    for i in range(20):
        detail_url_list.append('http://projectsedu.com/{id}'.format(id=i))
        print('get detail url end')


if __name__ == '__main__':
    start_time = time.time()
    for i in range(10):
        t = threading.Thread(target=get_detail_html)
        t.start()

    t1 = threading.Thread(target=get_detail_url)
    t1.start()
    t1.join()

    print('total_time:{}'.format(time.time()-start_time))

#通過queue的方式進行線程間同步通信

-----------------------------------------------------------------------------------------------------------------

from queue import Queue

import time
import threading


def get_detail_html(queue):
    #爬取文章詳情頁
    while True:
        url = queue.get() #get（）方法是一個阻塞的方法，如果queue是空隊列，它一直會阻塞在這

        print('get detail html started')
        time.sleep(2)
        print('get detail html end')


def get_detail_url(queue):
    #爬取文章列表頁

    while True:
        print('get detail url started')
        time.sleep(2)
        for i in range(20):
            queue.put("https://projectsedu.com/{id}".format(id=i))
        print('get detail url end')


if __name__ == "__main__":
    detail_url_queue = Queue(maxsize=1000)#隊列里面一定要設置下，maxsize的最大值，防止內存過大

    thread_detail_url = threading.Thread(target=get_detail_url,args=((detail_url_queue,)))

    for i in range(10):
        html_thread = threading.Thread(target=get_detail_html,args=((detail_url_queue,)))
        html_thread.start()

    detail_url_queue.task_done()
    #隊列調用join（）方法阻塞在這，只有調用task_done()方法隊列才結束，主線程才能運行。
    detail_url_queue.join()

qsize()方法判斷隊列的大小，empty（）方法判斷隊列是否為空，如果為空，get（）是會阻塞在哪，full（）方法判斷隊列是否已滿，如果以滿，put（）方法是會阻塞在哪的

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 多線程進程間共享變量等 ThreadLocal線程范圍內的共享變量『Python』多線程共享變量的實現多線程共享變量和 AsyncLocal 多線程05-線程范圍內共享變量 Java多線程——線程范圍內共享變量和ThreadLocal Spark共享變量多線程的共享變量的內存不可見性 Java多線程共享變量控制並發編程-多線程共享變量不安全