1. 程式人生 > >python3爬蟲採坑記錄片之oss檔案和字串連線的學問

python3爬蟲採坑記錄片之oss檔案和字串連線的學問

1:由於之前做了雲上的專案,剛入手看oss的文件有點費力,因為有些變數阿里說的並不是很詳細。今天剛好又用了一遍就把它扔出來。此處使用的是最簡單的下載到本地再上傳,並未實時併發。如果量大請另行參考阿里文件

從資料庫取出介面下載

    #下載
    def agoda_img(self,id,pics):
        image_list=[]
        values=pics.split(",")
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
        }
        #多圖片下載
        for i in range(len(values)):
            url = values[i]
            ir = requests.get(url, headers=header)
            if ir.status_code == 200:
                # 成功儲存圖片
                abspath = "/root/wcspider/ctrip_up/agoda_image"
                path = abspath + "/" + str(id)
                # 建立檔案
                if not os.path.exists(path):
                    os.makedirs(path)
                paths = str(id)+"_"+str(i)+ ".jpg"
                #將圖片在Linux系統上移到指定位置,已存在跳過本次迴圈說明已下載
                with open(paths, 'wb') as f:
                    f.write(ir.content)
                    name = os.path.basename(paths)  # 獲取檔名
                    dirname = os.path.dirname(paths)  # 獲取檔案目錄
                    full_path = os.path.join(dirname, name)
                    try:
                        shutil.move(full_path, path)  ##轉移檔案到目標資料夾
                    except  Exception as e:
                        print("圖片已下載")
                        continue
                    fname = path + "/" + paths
                    #oss配置,文件上有,key比較關鍵它是阿里雲的目標目錄
                    endpoint = 'http://oss-cn-hangzhou.aliyuncs.com'
                    access_key_id = 'LTAIdDUS3bCbQILs'
                    access_key_secret = '16CvB9kL5D7KMgxavAzMCd27D8Afn2'
                    bucket_name = 'lohas-changzhou'
                    key = "foreign_hotel/" + paths
                    ####上傳圖片到阿里雲並記錄成功的URL,先刪除圖片防止二次上傳報錯
                    deleimg = "foreign_hotel/" + paths.split(".")[0]
                    try:
                        bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
                        bucket.delete_object(deleimg)
                        res_back = bucket.put_object_from_file(key, filename=fname)
                        if res_back:
                            imageimg = "https://lohas-changzhou.oss-cn-hangzhou.aliyuncs.com/foreign_hotel/" + paths
                            image_list.append(imageimg)
                            ress_3 = 1
                            print('上傳成功:%s' % fname)
                    except Exception as e:
                        ress_3 = 0
                        print('圖片失敗', fname)

                    # self.run_state(ress_3, id)
        #儲存下載成功的圖片連結到資料庫供呼叫
        pic=None
        pics = ",".join(image_list)
        for m in image_list:
            if (str(id) + "_0") in m:
                pic = m
                break
            else:
                pic = image_list[0]
        #如已上傳就不用寫入了
        if pic and pics:
            self.insert_sql(id, pic, pics)

字串拼接稍後在寫