python3爬蟲採坑記錄片之oss檔案和字串連線的學問
阿新 • • 發佈:2018-12-19
1:由於之前做了雲上的專案,剛入手看oss的文件有點費力,因為有些變數阿里說的並不是很詳細。今天剛好又用了一遍就把它扔出來。此處使用的是最簡單的下載到本地再上傳,並未實時併發。如果量大請另行參考阿里文件
從資料庫取出介面下載
#下載 def agoda_img(self,id,pics): image_list=[] values=pics.split(",") header = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", } #多圖片下載 for i in range(len(values)): url = values[i] ir = requests.get(url, headers=header) if ir.status_code == 200: # 成功儲存圖片 abspath = "/root/wcspider/ctrip_up/agoda_image" path = abspath + "/" + str(id) # 建立檔案 if not os.path.exists(path): os.makedirs(path) paths = str(id)+"_"+str(i)+ ".jpg" #將圖片在Linux系統上移到指定位置,已存在跳過本次迴圈說明已下載 with open(paths, 'wb') as f: f.write(ir.content) name = os.path.basename(paths) # 獲取檔名 dirname = os.path.dirname(paths) # 獲取檔案目錄 full_path = os.path.join(dirname, name) try: shutil.move(full_path, path) ##轉移檔案到目標資料夾 except Exception as e: print("圖片已下載") continue fname = path + "/" + paths #oss配置,文件上有,key比較關鍵它是阿里雲的目標目錄 endpoint = 'http://oss-cn-hangzhou.aliyuncs.com' access_key_id = 'LTAIdDUS3bCbQILs' access_key_secret = '16CvB9kL5D7KMgxavAzMCd27D8Afn2' bucket_name = 'lohas-changzhou' key = "foreign_hotel/" + paths ####上傳圖片到阿里雲並記錄成功的URL,先刪除圖片防止二次上傳報錯 deleimg = "foreign_hotel/" + paths.split(".")[0] try: bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name) bucket.delete_object(deleimg) res_back = bucket.put_object_from_file(key, filename=fname) if res_back: imageimg = "https://lohas-changzhou.oss-cn-hangzhou.aliyuncs.com/foreign_hotel/" + paths image_list.append(imageimg) ress_3 = 1 print('上傳成功:%s' % fname) except Exception as e: ress_3 = 0 print('圖片失敗', fname) # self.run_state(ress_3, id) #儲存下載成功的圖片連結到資料庫供呼叫 pic=None pics = ",".join(image_list) for m in image_list: if (str(id) + "_0") in m: pic = m break else: pic = image_list[0] #如已上傳就不用寫入了 if pic and pics: self.insert_sql(id, pic, pics)
字串拼接稍後在寫