1. 程式人生 > 其它 >利用Python下載目標網站圖片並利用PIL模組提取出圖片中的Exif資訊

利用Python下載目標網站圖片並利用PIL模組提取出圖片中的Exif資訊

  本程式碼主要由以下幾部分組成:

  1. 獲取目標網站頁面

  2. 利用xpath提取出圖片的連結

  3. 利用PIL模組提取出Exif資訊

 1 from email import header
 2 import requests
 3 import sys
 4 from PIL import Image
 5 from PIL.ExifTags import TAGS
 6 import optparse
 7 from lxml import etree
 8 
 9 
10 class ImagesExifExtractor:
11     def __init__(self) -> None:
12 self.url = self.get_params() 13 14 def get_params(self): 15 parser = optparse.OptionParser("Usage: <Program> -u website url") 16 parser.add_option('-u', '--url', dest='url', type='string', help='Specify URL of target website') 17 options, args = parser.parse_args()
18 if options.url is None: 19 print(parser.usage) 20 sys.exit(0) 21 return options.url 22 23 def get_web_page(self, url): 24 print("[-] Retrieve web page of target...\n") 25 headers = { 26 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0
' 27 } 28 try: 29 response = requests.get(url=url, headers=headers) 30 31 if response.status_code == 200: #這裡一定要注意,此處200位整數,而不是字串,否則判斷條件無法成真,除錯的時候琢磨了半天 32 return response.text 33 34 except Exception as e: 35 print(e) 36 sys.exit(0) 37 38 def extract_images_links(self,response): 39 try: 40 images_links = [] 41 html = etree.HTML(response) 42 images_list = html.xpath('//img') 43 for link in images_list: 44 images_links.append(link.xpath('./@src')[0]) 45 print(link.xpath('./@src')[0]) 46 47 return images_links 48 49 except Exception as e: 50 print(e) 51 sys.exit(0) 52 53 def extract_exif(self,img_filename): 54 try: 55 image = Image.open(img_filename) 56 img_exif = image.getexif() 57 print("[-] Extract exif data from the image [%s]" % img_filename) 58 59 exif_data = { 60 TAGS[k]: v 61 for k, v in img_exif.items() 62 if k in TAGS 63 } 64 print(exif_data) 65 66 67 except Exception as e: 68 69 print(e) 70 sys.exit(0) 71 72 def download_image(self, url): 73 print("[-] Downdload image from %s" % url) 74 try: 75 headers = { 76 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0' 77 } 78 filename = 'images/' + url.split('/')[-1] 79 with open(filename, 'wb') as f: 80 f.write(requests.get(url=url, headers=headers).content) 81 print("[-] Saved image successfully: %s" % url.split('/')[-1]) 82 except Exception as e: 83 print(e) 84 pass 85 86 87 def run(self): 88 response = self.get_web_page(self.url) 89 images_links = self.extract_images_links(response) 90 for img_link in images_links: 91 filename = 'images/' + img_link.split('/')[-1] 92 self.download_image(img_link) 93 self.extract_exif(filename) 94 95 96 if __name__ == "__main__": 97 image_exif = ImagesExifExtractor() 98 99 image_exif.run()