爬蟲基礎二
阿新 • • 發佈:2018-07-08
瀏覽器 軌跡 slider 回來 gb2 all for start cti
1概念:
a層級
1 1
1 1 1
1 1
1 1
b深度和廣度層級
11111111111 先處理第一層然後在爬下一層的就是廣度優先,從第一排第一個到m這樣就是深度優先
11111111111
m111111111
實現的方法:
隊列:入 [1,2,3,4,5,6]出 1 會先出 廣度優先
棧: 入、出[10,9,8,7,6,5,4,3,2,1]10會先出 深度優先
selenium模塊:
控制瀏覽器進行點點點的模塊
示例代碼:
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait importWebDriverWait import os import shutil from PIL import Image import time def get_snap(driver): driver.save_screenshot(‘full_snap.png‘) page_snap_obj = Image.open(‘full_snap.png‘) return page_snap_obj def get_image(driver): img = driver.find_element_by_class_name(‘geetest_canvas_img‘) time.sleep(2) location = img.location size = img.size left = location[‘x‘] top = location[‘y‘] right = left + size[‘width‘] bottom = top + size[‘height‘] page_snap_obj = get_snap(driver) image_obj = page_snap_obj.crop((left * 2, top * 2, right * 2, bottom * 2)) # image_obj.show() with open(‘code.png‘, ‘wb‘) as f: image_obj.save(f, format=‘png‘) return image_obj def get_distance(image1, image2): # start = 0 # threhold = 70 # for i in range(start, image1.size[0]): # for j in range(0, image1.size[1]): # rgb1 = image1.load()[i, j] # rgb2 = image2.load()[i, j] # res1 = abs(rgb1[0] - rgb2[0]) # res2 = abs(rgb1[1] - rgb2[1]) # res3 = abs(rgb1[2] - rgb2[2]) # # print(res1,res2,res3) # if not (res1 < threhold and res2 < threhold and res3 < threhold): # print(111111, i, j) # return i - 13 # print(2222, i, j) # return i - 13 start = 0 threhold = 70 v = [] for i in range(start, image1.size[0]): for j in range(0, image1.size[1]): rgb1 = image1.load()[i, j] rgb2 = image2.load()[i, j] res1 = abs(rgb1[0] - rgb2[0]) res2 = abs(rgb1[1] - rgb2[1]) res3 = abs(rgb1[2] - rgb2[2]) if not (res1 < threhold and res2 < threhold and res3 < threhold): print(i) if i not in v: v.append(i) stop = 0 for i in range(0, len(v)): val = i + v[0] if v[i] != val: stop = v[i] break width = stop - v[0] print(stop, v[0], width) return width def get_tracks(distance): import random exceed_distance = random.randint(0, 5) distance += exceed_distance # 先滑過一點,最後再反著滑動回來 v = 0 t = 0.2 forward_tracks = [] current = 0 mid = distance * 3 / 5 while current < distance: if current < mid: a = random.randint(1, 3) else: a = random.randint(1, 3) a = -a s = v * t + 0.5 * a * (t ** 2) v = v + a * t current += s forward_tracks.append(round(s)) # 反著滑動到準確位置 v = 0 t = 0.2 back_tracks = [] current = 0 mid = distance * 4 / 5 while abs(current) < exceed_distance: if current < mid: a = random.randint(1, 3) else: a = random.randint(-3, -5) a = -a s = -v * t - 0.5 * a * (t ** 2) v = v + a * t current += s back_tracks.append(round(s)) return {‘forward_tracks‘: forward_tracks, ‘back_tracks‘: list(reversed(back_tracks))} def crack(driver): # 破解滑動認證 # 1、點擊按鈕,得到沒有缺口的圖片 button = driver.find_element_by_xpath(‘//*[@id="embed-captcha"]/div/div[2]/div[1]/div[3]‘) button.click() # 2、獲取沒有缺口的圖片 image1 = get_image(driver) # 3、點擊滑動按鈕,得到有缺口的圖片 button = driver.find_element_by_class_name(‘geetest_slider_button‘) button.click() # 4、獲取有缺口的圖片 image2 = get_image(driver) # 5、對比兩種圖片的像素點,找出位移 distance = get_distance(image1, image2) print(distance) # # 6、模擬人的行為習慣,根據總位移得到行為軌跡 tracks = get_tracks(int(distance / 2)) # 7、按照行動軌跡先正向滑動,後反滑動 button = driver.find_element_by_class_name(‘geetest_slider_button‘) ActionChains(driver).click_and_hold(button).perform() # 正常人類總是自信滿滿地開始正向滑動,自信地表現是瘋狂加速 for track in tracks[‘forward_tracks‘]: ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform() # 結果傻逼了,正常的人類停頓了一下,回過神來發現,臥槽,滑過了,然後開始反向滑動 time.sleep(0.5) for back_track in tracks[‘back_tracks‘]: ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform() # # # 小範圍震蕩一下,進一步迷惑極驗後臺,這一步可以極大地提高成功率 ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform() ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform() # # 成功後,騷包人類總喜歡默默地欣賞一下自己拼圖的成果,然後戀戀不舍地松開那只臟手 time.sleep(0.5) ActionChains(driver).release().perform() def login_luffy(username, password): driver = webdriver.Chrome(‘/Users/wupeiqi/drivers/chromedriver‘) driver.set_window_size(960, 800) try: # 1、輸入賬號密碼回車 driver.implicitly_wait(3) driver.get(‘https://www.luffycity.com/login‘) input_username = driver.find_element_by_xpath(‘//*[@id="router-view"]/div/div/div[2]/div[2]/input[1]‘) input_pwd = driver.find_element_by_xpath(‘//*[@id="router-view"]/div/div/div[2]/div[2]/input[2]‘) input_username.send_keys(username) input_pwd.send_keys(password) # 2、破解滑動認證 crack(driver) time.sleep(10) # 睡時間長一點,確定登錄成功 finally: pass # driver.close() if __name__ == ‘__main__‘: login_luffy(username=‘wupeiqi‘, password=‘123123123‘)
爬蟲基礎二