playwright实战篇(tx、ali225)-计算机考试-空麓网

人人都笑金角，人人都是金角

推荐文章：1、https://playwright.dev/python/docs/api/class-playwright  //官方文档2、https://cuiqingcai.com/36045.html  //崔庆才教程3、https://github.com/qqq732004709/  //实战参考4、https://www.cnblogs.com/carl-/p/15761861.html //实战参考5、https://www.cnblogs.com/james-wangx/p/16106304.html //实战参考

案例一：tx滑块（playwright）

目标网站：aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu

1、创建Chromium实例（如果不设置为 False，默认是无头模式启动浏览器）

async with async_playwright() as p:    browser = await p.chromium.launch(headless=False, args=['--start-maximized'])

2、最大化窗口

context = await browser.new_context(viewport={"width": 1920, "height": 1080}, no_viewport=True)

3、新建标签页

page = await context.new_page()

4、加载过检测js

await page.add_init_script(js) #stealth.min.js

5、监听response事件

async def on_response(response):    if '/cap_union_new_getcapbysig' in response.url and response.status == 200:        #对背景图以及滑块图进行拦截然后保存        if 'img_index=1' in response.url:            with open("bg_picture.jpg", "wb") as f:                f.write(requests.get(response.url).content)        elif 'img_index=0' in response.url:            with open("cut_picture.png", "wb") as f:                f.write(requests.get(response.url).content)        print("response.url:", response.url)    if 'cap_union_new_verify' in response.url and response.status == 200:        #滑块通过后获取参数        result = await response.text()        print("response.url:", response.url,result)        page.on('response',on_response)

6、打开网页、触发滑块

await page.goto('aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu')await page.wait_for_timeout(1500)await page.click('xpath=//*[@id="root"]/div/div[3]/div/div/div[5]/div/div') await page.wait_for_timeout(500)await page.click('xpath=//*[@id="root"]/div/div[3]/div/div/div[8]/div[2]/div')

7、识别坐标

def get_gap_offset():    """    识别坐标，滑块的图片需要切割    """    det = ddddocr.DdddOcr(det=False, ocr=False, show_ad=False)    img = Image.open('cut_picture.png')    region = img.crop((160, 508, 243, 595))  #    region.save(f'cut_picture.png')    with open('bg_picture.jpg', 'rb') as f:        target_bytes = f.read()    with open('cut_picture.png', 'rb') as f:        background_bytes = f.read()    res = det.slide_match(target_bytes, background_bytes, simple_target=True)    print("识别到的坐标位置：", res)    distance = int(res['target'][0])    return distance

8、找到滑动起始点，并滑动

async def move_down(page):    #定位iframe    new_frame = page.frame_locator('iframe[id="tcaptcha_iframe_dy"]')    #定位起始点    move_tag = new_frame.locator('xpath=//*[@id="tcOperation"]/div[6]')    #找到这个元素在当前页面的坐标    box = await move_tag.bounding_box()     print("目前点击的位置",box)    # 讲鼠标移动到到其实元素的中心    await page.mouse.move(box["x"] + box["width"] / 2, box["y"] + box["height"] / 2)      # 按下鼠标    await page.mouse.down()     #延时1.2s    await page.wait_for_timeout(1200)    # 这里获取到x坐标中心点位置    x = box["x"] + box["width"] / 2     #识别到坐标后与网页上的比例    distance = int(get_gap_offset()/1.97)-30    #轨迹    move_distance = get_track_list(distance)    print("最终坐标：",distance,"轨迹：",move_distance)    for i in move_distance:        x += i        await page.mouse.move(x, box["y"])    await page.mouse.up()

9、关闭窗口

await browser.close()

至此tx滑块的分析就结束了

然后我还写了一版selenium的，相比于playwright就会麻烦一些
案例一：tx滑块（selenium）

对于我们日常使用而言两者主要区别在于：1、selenium只支持同步，playwright可以支持异步的2、操作iframe,selenium来回切换iframe非常麻烦，而playwright只需要定位元素即可2、在监听请求这一点上，playwright的page.on非常好用，而selenium一般是借助browsermobproxy通过代理的方式进行拦截   使用方式：(1)https://github.com/lightbody/browsermob-proxy/releases，下载并解压            (2)安装证书，参考链接https://www.bilibili.com/read/cv21263644/            (3)调用方式		        server = Server('browsermob-proxy-2.1.4/bin/browsermob-proxy')		        server.start()		        proxy = server.create_proxy(params={'trustAllServers':'true'})		        option = ChromeOptions()		        option.add_argument('--proxy-server={0}'.format(self.proxy.proxy))		        driver = webdriver.Chrome(options=option)

这里就不细致讲解了，主要代码如下

class Tencent():    def __init__(self):        server = Server('browsermob-proxy-2.1.4/bin/browsermob-proxy')        server.start()        self.proxy = server.create_proxy(params={'trustAllServers':'true'})        self.url = 'aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu'        option = ChromeOptions()        option.add_experimental_option('excludeSwitches', ['enable-automation'])        option.add_experimental_option('useAutomationExtension', False)        option.add_argument('--proxy-server={0}'.format(self.proxy.proxy))        self.proxy.new_har(options={'captureContent': True,'captureHeaders': True})        self.driver = webdriver.Chrome(options=option)        self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {            'source': 'Object.defineProperty(navigator,"webdriver",{get: () => undefined})'        })        with open('stealth.min.js') as f:            js = f.read()        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})        self.driver.maximize_window()        self.det = ddddocr.DdddOcr(det=False, ocr=False, show_ad=False)        self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'}    def index(self):        """        主流程        """        self.driver.get(self.url)        time.sleep(5)        print("正在打开网页~~~")        self.driver.find_element(by=By.XPATH, value=f'//*[@id="root"]/div/div[3]/div/div/div[5]/div/div').click()        time.sleep(1)        self.driver.find_element(by=By.XPATH, value=f'//*[@id="root"]/div/div[3]/div/div/div[8]/div[2]/div').click()        time.sleep(5)        self.driver.switch_to.frame('tcaptcha_iframe_dy')        bg_style = self.driver.find_element('id','slideBg').get_attribute("style")        cut_style = self.driver.find_element(by=By.XPATH, value=f'//*[@id="tcOperation"]/div[8]').get_attribute("style")        bg_url = re.findall('url("(.*?)")',str(bg_style))[0]        cut_url = re.findall('url("(.*?)")', str(cut_style))[0]        print("获取到背景图片url:",bg_url)        print("获取到滑块图片url:",cut_url)        with open("bg_picture.jpg", "wb") as f:            f.write(requests.get(bg_url).content)        with open("cut_picture.png", "wb") as f:            f.write(requests.get(cut_url).content)    def get_gap_offset(self):        """        识别坐标        """        img = Image.open('cut_picture.png')        region = img.crop((160, 508, 243, 595))  #        region.save(f'cut_picture.png')        with open('bg_picture.jpg', 'rb') as f:            target_bytes = f.read()        with open('cut_picture.png', 'rb') as f:            background_bytes = f.read()        res = self.det.slide_match(target_bytes, background_bytes, simple_target=True)        print("识别到的坐标位置：",res)        distance = int(res['target'][0])        return distance    def get_track(self, offset):        '''        计算滑块的移动轨迹        '''        offset -= 30  # 滑块并不是从0开始移动，有一个初始值        a = offset / 4        track = [a, a, a, a]        return track    def shake_mouse(self):        """        模拟人手释放鼠标抖动        """        ActionChains(self.driver).move_by_offset(xoffset=-2, yoffset=0).perform()        ActionChains(self.driver).move_by_offset(xoffset=2, yoffset=0).perform()    def operate_slider(self, track):        """        拖动滑块        :param track: 运动轨迹        """        #  定位到拖动按钮        slider_bt = self.driver.find_element(by=By.XPATH,value ='//*[@id="tcOperation"]/div[6]')        # 点击拖动按钮不放        ActionChains(self.driver).click_and_hold(slider_bt).perform()        # 按正向轨迹移动        for i in track:            ActionChains(self.driver).move_by_offset(xoffset=i, yoffset=0).perform()            time.sleep(random.random() / 100)  # 每移动一次随机停顿0-1/100秒之间骗过了极验，通过率很高        time.sleep(random.random())        # 按逆向轨迹移动        back_tracks = [-1, -0.5, -1]        for i in back_tracks:            time.sleep(random.random() / 100)            ActionChains(self.driver).move_by_offset(xoffset=i, yoffset=0).perform()        # 模拟人手抖动        self.shake_mouse()        time.sleep(random.random())        # 松开滑块按钮        ActionChains(self.driver).release().perform()        time.sleep(2)    def login(self):        '''        实现主要的登陆逻辑        '''        self.index()        distance = self.get_gap_offset()        distance = int(distance/1.97)        track = self.get_track(distance)        self.operate_slider(track)        result = self.proxy.har        for entry in result['log']['entries']:            if entry['request']['url'] == 'https://t.captcha.qq.com/cap_union_new_verify':                print(entry['request']['url'],entry['response']['content'])                print(entry['response']['content']['text'])

案例二：阿里225(playwright)

目标网站：aHR0cHM6Ly9wYXNzcG9ydC5kYW1haS5jbi9sb2dpbg==

1、前面的初始化流程

async with async_playwright() as p:        browser = await p.chromium.launch(headless=False, args=['--start-maximized'])	    context = await browser.new_context(viewport={"width": 1920, "height": 1080}, no_viewport=True)	    context.set_default_timeout(8000)	    page = await context.new_page()	    await page.add_init_script(js)	    print("打开网页~~~")	    await page.goto('aHR0cHM6Ly9wYXNzcG9ydC5kYW1haS5jbi9sb2dpbg==')	    await page.wait_for_timeout(1000)	    page.on('response', on_response)

2、输入账号密码

 #这里需要注意这个iframe,前面的iframe和后面出滑块之后的iframe属于包含关系 new_frame = page.frame_locator('iframe[id="alibaba-login-box"]') await page.wait_for_timeout(1000) await new_frame.locator('#fm-login-id').fill("正确的手机号码") await page.wait_for_timeout(1000) await new_frame.locator('#fm-login-password').fill("错误的密码")  await page.wait_for_timeout(1000) await new_frame.get_by_role("button", name="登录").click() await page.wait_for_timeout(1000)

3、强制弹出滑块，并判断

 #这里为了让它出滑块要先输出错误的密码，然后一直click，直到出滑块为止while True:   try:       new_frame2 = new_frame.frame_locator('iframe[id="baxia-dialog-content"]')       move_tag = new_frame2.locator('xpath=//*[@id="nc_1_n1z"]')       number = await move_tag.count()       if number>=1:           box = await move_tag.bounding_box()           print("目前点击的位置", box)           break       else:           print(f"没出滑块，重新点击")           await page.wait_for_timeout(1000)           await new_frame.get_by_role("button", name="登录").click()   except:       await new_frame.get_by_role("button", name="登录").click()

4、定位以及滑动

async def move_down(page,box):    await page.mouse.move(box["x"] + box["width"] / 2, box["y"] + box["height"] / 2)    await page.mouse.down()  # 按下鼠标    await page.wait_for_timeout(1200)    x = box["x"] + box["width"] / 2  # 这里获取到x坐标中心点位置    move_distance = get_track_list(265)    print("轨迹：",move_distance)    for i in move_distance:        x += i        await page.mouse.move(x, box["y"])    await page.mouse.up()    await page.wait_for_timeout(500)

至此ali滑块的分析就结束了

当脚下的路走起来比以前轻松了，是不是该问自己是否在走下坡路了，我也不知道呢

playwright实战篇(tx、ali225)

Python相关栏目本月热门文章