selenium 使用代理的方法汇总

在docker中启动selenium gird使用扩展,并使用隧道代理,比如阿布云、多贝云、蘑菇代理。how to set proxy with authentication in selenium chromedriver python

proxy with authentication(账号密码认证代理)不支持chrome headless,但是对docker selenium 或者 selenium gird集群,是支持的。
启动selenium docker

1
docker run -d -p 4444:4444 --shm-size=2g -m 800M --memory-swap=800M --name=chrome  --restart=always selenium/standalone-chrome

一、selenium使用隧道动态代理(会生成本地zip插件文件)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import time
import zipfile

from selenium import webdriver
from scrapy.selector import Selector

PROXY_HOST = 'http-dyn.abuyun.com' # rotating proxy or host
PROXY_PORT = 9020 # port
PROXY_USER = '' # username
PROXY_PASS = '' # password

REMOTE_SELENIUM = '111.22.111.11:4444' # 远端docker selenium地址

manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""

background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};

chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}

chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)


def get_chromedriver(use_proxy=False, user_agent=None, use_docker=True):
path = os.path.dirname(os.path.abspath(__file__))
chrome_options = webdriver.ChromeOptions()
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'

with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options.add_extension(pluginfile)
if user_agent:
chrome_options.add_argument('--user-agent=%s' % user_agent)
if use_docker:
driver = webdriver.Remote(
command_executor="http://{}/wd/hub".format(REMOTE_SELENIUM),
# command_executor="http://192.168.95.56:4444/wd/hub",
options=chrome_options
)
else:
driver = webdriver.Chrome(
os.path.join(path, '/usr/local/bin/chromedriver'),
chrome_options=chrome_options)
return driver


def main():
# 使用代理 使用docker
driver = get_chromedriver(use_proxy=True, use_docker=True)
print(driver)
n = 0
while True:
# driver = get_chromedriver(use_proxy=True, use_docker=True)
# print(driver)
driver.get('https://www.cip.cc')
ip_text = Selector(text=driver.page_source).xpath(
'//pre/text()').extract_first().strip()
print(ip_text)
driver.close()
time.sleep(3)
n += 1
if n > 10:
break
driver.quit()


if __name__ == '__main__':
main()

效果图

二、selenium 使用芝麻代理等常规HOST:PORT代理

1
2
3
4
5
6
7
8
9
from selenium import webdriver
PROXY = "88.157.149.250:8080" # IP:PORT or HOST:PORT

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)

chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get("http://www.cip.cc")
print(chrome.page_source)

子航 Clark wechat
微信公众号"优雅的python",欢迎订阅!
坚持分享,您的支持将鼓励我继续创作!