Crawn Selenium

Open the crawler on the front end of the browser

from selenium import webdriver

from time import sleep

bro
= webdriver.Chrome(executable_path=r'D:\Crawler Storage\chromedriver.exe')

bro.
get(url='https://www.baidu.com/')

sleep(
2)
bro.find_element_by_id(
'kw').send_keys('python')
sleep(
1)
bro.find_element_by_id(
'su').click()
time.sleep(
2)

with open(
'baidu.html', 'w< span style="color: #800000;">', encoding='utf8< /span>') as f:
f.write(bro.page_source)

bro.quit()

Do not open the crawler at the front end of the browser

from selenium.webdriver.chrome.options import Options

chrome_options
= Options()
chrome_options.add_argument(
'--headless')
chrome_options.add_argument(
'--disable-gpu ')

url
= 'https://movie.douban.com/typerank ?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action='
bro
= webdriver.Chrome(executable_path=r'D:\Crawler Storage\chromedriver.exe', chrome_options=chrome_options)

bro.
get(url)

bro.execute_script(
'window.scrollTo(0,document.body .scrollHeight)')
for i in range(2):
sleep(
1)
bro.execute_script(
'window.scrollTo(0,document.body .scrollHeight)')
sleep(
5)

with open(
'douban.html', 'w< span style="color: #800000;">', encoding='utf8< /span>') as f:
f.write(bro.page_source)

bro.quit()

Get the real-time image of the browser and set the size of the browser

from selenium.webdriver.chrome.options import Options


chrome_options
= Options()
chrome_options.add_argument(
'--headless')
chrome_options.add_argument(
'--disable-gpu ')

url
= r'www.baidu.com< span style="color: #800000;">'

bro
= webdriver.Chrome(executable_path=r'D:\Crawler Storage\chromedriver.exe', chrome_options=chrome_options)

bro.set_window_size(
7680, 4320)
bro.
get(url)
sleep(
30)
data
= bro.get_screenshot_as_png()

with open(
'1.png', 'wb< span style="color: #800000;">') as f:
f.write(data)

bro.quit()

In the case of encountering an iframe, using the selected id, etc. will not be found, the solution

bro.switch_to_frame('login_frame< /span>')

bro.find_element_by_id(
'switcher_plogin').click()
bro.find_element_by_id(
'u').send_keys('1132300949')
bro.find_element_by_id(
'login_button').click()
page_text
= bro.page_source

from selenium import webdriver

from time import sleep

bro
= webdriver.Chrome(executable_path=r'D:\Crawler Storage\chromedriver.exe')

bro.
get(url='https://www.baidu.com/')

sleep(
2)
bro.find_element_by_id(
'kw').send_keys('python')
sleep(
1)
bro.find_element_by_id(
'su').click()
time.sleep(
2)

with open(
'baidu.html', 'w< span style="color: #800000;">', encoding='utf8< /span>') as f:
f.write(bro.page_source)

bro.quit()

from selenium.webdriver.chrome.options import Options

chrome_options
= Options()
chrome_options.add_argument(
'--headless')
chrome_options.add_argument(
'--disable-gpu ')

url
= 'https://movie.douban.com/typerank ?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action='
bro
= webdriver.Chrome(executable_path=r'D:\Crawler Storage\chromedriver.exe', chrome_options=chrome_options)

bro.
get(url)

bro.execute_script(
'window.scrollTo(0,document.body .scrollHeight)')
for i in range(2):
sleep(
1)
bro.execute_script(
'window.scrollTo(0,document.body .scrollHeight)')
sleep(
5)

with open(
'douban.html', 'w< span style="color: #800000;">', encoding='utf8< /span>') as f:
f.write(bro.page_source)

bro.quit()

from selenium.webdriver.chrome.options import Options


chrome_options
= Options()
chrome_options.add_argument(
'--headless')
chrome_options.add_argument(
'--disable-gpu ')

url
= r'www.baidu.com< span style="color: #800000;">'

bro
= webdriver.Chrome(executable_path=r'D:\Crawler Storage\chromedriver.exe', chrome_options=chrome_options)

bro.set_window_size(
7680, 4320)
bro.
get(url)
sleep(
30)
data
= bro.get_screenshot_as_png()

with open(
'1.png', 'wb< span style="color: #800000;">') as f:
f.write(data)

bro.quit()

bro.switch_to_frame('login_frame')

bro.find_element_by_id(
'switcher_plogin').click()
bro.find_element_by_id(
'u').send_keys('1132300949')
bro.find_element_by_id(
'login_button').click()
page_text
= bro.page_source

WordPress database error: [Table 'yf99682.wp_s6mz6tyggq_comments' doesn't exist]
SELECT SQL_CALC_FOUND_ROWS wp_s6mz6tyggq_comments.comment_ID FROM wp_s6mz6tyggq_comments WHERE ( comment_approved = '1' ) AND comment_post_ID = 2032 ORDER BY wp_s6mz6tyggq_comments.comment_date_gmt ASC, wp_s6mz6tyggq_comments.comment_ID ASC

Leave a Comment

Your email address will not be published.