Reptile-REQUESTS usage

Chinese document API: http://requests.kennethreitz.org/zh_CN/latest/

Installation

pip install requests 

Get webpage

# coding=utf-8

import requests

response
= requests.get('http://www.baidu .com')

# The first way to get the response content
#
View page encoding method
print(response.encoding)
# Modify encoding method
response.encoding = 'utf-8'
# Get response content
print(response.text)

# The second way to get the binary response content

# Get binary response content
print(response.content)
# Decoding decode('decoding method') The default is utf-8 Way
print(response.content.decode())# coding=utf-8
import requests

response
= requests.get('http://www.baidu .com')

# The first way to get the response content
#
View page encoding method
print(response.encoding)
# Modify encoding method
response.encoding = 'utf-8'
# Get response content
print(response.text)

# The second way to get the binary response content

# Get binary response content
print(response.content)
# Decoding decode('decoding method') The default is utf-8 Way
print(response.content.decode())

Save the picture

import requests


response
= requests.get('http://requests.kennethreitz .org/zh_CN/latest/_static/requests-sidebar.png')
# save picture
with open('a.png','wb') as f:
f.write(response.content)

Get the status code and determine whether the request is successful

 import requests

r
= requests.get('http://www.baidu .com')
# Get status code
print(r.status_code) # The page number after the jump when 200 is not necessarily successful and may be obtained
#
Assert to determine whether the request is successful
assert r.status_code==200 # If it succeeds, there is no response. Failure will report an error

# Get response header
print(r.headers)
# Get request header
print(r.request.headers)

# Get request url
print(r.request.url)
# Get the response url
print(r.url)

Disguise browser crawling content with header header

import requests

# Simulation header
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' }
# Get webpage
r = requests.get('http://www.baidu.com',headers=headers)
# Get response content
print(r.text)

Crawl the post bar content

import requests


class WebSpider():
def __init__(self, name):
self.headers
= {'User-Agent< span style="color: #800000;">': 'Mozilla/5.0 ( Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
self.url_temp
= "http://tieba.baidu.com /f?kw="+ name +"&ie=utf-8&pn={}"
self.name
= name

# Build an address list
def get_url_list(self):
return [self.url_temp.format(i*50) for< /span> i in range(1000)]

# Get the post bar content
def parse_url(self, url): #Crawling data
print(url)
r
= requests.get(url, headers=self.headers)
return r.content.decode()


def run(self):
# Get address list
urls = self.get_url_list()
# Traverse and crawl data
for url in urls:
html_str
= self.parse_url(url)
# Save
page_num = urls.index(url)+1 # Get the number of yards
file_name = self.name + "page {}.html< span style="color: #800000;">"
.format(page_num)
with open(file_name,
"w", encoding="utf-8< /span>") as f:
f.write(html_str)
if __name__ == '__main__':
r
= WebSpider(input("Please enter the name of the post: span>"))
r.run()

pip install requests

# coding=utf-8

import requests

response
= requests.get('http://www.baidu .com')

# The first way to get the response content
#
View page encoding method
print(response.encoding)
# Modify encoding method
response.encoding = 'utf-8'
# Get response content
print(response.text)

# The second way to get the binary response content

# Get binary response content
print(response.content)
# Decoding decode('decoding method') The default is utf-8 Way
print(response.content.decode())# coding=utf-8
import requests

response
= requests.get('http://www.baidu .com')

# The first way to get the response content
#
View page encoding method
print(response.encoding)
# Modify encoding method
response.encoding = 'utf-8'
# Get response content
print(response.text)

# The second way to get the binary response content

# Get binary response content
print(response.content)
# Decoding decode('decoding method') The default is utf-8 Way
print(response.content.decode())

import requests


response
= requests.get('http://requests.kennethreitz .org/zh_CN/latest/_static/requests-sidebar.png')
# save picture
with open('a.png','wb') as f:
f.write(response.content)

import requests

r
= requests.get('http://www.baidu .com')
# Get status code
print(r.status_code) # The page number after the jump when 200 is not necessarily successful and may be obtained
#
Assert to determine whether the request is successful
assert r.status_code==200 # If it succeeds, there is no response. Failure will report an error

# Get response header
print(r.headers)
# Get request header
print(r.request.headers)

# Get request url
print(r.request.url)
# Get the response url
print(r.url)

import requests

# Simulation header
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' }
# Get webpage
r = requests.get('http://www.baidu.com',headers=headers)
# Get response content
print(r.text)

import requests


class WebSpider():
def __init__(self, name):
self.headers
= {'User-Agent< span style="color: #800000;">': 'Mozilla/5.0 ( Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
self.url_temp
= "http://tieba.baidu.com /f?kw="+ name +"&ie=utf-8&pn={}"
self.name
= name

# Build an address list
def get_url_list(self):
return [self.url_temp.format(i*50) for< /span> i in range(1000)]

# Get the post bar content
def parse_url(self, url): #Crawling data
print(url)
r
= requests.get(url, headers=self.headers)
return r.content.decode()


def run(self):
# Get address list
urls = self.get_url_list()
# Traverse and crawl data
for url in urls:
html_str
= self.parse_url(url)
# Save
page_num = urls.index(url)+1 # Get the number of yards
file_name = self.name + "page {}.html< span style="color: #800000;">"
.format(page_num)
with open(file_name,
"w", encoding="utf-8< /span>") as f:
f.write(html_str)
if __name__ == '__main__':
r
= WebSpider(input("Please enter the name of the post: span>"))
r.run()

Leave a Comment

Your email address will not be published.