Reptile-REQUESTS usage - Herbs, requests, usage

Chinese document API: http://requests.kennethreitz.org/zh_CN/latest/

Installation

pip install requests

Get webpage

# coding=utf-8

import requests



response = requests.get('http://www.baidu .com')



# The first way to get the response content

# View page encoding method

print(response.encoding)

# Modify encoding method

response.encoding = 'utf-8'

# Get response content

print(response.text)



# The second way to get the binary response content



# Get binary response content

print(response.content)

# Decoding decode('decoding method') The default is utf-8 Way

print(response.content.decode())# coding=utf-8

import requests



response = requests.get('http://www.baidu .com')



# The first way to get the response content

# View page encoding method

print(response.encoding)

# Modify encoding method

response.encoding = 'utf-8'

# Get response content

print(response.text)



# The second way to get the binary response content



# Get binary response content

print(response.content)

# Decoding decode('decoding method') The default is utf-8 Way

print(response.content.decode())

Save the picture

import requests



response = requests.get('http://requests.kennethreitz .org/zh_CN/latest/_static/requests-sidebar.png')

# save picture

with open('a.png','wb') as f:

 f.write(response.content)

Get the status code and determine whether the request is successful

 import requests

r = requests.get('http://www.baidu .com')

# Get status code

print(r.status_code) # The page number after the jump when 200 is not necessarily successful and may be obtained

# Assert to determine whether the request is successful

assert r.status_code==200 # If it succeeds, there is no response. Failure will report an error



# Get response header

print(r.headers)

# Get request header

print(r.request.headers)



# Get request url

print(r.request.url)

# Get the response url

print(r.url)

Disguise browser crawling content with header header

import requests

# Simulation header

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' }

# Get webpage

r = requests.get('http://www.baidu.com',headers=headers)

# Get response content

print(r.text)

Crawl the post bar content

import requests



class WebSpider():

 def __init__(self, name):

 self.headers = {'User-Agent< span style="color: #800000;">': 'Mozilla/5.0 ( Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}

 self.url_temp = "http://tieba.baidu.com /f?kw="+ name +"&ie=utf-8&pn={}"

 self.name = name



 # Build an address list

 def get_url_list(self):

 return [self.url_temp.format(i*50) for< /span> i in range(1000)]



 # Get the post bar content

 def parse_url(self, url): #Crawling data

 print(url)

 r = requests.get(url, headers=self.headers)

 return r.content.decode()





 def run(self):

 # Get address list

 urls = self.get_url_list()

 # Traverse and crawl data

 for url in urls:

 html_str = self.parse_url(url)

 # Save

 page_num = urls.index(url)+1 # Get the number of yards

 file_name = self.name + "page {}.html< span style="color: #800000;">".format(page_num)

 with open(file_name, "w", encoding="utf-8< /span>") as f:

 f.write(html_str)

if __name__ == '__main__':

 r = WebSpider(input("Please enter the name of the post: span>"))

 r.run()

pip install requests

# coding=utf-8

import requests



response = requests.get('http://www.baidu .com')



# The first way to get the response content

# View page encoding method

print(response.encoding)

# Modify encoding method

response.encoding = 'utf-8'

# Get response content

print(response.text)



# The second way to get the binary response content



# Get binary response content

print(response.content)

# Decoding decode('decoding method') The default is utf-8 Way

print(response.content.decode())# coding=utf-8

import requests



response = requests.get('http://www.baidu .com')



# The first way to get the response content

# View page encoding method

print(response.encoding)

# Modify encoding method

response.encoding = 'utf-8'

# Get response content

print(response.text)



# The second way to get the binary response content



# Get binary response content

print(response.content)

# Decoding decode('decoding method') The default is utf-8 Way

print(response.content.decode())

import requests



response = requests.get('http://requests.kennethreitz .org/zh_CN/latest/_static/requests-sidebar.png')

# save picture

with open('a.png','wb') as f:

 f.write(response.content)

import requests

r = requests.get('http://www.baidu .com')

# Get status code

print(r.status_code) # The page number after the jump when 200 is not necessarily successful and may be obtained

# Assert to determine whether the request is successful

assert r.status_code==200 # If it succeeds, there is no response. Failure will report an error



# Get response header

print(r.headers)

# Get request header

print(r.request.headers)



# Get request url

print(r.request.url)

# Get the response url

print(r.url)

import requests

# Simulation header

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' }

# Get webpage

r = requests.get('http://www.baidu.com',headers=headers)

# Get response content

print(r.text)

import requests



class WebSpider():

 def __init__(self, name):

 self.headers = {'User-Agent< span style="color: #800000;">': 'Mozilla/5.0 ( Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}

 self.url_temp = "http://tieba.baidu.com /f?kw="+ name +"&ie=utf-8&pn={}"

 self.name = name



 # Build an address list

 def get_url_list(self):

 return [self.url_temp.format(i*50) for< /span> i in range(1000)]



 # Get the post bar content

 def parse_url(self, url): #Crawling data

 print(url)

 r = requests.get(url, headers=self.headers)

 return r.content.decode()





 def run(self):

 # Get address list

 urls = self.get_url_list()

 # Traverse and crawl data

 for url in urls:

 html_str = self.parse_url(url)

 # Save

 page_num = urls.index(url)+1 # Get the number of yards

 file_name = self.name + "page {}.html< span style="color: #800000;">".format(page_num)

 with open(file_name, "w", encoding="utf-8< /span>") as f:

 f.write(html_str)

if __name__ == '__main__':

 r = WebSpider(input("Please enter the name of the post: span>"))

 r.run()

Leave a Comment Cancel reply