介绍下3种python获取cookie的方法。
(1)借助handler
这种方法也是网上介绍最多的一种方法,但是用起来比较麻烦
from http import cookiejar
from urllib import request
class Craw():
def __init__(self):
self.url = ''
self.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) ' \
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36'
self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
def getCookies(self):
cookie = cookiejar.CookieJar()
handler = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(handler)
response = opener.open(self.url)
cookieValue = ''
for item in cookie:
cookieValue += item.name + '=' + item.value + ';'
self.headers['Cookie'] = cookieValue
response = requests.get(url=self.url)
def getVerificationCode(self):
img_url = ''
imgResponse = requests.get(url=img_url,headers = self.headers) #直接使用headers即可
base64_jpg = base64.b64encode(imgResponse.content)
return base64_jpg
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
(2)使用response headers的set_cookie
import requests
import re
class Crawler():
def getCookie(self):
response = requests.post(self.url)
set_cookie = response.headers['Set-Cookie']
array = re.split('[;,]',set_cookie)
cookieValue = ''
for arr in array:
if arr.find('DZSW_SESSIONID') >= 0 or arr.find('bl0gm1HBTB') >= 0:
cookieValue += arr + ';'
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
(3)使用response的cookies属性获取
只写getCookies方法,代码如下:
import requests
class Crawler():
def getCookie(self):
response = requests.get(self.url)
cookie_value = ''
for key,value in response.cookies.items():
cookie_value += key + '=' + value + ';'
self.headers['Cookie'] = cookie_value
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15