|
楼主 |
发表于 2018-6-6 07:34:00
|
显示全部楼层
eqblog 发表于 2018-6-6 07:34
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gec ...
大佬您来了

加了headers好像还是不行啊,图片的URL打开进去好像是一个网页,还得再解析一次HTML好像。。。http://www.siimg.com/i/?i=u/20180605/12045739.jpg 比如这个图片
[ol]import reimport requestsimport jsonfrom multiprocessing import Poolfrom requests.exceptions import RequestExceptionimport osfrom hashlib import md5from bs4 import BeautifulSoupheaders={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36Name','Referer':'https://t66y.com'}def get_page_index(offset): url = 'http://t66y.com/thread0806.php?fid=16&search=&page=' + str(offset) try: response = requests.get(url, headers = headers) response.encoding = 'gbk' if response.status_code == 200: return response.text return None except RequestException: print('请求索引页出错') return Nonedef parse_index_page(html): soup = BeautifulSoup(html, 'html.parser') h3s= soup.find_all('h3') for h3 in h3s: yield h3def get_page_detail(url): try: response = requests.get(url, headers = headers) response.encoding = 'gbk' print(response.status_code) if response.status_code == 200: return response.text return None except ConnectionError: print('Error occurred') return Nonedef parse_page_detail(html2): pattern = re.compile('复制代码 |
|