|
|
刚哪位大佬要的,快来取,新手练手,大佬莫怪哈。
分文件夹下载到img目录下,info.json 导的json,包含标题和标签信息,哪位大佬有闲的帮指点下json怎么保存更好,现在保存的内容没法直接看。
[ol][*]# -*- coding: utf-8 -*-
[*]
[*]import requests
[*]import json
[*]import os
[*]from lxml import etree
[*]from multiprocessing import Process
[*]import threadpool
[*]
[*]
[*]class SMTMM():
[*] def __init__(self):
[*] self.base_url = "https://smtmm.win"
[*] self.page_base_url = "https://smtmm.win/?page="
[*] self.start_page = 1
[*] self.dir_base = "img/"
[*] self.headers = {
[*] 'User-Agent': 'Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36'}
[*]
[*] def get_article_list(self, n):
[*] url = self.page_base_url + str(n)
[*] r = requests.get(url, headers=self.headers)
[*] tree = etree.HTML(r.text)
[*] article_list = tree.xpath("/html/body/section/div[1]/div/article/a/@href")
[*] if len(article_list) 复制代码
|
|