复制代码- import os
import requests import threading from faker import Faker faker = Faker(locale='zh_CN') from pyquery import PyQuery as pq '''下载单本漫画''' def get_doc(url): #返回pyquery对象 headers = {'User-Agent':faker.user_agent()} #用随机UA response = requests.get(url,headers=headers) html = response.text doc = pq(html) return doc def make_file(file_path,file_name): #目标路径和文件夹名 if os.path.exists(os.path.join(file_path,file_name)): #判断是否存在目标文件夹 return os.path.join(file_path,file_name) else: #没有就创建 os.makedirs(os.path.join(file_path,file_name)) return os.path.join(file_path,file_name) def save_img_noreferer(path,src): headers = {'User-Agent':faker.user_agent()} try: response = requests.get(src,headers=headers) #有的时候仍然会出现UA反爬虫,跳过 except: pass else: if response.status_code == 200 or response.status_code == 206: name = src.split('/')[-1] #图片名称默认为存在服务器里的名称 path = os.path.join(path,name) with open(path,'wb') as f: f.write(response.content) elif response.status_code == 522: #状态码522,重试 save_img_noreferer(path,src) elif response.status_code == 404: #状态码404,可能是src图片格式问题 save_img_noreferer(path,src.replace('jpg','png')) else: #为防止错误,跳过 pass glock = threading.Lock() def save_imgs(srcs,path): #多线程,配合save_img_noreferer()使用 while True: glock.acquire() if len(srcs) ==0: glock.release() break else: src = srcs.pop() glock.release() save_img_noreferer(path,src) #记得修改path def main(): url_main = input('请输入下载漫画的地址:') num = url_main.split('/')[-1].split('o')[0] #漫画代码 doc = get_doc(url_main) page = doc('.ld_box > div:nth-child(2) > .ld_body').text().split(' ')[0] #漫画页码 title = doc('#comicdetail > h1').text()+'id='+num+'page='+page #漫画标题 src_format = doc('>img')[0].items()[2][1].split('/')[2] #服务器地址 print('当前漫画服务器地址为:'+src_format+',数字越大下载越慢,请耐心等待') pic_format = 'jpg' #默认图片格式 list_1 = ['\\','/',':','*','?','"','<','>','|'] #去除漫画名中的非法字符 for i in list_1: if i in title: title = title.replace(i,'') print('开始下载:'+title)
path = make_file('./本子',title) print(title+'用有'+page+'页') srcs = [] for i in range(1,int(page)+1): src = 'https://'+src_format+'/galleries/'+num+'/'+str(i)+'.'+pic_format #图片src srcs.append(src) for _ in range(64): #64个线程 consumer = threading.Thread(target=save_imgs,args=[srcs,path,]) consumer.start()
if __name__ == '__main__': main()
|