p站作者爬取

import re
import requests
import json
import os
import requests
from concurrent.futures import ThreadPoolExecutor
from lxml import etree
from multiprocessing import Process, Queue
from urllib import parse
import re
from progress.bar import Bar
from tqdm import tqdm
from requests.exceptions import RequestException
from concurrent.futures import ThreadPoolExecutor

import base64
from threading import Thread
import queue
headers = {
}
cookies = {

}
pid=77423935 #指定作者主页id
url = f"https://www.pixiv.net/ajax/user/{pid}/profile/all" #指定作者解析
def create_directory(path):
os.makedirs(path, exist_ok=True)
def geturl(q):
params = {
"lang": "zh",
"version": "ffd686701f8dee21374fbbf9873ec37bbef4be58"
}
response = requests.get(url, headers=headers, cookies=cookies, params=params).json()
works=response['body']['illusts']
ids_string = []
for i in works:
ids_string.append(i)
n=0
for id in ids_string:
n+=1
print(f'{n} {id}')
urls = f'https://www.pixiv.net/ajax/illust/{id}/pages?lang=zh&version=6c33903e9ee09f649515b0326775bf9913d930a1'
# print(url)
response = requests.get(urls, headers=headers,cookies=cookies).json()
# print(response)
original_values = [item['urls']['original'] for item in response['body']]
# print(original_values)
for oriurl in original_values:
print(f'正在把{oriurl}塞入队列喵')
q.put(oriurl)
q.put('ok')
def downurl(url):
response = requests.get(url, stream=True,headers=headers,cookies=cookies)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
filename = url.split('/')[-1]
save_path = f'E:\\p站图片\\p站{pid}' #修改选项
save_file = os.path.join(save_path, filename)
with open(save_file, 'wb') as file:
with tqdm(total=total_size, unit='B', unit_scale=True) as progress_bar:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
file.write(chunk)
progress_bar.update(len(chunk))
print(f'{filename}下载完成喵')



def load_url(q):
with ThreadPoolExecutor(max_workers=40) as executor:
while True:
url = q.get()
if url == 'ok':
break
executor.submit(downurl, url,)

if __name__ == '__main__':

create_directory(f'E:\\p站图片\\p站{pid}') #修改选项
q = Queue()
q1 = Process(target=geturl, args=(q,))
q2 = Process(target=load_url, args=(q,))
q1.start()
q2.start()