# ==[ 4chan Image Scraper Tool ]== # Tool used for mass downloading images from 4chan threads # Author: Justus Fee # Created: 3 November 2019 # Last edited: 20 April 2020 # Req. packages: beautifulsoup4, requests, lxml import bs4 import requests import os import time url = input("What's the thread you need, chief?\n>") res = requests.get(url) res.raise_for_status() folder = input("What do you want the folder called, chief?\n>") os.makedirs("chan/" + folder, exist_ok=True) thread = bs4.BeautifulSoup(res.text, features="lxml") pics = thread.select("div.fileText a") pics_len = len(pics) print("\n{0} pictures found.\n".format(pics_len)) start_time = time.time() for n in range(pics_len): page_url = "https:" + pics[n]["href"] page = requests.get(page_url) extension = os.path.splitext(os.path.basename(page_url))[1] image = open(os.path.join("chan/" + folder, str(n+1) + extension), "wb") for chunk in page.iter_content(100000): image.write(chunk) image.close() print("[{0:.2%}] Downloading picture {1} / {2}".format((n+1)/(pics_len), n+1, pics_len), end="\r") end_time = time.time() print("\nJob's done!") elapsed = round(end_time - start_time) print("Took {0:.2} minutes ({1} seconds).".format((elapsed/60), elapsed))