I finally got around to fixing the wallpaper script that was the subject of my last two posts: Bing wallpaper on a Mac and Bing wallpaper on a Mac 2.0
The script was rewritten in Python and it is now scraping the image url directly from the Bing website instead of third party aggregators.
The code is fairly straightforward, I am leveraging BeautifulSoup4 to scrape bing.com and if I weren’t lazily catching all exceptions in a single except
block, the funcy module would retry on failed attempts to scrape the website or download the file. I’m leaving funcy in the script, but it likely won’t help if the requests
calls fail.
I continue to have the script scheduled to run every five minutes via cron, and macOS is configured to change the wallpaper every fifteen minutes.
import argparse import datetime import hashlib import logging import os import re import shutil import bs4 import funcy import requests logging.basicConfig(level=logging.INFO, format='[%(asctime)s] - %(levelname)s - %(message)s') log = logging.getLogger() @funcy.retry(3, timeout=lambda a: 2 ** a) def main(dest: str): """ @param: dest Destination for downloaded image Find the URL of today's image and download it we don't have it. Destination filename will be YYYY-mm-dd_{md5dum}.jpg """ bing_url = 'https://bing.com' archive_dir = os.path.join(dest, 'Archive') try: log.info(f"Connecting to {bing_url}") r = requests.get(bing_url) if not r.ok: raise RuntimeError(f"{r.reason}") except: log.error(f"Could not get data from {bing_url}. Exiting.") return img_cont = bs4.BeautifulSoup( r.content, 'html.parser').find_all('div', class_='img_cont') if not img_cont: log.error(f"Could not parse html from {bing_url}. Exiting.") return url = bing_url + re.search(r'\((.+)\)', str(img_cont)).group(1) log.info(f"Found image url in html: {url}") md5sum = hashlib.md5(url.encode('utf-8')).hexdigest() log.info(f"Hash of image url: {md5sum}") # Stop if we have this checksum in dest existing_files = os.listdir(dest) log.debug(f"Existing files in {dest} are {existing_files}") if any(md5sum in f for f in existing_files): log.info(f"Found {md5sum} hash in {dest}. Exiting.") return # Build the filename image_file = f"{datetime.date.today().isoformat()}_{md5sum}.jpg" image_fullname = os.path.join(dest, image_file) # Download the file try: log.info(f"Downloading {url} to {image_fullname}") r = requests.get(url, allow_redirects=True) if r.ok: with open(image_fullname, 'wb') as f: log.debug(f"Writing to disk as {image_fullname}") f.write(r.content) else: log.error(f"Could not download {url}, reason: {r.reason}") except: log.error(f"Could not download {url} to {image_fullname}") return # Archive the existing jpg files if archive directory exists if os.path.isdir(archive_dir): for f in existing_files: if f.endswith('.jpg'): log.info(f"Archiving {f} to {archive_dir}") shutil.move(os.path.join(dest, f), archive_dir) # Done log.info('Done') if __name__ == '__main__': """ Initialize a (very basic) argument parser with destination directory and download the image, archiving any existing .jpgs to {dest}/Archive """ parser = argparse.ArgumentParser() parser.add_argument( '--dest', help='destination directory', type=str, required=True ) args = parser.parse_args() if not os.path.isdir(args.dest): log.error(f"{args.dest} is not a directory. Exiting.") else: main(args.dest)
Available on GitHub here.
Post a Comment