#!/usr/bin/env python3 # yes, html is not a regular language import requests, sys uris = sys.argv[1:] import re for uri in uris: resp = requests.get(uri).text lines = resp.split('\n') license = None author = None for idx, line in enumerate(lines): if 'Original file' in line: uri = re.match(r'.*href="(.*?)".*', line).group(1) elif 'licensetpl_short' in line: license = line.rsplit('>')[-1] elif author is None and 'Author)?(.*?)<.*', lines[idx+2]).group(2) if not uri.startswith('http'): uri = 'https:' + uri r = requests.get(uri, allow_redirects=True) local_filename = uri.split('/')[-1] open("./static/assets/" + local_filename, 'wb').write(r.content) print('[[mediums.works.figures]]') print('file = "' + local_filename + '"') print(f'byline = "{license}{author}"')