blob: 7f6144bcd4490b58c40e3bc165d199e9cb598535 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
#!/usr/bin/env python3
# yes, html is not a regular language
import requests, sys
uris = sys.argv[1:]
import re
for uri in uris:
resp = requests.get(uri).text
lines = resp.split('\n')
license = None
author = None
for idx, line in enumerate(lines):
if 'Original file' in line:
uri = re.match(r'.*href="(.*?)".*', line).group(1)
elif 'licensetpl_short' in line:
license = line.rsplit('>')[-1]
elif author is None and 'Author</td' in line:
author = re.match(r'(.*?>)?(.*?)<.*', lines[idx+2]).group(2)
if not uri.startswith('http'):
uri = 'https:' + uri
r = requests.get(uri, allow_redirects=True)
local_filename = uri.split('/')[-1]
open("./static/assets/" + local_filename, 'wb').write(r.content)
print('[[mediums.works.figures]]')
print('file = "' + local_filename + '"')
print(f'byline = "{license}{author}"')
|