From bd77bc4ddff6e55746b00fe8070383678e9218a3 Mon Sep 17 00:00:00 2001 From: Aaron Gutierrez Date: Wed, 30 Jan 2019 21:33:38 -0800 Subject: [PATCH] style changes, better parsing --- proxy.cgi => cgi-bin/proxy.cgi | 22 ++++++++++++++++------ index.html | 3 +-- site.css | 29 +++++++++++++++++------------ template.html | 9 ++++----- 4 files changed, 38 insertions(+), 25 deletions(-) rename proxy.cgi => cgi-bin/proxy.cgi (58%) diff --git a/proxy.cgi b/cgi-bin/proxy.cgi similarity index 58% rename from proxy.cgi rename to cgi-bin/proxy.cgi index f0055b0..bc25ff0 100755 --- a/proxy.cgi +++ b/cgi-bin/proxy.cgi @@ -4,17 +4,27 @@ import cgi import cgitb import subprocess +from goose3 import Goose + cgitb.enable() -def fetch_site(url): - data = subprocess.check_output(['w3m', '-dump', url]) - return data.decode() +goose = Goose({'enable_image_fetching': True}) -def format_output(url, data): +def fetch_site(url): + return goose.extract(url=url) + +def format_output(article): with open('template.html', 'r') as f: template = f.read() - return template.format(url, data) + extra = "" + if article.top_image: + extra += """""".format(article.top_image.src) + + return template.format( + title=article.title, + body=article.cleaned_text, + extra=extra) def print_headers(): print('Content-Type: text/html; charset=utf8\r\n\r\n') @@ -31,7 +41,7 @@ def main(): data = fetch_site(url) print_headers() - print(format_output(url, data)) + print(format_output(data)) if __name__ == '__main__': main() diff --git a/index.html b/index.html index 83ef50b..6b5a320 100644 --- a/index.html +++ b/index.html @@ -3,13 +3,12 @@ Text Proxy -

Text Proxy

-
+ diff --git a/site.css b/site.css index 03945fb..0235f07 100644 --- a/site.css +++ b/site.css @@ -1,9 +1,3 @@ -html, -body, -pre { - font-family: 'Nanum Gothic Coding', monospace; -} - html, body { background-color: #2e3440; @@ -13,7 +7,6 @@ body { } h1 { - font-family: 'Nanum Gothic', sans-serif; font-size: 28px; font-weight: 800; text-align: center; @@ -26,7 +19,19 @@ h1 { padding: 32px; } -pre, +.text { + font-size: 16px; + line-height: 22px; + margin: 0; + max-width: 40em; + white-space: pre-line; +} + +img { + margin: 16px 0; + max-width: 100%; +} + label, input { font-size: 16px; @@ -34,10 +39,6 @@ input { margin: 0; } -pre { - max-width: 100%; -} - form { display: flex; flex: 1 1 auto; @@ -55,6 +56,10 @@ input { box-sizing: border-box; } +input[type=submit] { + cursor: pointer; +} + input[type=url] { padding: 4px 22px; margin: 22px 0; diff --git a/template.html b/template.html index 44ee7d3..8ef5770 100644 --- a/template.html +++ b/template.html @@ -2,14 +2,13 @@ - {0} - TextProxy - - + {title} - TextProxy + -

{0}

+

{title}

-
{1}
+
{extra}{body}