style changes, better parsing
This commit is contained in:
@@ -4,17 +4,27 @@ import cgi
|
||||
import cgitb
|
||||
import subprocess
|
||||
|
||||
from goose3 import Goose
|
||||
|
||||
cgitb.enable()
|
||||
|
||||
def fetch_site(url):
|
||||
data = subprocess.check_output(['w3m', '-dump', url])
|
||||
return data.decode()
|
||||
goose = Goose({'enable_image_fetching': True})
|
||||
|
||||
def format_output(url, data):
|
||||
def fetch_site(url):
|
||||
return goose.extract(url=url)
|
||||
|
||||
def format_output(article):
|
||||
with open('template.html', 'r') as f:
|
||||
template = f.read()
|
||||
|
||||
return template.format(url, data)
|
||||
extra = ""
|
||||
if article.top_image:
|
||||
extra += """<img src="{}" />""".format(article.top_image.src)
|
||||
|
||||
return template.format(
|
||||
title=article.title,
|
||||
body=article.cleaned_text,
|
||||
extra=extra)
|
||||
|
||||
def print_headers():
|
||||
print('Content-Type: text/html; charset=utf8\r\n\r\n')
|
||||
@@ -31,7 +41,7 @@ def main():
|
||||
data = fetch_site(url)
|
||||
|
||||
print_headers()
|
||||
print(format_output(url, data))
|
||||
print(format_output(data))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -3,13 +3,12 @@
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>Text Proxy</title>
|
||||
<link href="https://fonts.googleapis.com/css?family=Nanum+Gothic+Coding|Nanum+Gothic:800" rel="stylesheet">
|
||||
<link href="site.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<h1>Text Proxy</h1>
|
||||
<div class="main">
|
||||
<form action="/proxy.cgi" method="get">
|
||||
<form action="/cgi-bin/proxy.cgi" method="get">
|
||||
<label for="u">Site</label>
|
||||
<input name="u" id="u" type="url" autofocus required>
|
||||
<input type="submit" value="Go">
|
||||
|
||||
29
site.css
29
site.css
@@ -1,9 +1,3 @@
|
||||
html,
|
||||
body,
|
||||
pre {
|
||||
font-family: 'Nanum Gothic Coding', monospace;
|
||||
}
|
||||
|
||||
html,
|
||||
body {
|
||||
background-color: #2e3440;
|
||||
@@ -13,7 +7,6 @@ body {
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-family: 'Nanum Gothic', sans-serif;
|
||||
font-size: 28px;
|
||||
font-weight: 800;
|
||||
text-align: center;
|
||||
@@ -26,7 +19,19 @@ h1 {
|
||||
padding: 32px;
|
||||
}
|
||||
|
||||
pre,
|
||||
.text {
|
||||
font-size: 16px;
|
||||
line-height: 22px;
|
||||
margin: 0;
|
||||
max-width: 40em;
|
||||
white-space: pre-line;
|
||||
}
|
||||
|
||||
img {
|
||||
margin: 16px 0;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
label,
|
||||
input {
|
||||
font-size: 16px;
|
||||
@@ -34,10 +39,6 @@ input {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
pre {
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
form {
|
||||
display: flex;
|
||||
flex: 1 1 auto;
|
||||
@@ -55,6 +56,10 @@ input {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
input[type=submit] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
input[type=url] {
|
||||
padding: 4px 22px;
|
||||
margin: 22px 0;
|
||||
|
||||
@@ -2,14 +2,13 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>{0} - TextProxy</title>
|
||||
<link href="https://fonts.googleapis.com/css?family=Nanum+Gothic+Coding|Nanum+Gothic:800" rel="stylesheet">
|
||||
<link href="site.css" rel="stylesheet">
|
||||
<title>{title} - TextProxy</title>
|
||||
<link href="/site.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<h1>{0}</h1>
|
||||
<h1>{title}</h1>
|
||||
<div class="main">
|
||||
<pre>{1}</pre>
|
||||
<div class="text">{extra}{body}</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
Reference in New Issue
Block a user