summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFélix Sipma <felix.sipma@no-log.org>2016-12-02 10:40:35 +0100
committerFélix Sipma <felix.sipma@no-log.org>2016-12-02 10:40:35 +0100
commit92aef158f515dd05ef2b980af02427eb2fde7a78 (patch)
tree64de16f90e2a85aefbfeedb5541928a755c2532c
parentc0f3d0f825d1455f8dfb540cbcc973753fb33312 (diff)
safer-rhone-alpes.py: report errors encounteredHEADmaster
-rwxr-xr-xdoc/safer-rhone-alpes.py29
1 files changed, 23 insertions, 6 deletions
diff --git a/doc/safer-rhone-alpes.py b/doc/safer-rhone-alpes.py
index 41b7bf2..955819b 100755
--- a/doc/safer-rhone-alpes.py
+++ b/doc/safer-rhone-alpes.py
@@ -1,12 +1,15 @@
#!/usr/bin/env python3
#
# Example usage:
-# curl "http://www.safer-rhone-alpes.com/appels-candidature.html" | ./safer-rhone-alpes.py "--26.html"
+# curl "http://www.safer-rhone-alpes.com/appels-candidature.html" \
+# | ./safer-rhone-alpes.py "--26.html"
import sys
import lxml.html
from lxml import etree
import urllib.request
+import urllib.parse
+from urllib.error import URLError, HTTPError
import html2text
baseurl = "http://www.safer-rhone-alpes.com"
@@ -25,9 +28,23 @@ nodes = html.xpath("//div[@id='contenuPage']//table/tbody/tr/td/a[contains(@href
for node in nodes:
title = "<h1>" + node.text + "</h1>"
- url = baseurl + "/" + node.get("href")
- with opener.open(url) as response:
+ print(html2text.html2text(title))
+ url = baseurl + "/" + urllib.parse.quote_plus(node.get("href"))
+ content = ""
+ try:
+ response = opener.open(url)
html = etree.parse(response, parser)
- content = html.xpath("//div[contains(@class,'appel')]")
- print(html2text.html2text(title))
- print(html2text.html2text(lxml.html.tostring(content[0]).decode()))
+ base_content = html.xpath("//div[contains(@class,'appel')]")
+ content = html2text.html2text(
+ lxml.html.tostring(base_content[0]).decode()
+ )
+ except Exception as error:
+ if error == HTTPError:
+ content = (
+ 'HTTP code: %s - %s (%s)' % (error.code, error.msg, url)
+ ).encode()
+ elif error == URLError:
+ content = ('URL Error: %s (%s)' % (error.reason, url)).encode()
+ else:
+ content = ('Error: %s (%s)' % (error, url)).encode()
+ print(content)