aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdoc/filters/ddt-sdrea.py37
1 files changed, 22 insertions, 15 deletions
diff --git a/doc/filters/ddt-sdrea.py b/doc/filters/ddt-sdrea.py
index 542ac3f..761d875 100755
--- a/doc/filters/ddt-sdrea.py
+++ b/doc/filters/ddt-sdrea.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim: ft=python et softtabstop=4 cinoptions=4 shiftwidth=4 ts=4 ai
"""
@@ -22,18 +22,18 @@ EXAMPLES
"http://www.loire.gouv.fr/publicite-des-demandes-d-autorisation-d-exploiter-a6497.html"
"""
-from __future__ import print_function, unicode_literals
-from bs4 import BeautifulSoup
-import PyRSS2Gen
import sys
import getopt
import logging
-from dateutil.parser import parse, parserinfo
import datetime
-import requests
import subprocess
+import shutil
+from bs4 import BeautifulSoup
+import PyRSS2Gen
+from dateutil.parser import parse, parserinfo
+import requests
-__version__ = "0.2"
+__version__ = "0.3"
# Defaults
feed_title = None
@@ -80,6 +80,11 @@ for opt in optlist:
print(__doc__)
sys.exit(0)
+if not shutil.which("pdftotext"):
+ logger.error("pdftotext was not found in your $PATH. (Try `apt-get \
+ install poppler-utils`?)\n")
+ sys.exit(2)
+
html = sys.stdin.read()
# Find entries
@@ -106,11 +111,13 @@ yesterday = today - one_day
class MyParserInfo(parserinfo):
MONTHS = [parserinfo.MONTHS[x] + months[x] for x in range(12)]
+
for entry in entries:
title = entry.find("td").text.strip()
if not title:
logger.error("Can't find title for entry.")
- date = parse(entry.find("th").text.strip(), dayfirst=True, fuzzy=True, parserinfo=MyParserInfo())
+ date = parse(entry.find("th").text.strip(), dayfirst=True, fuzzy=True,
+ parserinfo=MyParserInfo())
if not date:
logger.error("Can't find date for entry %s.", title)
link = entry.find("a").attrs.get("href")
@@ -118,15 +125,15 @@ for entry in entries:
logger.error("Can't find link for entry %s.", title)
pdf = requests.get(base_url + "/" + link)
p = subprocess.Popen(['pdftotext', '-q', '-layout', '-', '-'],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
content = "<pre>" + p.communicate(input=pdf.content)[0].decode("utf-8") + "</pre>"
items.append(
PyRSS2Gen.RSSItem(
title=title,
- #link=link,
- #categories=[category],
+ # link=link,
+ # categories=[category],
description=content,
pubDate=date,
)
@@ -134,11 +141,11 @@ for entry in entries:
# Build rss feed
rss = PyRSS2Gen.RSS2(
- #title=feed_title or soup.body.find(class_="sTitrePage").text or "",
+ # title=feed_title or soup.body.find(class_="sTitrePage").text or "",
title=feed_title,
link=url,
description=feed_description or "",
- lastBuildDate = parse(feed_date),
+ lastBuildDate=parse(feed_date),
items=items)
# Return rss feed