aboutsummaryrefslogtreecommitdiff
path: root/src/RSS.hs
blob: a7f242ce9c80c5b318f960f98621f5f039189d4c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell   #-}
module RSS
    ( parseRSSFeed
    ) where

import           Protolude
import           Text.XML.Cursor
import           Text.XML                 ( Name (..) )
import qualified Data.Text.Lazy           as TL
import           Data.Maybe               ( catMaybes, listToMaybe, isNothing )
import           Data.Time.Clock          ( UTCTime (..) )
import           Data.Time.Format         ( ParseTime (..), defaultTimeLocale )
import           Control.Monad.Logger     ( MonadLogger, logWarn, logError )
import           Date
import           Types
import           Parse


localNameFilter :: Text -> Axis
localNameFilter ln = checkName (\name -> nameLocalName name == ln)

parseFieldContent :: Text -> Cursor -> [Text]
parseFieldContent field c = c $/ localNameFilter field &// content

parseId :: Cursor -> Text
parseId = mconcat . parseFieldContent "guid"

parseTitle :: Cursor -> Text
parseTitle c =
              mconcat (parseFieldContent "title" c)
    `mappend` mconcat (c $/ element "{http://purl.org/dc/elements/1.1/}subject" &// content)

parseUpdated :: MonadLogger m => [Entry] -> Cursor -> m (Maybe UTCTime)
parseUpdated e c = case mconcat $ parseFieldContent "lastBuildDate" c of
    ""   -> case e of
        [] -> return (buildTime defaultTimeLocale [])
        -- ^ Just 1970-01-01 00:00:00 UTC
        _  -> case map entryUpdated e of
            [] -> return Nothing
            us -> return (Just $ maximum us)
    date -> parseW3Rfc822 date

parseEntryUpdated :: MonadLogger m => Cursor -> m (Maybe UTCTime)
parseEntryUpdated c = parseW3Rfc822 $
              mconcat (parseFieldContent "pubDate" c)
    `mappend` mconcat (c $/ element "{http://purl.org/dc/elements/1.1/}date" &// content)

parseLink :: Cursor -> Link
parseLink c = do
    let href = case mconcat $ c $| attribute "href" of
            "" -> mconcat $ c $/ content
            h  -> h
        rel = listToMaybe $ c $| attribute "rel"
    Link (escapeHref href) rel

parseLinks :: Cursor -> [Link]
parseLinks c = c $/ localNameFilter "link" &| parseLink

parsePerson :: Cursor -> Person
parsePerson c = do
    let name = mconcat $ c $// content
    Person name Nothing Nothing

parseAuthors :: Cursor -> [Person]
parseAuthors c = c $/ localNameFilter "managingEditor" &| parsePerson

parseEntryAuthors :: Cursor -> [Person]
parseEntryAuthors c =
              (c $/ localNameFilter "author" &| parsePerson)
    `mappend` (c $/ element "{http://purl.org/dc/elements/1.1/}creator" &| parsePerson)

parseContent' :: Cursor -> Maybe Content
parseContent' c = Just . HtmlContent . TL.fromChunks $ c $// content

parseContent :: Cursor -> Maybe Content
parseContent c = msum $
              (c $/ element "{http://purl.org/rss/1.0/modules/content/}encoded" &| parseContent')
    `mappend` (c $/ localNameFilter "description" &| parseContent')

parseEntry :: MonadLogger m => Cursor -> m (Maybe Entry)
parseEntry c = do
    let id' = parseId c
        title = parseTitle c
        authors = parseEntryAuthors c
        content' = parseContent c
        links = parseLinks c
    when (isNothing content') ($logWarn $ formatContentEmptyError id' title)
    mupdated <- parseEntryUpdated c
    maybe (parseUpdatedFailed id' title) (\updated -> return $ Just $ Entry id' title updated authors content' links) mupdated
  where
    parseUpdatedFailed i t = do
        $logError $ formatParseEntryUpdatedError i t
        return Nothing

parseEntries :: MonadLogger m => Cursor -> m [Entry]
parseEntries c = catMaybes <$> sequence (c $// localNameFilter "item" &| parseEntry)

parseFeed' :: MonadLogger m => Url -> Cursor -> m (Maybe Feed)
parseFeed' u c = do
    let title = parseTitle c
        links = parseLinks c
        authors = parseAuthors c
    entries <- parseEntries c
    mupdated <- parseUpdated entries c
    maybe (parseUpdatedFailed u title) (\updated -> return $ Just $ Feed u title updated authors links entries) mupdated
  where
    parseUpdatedFailed i t = do
        $logError $ formatParseFeedUpdatedError i t
        return Nothing

parseRSSFeed :: MonadLogger m => Url -> Cursor -> m (Maybe Feed)
parseRSSFeed u c = do
    mfeeds <- sequence (c $| localNameFilter "rss" &/ localNameFilter "channel" &| parseFeed' u)
    return $ msum mfeeds