aboutsummaryrefslogtreecommitdiff
path: root/src/Atom.hs
blob: 3f2f652611bab920dcfde4d303b34e234408f057 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell   #-}
module Atom
    ( parseAtomFeed
    ) where

import           Protolude
import           Text.XML.Cursor
import           Text.XML                 ( Name (..) )
import qualified Data.Text.Lazy           as TL
import           Data.Time.Clock          ( UTCTime )
import           Control.Monad.Logger     ( MonadLogger, logWarn, logError )
import           Text.Blaze               ( toMarkup )
import           Text.Blaze.Renderer.Text ( renderMarkup )
import           Date
import           Types
import           Parse


localNameFilter :: Text -> Axis
localNameFilter ln = checkName (\name -> nameLocalName name == ln)

parseFieldContent :: Text -> Cursor -> [Text]
parseFieldContent field c = c $/ localNameFilter field &// content

-- | "id" is a required element
parseId :: Cursor -> Text
parseId = mconcat . parseFieldContent "id"

-- | "title" is a required element
parseTitle :: Cursor -> Text
parseTitle = mconcat . parseFieldContent "title"

-- | "updated" is a required element
parseUpdated :: MonadLogger m => Cursor -> m (Maybe UTCTime)
parseUpdated c = parseW3Rfc822 $ mconcat $ parseFieldContent "updated" c

parseEntryUpdated :: MonadLogger m => Cursor -> m (Maybe UTCTime)
parseEntryUpdated c = do
    mu <- parseEntrySourceUpdated c
    case mu of
        Nothing -> parseEntryUpdated' c
        Just u -> return $ Just u
-- parseEntryUpdated = parseEntryUpdated'

parseEntryUpdated' :: MonadLogger m => Cursor -> m (Maybe UTCTime)
parseEntryUpdated' c = case parseFieldContent "updated" c of
    [] -> parseW3Rfc822 $ mconcat $ parseFieldContent "published" c
    us -> parseW3Rfc822 $ mconcat us

-- | "source > updated" is useful for "planet" feeds, it is often more accurate
-- than "updated"
parseEntrySourceUpdated :: MonadLogger m => Cursor -> m (Maybe UTCTime)
parseEntrySourceUpdated c = case (c $/ localNameFilter "source" &/ localNameFilter "updated" &// content) of
    [] -> return Nothing
    us -> parseW3Rfc822 $ mconcat us

parseLink :: Cursor -> Link
parseLink c = do
    let href = case mconcat $ c $| attribute "href" of
            "" -> mconcat $ c $/ content
            h  -> h
        rel = listToMaybe $ c $| attribute "rel"
    Link (escapeHref href) rel

parseLinks :: Cursor -> [Link]
parseLinks c = c $/ localNameFilter "link" &| parseLink

parsePerson :: Cursor -> Person
parsePerson c = Person name uri email
  where
    name = mconcat $ parseFieldContent "name" c
    uri = Nothing  -- TODO
    email = Nothing  -- TODO

parseAuthors :: Cursor -> [Person]
parseAuthors c = c $/ localNameFilter "author" &| parsePerson

parseContent' :: Cursor -> Maybe Content
parseContent' c = parseType (mconcat $ c $| attribute "type")
  where
    parseType t
        | t == "text"       = Just $ TextContent $ TL.fromChunks $ c $// content
        |      t == "html"
            || t == "xhtml"
            || t == ""      = case c $/ xhtmlFilter of
                [] -> Just . HtmlContent . TL.fromChunks $ c $/ content
                xs  -> Just . HtmlContent $ foldr ((<>) . managechildren . child) ""  xs
        | otherwise         = Nothing
    managechildren = foldr ((<>) . renderMarkup . toMarkup . node) ""

xhtmlFilter :: Axis
xhtmlFilter = checkName (\name -> nameNamespace name == Just "http://www.w3.org/1999/xhtml")

parseContent :: Cursor -> Maybe Content
parseContent c = msum $
              (c $/ localNameFilter "content" &| parseContent')
    `mappend` (c $/ localNameFilter "summary" &| parseContent')

parseEntry :: MonadLogger m => Cursor -> m (Maybe Entry)
parseEntry c = do
    let id' = parseId c
        title = parseTitle c
        authors = parseAuthors c
        content' = parseContent c
        links = parseLinks c
    when (isNothing content') ($logWarn $ formatContentEmptyError id' title)
    mupdated <- parseEntryUpdated c
    maybe (parseUpdatedFailed id' title) (\updated -> return $ Just $ Entry id' title updated authors content' links) mupdated
  where
    parseUpdatedFailed i t = do
        $logError $ formatParseEntryUpdatedError i t
        return Nothing

parseEntries :: MonadLogger m => Cursor -> m [Entry]
parseEntries c = catMaybes <$> sequence (c $/ localNameFilter "entry" &| parseEntry)

parseFeed' :: MonadLogger m => Url -> Cursor -> m (Maybe Feed)
parseFeed' u c = do
    let title = parseTitle c
        links = parseLinks c
        authors = parseAuthors c
    entries <- parseEntries c
    mupdated <- parseUpdated c
    maybe (parseUpdatedFailed u title) (\updated -> return $ Just $ Feed u title updated authors links entries) mupdated
  where
    parseUpdatedFailed i t = do
        $logError $ formatParseFeedUpdatedError i t
        return Nothing

parseAtomFeed :: MonadLogger m => Url -> Cursor -> m (Maybe Feed)
parseAtomFeed u c = do
    mfeeds <- sequence $ c $| localNameFilter "feed" &| parseFeed' u
    return $ msum mfeeds