aboutsummaryrefslogtreecommitdiff
path: root/src/Atom.hs
blob: d5dcdad0eb3f245593d72fe8086b0c766c76b9cf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell   #-}
module Atom
    ( parseAtomFeed
    ) where

import           Protolude
import           Text.XML.Cursor
import           Text.XML                 ( Name (..) )
import qualified Data.Text.Lazy           as TL
import           Data.Time.Clock          ( UTCTime )
import           Control.Monad.Logger     ( MonadLogger, logWarn, logError )
import           Text.Blaze               ( toMarkup )
import           Text.Blaze.Renderer.Text ( renderMarkup )
import           Date
import           Types
import           Parse


localNameFilter :: Text -> Axis
localNameFilter ln = checkName (\name -> nameLocalName name == ln)

parseFieldContent :: Text -> Cursor -> [Text]
parseFieldContent field c = c $/ localNameFilter field &// content

-- | "id" is a required element
parseId :: Cursor -> Text
parseId = mconcat . parseFieldContent "id"

-- | "title" is a required element
parseTitle :: Cursor -> Text
parseTitle = mconcat . parseFieldContent "title"

-- | "updated" is a required element
parseUpdated :: MonadLogger m => Cursor -> m (Maybe UTCTime)
parseUpdated c = parseW3Rfc822 $ mconcat $ parseFieldContent "updated" c

parseEntryUpdated :: MonadLogger m => Cursor -> m (Maybe UTCTime)
parseEntryUpdated c = case parseFieldContent "updated" c of
    [] -> parseW3Rfc822 $ mconcat $ parseFieldContent "published" c
    us -> parseW3Rfc822 $ mconcat us

parseLink :: Cursor -> Link
parseLink c = do
    let href = case mconcat $ c $| attribute "href" of
            "" -> mconcat $ c $/ content
            h  -> h
        rel = listToMaybe $ c $| attribute "rel"
    Link (escapeHref href) rel

parseLinks :: Cursor -> [Link]
parseLinks c = c $/ localNameFilter "link" &| parseLink

parsePerson :: Cursor -> Person
parsePerson c = Person name uri email
  where
    name = mconcat $ parseFieldContent "name" c
    uri = Nothing  -- TODO
    email = Nothing  -- TODO

parseAuthors :: Cursor -> [Person]
parseAuthors c = c $/ localNameFilter "author" &| parsePerson

parseContent' :: Cursor -> Maybe Content
parseContent' c = parseType (mconcat $ c $| attribute "type")
  where
    parseType t
        | t == "text"       = Just $ TextContent $ TL.fromChunks $ c $// content
        |      t == "html"
            || t == "xhtml"
            || t == ""      = case c $/ xhtmlFilter of
                [] -> Just . HtmlContent . TL.fromChunks $ c $/ content
                xs  -> Just . HtmlContent $ foldr ((<>) . managechildren . child) ""  xs
        | otherwise         = Nothing
    managechildren = foldr ((<>) . renderMarkup . toMarkup . node) ""

xhtmlFilter :: Axis
xhtmlFilter = checkName (\name -> nameNamespace name == Just "http://www.w3.org/1999/xhtml")

parseContent :: Cursor -> Maybe Content
parseContent c = msum $
              (c $/ localNameFilter "content" &| parseContent')
    `mappend` (c $/ localNameFilter "summary" &| parseContent')

parseEntry :: MonadLogger m => Cursor -> m (Maybe Entry)
parseEntry c = do
    let id' = parseId c
        title = parseTitle c
        authors = parseAuthors c
        content' = parseContent c
        links = parseLinks c
    when (isNothing content') ($logWarn $ formatContentEmptyError id' title)
    mupdated <- parseEntryUpdated c
    maybe (parseUpdatedFailed id' title) (\updated -> return $ Just $ Entry id' title updated authors content' links) mupdated
  where
    parseUpdatedFailed i t = do
        $logError $ formatParseEntryUpdatedError i t
        return Nothing

parseEntries :: MonadLogger m => Cursor -> m [Entry]
parseEntries c = catMaybes <$> sequence (c $/ localNameFilter "entry" &| parseEntry)

parseFeed' :: MonadLogger m => Url -> Cursor -> m (Maybe Feed)
parseFeed' u c = do
    let title = parseTitle c
        links = parseLinks c
        authors = parseAuthors c
    entries <- parseEntries c
    mupdated <- parseUpdated c
    maybe (parseUpdatedFailed u title) (\updated -> return $ Just $ Feed u title updated authors links entries) mupdated
  where
    parseUpdatedFailed i t = do
        $logError $ formatParseFeedUpdatedError i t
        return Nothing

parseAtomFeed :: MonadLogger m => Url -> Cursor -> m (Maybe Feed)
parseAtomFeed u c = do
    mfeeds <- sequence $ c $| localNameFilter "feed" &| parseFeed' u
    return $ msum mfeeds