{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, PatternGuards #-}

{-|
    This module is for working with HTML/XML. It deals with both well-formed XML and
    malformed HTML from the web. It features:

    * A lazy parser, based on the HTML 5 specification - see 'parseTags'.

    * A renderer that can write out HTML/XML - see 'renderTags'.

    * Utilities for extracting information from a document - see '~==', 'sections' and 'partitions'.

    The standard practice is to parse a 'String' to @[@'Tag' 'String'@]@ using 'parseTags',
    then operate upon it to extract the necessary information.
-}

module Text.HTML.TagSoup(
    -- * Data structures and parsing
    Tag(..), Row, Column, Attribute,
    module Text.HTML.TagSoup.Parser,
    module Text.HTML.TagSoup.Render,
    canonicalizeTags,

    -- Note: the "#tag-identification#" creates an anchor that's linked to from Match.hs
    -- * #tag-identification# Tag identification
    isTagOpen, isTagClose, isTagText, isTagWarning, isTagPosition,
    isTagOpenName, isTagCloseName, isTagComment,

    -- * Extraction
    fromTagText, fromAttrib,
    maybeTagText, maybeTagWarning,
    innerText,

    -- * Utility
    sections, partitions,
    
    -- * Combinators
    TagRep(..), (~==),(~/=)
    ) where

import Text.HTML.TagSoup.Type
import Text.HTML.TagSoup.Parser
import Text.HTML.TagSoup.Render
import Data.Char
import Data.List (groupBy, tails)
import Text.StringLike


-- | Turns all tag names and attributes to lower case and
--   converts DOCTYPE to upper case.
canonicalizeTags :: StringLike str => [Tag str] -> [Tag str]
canonicalizeTags :: forall str. StringLike str => [Tag str] -> [Tag str]
canonicalizeTags = (Tag str -> Tag str) -> [Tag str] -> [Tag str]
forall a b. (a -> b) -> [a] -> [b]
map Tag str -> Tag str
f
    where
        f :: Tag str -> Tag str
f (TagOpen str
tag [Attribute str]
attrs) | Just (Char
'!',str
name) <- str -> Maybe (Char, str)
forall a. StringLike a => a -> Maybe (Char, a)
uncons str
tag = str -> [Attribute str] -> Tag str
forall str. str -> [Attribute str] -> Tag str
TagOpen (Char
'!' Char -> str -> str
forall a. StringLike a => Char -> a -> a
`cons` str -> str
ucase str
name) [Attribute str]
attrs
        f (TagOpen str
name [Attribute str]
attrs) = str -> [Attribute str] -> Tag str
forall str. str -> [Attribute str] -> Tag str
TagOpen (str -> str
lcase str
name) [(str -> str
lcase str
k, str
v) | (str
k,str
v) <- [Attribute str]
attrs]
        f (TagClose str
name) = str -> Tag str
forall str. str -> Tag str
TagClose (str -> str
lcase str
name)
        f Tag str
a = Tag str
a

        ucase :: str -> str
ucase = String -> str
forall a. IsString a => String -> a
fromString (String -> str) -> (str -> String) -> str -> str
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper (String -> String) -> (str -> String) -> str -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. str -> String
forall a. StringLike a => a -> String
toString
        lcase :: str -> str
lcase = String -> str
forall a. IsString a => String -> a
fromString (String -> str) -> (str -> String) -> str -> str
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower (String -> String) -> (str -> String) -> str -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. str -> String
forall a. StringLike a => a -> String
toString


-- | Define a class to allow String's or Tag str's to be used as matches
class TagRep a where
    -- | Convert a value into a 'Tag'.
    toTagRep :: StringLike str => a -> Tag str

instance StringLike str => TagRep (Tag str) where toTagRep :: forall str. StringLike str => Tag str -> Tag str
toTagRep = (str -> str) -> Tag str -> Tag str
forall a b. (a -> b) -> Tag a -> Tag b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap str -> str
forall a b. (StringLike a, StringLike b) => a -> b
castString

instance TagRep String where
    toTagRep :: forall str. StringLike str => String -> Tag str
toTagRep String
x = case String -> [Tag String]
forall str. StringLike str => str -> [Tag str]
parseTags String
x of
                     [Tag String
a] -> Tag String -> Tag str
forall str. StringLike str => Tag String -> Tag str
forall a str. (TagRep a, StringLike str) => a -> Tag str
toTagRep Tag String
a
                     [Tag String]
_ -> String -> Tag str
forall a. HasCallStack => String -> a
error (String -> Tag str) -> String -> Tag str
forall a b. (a -> b) -> a -> b
$ String
"When using a TagRep it must be exactly one tag, you gave: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
x



-- | Performs an inexact match, the first item should be the thing to match.
-- If the second item is a blank string, that is considered to match anything.
-- For example:
--
-- > (TagText "test" ~== TagText ""    ) == True
-- > (TagText "test" ~== TagText "test") == True
-- > (TagText "test" ~== TagText "soup") == False
--
-- For 'TagOpen' missing attributes on the right are allowed.
(~==) :: (StringLike str, TagRep t) => Tag str -> t -> Bool
~== :: forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
(~==) Tag str
a t
b = Tag str -> Tag str -> Bool
forall {a}. StringLike a => Tag a -> Tag a -> Bool
f Tag str
a (t -> Tag str
forall str. StringLike str => t -> Tag str
forall a str. (TagRep a, StringLike str) => a -> Tag str
toTagRep t
b)
    where
        f :: Tag a -> Tag a -> Bool
f (TagText a
y) (TagText a
x) = a -> Bool
forall a. StringLike a => a -> Bool
strNull a
x Bool -> Bool -> Bool
|| a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
y
        f (TagClose a
y) (TagClose a
x) = a -> Bool
forall a. StringLike a => a -> Bool
strNull a
x Bool -> Bool -> Bool
|| a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
y
        f (TagOpen a
y [Attribute a]
ys) (TagOpen a
x [Attribute a]
xs) = (a -> Bool
forall a. StringLike a => a -> Bool
strNull a
x Bool -> Bool -> Bool
|| a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
y) Bool -> Bool -> Bool
&& (Attribute a -> Bool) -> [Attribute a] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Attribute a -> Bool
g [Attribute a]
xs
            where
                g :: Attribute a -> Bool
g (a
name,a
val) | a -> Bool
forall a. StringLike a => a -> Bool
strNull a
name = a
val  a -> [a] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (Attribute a -> a) -> [Attribute a] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map Attribute a -> a
forall a b. (a, b) -> b
snd [Attribute a]
ys
                             | a -> Bool
forall a. StringLike a => a -> Bool
strNull a
val  = a
name a -> [a] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (Attribute a -> a) -> [Attribute a] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map Attribute a -> a
forall a b. (a, b) -> a
fst [Attribute a]
ys
                g Attribute a
nameval = Attribute a
nameval Attribute a -> [Attribute a] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Attribute a]
ys
        f (TagComment a
x) (TagComment a
y) = a -> Bool
forall a. StringLike a => a -> Bool
strNull a
x Bool -> Bool -> Bool
|| a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
y
        f (TagWarning a
x) (TagWarning a
y) = a -> Bool
forall a. StringLike a => a -> Bool
strNull a
x Bool -> Bool -> Bool
|| a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
y
        f (TagPosition Row
x1 Row
x2) (TagPosition Row
y1 Row
y2) = Row
x1 Row -> Row -> Bool
forall a. Eq a => a -> a -> Bool
== Row
y1 Bool -> Bool -> Bool
&& Row
x2 Row -> Row -> Bool
forall a. Eq a => a -> a -> Bool
== Row
y2
        f Tag a
_ Tag a
_ = Bool
False

-- | Negation of '~=='
(~/=) :: (StringLike str, TagRep t) => Tag str -> t -> Bool
~/= :: forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
(~/=) Tag str
a t
b = Bool -> Bool
not (Tag str
a Tag str -> t -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~== t
b)



-- | This function takes a list, and returns all suffixes whose
--   first item matches the predicate.
sections :: (a -> Bool) -> [a] -> [[a]]
sections :: forall a. (a -> Bool) -> [a] -> [[a]]
sections a -> Bool
p = ([a] -> Bool) -> [[a]] -> [[a]]
forall a. (a -> Bool) -> [a] -> [a]
filter (a -> Bool
p (a -> Bool) -> ([a] -> a) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> a
forall a. HasCallStack => [a] -> a
head) ([[a]] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[a]] -> [[a]]
forall a. HasCallStack => [a] -> [a]
init ([[a]] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [[a]]
forall a. [a] -> [[a]]
tails

-- | This function is similar to 'sections', but splits the list
--   so no element appears in any two partitions.
partitions :: (a -> Bool) -> [a] -> [[a]]
partitions :: forall a. (a -> Bool) -> [a] -> [[a]]
partitions a -> Bool
p =
   let notp :: a -> Bool
notp = Bool -> Bool
not (Bool -> Bool) -> (a -> Bool) -> a -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Bool
p
   in  (a -> a -> Bool) -> [a] -> [[a]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy ((a -> Bool) -> a -> a -> Bool
forall a b. a -> b -> a
const a -> Bool
notp) ([a] -> [[a]]) -> ([a] -> [a]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Bool) -> [a] -> [a]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile a -> Bool
notp