{-# LANGUAGE DeriveDataTypeable #-}

module Text.HTML.TagSoup.Options where

import Data.Typeable
import Text.HTML.TagSoup.Type
import Text.HTML.TagSoup.Entity
import Text.StringLike


-- | These options control how 'parseTags' works. The 'ParseOptions' type is usually generated by one of
--   'parseOptions', 'parseOptionsFast' or 'parseOptionsEntities', then selected fields may be overriden.
--
--   The options 'optTagPosition' and 'optTagWarning' specify whether to generate
--   'TagPosition' or 'TagWarning' elements respectively. Usually these options should be set to @False@
--   to simplify future stages, unless you rely on position information or want to give malformed HTML
--   messages to the end user.
--
--   The options 'optEntityData' and 'optEntityAttrib' control how entities, for example @ @ are handled.
--   Both take a string, and a boolean, where @True@ indicates that the entity ended with a semi-colon @;@.
--   Inside normal text 'optEntityData' will be called, and the results will be inserted in the tag stream.
--   Inside a tag attribute 'optEntityAttrib' will be called, and the first component of the result will be used
--   in the attribute, and the second component will be appended after the 'TagOpen' value (usually the second
--   component is @[]@). As an example, to not decode any entities, pass:
--
-- > parseOptions
-- >     {optEntityData=\(str,b) -> [TagText $ "&" ++ str ++ [';' | b]]
-- >     ,optEntityAttrib\(str,b) -> ("&" ++ str ++ [';' | b], [])

--   The 'optTagTextMerge' value specifies if you always want adjacent 'TagText' values to be merged.
--   Merging adjacent pieces of text has a small performance penalty, but will usually make subsequent analysis
--   simpler. Contiguous runs of characters without entities or tags will also be generated as single 'TagText'
--   values.
data ParseOptions str = ParseOptions
    {forall str. ParseOptions str -> Bool
optTagPosition :: Bool -- ^ Should 'TagPosition' values be given before some items (default=False,fast=False).
    ,forall str. ParseOptions str -> Bool
optTagWarning :: Bool  -- ^ Should 'TagWarning' values be given (default=False,fast=False)
    ,forall str. ParseOptions str -> (str, Bool) -> [Tag str]
optEntityData :: (str,Bool) -> [Tag str] -- ^ How to lookup an entity (Bool = has ending @';'@)
    ,forall str. ParseOptions str -> (str, Bool) -> (str, [Tag str])
optEntityAttrib :: (str,Bool) -> (str,[Tag str]) -- ^ How to lookup an entity in an attribute (Bool = has ending @';'@?)
    ,forall str. ParseOptions str -> Bool
optTagTextMerge :: Bool -- ^ Require no adjacent 'TagText' values (default=True,fast=False)
    }
    deriving Typeable


-- | A 'ParseOptions' structure using a custom function to lookup attributes. Any attribute
--   that is not found will be left intact, and a 'TagWarning' given (if 'optTagWarning' is set).
--
--   If you do not want to resolve any entities, simpliy pass @const Nothing@ for the lookup function.
parseOptionsEntities :: StringLike str => (str -> Maybe str) -> ParseOptions str
parseOptionsEntities :: forall str.
StringLike str =>
(str -> Maybe str) -> ParseOptions str
parseOptionsEntities str -> Maybe str
lookupEntity = Bool
-> Bool
-> ((str, Bool) -> [Tag str])
-> ((str, Bool) -> (str, [Tag str]))
-> Bool
-> ParseOptions str
forall str.
Bool
-> Bool
-> ((str, Bool) -> [Tag str])
-> ((str, Bool) -> (str, [Tag str]))
-> Bool
-> ParseOptions str
ParseOptions Bool
False Bool
False (str, Bool) -> [Tag str]
entityData (str, Bool) -> (str, [Tag str])
entityAttrib Bool
True
    where
        entityData :: (str, Bool) -> [Tag str]
entityData (str, Bool)
x = str -> Tag str
forall str. str -> Tag str
TagText str
a Tag str -> [Tag str] -> [Tag str]
forall a. a -> [a] -> [a]
: [Tag str]
b
            where (str
a,[Tag str]
b) = (str, Bool) -> (str, [Tag str])
entityAttrib (str, Bool)
x

        entityAttrib :: (str, Bool) -> (str, [Tag str])
entityAttrib ~(str
x,Bool
b) =
            let x' :: str
x' = str
x str -> str -> str
forall a. StringLike a => a -> a -> a
`append` String -> str
forall a. IsString a => String -> a
fromString [Char
';'|Bool
b]
            in case str -> Maybe str
lookupEntity str
x' of
                Just str
y -> (str
y, [])
                Maybe str
Nothing -> (Char -> str
forall a. StringLike a => Char -> a
fromChar Char
'&' str -> str -> str
forall a. StringLike a => a -> a -> a
`append` str
x'
                           ,[str -> Tag str
forall str. str -> Tag str
TagWarning (str -> Tag str) -> str -> Tag str
forall a b. (a -> b) -> a -> b
$ String -> str
forall a. IsString a => String -> a
fromString String
"Unknown entity: " str -> str -> str
forall a. StringLike a => a -> a -> a
`append` str
x])


-- | The default parse options value, described in 'ParseOptions'. Equivalent to
--   @'parseOptionsEntities' 'lookupEntity'@.
parseOptions :: StringLike str => ParseOptions str
parseOptions :: forall str. StringLike str => ParseOptions str
parseOptions = (str -> Maybe str) -> ParseOptions str
forall str.
StringLike str =>
(str -> Maybe str) -> ParseOptions str
parseOptionsEntities ((str -> Maybe str) -> ParseOptions str)
-> (str -> Maybe str) -> ParseOptions str
forall a b. (a -> b) -> a -> b
$ (String -> str) -> Maybe String -> Maybe str
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> str
forall a. IsString a => String -> a
fromString (Maybe String -> Maybe str)
-> (str -> Maybe String) -> str -> Maybe str
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
lookupEntity (String -> Maybe String) -> (str -> String) -> str -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. str -> String
forall a. StringLike a => a -> String
toString


-- | A 'ParseOptions' structure optimised for speed, following the fast options.
parseOptionsFast :: StringLike str => ParseOptions str
parseOptionsFast :: forall str. StringLike str => ParseOptions str
parseOptionsFast = ParseOptions str
forall str. StringLike str => ParseOptions str
parseOptions{optTagTextMerge=False}


-- | Change the underlying string type of a 'ParseOptions' value.
fmapParseOptions :: (StringLike from, StringLike to) => ParseOptions from -> ParseOptions to
fmapParseOptions :: forall from to.
(StringLike from, StringLike to) =>
ParseOptions from -> ParseOptions to
fmapParseOptions (ParseOptions Bool
a Bool
b (from, Bool) -> [Tag from]
c (from, Bool) -> (from, [Tag from])
d Bool
e) = Bool
-> Bool
-> ((to, Bool) -> [Tag to])
-> ((to, Bool) -> (to, [Tag to]))
-> Bool
-> ParseOptions to
forall str.
Bool
-> Bool
-> ((str, Bool) -> [Tag str])
-> ((str, Bool) -> (str, [Tag str]))
-> Bool
-> ParseOptions str
ParseOptions Bool
a Bool
b (to, Bool) -> [Tag to]
forall {b} {a}.
(StringLike b, StringLike a) =>
(a, Bool) -> [Tag b]
c2 (to, Bool) -> (to, [Tag to])
forall {a} {b} {a}.
(StringLike a, StringLike b, StringLike a) =>
(a, Bool) -> (a, [Tag b])
d2 Bool
e
    where
        c2 :: (a, Bool) -> [Tag b]
c2 ~(a
x,Bool
y) = (Tag from -> Tag b) -> [Tag from] -> [Tag b]
forall a b. (a -> b) -> [a] -> [b]
map ((from -> b) -> Tag from -> Tag b
forall a b. (a -> b) -> Tag a -> Tag b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap from -> b
forall a b. (StringLike a, StringLike b) => a -> b
castString) ([Tag from] -> [Tag b]) -> [Tag from] -> [Tag b]
forall a b. (a -> b) -> a -> b
$ (from, Bool) -> [Tag from]
c (a -> from
forall a b. (StringLike a, StringLike b) => a -> b
castString a
x, Bool
y)
        d2 :: (a, Bool) -> (a, [Tag b])
d2 ~(a
x,Bool
y) = (from -> a
forall a b. (StringLike a, StringLike b) => a -> b
castString from
r, (Tag from -> Tag b) -> [Tag from] -> [Tag b]
forall a b. (a -> b) -> [a] -> [b]
map ((from -> b) -> Tag from -> Tag b
forall a b. (a -> b) -> Tag a -> Tag b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap from -> b
forall a b. (StringLike a, StringLike b) => a -> b
castString) [Tag from]
s)
            where (from
r,[Tag from]
s) = (from, Bool) -> (from, [Tag from])
d (a -> from
forall a b. (StringLike a, StringLike b) => a -> b
castString a
x, Bool
y)