-----------------------------------------------------------------------------
-- |
-- Module      :  Miso.Util.Lexer
-- Copyright   :  (C) 2016-2025 David M. Johnson (@dmjio)
-- License     :  BSD3-style (see the file LICENSE)
-- Maintainer  :  David M. Johnson <code@dmj.io>
-- Stability   :  experimental
-- Portability :  non-portable
----------------------------------------------------------------------------
module Miso.Util.Lexer
  ( -- ** Types
    Lexer (..)
  , Location (..)
  , Located (..)
  , LexerError (..)
  , Stream (..)
    -- ** Combinators
  , getStartColumn
  , zeroLocation
  , initialLocation
  , mkStream
  , oops
  , streamError
  , string
  , string'
  , char
  , satisfy
  , peek
  , getInput
  , putInput
  , getLocation
  , setLocation
  , modifyInput
  , withLocation
  ) where
----------------------------------------------------------------------------
import           Control.Applicative
----------------------------------------------------------------------------
import           Miso.String (MisoString, ToMisoString)
import qualified Miso.String as MS
----------------------------------------------------------------------------
data LexerError
  = LexerError MisoString Location
  | UnexpectedEOF Location
  deriving (LexerError -> LexerError -> Bool
(LexerError -> LexerError -> Bool)
-> (LexerError -> LexerError -> Bool) -> Eq LexerError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LexerError -> LexerError -> Bool
== :: LexerError -> LexerError -> Bool
$c/= :: LexerError -> LexerError -> Bool
/= :: LexerError -> LexerError -> Bool
Eq)
----------------------------------------------------------------------------
instance Show LexerError where
  show :: LexerError -> String
show (UnexpectedEOF Location
loc) =
    String
"Unexpected EOF at: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Location -> String
forall a. Show a => a -> String
show Location
loc
  show (LexerError MisoString
xs Location
loc) =
    String
"Unexpected \"" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
5 (MisoString -> String
MS.unpack MisoString
xs) String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"\"... at " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Location -> String
forall a. Show a => a -> String
show Location
loc
----------------------------------------------------------------------------
data Location
  = Location
  { Location -> Int
line :: Int
  , Location -> (Int, Int)
column :: (Int,Int)
  } deriving Location -> Location -> Bool
(Location -> Location -> Bool)
-> (Location -> Location -> Bool) -> Eq Location
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Location -> Location -> Bool
== :: Location -> Location -> Bool
$c/= :: Location -> Location -> Bool
/= :: Location -> Location -> Bool
Eq
----------------------------------------------------------------------------
instance Show Location where
  show :: Location -> String
show (Location Int
l (Int, Int)
col) = Int -> String
forall a. Show a => a -> String
show Int
l String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> (Int, Int) -> String
forall a. Show a => a -> String
show (Int, Int)
col
----------------------------------------------------------------------------
getStartColumn :: Location -> Int
getStartColumn :: Location -> Int
getStartColumn = (Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int) -> (Location -> (Int, Int)) -> Location -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Location -> (Int, Int)
column
----------------------------------------------------------------------------
initialLocation :: Location
initialLocation :: Location
initialLocation = Int -> (Int, Int) -> Location
Location Int
1 (Int
1,Int
1)
----------------------------------------------------------------------------
zeroLocation :: Location
zeroLocation :: Location
zeroLocation = Int -> (Int, Int) -> Location
Location Int
0 (Int
0,Int
0)
----------------------------------------------------------------------------
newtype Lexer token
  = Lexer
  { forall token.
Lexer token -> Stream -> Either LexerError (token, Stream)
runLexer
      :: Stream
      -> Either LexerError (token, Stream)
  }
----------------------------------------------------------------------------
oops :: Lexer token
oops :: forall token. Lexer token
oops = (Stream -> Either LexerError (token, Stream)) -> Lexer token
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (token, Stream)) -> Lexer token)
-> (Stream -> Either LexerError (token, Stream)) -> Lexer token
forall a b. (a -> b) -> a -> b
$ \Stream
s -> LexerError -> Either LexerError (token, Stream)
forall a b. a -> Either a b
Left (Stream -> LexerError
streamError Stream
s)
----------------------------------------------------------------------------
streamError :: Stream -> LexerError
streamError :: Stream -> LexerError
streamError (Stream MisoString
xs Location
l) = MisoString -> Location -> LexerError
unexpected MisoString
xs Location
l
----------------------------------------------------------------------------
mkStream :: MisoString -> Stream
mkStream :: MisoString -> Stream
mkStream MisoString
xs = MisoString -> Location -> Stream
Stream MisoString
xs Location
initialLocation
----------------------------------------------------------------------------
data Stream
  = Stream
  { Stream -> MisoString
stream :: MisoString
  , Stream -> Location
currentLocation :: Location
  } deriving Stream -> Stream -> Bool
(Stream -> Stream -> Bool)
-> (Stream -> Stream -> Bool) -> Eq Stream
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Stream -> Stream -> Bool
== :: Stream -> Stream -> Bool
$c/= :: Stream -> Stream -> Bool
/= :: Stream -> Stream -> Bool
Eq
----------------------------------------------------------------------------
data Located token
  = Located
  { forall token. Located token -> token
token :: token
  , forall token. Located token -> Location
location :: Location
  } deriving Located token -> Located token -> Bool
(Located token -> Located token -> Bool)
-> (Located token -> Located token -> Bool) -> Eq (Located token)
forall token. Eq token => Located token -> Located token -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall token. Eq token => Located token -> Located token -> Bool
== :: Located token -> Located token -> Bool
$c/= :: forall token. Eq token => Located token -> Located token -> Bool
/= :: Located token -> Located token -> Bool
Eq
----------------------------------------------------------------------------
instance Show token => Show (Located token) where
  show :: Located token -> String
show (Located token
t Location
l) = Location -> String
forall a. Show a => a -> String
show Location
l String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> token -> String
forall a. Show a => a -> String
show token
t
----------------------------------------------------------------------------
instance Functor Lexer where
  fmap :: forall a b. (a -> b) -> Lexer a -> Lexer b
fmap a -> b
f (Lexer Stream -> Either LexerError (a, Stream)
l) = (Stream -> Either LexerError (b, Stream)) -> Lexer b
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (b, Stream)) -> Lexer b)
-> (Stream -> Either LexerError (b, Stream)) -> Lexer b
forall a b. (a -> b) -> a -> b
$ \Stream
input -> do
    (t, x) <- Stream -> Either LexerError (a, Stream)
l Stream
input
    pure (f t, x)
----------------------------------------------------------------------------
instance Applicative Lexer where
  pure :: forall a. a -> Lexer a
pure a
x = (Stream -> Either LexerError (a, Stream)) -> Lexer a
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (a, Stream)) -> Lexer a)
-> (Stream -> Either LexerError (a, Stream)) -> Lexer a
forall a b. (a -> b) -> a -> b
$ \Stream
input -> (a, Stream) -> Either LexerError (a, Stream)
forall a. a -> Either LexerError a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (a
x, Stream
input)
  Lexer Stream -> Either LexerError (a -> b, Stream)
l1 <*> :: forall a b. Lexer (a -> b) -> Lexer a -> Lexer b
<*> Lexer Stream -> Either LexerError (a, Stream)
l2 = (Stream -> Either LexerError (b, Stream)) -> Lexer b
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (b, Stream)) -> Lexer b)
-> (Stream -> Either LexerError (b, Stream)) -> Lexer b
forall a b. (a -> b) -> a -> b
$ \Stream
input -> do
    (f, x) <- Stream -> Either LexerError (a -> b, Stream)
l1 Stream
input
    (a, y) <- l2 x
    pure (f a, y)
----------------------------------------------------------------------------
instance Monad Lexer where
  Lexer a
m >>= :: forall a b. Lexer a -> (a -> Lexer b) -> Lexer b
>>= a -> Lexer b
f = (Stream -> Either LexerError (b, Stream)) -> Lexer b
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (b, Stream)) -> Lexer b)
-> (Stream -> Either LexerError (b, Stream)) -> Lexer b
forall a b. (a -> b) -> a -> b
$ \Stream
input -> do
    (x, s) <- Lexer a -> Stream -> Either LexerError (a, Stream)
forall token.
Lexer token -> Stream -> Either LexerError (token, Stream)
runLexer Lexer a
m Stream
input
    runLexer (f x) s
----------------------------------------------------------------------------
instance Alternative Lexer where
  empty :: forall token. Lexer token
empty = (Stream -> Either LexerError (a, Stream)) -> Lexer a
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (a, Stream)) -> Lexer a)
-> (Stream -> Either LexerError (a, Stream)) -> Lexer a
forall a b. (a -> b) -> a -> b
$ \(Stream MisoString
s Location
l)  -> LexerError -> Either LexerError (a, Stream)
forall a b. a -> Either a b
Left (MisoString -> Location -> LexerError
unexpected MisoString
s Location
l)
  Lexer Stream -> Either LexerError (a, Stream)
l1 <|> :: forall a. Lexer a -> Lexer a -> Lexer a
<|> Lexer Stream -> Either LexerError (a, Stream)
l2 = (Stream -> Either LexerError (a, Stream)) -> Lexer a
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (a, Stream)) -> Lexer a)
-> (Stream -> Either LexerError (a, Stream)) -> Lexer a
forall a b. (a -> b) -> a -> b
$ \Stream
input ->
    case (Stream -> Either LexerError (a, Stream)
l1 Stream
input, Stream -> Either LexerError (a, Stream)
l2 Stream
input) of
      (Either LexerError (a, Stream)
res, Left LexerError
_) -> Either LexerError (a, Stream)
res
      (Left LexerError
_, Either LexerError (a, Stream)
res) -> Either LexerError (a, Stream)
res
      (Right (a
x, Stream MisoString
s Location
sl), Right (a
y,Stream MisoString
t Location
tl)) ->
        if MisoString -> Int
MS.length MisoString
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= MisoString -> Int
MS.length MisoString
t
        then (a, Stream) -> Either LexerError (a, Stream)
forall a b. b -> Either a b
Right (a
x, MisoString -> Location -> Stream
Stream MisoString
s Location
sl)
        else (a, Stream) -> Either LexerError (a, Stream)
forall a b. b -> Either a b
Right (a
y, MisoString -> Location -> Stream
Stream MisoString
t Location
tl)
----------------------------------------------------------------------------
peek :: Lexer (Maybe Char)
peek :: Lexer (Maybe Char)
peek = (Stream -> Either LexerError (Maybe Char, Stream))
-> Lexer (Maybe Char)
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (Maybe Char, Stream))
 -> Lexer (Maybe Char))
-> (Stream -> Either LexerError (Maybe Char, Stream))
-> Lexer (Maybe Char)
forall a b. (a -> b) -> a -> b
$ \Stream
ys ->
  (Maybe Char, Stream) -> Either LexerError (Maybe Char, Stream)
forall a. a -> Either LexerError a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ((Maybe Char, Stream) -> Either LexerError (Maybe Char, Stream))
-> (Maybe Char, Stream) -> Either LexerError (Maybe Char, Stream)
forall a b. (a -> b) -> a -> b
$ case Stream
ys of
    Stream MisoString
xs Location
l ->
      case MisoString -> Maybe (Char, MisoString)
MS.uncons MisoString
xs of
        Maybe (Char, MisoString)
Nothing -> (Maybe Char
forall a. Maybe a
Nothing, MisoString -> Location -> Stream
Stream MisoString
forall a. Monoid a => a
mempty Location
l)
        Just (Char
z,MisoString
zs) -> (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
z, MisoString -> Location -> Stream
Stream (Char -> MisoString
MS.singleton Char
z MisoString -> MisoString -> MisoString
forall a. Semigroup a => a -> a -> a
<> MisoString
zs) Location
l)
----------------------------------------------------------------------------
satisfy :: (Char -> Bool) -> Lexer Char
satisfy :: (Char -> Bool) -> Lexer Char
satisfy Char -> Bool
predicate = (Stream -> Either LexerError (Char, Stream)) -> Lexer Char
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (Char, Stream)) -> Lexer Char)
-> (Stream -> Either LexerError (Char, Stream)) -> Lexer Char
forall a b. (a -> b) -> a -> b
$ \Stream
ys ->
  case Stream
ys of
    Stream MisoString
s Location
l ->
      case MisoString -> Maybe (Char, MisoString)
MS.uncons MisoString
s of
        Maybe (Char, MisoString)
Nothing -> LexerError -> Either LexerError (Char, Stream)
forall a b. a -> Either a b
Left (MisoString -> Location -> LexerError
unexpected MisoString
s Location
l)
        Just (Char
z,MisoString
zs)
          | Char -> Bool
predicate Char
z -> (Char, Stream) -> Either LexerError (Char, Stream)
forall a b. b -> Either a b
Right (Char
z, MisoString -> Location -> Stream
Stream MisoString
zs Location
l)
          | Bool
otherwise -> LexerError -> Either LexerError (Char, Stream)
forall a b. a -> Either a b
Left (MisoString -> Location -> LexerError
unexpected MisoString
zs Location
l)
----------------------------------------------------------------------------
unexpected :: MisoString -> Location -> LexerError
unexpected :: MisoString -> Location -> LexerError
unexpected MisoString
xs Location
loc | MisoString -> Bool
MS.null MisoString
xs = Location -> LexerError
UnexpectedEOF Location
loc
unexpected MisoString
cs Location
loc = MisoString -> Location -> LexerError
LexerError MisoString
cs Location
loc
----------------------------------------------------------------------------
getInput :: Lexer Stream
getInput :: Lexer Stream
getInput = (Stream -> Either LexerError (Stream, Stream)) -> Lexer Stream
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (Stream, Stream)) -> Lexer Stream)
-> (Stream -> Either LexerError (Stream, Stream)) -> Lexer Stream
forall a b. (a -> b) -> a -> b
$ \Stream
s -> (Stream, Stream) -> Either LexerError (Stream, Stream)
forall a b. b -> Either a b
Right (Stream
s, Stream
s)
----------------------------------------------------------------------------
putInput :: Stream -> Lexer ()
putInput :: Stream -> Lexer ()
putInput Stream
s = (Stream -> Either LexerError ((), Stream)) -> Lexer ()
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError ((), Stream)) -> Lexer ())
-> (Stream -> Either LexerError ((), Stream)) -> Lexer ()
forall a b. (a -> b) -> a -> b
$ \Stream
_ -> ((), Stream) -> Either LexerError ((), Stream)
forall a b. b -> Either a b
Right ((), Stream
s)
----------------------------------------------------------------------------
getLocation :: Lexer Location
getLocation :: Lexer Location
getLocation = (Stream -> Either LexerError (Location, Stream)) -> Lexer Location
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (Location, Stream))
 -> Lexer Location)
-> (Stream -> Either LexerError (Location, Stream))
-> Lexer Location
forall a b. (a -> b) -> a -> b
$ \(Stream MisoString
s Location
l) -> (Location, Stream) -> Either LexerError (Location, Stream)
forall a. a -> Either LexerError a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Location
l, MisoString -> Location -> Stream
Stream MisoString
s Location
l)
----------------------------------------------------------------------------
setLocation :: Location -> Lexer ()
setLocation :: Location -> Lexer ()
setLocation Location
l = (Stream -> Either LexerError ((), Stream)) -> Lexer ()
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError ((), Stream)) -> Lexer ())
-> (Stream -> Either LexerError ((), Stream)) -> Lexer ()
forall a b. (a -> b) -> a -> b
$ \(Stream MisoString
s Location
_) -> ((), Stream) -> Either LexerError ((), Stream)
forall a. a -> Either LexerError a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ((), MisoString -> Location -> Stream
Stream MisoString
s Location
l)
----------------------------------------------------------------------------
modifyInput :: (Stream -> Stream) -> Lexer ()
modifyInput :: (Stream -> Stream) -> Lexer ()
modifyInput Stream -> Stream
f = do
  s <- Lexer Stream
getInput
  putInput (f s)
----------------------------------------------------------------------------
char :: Char -> Lexer Char
char :: Char -> Lexer Char
char Char
c = (Char -> Bool) -> Lexer Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)
----------------------------------------------------------------------------
string' :: String -> Lexer String
string' :: String -> Lexer String
string' = (Char -> Lexer Char) -> String -> Lexer String
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> [a] -> f [b]
traverse Char -> Lexer Char
char
----------------------------------------------------------------------------
string :: MisoString -> Lexer MisoString
string :: MisoString -> Lexer MisoString
string MisoString
prefix = (Stream -> Either LexerError (MisoString, Stream))
-> Lexer MisoString
forall token.
(Stream -> Either LexerError (token, Stream)) -> Lexer token
Lexer ((Stream -> Either LexerError (MisoString, Stream))
 -> Lexer MisoString)
-> (Stream -> Either LexerError (MisoString, Stream))
-> Lexer MisoString
forall a b. (a -> b) -> a -> b
$ \Stream
s ->
  case Stream
s of
    Stream MisoString
ys Location
l
      | MisoString
prefix MisoString -> MisoString -> Bool
`MS.isPrefixOf` MisoString
ys ->
          (MisoString, Stream) -> Either LexerError (MisoString, Stream)
forall a b. b -> Either a b
Right (MisoString
prefix, MisoString -> Location -> Stream
Stream (Int -> MisoString -> MisoString
MS.drop (MisoString -> Int
MS.length MisoString
prefix) MisoString
ys) Location
l)
      | Bool
otherwise ->
          LexerError -> Either LexerError (MisoString, Stream)
forall a b. a -> Either a b
Left (MisoString -> Location -> LexerError
unexpected MisoString
ys Location
l)
----------------------------------------------------------------------------
withLocation :: ToMisoString token => Lexer token -> Lexer (Located token)
withLocation :: forall token.
ToMisoString token =>
Lexer token -> Lexer (Located token)
withLocation Lexer token
lexer = do
  result <- Lexer token
lexer
  let
    adjustLoc :: Location -> MisoString -> Location
    adjustLoc Location
l = (Location -> Char -> Location)
-> Location -> MisoString -> Location
forall a. (a -> Char -> a) -> a -> MisoString -> a
MS.foldl' Location -> Char -> Location
adjust (Location -> Location
next Location
l)

  setLocation =<< adjustLoc <$> getLocation <*> pure (MS.ms result)
  Located result <$> getLocation
    where
      next :: Location -> Location
      next :: Location -> Location
next (Location Int
l (Int
_, Int
end)) = Int -> (Int, Int) -> Location
Location Int
l (Int
end, Int
end)

      adjust :: Location -> Char -> Location
      adjust :: Location -> Char -> Location
adjust (Location Int
l (Int
_, Int
_)) Char
'\n'       = Int -> (Int, Int) -> Location
Location (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
1,Int
1)
      adjust (Location Int
l (Int
start, Int
end)) Char
'\t' = Int -> (Int, Int) -> Location
Location Int
l (Int
start, Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8)
      adjust (Location Int
l (Int
start, Int
end))   Char
_  = Int -> (Int, Int) -> Location
Location Int
l (Int
start, Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
----------------------------------------------------------------------------