-
Notifications
You must be signed in to change notification settings - Fork 144
Implemented TH splices for validated ByteString literals #712
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
|
||
{-# OPTIONS_HADDOCK not-home #-} | ||
|
||
{-# LANGUAGE ConstraintKinds #-} | ||
{-# LANGUAGE TemplateHaskellQuotes #-} | ||
{-# LANGUAGE TypeFamilies #-} | ||
{-# LANGUAGE UnliftedFFITypes #-} | ||
|
@@ -42,6 +43,7 @@ module Data.ByteString.Internal.Type ( | |
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, | ||
unsafePackAddress, unsafePackLenAddress, | ||
unsafePackLiteral, unsafePackLenLiteral, | ||
literalFromOctetString, literalFromHex, | ||
|
||
-- * Low level imperative construction | ||
empty, | ||
|
@@ -152,8 +154,9 @@ import Data.String (IsString(..)) | |
|
||
import Control.Exception (assert, throw, Exception) | ||
|
||
import Data.Bits ((.&.)) | ||
import Data.Bits ((.|.), (.&.), complement, shiftL) | ||
import Data.Char (ord) | ||
import Data.Foldable (foldr') | ||
import Data.Word | ||
|
||
import Data.Data (Data(..), mkConstr, mkDataType, Constr, DataType, Fixity(Prefix), constrIndex) | ||
|
@@ -197,6 +200,14 @@ import GHC.ForeignPtr (unsafeWithForeignPtr) | |
|
||
import qualified Language.Haskell.TH.Lib as TH | ||
import qualified Language.Haskell.TH.Syntax as TH | ||
import Language.Haskell.TH.Syntax (Lift, TExp) | ||
#if __GLASGOW_HASKELL__ >= 900 | ||
import Language.Haskell.TH.Syntax (Code, Quote) | ||
#endif | ||
|
||
#if !MIN_VERSION_base(4,13,0) | ||
import Control.Monad.Fail (MonadFail) | ||
#endif | ||
|
||
#if !HS_unsafeWithForeignPtr_AVAILABLE | ||
unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b | ||
|
@@ -359,7 +370,7 @@ byteStringDataType :: DataType | |
byteStringDataType = mkDataType "Data.ByteString.ByteString" [packConstr] | ||
|
||
-- | @since 0.11.2.0 | ||
instance TH.Lift ByteString where | ||
instance Lift ByteString where | ||
#if MIN_VERSION_template_haskell(2,16,0) | ||
-- template-haskell-2.16 first ships with ghc-8.10 | ||
lift (BS ptr len) = [| unsafePackLenLiteral |] | ||
|
@@ -530,6 +541,104 @@ packUptoLenChars len cs0 = | |
go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs | ||
in go p0 cs0 | ||
|
||
#if __GLASGOW_HASKELL__ < 900 | ||
type Quote m = (TH.Q ~ m) | ||
type Code m a = m (TExp a) | ||
#endif | ||
|
||
liftTyped :: forall a m. (MonadFail m, Quote m, Lift a) => a -> Code m a | ||
#if MIN_VERSION_template_haskell(2,17,0) | ||
liftTyped = TH.liftTyped | ||
|
||
liftCode :: forall a m. (MonadFail m, Quote m) => m (TExp a) -> Code m a | ||
liftCode = TH.liftCode | ||
#else | ||
liftTyped = TH.unsafeTExpCoerce . TH.lift | ||
|
||
liftCode :: forall a m. (MonadFail m, Quote m) => m TH.Exp -> Code m a | ||
liftCode = TH.unsafeTExpCoerce | ||
#endif | ||
|
||
data S2W = Octets {-# UNPACK #-} !Int [Word8] | ||
-- ^ Decoded some octets (<= 0xFF) | ||
| Hichar {-# UNPACK #-} !Int {-# UNPACK #-} !Word | ||
-- ^ Found a high char (> 0xFF) | ||
|
||
data H2W = Hex {-# UNPACK #-} !Int [Word8] | ||
-- ^ Decoded some full bytes (nibble pairs) | ||
| Odd {-# UNPACK #-} !Int {-# UNPACK #-} !Word [Word8] | ||
-- ^ Decoded a nibble plus some full bytes | ||
| Bad {-# UNPACK #-} !Int {-# UNPACK #-} !Word | ||
-- ^ Found a non hex-digit character | ||
|
||
-- | Template Haskell splice to convert string constants to compile-time | ||
vdukhovni marked this conversation as resolved.
Show resolved
Hide resolved
|
||
-- ByteString literals. Unlike the 'IsString' instance, the input string | ||
-- is validated to ensure that each character is a valid /octet/, i.e. is | ||
-- at most @0xFF@ (255). | ||
-- | ||
-- Example: | ||
-- | ||
-- > :set -XTemplateHaskell | ||
-- > ehloCmd :: ByteString | ||
-- > ehloCmd = $$(literalFromOctetString "EHLO") | ||
-- | ||
literalFromOctetString :: (MonadFail m, Quote m) => String -> Code m ByteString | ||
literalFromOctetString "" = [||empty||] | ||
literalFromOctetString s = case foldr' op (Octets 0 []) s of | ||
Octets n ws -> liftTyped (unsafePackLenBytes n ws) | ||
Hichar i w -> liftCode $ fail $ "non-octet character '\\" ++ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TeofilC Would this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the headsup. This should be fine. |
||
show w ++ "' at offset: " ++ show i | ||
where | ||
op :: Char -> S2W -> S2W | ||
op (fromIntegral . fromEnum -> !(w :: Word)) acc | ||
| w <= 0xff = case acc of | ||
Octets i ws -> Octets (i + 1) (fromIntegral w : ws) | ||
clyring marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Hichar i w' -> Hichar (i + 1) w' | ||
| otherwise = Hichar 0 w | ||
|
||
-- | Template Haskell splice to convert hex-encoded string constants to compile-time | ||
-- ByteString literals. The input string is validated to ensure that it consists of | ||
-- an even number of valid hexadecimal digits (case insensitive). | ||
-- | ||
-- Example: | ||
-- | ||
-- > :set -XTemplateHaskell | ||
-- > ehloCmd :: ByteString | ||
-- > ehloCmd = $$(literalFromHex "45484c4F") | ||
-- | ||
literalFromHex :: (MonadFail m, Quote m) => String -> Code m ByteString | ||
literalFromHex "" = [||empty||] | ||
literalFromHex s = | ||
case foldr' op (Hex 0 []) s of | ||
Hex n ws -> liftTyped (unsafePackLenBytes n ws) | ||
Odd i _ _ -> liftCode $ fail $ "Odd input length: " ++ show (1 + 2 * i) | ||
Bad i w -> liftCode $ fail $ "Non-hexadecimal character '\\" ++ | ||
show w ++ "' at offset: " ++ show i | ||
where | ||
-- Convert char to decimal digit value if result in [0, 9]. | ||
-- Otherwise, for hex digits, it remains to: | ||
-- - fold upper and lower case by masking 0x20, | ||
-- - subtract another 0x11 (0x41 total), | ||
-- - check that result in [0,5] | ||
-- - add 0xa | ||
-- | ||
c2d :: Char -> Word | ||
c2d c = fromIntegral (fromEnum c) - 0x30 | ||
|
||
op :: Char -> H2W -> H2W | ||
op (c2d -> !(d :: Word)) acc | ||
| d <= 9 = case acc of | ||
Hex i ws -> Odd i d ws | ||
Odd i lo ws -> Hex (i+1) $ fromIntegral ((d `shiftL` 4 .|. lo)) : ws | ||
Bad i w -> Bad (i + 1) w | ||
| l <- (d .&. complement 0x20) - 0x11 | ||
, l <= 5 | ||
, x <- l + 0xa = case acc of | ||
Hex i ws -> Odd i (l + 0xa) ws | ||
Odd i lo ws -> Hex (i+ 1) $ fromIntegral (x `shiftL` 4 .|. lo) : ws | ||
Bad i w -> Bad (i + 1) w | ||
| otherwise = Bad 0 (d + 0x30) | ||
|
||
-- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand | ||
-- we would like to write a tight loop that just blasts the list into memory, on | ||
-- the other hand we want it to be unpacked lazily so we don't end up with a | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The unqualified
foldr'
briefly confused me. (Actually, why are these quote-generators defined inD.B.Internal.Type
instead of the exposedData.ByteString
?)