Skip to content

Commit 61ae59c

Browse files
authored
Build with MicroHs (#45)
* Mostly ifdefs and changes to imports * Use a list to store the Alt branches on non-GHC, where we use SmallArray on GHC * Implement a finishHex that does not depend on GHC's representation of Natural * Add MicroHs CI
1 parent e804c11 commit 61ae59c

File tree

8 files changed

+262
-38
lines changed

8 files changed

+262
-38
lines changed

.github/workflows/mhs-ci.yml

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: MicroHs CI
2+
3+
# Adapted from haskell-ci.yml and
4+
# https://github.com/haskell/containers/blob/027ea3c6878e53efe70d3dd28fc9070dc0d989b7/.github/workflows/mhs-ci.yml
5+
6+
on:
7+
push:
8+
branches:
9+
- master
10+
pull_request:
11+
branches:
12+
- master
13+
14+
jobs:
15+
linux:
16+
name: MicroHs CI - Linux
17+
runs-on: ubuntu-24.04
18+
timeout-minutes:
19+
60
20+
steps:
21+
- name: checkout
22+
uses: actions/checkout@v4
23+
with:
24+
path: source
25+
- name: checkout mhs
26+
uses: actions/checkout@v4
27+
with:
28+
repository: augustss/MicroHs
29+
# Update ref to a proper version later
30+
ref: cb8bc5d609771014dcc32f3052a59066ee13e3ab
31+
path: mhs
32+
- name: make and install mhs
33+
run: |
34+
cd mhs
35+
make minstall
36+
- name: compile and install
37+
run: |
38+
PATH="$HOME/.mcabal/bin:$PATH"
39+
cd source
40+
mcabal -r install
41+
- name: cleanup
42+
run: |
43+
rm -rf "$HOME/.mcabal"

parser-regex.cabal

+17-2
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,29 @@ library
6565
base >= 4.15 && < 5.0
6666
, containers >= 0.6.4 && < 0.9
6767
, deepseq >= 1.4.5 && < 1.6
68-
, ghc-bignum >= 1.1 && < 1.4
69-
, primitive >= 0.7.3 && < 0.10
7068
, text >= 2.0.1 && < 2.2
7169
, transformers >= 0.5.6 && < 0.7
7270

7371
hs-source-dirs: src
7472
default-language: Haskell2010
7573

74+
other-extensions:
75+
BangPatterns
76+
CPP
77+
GADTs
78+
RankNTypes
79+
ScopedTypeVariables
80+
81+
if impl(ghc)
82+
build-depends:
83+
ghc-bignum >= 1.1 && < 1.4
84+
, primitive >= 0.7.3 && < 0.10
85+
86+
if impl(mhs)
87+
build-depends:
88+
containers >= 0.8
89+
, transformers >= 0.6.1.2
90+
7691
test-suite test
7792
import: warnings
7893

src/Regex/Internal/CharSet.hs

+9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
{-# LANGUAGE CPP #-}
12
{-# LANGUAGE BangPatterns #-}
3+
#ifdef __GLASGOW_HASKELL__
24
{-# LANGUAGE MagicHash #-}
5+
#endif
36
{-# OPTIONS_HADDOCK not-home #-}
47

58
-- | This is an internal module. You probably don't need to import this. Import
@@ -40,7 +43,9 @@ import Data.String (IsString(..))
4043
import qualified Data.Foldable as F
4144
import qualified Data.IntMap.Strict as IM
4245
import Data.Semigroup (Semigroup(..), stimesIdempotentMonoid)
46+
#ifdef __GLASGOW_HASKELL__
4347
import GHC.Exts (Int(..), Char(..), chr#)
48+
#endif
4449

4550
-- TODO: Evaluate other set libraries.
4651
-- Possible candidates: charset, rangeset
@@ -222,7 +227,11 @@ complementRanges = go
222227
unsafePred c = unsafeChr (ord c - 1)
223228

224229
unsafeChr :: Int -> Char
230+
#ifdef __GLASGOW_HASKELL__
225231
unsafeChr (I# i#) = C# (chr# i#)
232+
#else
233+
unsafeChr = toEnum
234+
#endif
226235

227236
------------
228237
-- Testing

src/Regex/Internal/List.hs

+1-1
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ parseSure p = fromMaybe parseSureError . parse p
328328
{-# INLINE parseSure #-}
329329

330330
parseSureError :: a
331-
parseSureError = errorWithoutStackTrace
331+
parseSureError = error
332332
"Regex.List.parseSure: parse failed; if parsing can fail use 'parse' instead"
333333

334334
reParseSure :: RE c a -> [c] -> a

src/Regex/Internal/Num.hs

+50-10
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@ module Regex.Internal.Num
1717
import Control.Applicative ((<|>), empty)
1818
import qualified Control.Applicative as Ap
1919
import Control.Monad (replicateM_, void)
20-
import Data.Primitive.PrimArray
21-
(PrimArray(..), newPrimArray, runPrimArray, writePrimArray)
2220
import Data.Bits ((.&.), countLeadingZeros, unsafeShiftL, unsafeShiftR)
2321
import Numeric.Natural (Natural)
22+
#ifdef __GLASGOW_HASKELL__
23+
import Data.Primitive.PrimArray
24+
(PrimArray(..), newPrimArray, runPrimArray, writePrimArray)
2425
import qualified GHC.Num.Natural as Nat
26+
#endif
2527

2628
import Regex.Internal.Regex (RE)
2729
import qualified Regex.Internal.Regex as R
@@ -221,10 +223,14 @@ mkWordRangeBase base quotRemPowBase powBase baseLen d low high
221223
-- Parsing hexadecimal is simple, there is no base conversion involved.
222224
--
223225
-- Step 1: Accumulate the hex digits, packed into Words
224-
-- Step 2: Initialize a ByteArray and fill it with the Words
225-
--
226-
-- Because we create a Nat directly, this makes us depend on ghc-bignum and
227-
-- GHC>=9.0.
226+
-- Step 2:
227+
-- * GHC: Initialize a ByteArray and fill it with the Words. This takes
228+
-- O(n) time. Because we create a Nat directly, this makes us depend on
229+
-- ghc-bignum and GHC>=9.0.
230+
-- * Not GHC: Do it like we do for decimal, without being aware of the
231+
-- representation of Naturals, but replace the base multiplications with
232+
-- shifts. If it is a binary representation, this takes O(n log n) time
233+
-- instead of O(n^2).
228234

229235
stepHex :: NatParseState -> Word -> NatParseState
230236
stepHex (NatParseState acc len ns) d
@@ -235,6 +241,7 @@ finishHex
235241
:: Word -- ^ Leading digit
236242
-> NatParseState -- ^ Everything else
237243
-> Natural
244+
#ifdef __GLASGOW_HASKELL__
238245
finishHex !ld (NatParseState acc0 len0 ns0) = case ns0 of
239246
WNil -> Nat.naturalFromWord (ld `unsafeShiftL` (4*(len0-1)) + acc0)
240247
WCons n ns1 ->
@@ -268,6 +275,37 @@ finishHex !ld (NatParseState acc0 len0 ns0) = case ns0 of
268275
-- * Natural invariants:
269276
-- * If the value fits in a word, it must be NS (via naturalFromWord here).
270277
-- * Otherwise, use a ByteArray# with NB. The highest Word must not be 0.
278+
#else
279+
finishHex !ld (NatParseState acc0 len0 ns0) = combine acc0 len0 ns0
280+
where
281+
combine !acc !len ns = case ns of
282+
WNil -> mul16Pow (w2n ld) (len-1) + w2n acc
283+
WCons n ns1 ->
284+
mul16Pow (combine1 maxBoundWordHexLen (go n ns1)) len + w2n acc
285+
where
286+
go n WNil =
287+
let !n' = mul16Pow (w2n ld) (maxBoundWordHexLen - 1) + w2n n
288+
in [n']
289+
go n (WCons m WNil) =
290+
let !n' = mul16Pow (w2n ld) (2 * maxBoundWordHexLen - 1) +
291+
mul16Pow (w2n m) maxBoundWordHexLen +
292+
w2n n
293+
in [n']
294+
go n (WCons m (WCons n1 ns1)) =
295+
let !n' = mul16Pow (w2n m) maxBoundWordHexLen + w2n n
296+
in n' : go n1 ns1
297+
298+
combine1 :: Int -> [Natural] -> Natural
299+
combine1 !_ [n] = n
300+
combine1 !numDigs ns1 = combine1 numDigs1 (go ns1)
301+
where
302+
numDigs1 = 2 * numDigs
303+
go (n:m:ns) = let !n' = mul16Pow m numDigs1 + n in n' : go ns
304+
go ns = ns
305+
306+
mul16Pow :: Natural -> Int -> Natural
307+
mul16Pow x p = unsafeShiftL x (4 * p)
308+
#endif
271309

272310
-----------------------------
273311
-- Parsing decimal Naturals
@@ -282,9 +320,10 @@ finishHex !ld (NatParseState acc0 len0 ns0) = case ns0 of
282320
--
283321
-- The obvious foldl approach is O(n^2) for n digits. The combine approach
284322
-- performs O(n/2^i) multiplications of size O(2^i), for i in [0..log_2(n)].
285-
-- If multiplication is O(n^k), this is also O(n^k). We have k < 2,
286-
-- thanks to subquadratic multiplication of GMP-backed Naturals:
287-
-- https://gmplib.org/manual/Multiplication-Algorithms.
323+
-- If multiplication is O(n^k), this is also O(n^k).
324+
--
325+
-- On GHC, we have k < 2, thanks to subquadratic multiplication of GMP-backed
326+
-- Naturals: https://gmplib.org/manual/Multiplication-Algorithms.
288327
--
289328
-- For reference, here's how GMP converts any base (including 10) to a natural
290329
-- using broadly the same approach.
@@ -311,6 +350,7 @@ finishDec !ld (NatParseState acc0 len0 ns0) = combine acc0 len0 ns0
311350
go n (WCons m (WCons n1 ns1)) =
312351
let !n' = w2n m * safeBaseDec + w2n n in n' : go n1 ns1
313352

353+
combine1 :: Natural -> [Natural] -> Natural
314354
combine1 _ [n] = n
315355
combine1 base ns1 = combine1 base1 (go ns1)
316356
where
@@ -412,7 +452,7 @@ pow10 p = case p of
412452
18 -> 1000000000000000000
413453
19 -> 10000000000000000000
414454
#endif
415-
_ -> errorWithoutStackTrace "Regex.Internal.Int.pow10: p too large"
455+
_ -> error "Regex.Internal.Int.pow10: p too large"
416456
#else
417457
#error "unsupported word size"
418458
#endif

src/Regex/Internal/Parser.hs

+29-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE CPP #-}
12
{-# LANGUAGE BangPatterns #-}
23
{-# LANGUAGE GADTs #-}
34
{-# LANGUAGE RankNTypes #-}
@@ -32,10 +33,13 @@ import Control.Monad.Trans.State.Strict
3233
( State, StateT, evalState, evalStateT, execState, gets, modify', state)
3334
import Control.Monad.Fix (mfix)
3435
import Data.Maybe (isJust)
36+
import qualified Data.Foldable as F
37+
import qualified Data.Traversable as T
38+
#ifdef __GLASGOW_HASKELL__
3539
import Data.Primitive.SmallArray
3640
(SmallArray, emptySmallArray, smallArrayFromList)
37-
import qualified Data.Foldable as F
3841
import qualified GHC.Exts as X
42+
#endif
3943

4044
import Regex.Internal.Regex (RE(..), Strictness(..), Greediness(..))
4145
import Regex.Internal.Unique (Unique(..), UniqueSet)
@@ -98,7 +102,7 @@ compileToParser re = case re of
98102
let (re1,re2,res) = gatherAlts re01 re02
99103
p1 <- compileToParser re1
100104
p2 <- compileToParser re2
101-
ps <- traverse compileToParser res
105+
ps <- T.traverse compileToParser res
102106
pure $ PAlt u p1 p2 (smallArrayFromList ps)
103107
RFold st gr f z re1 -> do
104108
u <- nxtU
@@ -128,7 +132,7 @@ compileToNode a re0 = go re0 (NAccept a)
128132
(re1,re2,res) = gatherAlts re01 re02
129133
n1 <- go re1 nxt1
130134
n2 <- go re2 nxt1
131-
ns <- traverse (flip go nxt1) res
135+
ns <- T.traverse (flip go nxt1) res
132136
pure $ NAlt n1 n2 (smallArrayFromList ns)
133137
RFold _ gr _ _ re1 -> goMany gr re1 nxt
134138
RMany _ _ _ _ re1 -> goMany Greedy re1 nxt
@@ -145,10 +149,10 @@ compileToNode a re0 = go re0 (NAccept a)
145149
gatherAlts :: RE c a -> RE c a -> (RE c a, RE c a, [RE c a])
146150
gatherAlts re01 re02 = case go re01 (go re02 []) of
147151
re11:re12:res -> (re11, re12, res)
148-
_ -> errorWithoutStackTrace "Regex.Internal.Parser.gatherAlts: impossible"
152+
_ -> error "Regex.Internal.Parser.gatherAlts: impossible"
149153
where
150-
go (RAlt re1 re2) = go re1 . go re2
151-
go re = (re:)
154+
go (RAlt re1 re2) acc = go re1 (go re2 acc)
155+
go re acc = re:acc
152156

153157
--------------------
154158
-- Compile bounded
@@ -414,7 +418,11 @@ type Foldr f a = forall b. (a -> b -> b) -> b -> f -> b
414418
parseFoldr :: Foldr f c -> Parser c a -> f -> Maybe a
415419
parseFoldr fr = \p xs -> prepareParser p >>= fr f finishParser xs
416420
where
417-
f c k = X.oneShot (\ !ps -> stepParser ps c >>= k)
421+
f c k =
422+
#ifdef __GLASGOW_HASKELL__
423+
X.oneShot
424+
#endif
425+
(\ !ps -> stepParser ps c >>= k)
418426
{-# INLINE parseFoldr #-}
419427

420428
-- | \(O(mn \log m)\). Run a parser given a \"@next@\" action.
@@ -481,6 +489,20 @@ unlessM mb mx = do
481489
b <- mb
482490
if b then pure () else mx
483491

492+
-----------------
493+
-- Array compat
494+
-----------------
495+
496+
#ifndef __GLASGOW_HASKELL__
497+
type SmallArray = []
498+
499+
emptySmallArray :: SmallArray a
500+
emptySmallArray = []
501+
502+
smallArrayFromList :: [a] -> SmallArray a
503+
smallArrayFromList = id
504+
#endif
505+
484506
----------
485507
-- Notes
486508
----------

src/Regex/Internal/Regex.hs

+5-4
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ import Control.Monad (void)
5151
import Data.Functor.Classes (Eq1(..), Ord1(..), Show1(..), showsUnaryWith)
5252
import Data.Semigroup (Semigroup(..))
5353
import qualified Data.Foldable as F
54+
import qualified Data.Traversable as T
5455

5556
---------------------------------
5657
-- RE and constructor functions
@@ -96,7 +97,7 @@ data RE c a where
9697
RPure :: a -> RE c a
9798
RLiftA2 :: !Strictness -> !(a1 -> a2 -> a) -> !(RE c a1) -> !(RE c a2) -> RE c a
9899
REmpty :: RE c a
99-
RAlt :: !(RE c a) -> !(RE c a) -> (RE c a)
100+
RAlt :: !(RE c a) -> !(RE c a) -> RE c a
100101
RFold :: !Strictness -> !Greediness -> !(a -> a1 -> a) -> a -> !(RE c a1) -> RE c a
101102
RMany :: !(a1 -> a) -> !(a2 -> a) -> !(a2 -> a1 -> a2) -> !a2 -> !(RE c a1) -> RE c a -- Strict and greedy implicitly
102103

@@ -128,13 +129,13 @@ instance Alternative (RE c) where
128129
-- | @(<>)@ = @liftA2 (<>)@
129130
instance Semigroup a => Semigroup (RE c a) where
130131
(<>) = Ap.liftA2 (<>)
131-
sconcat = fmap sconcat . sequenceA
132+
sconcat = fmap sconcat . T.sequenceA
132133
{-# INLINE sconcat #-}
133134

134135
-- | @mempty@ = @pure mempty@
135136
instance Monoid a => Monoid (RE c a) where
136137
mempty = pure mempty
137-
mconcat = fmap mconcat . sequenceA
138+
mconcat = fmap mconcat . T.sequenceA
138139
{-# INLINE mconcat #-}
139140
-- Use the underlying type's sconcat/mconcat because it may be more efficient
140141
-- than the default right-associative definition.
@@ -217,7 +218,7 @@ instance Functor Many where
217218
Repeat x -> Repeat (f x)
218219
Finite xs -> Finite (map f xs)
219220

220-
instance Foldable Many where
221+
instance F.Foldable Many where
221222
foldr f z m = case m of
222223
Repeat x -> let r = f x r in r
223224
Finite xs -> foldr f z xs

0 commit comments

Comments
 (0)