Skip to content

Commit c668679

Browse files
mau5mattchoutri
andauthoredMar 11, 2025··
[FLORA-794] Replace the usage of datalog with Haskell for category normalisation (#822)
feat: replaced use of datalog with haskell for category normalisation --------- Co-authored-by: Théophile Choutri <theophile@choutri.eu>
1 parent 8965122 commit c668679

File tree

19 files changed

+262
-409
lines changed

19 files changed

+262
-409
lines changed
 

‎CONTRIBUTING.md

-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ The following Haskell command-line tools will have to be installed:
1818

1919
(Some of the above packages have incompatible dependencies, so don't try to install them all at once with `cabal install`)
2020

21-
* [Soufflé datalog engine v2.2](https://github.com/souffle-lang/souffle/releases/tag/2.2): The datalog engine for package classification
2221
* `libsodium-1.0.18`: The system library that powers most of the cryptography happening in flora
2322
* `yarn`: The tool that handles the JavaScript code bases
2423
* `esbuild`: The tool that handles asset bundling

‎Dockerfile

+20-16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
# this is a pinned ubuntu:22.04 (newer versions have incomptible
2-
# library versions for souffle)
1+
# this is a pinned ubuntu:22.04
32
FROM ubuntu@sha256:67211c14fa74f070d27cc59d69a7fa9aeff8e28ea118ef3babc295a0428a6d21
43

54
ARG GID=1000
@@ -19,7 +18,7 @@ ARG POSTGRESQL_MIGRATION_VERSION=0.2.1.8
1918
USER "root"
2019
ARG USER="local"
2120
RUN groupadd -g "$GID" -o "$USER" \
22-
&& useradd -r -u "$UID" -g "$GID" -m -s /bin/zsh "$USER"
21+
&& useradd -r -u "$UID" -g "$GID" -m -s /bin/zsh "$USER"
2322

2423
# We create the folder explicitly so that we can give nonprivileged user the appropriate access
2524
RUN mkdir /flora-server
@@ -30,7 +29,20 @@ RUN chown -R $USER:$USER /home/$USER/.cabal
3029
WORKDIR /flora-server
3130

3231
RUN apt update && \
33-
apt install -y build-essential curl libffi-dev libffi8 libgmp-dev libgmp10 libncurses-dev libncurses5 libtinfo5 git libsodium-dev pkg-config
32+
apt install -y \
33+
build-essential \
34+
curl \
35+
libffi-dev \
36+
libffi8 \
37+
libgmp-dev \
38+
libgmp10 \
39+
libncurses-dev \
40+
libncurses5 \
41+
libtinfo5 \
42+
git \
43+
libsodium-dev \
44+
pkg-config \
45+
zlib1g-dev
3446

3547
# install dependencies (pg_config, postgresql-client, yarn)
3648
ENV BOOTSTRAP_HASKELL_NONINTERACTIVE="YES"
@@ -48,18 +60,12 @@ RUN git config --global --add safe.directory "*"
4860
RUN curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | sh
4961

5062
RUN ghcup install hls $HLS_VERSION \
51-
&& ghcup install ghc $GHC_VERSION \
52-
&& ghcup set ghc $GHC_VERSION \
53-
&& ghcup install cabal $CABAL_VERSION
63+
&& ghcup install ghc $GHC_VERSION \
64+
&& ghcup set ghc $GHC_VERSION \
65+
&& ghcup install cabal $CABAL_VERSION
5466

5567
USER ${USER}
5668

57-
# install soufflé
58-
USER "root"
59-
RUN wget --content-disposition https://github.com/souffle-lang/souffle/releases/download/2.2/x86_64-ubuntu-2004-souffle-2.2-Linux.deb
60-
RUN apt install -f -y ./x86_64-ubuntu-2004-souffle-2.2-Linux.deb
61-
USER ${USER}
62-
6369
RUN echo $PATH
6470

6571
# install Haskell tooling (note that for cabal, it's probably better
@@ -83,10 +89,8 @@ COPY --chown=${USER} scripts/.zshrc /home/$USER/.zshrc
8389
COPY --chown=${USER} cabal.project flora.cabal cabal.project.freeze ./
8490
RUN cabal build --only-dependencies -j
8591

86-
# compile Souffle source files
92+
# copy makefile
8793
COPY --chown=${USER} Makefile ./
88-
COPY --chown=${USER} cbits ./cbits
89-
RUN make souffle
9094

9195
# copy and build the assets
9296
COPY --chown=${USER} assets ./assets

‎Makefile

+7-10
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ init: ## Set up git hooks properly - needs calling once when cloning the repo
44
start: ## Start flora-server
55
@cabal run exe:flora-server
66

7-
build: soufflé ## Build the server
7+
build: ## Build the server
88
@cabal build
99

10-
build-release: soufflé ## Build the server for production
10+
build-release: ## Build the server for production
1111
@cabal freeze --project-file cabal.project.release
1212
@cabal build --project-file cabal.project.release
1313

@@ -91,21 +91,21 @@ db-test-provision-packages: ## Load development data in the database
9191
import-from-hackage: ## Imports every cabal file from the ./index-01 directory
9292
@cabal run -- flora-cli import-packages ./01-index
9393

94-
repl: soufflé ## Start a cabal REPL
94+
repl: ## Start a cabal REPL
9595
@cabal repl lib:flora
9696

9797
ghci: repl ## Start a cabal REPL (alias for `make repl`)
9898

99-
watch: soufflé ## Load the main library and reload on file change
99+
watch: ## Load the main library and reload on file change
100100
@ghcid --target flora-server -l
101101

102-
test: build soufflé ## Run the test suite
102+
test: ## Run the test suite
103103
./scripts/run-tests.sh
104104

105-
watch-test: soufflé ## Load the tests in ghcid and reload them on file change
105+
watch-test: ## Load the tests in ghcid and reload them on file change
106106
./scripts/run-tests.sh --watch
107107

108-
watch-server: soufflé ## Start flora-server in ghcid
108+
watch-server: ## Start flora-server in ghcid
109109
@ghcid --target=flora-server --restart="src" --test 'FloraWeb.Server.runFlora'
110110

111111
lint-hs: ## Run the code linter (HLint)
@@ -145,9 +145,6 @@ docker-enter: ## Enter the docker environment
145145
start-tmux: ## Start a Tmux session with hot code reloading
146146
./scripts/start-tmux.sh
147147

148-
soufflé: ## Generate C++ files from the Soufflé Datalog definitions
149-
cd cbits ; souffle -g categorise.{cpp,dl}
150-
151148
tags: ## Generate ctags for the project with `ghc-tags`
152149
@ghc-tags -c
153150

‎cabal.project

-6
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ allow-newer: hashable:filepath
1515
, qrcode-juicypixels:bytestring
1616
, qrcode-juicypixels:text
1717
, servant-lucid:text
18-
, souffle-haskell:text
1918
, tasty-test-reporter:ansi-terminal
2019
, tasty-test-reporter:tasty
2120
, type-errors-pretty:base
@@ -88,11 +87,6 @@ source-repository-package
8887
location: https://github.com/saurabhnanda/odd-jobs
8988
tag: a159d7a17913725a0cb2a2251fa0b812acd3d160
9089

91-
source-repository-package
92-
type: git
93-
location: https://github.com/luc-tielen/souffle-haskell
94-
tag: 268a11283ca9293b5eacabf7a0b79d9368232478
95-
9690
source-repository-package
9791
type: git
9892
location: https://github.com/goodlyrottenapple/tasty-test-reporter

‎cabal.project.freeze

-1
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,6 @@ constraints: any.Cabal ==3.10.3.0,
395395
any.some ==1.0.6,
396396
some +newtype-unsafe,
397397
any.sop-core ==0.5.0.2,
398-
any.souffle-haskell ==4.0.0,
399398
any.split ==0.2.5,
400399
any.splitmix ==0.1.1,
401400
splitmix -optimised-mixer,

‎cbits/categorise.dl

-215
This file was deleted.

‎changelog.d/794

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
synopsis: Replace the usage of datalog with Haskell for category normalisation
2+
3+
prs: #822
4+
issues: #794
5+
6+
description: {
7+
Replaces Souffle as much as possible for category normalisation
8+
}

‎flora.cabal

+1-5
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,6 @@ common common-rts-options
6767
library
6868
import: common-extensions
6969
import: common-ghc-options
70-
extra-libraries: stdc++
71-
cxx-options: -std=c++17 -Wall -D__EMBEDDED_SOUFFLE__
72-
cxx-sources: cbits/categorise.cpp
7370
hs-source-dirs: ./src/core ./src/datatypes
7471

7572
-- cabal-fmt: expand src/core
@@ -99,7 +96,6 @@ library
9996
Flora.Environment.Config
10097
Flora.Environment.Env
10198
Flora.Import.Categories
102-
Flora.Import.Categories.Tuning
10399
Flora.Import.Package
104100
Flora.Import.Package.Bulk
105101
Flora.Import.Package.Types
@@ -148,6 +144,7 @@ library
148144
Flora.Model.User.Query
149145
Flora.Model.User.Update
150146
Flora.Monitoring
147+
Flora.Normalise
151148
Flora.QRCode
152149
Flora.Tracing
153150
JSON
@@ -215,7 +212,6 @@ library
215212
, servant-prometheus
216213
, servant-server
217214
, slugify
218-
, souffle-haskell ==4.0.0
219215
, streamly
220216
, streamly-core
221217
, tar

‎src/core/Flora/Import/Categories.hs

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
module Flora.Import.Categories where
22

33
import Control.Monad.IO.Class
4+
import Data.Text (Text)
45
import Data.Text.IO qualified as T
56
import Effectful
67
import Effectful.PostgreSQL.Transact.Effect
78

8-
import Flora.Import.Categories.Tuning as Tuning
99
import Flora.Model.Category.Types (Category, mkCategory, mkCategoryId)
1010
import Flora.Model.Category.Update (insertCategory)
11+
import Flora.Normalise
1112

1213
importCategories :: (DB :> es, IOE :> es) => Eff es ()
1314
importCategories = do
14-
liftIO $ T.putStrLn "Sourcing categories from Datalog"
15-
canonicalCategories <- liftIO Tuning.sourceCategories
16-
categories <- mapM fromCanonical canonicalCategories
15+
liftIO $ T.putStrLn "Sourcing categories"
16+
categories <- mapM fromCanonical floraCategories
1717
mapM_ insertCategory categories
1818

19-
fromCanonical :: IOE :> es => CanonicalCategory -> Eff es Category
20-
fromCanonical (CanonicalCategory slug name synopsis) = do
19+
fromCanonical :: IOE :> es => (Text, Text, Text) -> Eff es Category
20+
fromCanonical (slug, name, synopsis) = do
2121
categoryId <- liftIO mkCategoryId
2222
pure $ mkCategory categoryId name (Just slug) synopsis

‎src/core/Flora/Import/Categories/Tuning.hs

-98
This file was deleted.

‎src/core/Flora/Import/Package.hs

+17-12
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import Data.ByteString qualified as BS
3939
import Data.List.NonEmpty qualified as NE
4040
import Data.Map (Map)
4141
import Data.Map.Strict qualified as Map
42-
import Data.Maybe
42+
import Data.Maybe as Maybe
4343
import Data.Set (Set)
4444
import Data.Set qualified as Set
4545
import Data.Text (Text, pack)
@@ -77,7 +77,7 @@ import Distribution.Types.PackageDescription ()
7777
import Distribution.Types.TestSuite
7878
import Distribution.Types.Version (Version)
7979
import Distribution.Types.VersionRange (VersionRange, withinRange)
80-
import Distribution.Utils.ShortText qualified as Cabal
80+
import Distribution.Utils.ShortText (fromShortText)
8181
import Distribution.Version qualified as Version
8282
import Effectful
8383
import Effectful.Log (Log)
@@ -94,9 +94,10 @@ import System.FilePath qualified as FilePath
9494

9595
import Effectful.Poolboy (Poolboy)
9696
import Effectful.Poolboy qualified as Poolboy
97-
import Flora.Import.Categories.Tuning qualified as Tuning
9897
import Flora.Import.Package.Types
9998
import Flora.Import.Types
99+
import Flora.Model.Category.Query as Query
100+
import Flora.Model.Category.Types
100101
import Flora.Model.Category.Update qualified as Update
101102
import Flora.Model.Component.Types as Component
102103
import Flora.Model.Package.Orphans ()
@@ -112,6 +113,7 @@ import Flora.Model.Requirement
112113
, deterministicRequirementId
113114
)
114115
import Flora.Model.User
116+
import Flora.Normalise
115117

116118
coreLibraries :: Set PackageName
117119
coreLibraries =
@@ -300,7 +302,7 @@ persistImportOutput (ImportOutput package categories release components) = State
300302
[ "package_name" .= packageName
301303
, "version" .= display release.version
302304
]
303-
persistPackage
305+
persistPackage package.packageId
304306
if Set.member (package.namespace, package.name, release.version) packageCache
305307
then do
306308
Log.logInfo "Release already present" $
@@ -317,12 +319,16 @@ persistImportOutput (ImportOutput package categories release components) = State
317319
where
318320
parallelRun :: Foldable t => (a -> Eff es ()) -> t a -> Eff es ()
319321
parallelRun f xs = forM_ xs (Poolboy.enqueue . f)
322+
320323
packageName = display package.namespace <> "/" <> display package.name
321-
persistPackage = do
322-
let packageId = package.packageId
323-
Update.upsertPackage package
324-
forM_ categories (\case Tuning.NormalisedPackageCategory cat -> Update.addToCategoryByName packageId cat)
325324

325+
persistPackage :: PackageId -> Eff es ()
326+
persistPackage packageId = do
327+
Update.upsertPackage package
328+
categoriesByName <- catMaybes <$> traverse Query.getCategoryByName categories
329+
forM_
330+
categoriesByName
331+
(\c -> Update.addToCategoryByName packageId c.name)
326332
persistComponent :: (PackageComponent, List ImportDependency) -> Eff es ()
327333
persistComponent (packageComponent, deps) = do
328334
Log.logInfo
@@ -398,9 +404,9 @@ extractPackageDataFromCabal userId repository@(repositoryName, repositoryPackage
398404
let releaseId = deterministicReleaseId packageId packageVersion
399405
timestamp <- Time.currentTime
400406
let sourceRepos = getRepoURL packageName packageDesc.sourceRepos
401-
let rawCategoryField = packageDesc ^. #category % to Cabal.fromShortText % to Text.pack
402-
let categoryList = fmap (Tuning.UserPackageCategory . Text.stripStart . Text.stripEnd) (Text.splitOn "," rawCategoryField)
403-
categories <- liftIO $ Tuning.normalisedCategories <$> Tuning.normalise categoryList
407+
let rawCategoryField = packageDesc ^. #category % to fromShortText % to Text.pack
408+
let categoryList = fmap (Text.stripStart . Text.stripEnd) (Text.splitOn "," rawCategoryField)
409+
let categories = Maybe.mapMaybe normaliseCategory categoryList
404410
let package =
405411
Package
406412
{ packageId
@@ -670,7 +676,6 @@ getVersions supportedCompilers =
670676
foldMap
671677
(\version -> Vector.foldMap (checkVersion version) supportedCompilers)
672678
versionList
673-
674679
checkVersion :: Version -> VersionRange -> Vector Version
675680
checkVersion version versionRange =
676681
if version `withinRange` versionRange

‎src/core/Flora/Import/Package/Types.hs

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ module Flora.Import.Package.Types where
33
import Control.DeepSeq
44
import Data.Aeson
55
import Data.List.NonEmpty (NonEmpty)
6+
import Data.Text (Text)
67
import GHC.Generics
78
import GHC.List (List)
89

9-
import Flora.Import.Categories.Tuning qualified as Tuning
1010
import Flora.Model.Component.Types
1111
import Flora.Model.Package.Types
1212
import Flora.Model.Release.Types
@@ -28,7 +28,7 @@ type DependentName = (Namespace, PackageName)
2828

2929
data ImportOutput = ImportOutput
3030
{ package :: Package
31-
, categories :: [Tuning.NormalisedPackageCategory]
31+
, categories :: [Text]
3232
, release :: Release
3333
, components :: NonEmpty (PackageComponent, List ImportDependency)
3434
}

‎src/core/Flora/Model/Category/Types.hs

-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import Database.PostgreSQL.Simple
1313
import Database.PostgreSQL.Simple.FromField (FromField)
1414
import Database.PostgreSQL.Simple.ToField (ToField)
1515
import GHC.Generics
16-
import Language.Souffle.Interpreted qualified as Souffle
1716
import Servant
1817
import Text.Slugify
1918

@@ -27,7 +26,6 @@ newtype CategoryId = CategoryId {getCategoryId :: UUID}
2726

2827
newtype CategoryName = CategoryName {getCategoryName :: Text}
2928
deriving stock (Generic, Show)
30-
deriving anyclass (Souffle.Marshal)
3129
deriving
3230
(Eq, NFData, Ord)
3331
via Text

‎src/core/Flora/Model/Package/Types.hs

-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ import Deriving.Aeson
4040
import Distribution.Pretty (Pretty (..))
4141
import Distribution.SPDX.License qualified as SPDX
4242
import Distribution.Types.Version (Version)
43-
import Language.Souffle.Interpreted qualified as Souffle
4443
import Lucid
4544
import Optics.Core hiding (element)
4645
import Servant (FromHttpApiData (..))
@@ -71,7 +70,6 @@ deterministicPackageId (Namespace ns) (PackageName name) =
7170

7271
newtype PackageName = PackageName Text
7372
deriving stock (Generic, Show)
74-
deriving anyclass (Souffle.Marshal)
7573
deriving
7674
(Eq, FromField, FromJSON, NFData, Ord, ToField, ToHtml, ToHttpApiData, ToJSON)
7775
via Text

‎src/core/Flora/Normalise.hs

+174
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
module Flora.Normalise where
2+
3+
import Data.List qualified as List
4+
import Data.Maybe (isJust)
5+
import Data.Text (Text)
6+
7+
normaliseCategory :: Text -> Maybe Text
8+
normaliseCategory string =
9+
if isJust $ List.find (\(_, name, _) -> name == string) floraCategories
10+
then Just string
11+
else case string of
12+
"Algorithm" -> Just "Algorithms"
13+
"Crypto" -> Just "Cryptography"
14+
"CLI" -> Just "CLI & TUI Development"
15+
"TUI" -> Just "CLI & TUI Development"
16+
"Command Line" -> Just "CLI & TUI Development"
17+
"CommandLine" -> Just "CLI & TUI Development"
18+
"Numeric" -> Just "Mathematics"
19+
"Numerical" -> Just "Mathematics"
20+
"Numerics" -> Just "Mathematics"
21+
"Arithmetic" -> Just "Mathematics"
22+
"Number Theory" -> Just "Mathematics"
23+
"Math" -> Just "Mathematics"
24+
"Mathematics" -> Just "Mathematics"
25+
"mathematics" -> Just "Mathematics"
26+
"Maths" -> Just "Mathematics"
27+
"Algebra" -> Just "Mathematics"
28+
"Graph" -> Just "Mathematics"
29+
"Graphs" -> Just "Mathematics"
30+
"Geometry" -> Just "Mathematics"
31+
"Tropical Geometry" -> Just "Mathematics"
32+
"Parser Builder" -> Just "Parsers"
33+
"Parser Combinators" -> Just "Parsers"
34+
"Parser" -> Just "Parsers"
35+
"ParserCombinators" -> Just "Parsers"
36+
"Parsers" -> Just "Parsers"
37+
"Parsing" -> Just "Parsers"
38+
"Parsing Text" -> Just "Parsers"
39+
"Network" -> Just "Network Development"
40+
"Data Network" -> Just "Network Development"
41+
"Network APIs" -> Just "Network Development"
42+
"Network Control" -> Just "Network Development"
43+
"NetworkAPI" -> Just "Network Development"
44+
"NetworkAPIs" -> Just "Network Development"
45+
"Networking" -> Just "Network Development"
46+
"Web" -> Just "Web Development"
47+
"Yesod" -> Just "Web Development"
48+
"Javascript" -> Just "Web Development"
49+
"OpenAPI" -> Just "Web Development"
50+
"Snap" -> Just "Web Development"
51+
"Servant" -> Just "Web Development"
52+
"Servant Web" -> Just "Web Development"
53+
"Web development" -> Just "Web Development"
54+
"Happstack" -> Just "Web Development"
55+
"Semantic Web" -> Just "Web Development"
56+
"Optics" -> Just "Lenses"
57+
"Lens" -> Just "Lenses"
58+
"Conduit" -> Just "Streaming"
59+
"Streamly" -> Just "Streaming"
60+
"Pipes" -> Just "Streaming"
61+
"Monad" -> Just "Monads"
62+
"MonadIO" -> Just "Monads"
63+
"Transformers" -> Just "Monads"
64+
"Monad Transformers" -> Just "Monads"
65+
"Mtl" -> Just "Monads"
66+
"User interfaces" -> Just "GUI"
67+
"User interface" -> Just "GUI"
68+
"UserInterface" -> Just "GUI"
69+
"UI" -> Just "GUI"
70+
"User Interfaces" -> Just "GUI"
71+
"graphics" -> Just "Graphics"
72+
"Code Generation" -> Just "FFI"
73+
"Foreign binding" -> Just "FFI"
74+
"Elm" -> Just "FFI"
75+
"TypeScript" -> Just "FFI"
76+
"Java" -> Just "FFI"
77+
"JVM" -> Just "FFI"
78+
"Jvm" -> Just "FFI"
79+
"Erlang" -> Just "FFI"
80+
"PHP" -> Just "FFI"
81+
"Foreign" -> Just "FFI"
82+
"Types" -> Just "Type System"
83+
"Validity" -> Just "Testing"
84+
"QuickCheck" -> Just "Testing"
85+
"Test" -> Just "Testing"
86+
"Sound" -> Just "Audio"
87+
"Algorithmic Music Composition" -> Just "Audio"
88+
"Automatic Music Generation" -> Just "Audio"
89+
"Music" -> Just "Audio"
90+
"Zip" -> Just "Compression"
91+
"ZLib" -> Just "Compression"
92+
"Tar" -> Just "Compression"
93+
"Cloud" -> Just "Cloud Computing"
94+
"Google" -> Just "Cloud Computing"
95+
"AWS" -> Just "Cloud Computing"
96+
"Compilers/Interpreters" -> Just "Compilers and Interpreters"
97+
"Interpreters" -> Just "Compilers and Interpreters"
98+
"Compiler" -> Just "Compilers and Interpreters"
99+
"DSL" -> Just "Compilers and Interpreters"
100+
"Database" -> Just "Databases"
101+
"PostgreSQL" -> Just "Databases"
102+
"NLP" -> Just "Natural Language Processing"
103+
"Japanese Natural Language Processing" -> Just "Natural Language Processing"
104+
"Natural Language" -> Just "Natural Language Processing"
105+
"Natural Language Processing" -> Just "Natural Language Processing"
106+
"Stemming" -> Just "Natural Language Processing"
107+
"Natural-language-processing" -> Just "Natural Language Processing"
108+
"Containers" -> Just "Data Structures"
109+
"Game" -> Just "Game Development"
110+
"Game Engine" -> Just "Game Development"
111+
"Concurrent" -> Just "Concurrency"
112+
"Parallel" -> Just "Parallelism"
113+
"Distributed Computing" -> Just "Distributed Systems & Computation"
114+
"Filesystem" -> Just "Systems Programming"
115+
"system" -> Just "Systems Programming"
116+
"System" -> Just "Systems Programming"
117+
"SYstem" -> Just "Systems Programming"
118+
"Embedded" -> Just "Systems Programming"
119+
"Distribution" -> Just "Package Distribution"
120+
"Trace" -> Just "Profiling"
121+
"Debug" -> Just "Profiling"
122+
"Debugging" -> Just "Profiling"
123+
"OpenTelemetry" -> Just "Telemetry"
124+
"Metrics" -> Just "Telemetry"
125+
"Regex" -> Just "Text"
126+
_ -> Nothing
127+
128+
floraCategories :: [(Text, Text, Text)]
129+
floraCategories =
130+
[ ("algorithms", "Algorithms", "Algorithms implemented in Haskell, like sorting, searching")
131+
, ("audio", "Audio", "Process digital signal, make music")
132+
, ("bioinformatics", "Bioinformatics", "Methods and software for the analysis of biological data")
133+
, ("cloud", "Cloud Computing", "Bindings to Cloud Computing platforms")
134+
, ("command-line", "CLI & TUI tooling", "Libraries to develop command-line interfaces")
135+
, ("compilers-interpreters", "Compilers and Interpreters", "Tooling to create compilers and interpreters")
136+
, ("compression", "Data compression", "Reducing the size of things")
137+
, ("concurrency", "Concurrency", "Concurrent programming techniques and tools")
138+
, ("cryptography", "Cryptography", "Algorithms for encrypting and hashing data")
139+
, ("data-structures", "Data Structures", "Data structures, whether purely functional or mutable")
140+
, ("databases", "Databases", "Database drivers and interfaces")
141+
, ("development", "Development", "Development helpers, integration with other languages")
142+
, ("distributed", "Distributed Systems & Computation", "Tooling and techniques for writing distributed systems")
143+
, ("distribution", "Package Distribution", "Building, Packaging and Distributing software in Haskell")
144+
, ("ffi", "FFI", "Working with other languages and generating bindings")
145+
, ("frp", "FRP", "Functional Reactive Programming")
146+
, ("game-dev", "Game Development", "Libraries used for game development")
147+
, ("generics", "Generics", "Working with Haskell's Generics mechanism")
148+
, ("graphics", "Graphics", "Programming the system's rendering APIs")
149+
, ("gui", "GUI", "Creating graphical user interfaces")
150+
, ("hardware", "Hardware", "Digital circuit description and hardware interfacing")
151+
, ("json", "JSON", "Parsing, producing and manipulating JSON")
152+
, ("language", "Language", "Interfacing with other programming languages from Haskell")
153+
, ("lenses", "Lenses", "Functional references such as Lenses, Folds and Traversals")
154+
, ("maths", "Mathematics", "Numerical and Mathematical packages")
155+
, ("monads", "Monads", "Effectful sequential computations")
156+
, ("network", "Network Development", "Connection pools, DNS, HTTP, API clients and network protocols")
157+
, ("nlp", "Natural Language Processing", "Tooling to work with natural languages")
158+
, ("parallelism", "Parallelism", "Parallel programming")
159+
, ("parser-implementations", "Parser Implementations", "Parsing data formats")
160+
, ("parsers", "Parsers", "Libraries to ingest and parse data")
161+
, ("parsing", "Parsing", "Parser generators, combinators and tools to help with parsing")
162+
, ("physics", "Physics", "The study of matter, its consituents, motion, and behaviour")
163+
, ("prelude", "Prelude", "Libraries that provide default imports")
164+
, ("profiling", "Profiling", "Measure the behaviour of your programs")
165+
, ("streaming", "Streaming", "Data streaming for continuous processing")
166+
, ("system", "Systems Programming", "Programming and communicating with the Operating System")
167+
, ("telemetry", "Telemetry", "Systems Observability")
168+
, ("template-haskell", "Template Haskell", "Metaprogramming with Template Haskell")
169+
, ("testing", "Testing", "Test frameworks")
170+
, ("text", "Text", "Working with textual data and algorithms")
171+
, ("type-system", "Type System", "Enhancing the Haskell type system")
172+
, ("web", "Web Development", "Programming for the web")
173+
, ("xml", "XML", "Libraries to consume and produce XML documents")
174+
]

‎test/Flora/CategorySpec.hs

+25-17
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,36 @@
11
module Flora.CategorySpec where
22

3-
import Control.Monad.IO.Class
3+
import Data.Set qualified as Set
44
import Test.Tasty
55

6-
import Flora.Import.Categories.Tuning as Tuning
6+
import Flora.Normalise
77
import Flora.TestUtils
88

99
spec :: TestEff TestTree
1010
spec =
1111
testThese
12-
"category tuning"
13-
[ testThis "Test that the category unification algorithm works" testUnificationAlgorithm
12+
"Category Normalisation"
13+
[ testThis "Normalisation of Mathematics categories" testNormalisationOfMathematicsCategories
1414
]
1515

16-
testUnificationAlgorithm :: TestEff ()
17-
testUnificationAlgorithm = do
18-
liftIO (Tuning.normalise [UserPackageCategory "Algorithm"])
19-
>>= assertEqual (Results [NormalisedPackageCategory "Algorithms"] [])
20-
21-
liftIO (Tuning.normalise [UserPackageCategory "Crypto"])
22-
>>= assertEqual (Results [NormalisedPackageCategory "Cryptography"] [])
23-
24-
liftIO (Tuning.normalise [UserPackageCategory "CLI", UserPackageCategory "TUI"])
25-
>>= assertEqual (Results [NormalisedPackageCategory "CLI & TUI Development"] [])
26-
27-
liftIO (Tuning.normalise [UserPackageCategory "Numeric", UserPackageCategory "Parser Builder"])
28-
>>= assertEqual (Results [NormalisedPackageCategory "Mathematics", NormalisedPackageCategory "Parsers"] [])
16+
testNormalisationOfMathematicsCategories :: TestEff ()
17+
testNormalisationOfMathematicsCategories = do
18+
let mathematicsExceptions =
19+
Set.fromList
20+
[ "Numeric"
21+
, "Numerical"
22+
, "Numerics"
23+
, "Arithmetic"
24+
, "Number Theory"
25+
, "Math"
26+
, "Mathematics"
27+
, "mathematics"
28+
, "Maths"
29+
, "Algebra"
30+
, "Graph"
31+
, "Graphs"
32+
, "Geometry"
33+
]
34+
assertEqual
35+
(Set.singleton (Just "Mathematics"))
36+
(Set.map normaliseCategory mathematicsExceptions)

‎test/Flora/PackageSpec.hs

-9
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import Optics.Core
1717
import Test.Tasty
1818

1919
import Flora.Import.Package
20-
import Flora.Model.Category (Category (..))
2120
import Flora.Model.Category.Query qualified as Query
2221
import Flora.Model.Component.Types
2322
import Flora.Model.Package
@@ -35,8 +34,6 @@ spec =
3534
"package tests"
3635
[ testThis "Check Cabal dependencies" testCabalDeps
3736
, testThis "Insert containers and its dependencies" testInsertContainers
38-
, testThis "@haskell/base belongs to the \"Prelude\" category" testThatBaseisInPreludeCategory
39-
, testThis "@hackage/semigroups belongs to appropriate categories" testThatSemigroupsIsInMathematicsAndDataStructures
4037
, testThis "The \"haskell\" namespace has the correct number of packages" testCorrectNumberInHaskellNamespace
4138
, testThis "Packages are not shown as their own dependent" testNoSelfDependent
4239
, testThis "Searching for `text` returns expected results by namespace/package name" testSearchResultText
@@ -123,12 +120,6 @@ testThatBaseisInPreludeCategory = do
123120
result <- Query.getPackagesFromCategorySlug "prelude"
124121
assertBool $ Set.member (PackageName "base") (Set.fromList $ Vector.toList $ fmap (view #name) result)
125122

126-
testThatSemigroupsIsInMathematicsAndDataStructures :: TestEff ()
127-
testThatSemigroupsIsInMathematicsAndDataStructures = do
128-
semigroups <- fromJust <$> Query.getPackageByNamespaceAndName (Namespace "hackage") (PackageName "semigroups")
129-
result <- Query.getPackageCategories semigroups.packageId
130-
assertEqual (Set.fromList ["data-structures", "maths"]) (Set.fromList $ slug <$> Vector.toList result)
131-
132123
testCorrectNumberInHaskellNamespace :: TestEff ()
133124
testCorrectNumberInHaskellNamespace = do
134125
results <- Query.getPackagesByNamespace (Namespace "haskell")

‎test/Flora/TestUtils.hs

+2-2
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,10 @@ assertBool boolean = liftIO $ Test.assertBool "" boolean
249249
-- Usage:
250250
--
251251
-- >>> assertEqual expected actual
252-
assertEqual :: (Eq a, Show a) => a -> a -> TestEff ()
252+
assertEqual :: (Eq a, HasCallStack, Show a) => a -> a -> TestEff ()
253253
assertEqual expected actual = liftIO $ Test.assertEqual "" expected actual
254254

255-
assertFailure :: MonadIO m => String -> m ()
255+
assertFailure :: (HasCallStack, MonadIO m) => String -> m ()
256256
assertFailure = liftIO . Test.assertFailure
257257

258258
assertJust :: HasCallStack => Maybe a -> TestEff a

‎test/fixtures/Cabal/hackage/flora.cabal

-5
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,6 @@ common common-rts-options
6868
library
6969
import: common-extensions
7070
import: common-ghc-options
71-
extra-libraries: stdc++
72-
cxx-options: -std=c++17 -Wall -D__EMBEDDED_SOUFFLE__
73-
cxx-sources: cbits/categorise.cpp
7471
hs-source-dirs: ./src/core ./src/orphans
7572

7673
-- cabal-fmt: expand src/core
@@ -84,7 +81,6 @@ library
8481
Flora.Environment
8582
Flora.Environment.Config
8683
Flora.Import.Categories
87-
Flora.Import.Categories.Tuning
8884
Flora.Import.Package
8985
Flora.Import.Package.Bulk
9086
Flora.Import.Package.Types
@@ -160,7 +156,6 @@ library
160156
, servant-lucid
161157
, servant-server
162158
, slugify
163-
, souffle-haskell ==3.5.1
164159
, streamly
165160
, streamly-core
166161
, tar

0 commit comments

Comments
 (0)
Please sign in to comment.