From 26a70582a5ca7f9a2b2cea756c74fa5c4a896dd2 Mon Sep 17 00:00:00 2001 From: Sean D Gillespie Date: Sat, 23 Sep 2023 14:15:50 -0400 Subject: [PATCH] feature: Add a formatting module --- bench/Main.hs | 16 ++++- gibberish.cabal | 2 + src/Data/Gibberish/Format.hs | 46 ++++++++++++++ test/Data/Gibberish/FormatSpec.hs | 102 ++++++++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 3 deletions(-) create mode 100644 src/Data/Gibberish/Format.hs create mode 100644 test/Data/Gibberish/FormatSpec.hs diff --git a/bench/Main.hs b/bench/Main.hs index 295ebc8..cf0ef25 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -1,5 +1,6 @@ module Main (main) where +import Data.Gibberish.Format import Data.Gibberish.GenTrigraph (genTrigraph) import Data.Gibberish.Types (Trigraph (..)) import Paths_gibberish (getDataDir) @@ -7,7 +8,6 @@ import Paths_gibberish (getDataDir) import Criterion.Main import Data.Aeson (encode) import Data.Char (isAlpha) -import Data.Map (Map ()) import Data.Text (Text ()) import Data.Text qualified as Text import Data.Text.IO qualified as Text @@ -25,7 +25,17 @@ trigraph = genTrigraph <$> dictionary main :: IO () main = defaultMain - [ bench "load dictionary file" $ nfIO dictionary, + [ -- Trigraph generation + bench "load dictionary file" $ nfIO dictionary, env dictionary $ bench "generate trigraph" . nf genTrigraph, - env trigraph $ bench "serialize to JSON" . nf encode + env trigraph $ bench "serialize to JSON" . nf encode, + -- Formatting + env dictionary $ bench "format words" . nf formatWords' ] + +formatWords' :: [Text] -> Text +formatWords' = formatWords maxLen maxHeight sep . map Word + where + maxLen = MaxLen 100 + maxHeight = MaxHeight 1000 + sep = Separator " " diff --git a/gibberish.cabal b/gibberish.cabal index 63c654b..174803f 100644 --- a/gibberish.cabal +++ b/gibberish.cabal @@ -96,6 +96,7 @@ library Data.Elocrypt, Data.Elocrypt.Trigraph, Data.Elocrypt.Utils, + Data.Gibberish.Format, Data.Gibberish.GenTrigraph, Data.Gibberish.Types build-depends: @@ -151,6 +152,7 @@ test-suite spec build-tool-depends: hspec-discover:hspec-discover other-modules: + Data.Gibberish.FormatSpec Data.Gibberish.GenTrigraphSpec Data.Gibberish.TypesSpec other-extensions: diff --git a/src/Data/Gibberish/Format.hs b/src/Data/Gibberish/Format.hs new file mode 100644 index 0000000..d95a4b2 --- /dev/null +++ b/src/Data/Gibberish/Format.hs @@ -0,0 +1,46 @@ +module Data.Gibberish.Format + ( MaxLen (..), + MaxHeight (..), + Separator (..), + Word (..), + formatWords, + formatLine, + ) where + +import Data.List (intersperse) +import Data.Text (Text ()) +import Data.Text qualified as Text +import Prelude hiding (Word ()) + +newtype MaxLen = MaxLen {unMaxLen :: Int} + deriving stock (Eq, Show) + deriving newtype (Enum, Integral, Num, Ord, Real) + +newtype MaxHeight = MaxHeight {unMaxHeight :: Int} + deriving stock (Eq, Show) + deriving newtype (Enum, Integral, Num, Ord, Real) + +newtype Separator = Separator {unSeparator :: Text} + deriving stock (Eq, Show) + +newtype Word = Word {unWord :: Text} + deriving stock (Eq, Show) + +-- | Format a list of words to a text blob +formatWords :: MaxLen -> MaxHeight -> Separator -> [Word] -> Text +formatWords maxLen height sep words' = + case height of + 1 -> line + _ -> line <> "\n" <> formatWords maxLen (height - 1) sep words' + where + line = formatLine maxLen sep words' + +formatLine :: MaxLen -> Separator -> [Word] -> Text +formatLine (MaxLen maxLen) (Separator sep) = + concatLine maxLen . intersperse sep . map unWord + where + concatLine :: Int -> [Text] -> Text + concatLine len (t : ts) + | len - Text.length t > 0 = t <> concatLine (len - Text.length t) ts + | otherwise = "" + concatLine _ [] = error $ "Ran out of words" diff --git a/test/Data/Gibberish/FormatSpec.hs b/test/Data/Gibberish/FormatSpec.hs new file mode 100644 index 0000000..1c31393 --- /dev/null +++ b/test/Data/Gibberish/FormatSpec.hs @@ -0,0 +1,102 @@ +module Data.Gibberish.FormatSpec (spec) where + +import Data.Gibberish.Format + +import Control.Monad (forM_) +import Data.Functor ((<&>)) +import Data.Text (Text ()) +import Data.Text qualified as Text +import Hedgehog +import Hedgehog.Gen qualified as Gen hiding (word) +import Hedgehog.Range qualified as Range +import Test.Hspec +import Test.Hspec.Hedgehog +import TextShow (TextShow (..)) +import Prelude hiding (Word) + +spec :: Spec +spec = do + describe "formatWords" $ do + describe "variable length" $ do + it "minLen <= length formatWords <= maxLen" $ hedgehog $ do + maxLen <- forAll genMaxLen + maxHeight <- forAll genMaxHeight + output <- + forAll $ + genFormattedLine (Range.linear 3 10) maxLen maxHeight + let lines' = Text.lines output + + annotateShow $ + lines' <&> \line -> + line <> "(length=" <> showt (Text.length line) <> ")" + + let minLen = maxLen - 10 + forM_ lines' $ \line -> + assert $ + Text.length line <= fromIntegral maxLen + && Text.length line >= fromIntegral minLen + + it "length (lines (formatWords)) == maxHeight" $ hedgehog $ do + maxLen <- forAll genMaxLen + maxHeight <- forAll genMaxHeight + output <- + forAll $ + genFormattedLine (Range.linear 3 10) maxLen maxHeight + let lines' = Text.lines output + + length lines' === fromIntegral maxHeight + + describe "constant length" $ do + it "minLen <= length formatWords <= maxLen" $ hedgehog $ do + maxLen <- forAll genMaxLen + maxHeight <- forAll genMaxHeight + wordLen <- forAll $ Gen.int (Range.linear 3 10) + output <- + forAll $ + genFormattedLine (Range.singleton wordLen) maxLen maxHeight + let lines' = Text.lines output + + annotateShow $ + lines' <&> \line -> + line <> "(length=" <> showt (Text.length line) <> ")" + + let minLen = maxLen - 10 + forM_ lines' $ \line -> + assert $ + Text.length line <= fromIntegral maxLen + && Text.length line >= fromIntegral minLen + + it "length (lines (formatWords)) == maxHeight" $ hedgehog $ do + maxLen <- forAll genMaxLen + maxHeight <- forAll genMaxHeight + wordLen <- forAll $ Gen.int (Range.linear 3 10) + output <- + forAll $ + genFormattedLine (Range.singleton wordLen) maxLen maxHeight + let lines' = Text.lines output + + length lines' === fromIntegral maxHeight + +genFormattedLine :: Range Int -> MaxLen -> MaxHeight -> Gen Text +genFormattedLine wordLen lineLen lineHeight = do + separator <- genSeparator + words' <- genWords wordLen (fromIntegral lineLen * fromIntegral lineHeight) + + pure $ formatWords lineLen lineHeight separator words' + +genWords :: Range Int -> Int -> Gen [Word] +genWords wordLen maxLen = Gen.list (Range.singleton minWords) (genWord wordLen) + where + minWords = maxLen `div` 2 + +genWord :: Range Int -> Gen Word +genWord len = Word <$> Gen.text len (Gen.enum 'a' 'e') + +genMaxLen :: Gen MaxLen +genMaxLen = MaxLen <$> Gen.integral (Range.linear 50 100) + +genMaxHeight :: Gen MaxHeight +genMaxHeight = MaxHeight <$> Gen.integral (Range.linear 3 50) + +genSeparator :: Gen Separator +genSeparator = Separator <$> Gen.text (Range.linear 1 3) (pure ' ')