Profiling of base64 encoding a 634 MB ISO file.

Code Snippet

Data/Base64.hs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{-# LANGUAGE OverloadedStrings #-}

--------------------------------------------------------------------------------

module Data.Base64
  ( encode
  ) where

--------------------------------------------------------------------------------

import           Data.Bits
    ( shiftL
    , shiftR
    , (.|.)
    )
import qualified Data.ByteString      as BS
import qualified Data.ByteString.Lazy as LBS
import           Data.Word
    ( Word32
    , Word8
    )

--------------------------------------------------------------------------------

tbl :: BS.ByteString
tbl =
  "ABCDEFGHIJKLMNOPQRSTUVWXYZ\
  \abcdefghijklmnopqrstuvwxyz\
  \0123456789+/"

pad :: Word8
pad =
  61 -- '='

encode :: LBS.ByteString -> LBS.ByteString
encode =
  aux (Nothing, Nothing, Nothing)
  where
    aux buf bs
      | "" == bs =
        let
          (x,y,z) =
            case buf of
              (Just x', Just y', Nothing) -> (x', y', 0)
              (Just x', Nothing, Nothing) -> (x', 0 , 0)
              ___________________________ -> (0 , 0 , 0)
          (a,b,c,d) = b64 x y z
        in
          cons a $ cons b $ cons c $ cons d $ ""
      | otherwise =
        case buf' of
          (Just x, Just y, Just z) ->
            let
              (a,b,c,d) = b64 x y z
            in
              cons a $ cons b $ cons c $ cons d $ aux buf' $ tl bs
          ________________________ -> aux buf' $ tl bs
      where
        b64 a b c =
          let
            os =
              w32 a .<. 16 .|.
              w32 b .<. 08 .|.
              w32 c
          in
            (                         idx tbl $ int $ os        .>. 18
            ,                         idx tbl $ int $ os .<. 14 .>. 26
            , if b == 0 then pad else idx tbl $ int $ os .<. 20 .>. 26
            , if c == 0 then pad else idx tbl $ int $ os .<. 26 .>. 26
            )
        buf' =
          case buf of
            (Nothing, b, c) -> (Just $ hd bs, b           , c           )
            (a, Nothing, c) -> (a           , Just $ hd bs, c           )
            (a, b, Nothing) -> (a           , b           , Just $ hd bs)
            (_, _,       _) -> (Just $ hd bs, Nothing     , Nothing     )
        (.<.) x y = x `shiftL` y
        (.>.) x y = x `shiftR` y
        w32 :: Word8 -> Word32
        w32 = fromIntegral
        int :: Word32 -> Int
        int = fromIntegral
        -- O(1) ByteString index (subscript) operator, starting from 0.
        idx =  BS.index
        -- O(1) cons is analogous to '(:)' for lists.
        cons = LBS.cons
        -- O(1) Extract the first element of a ByteString.
        hd   = LBS.head
        -- O(1) Extract the elements after the head of a ByteString.
        tl   = LBS.tail

Main.hs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env stack
{- stack
   --resolver lts-12.0
   --install-ghc
   script
   --ghc-options -Werror
   --ghc-options -Wall
   --package bytestring
   --
-}

--------------------------------------------------------------------------------

{-# LANGUAGE OverloadedStrings #-}

--------------------------------------------------------------------------------

module Main (main) where

--------------------------------------------------------------------------------

import qualified Data.ByteString.Lazy as LBS

import qualified Data.Base64          as Base64

--------------------------------------------------------------------------------

main
  :: IO ()

--------------------------------------------------------------------------------

main =
  LBS.interact $ Base64.encode

build.bash

#!/bin/bash

clear

# base64
ghc -Wall -Werror -O2 --make Main.hs -o base64

# clean
find . -name '*.hi' -delete
find . -name '*.o'  -delete

run.bash

#!/bin/bash

cat ./misc/don_quijote.txt | \
    ./base64 > ./misc/don_quijote.b64

Code Output:

77u/VGhlIFByb2plY3QgR3V0ZW5iZXJnIEVCb29rIG9mIERvbiBRdWlqb3RlLCBieSBNaWd1ZWwq ...
... cmliZSB0byBvdXIgZW1haWwgbmV3c2xldHRlciB0byBoZWFyIGFib3V0IG5ldyBlQm9va3MuDQo=

Test Snippet

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env stack
{- stack
   --resolver lts-12.0
   --install-ghc
   script
   --package bytestring
   --package hspec
   --ghc-options -threaded
   --ghc-options -Werror
   --ghc-options -Wall
   --
-}

--------------------------------------------------------------------------------

{-# LANGUAGE OverloadedStrings #-}

--------------------------------------------------------------------------------

module Main (main) where

--------------------------------------------------------------------------------

import qualified Data.ByteString.Lazy as LBS
import           Test.Hspec

import qualified Data.Base64          as Base64

--------------------------------------------------------------------------------

dec :: LBS.ByteString
enc :: LBS.ByteString

main
  :: IO ()

--------------------------------------------------------------------------------

dec =
  "Man is distinguished, not only by his reason, but by this singular \
  \passion from other animals, which is a lust of the mind, that by a \
  \perseverance of delight in the continued and indefatigable generation \
  \of knowledge, exceeds the short vehemence of any carnal pleasure."

enc =
  "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1\
  \dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3\
  \aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFu\
  \Y2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxl\
  \IGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhl\
  \bWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="

main = hspec $
  do
    describe ("Base64 encode") $
      do
        it ("https://en.wikipedia.org/wiki/B64enc#Examples") $
          (Base64.encode dec) `shouldBe` enc
user@personal:~/.../base64$ ./Test.hs 

Base64 encode
  https://en.wikipedia.org/wiki/B64enc#Examples

Finished in 0.0025 seconds
1 example, 0 failures

Profiling Snippet

profiling.bash

#!/bin/bash

# base: downloads.haskell.org/~ghc/latest/docs/html/users_guide/
# file: profiling.html#rts-flag--po%20%E2%9F%A8stem%E2%9F%A9

clear

# base64 (+ profiling)
ghc -prof -fprof-auto -rtsopts -Wall -Werror -O2 --make Main.hs -o base64

# clean
find . -name '*.hi' -delete
find . -name '*.o'  -delete

# run and generate process and memory profiles
cat ./misc/don_quijote.txt | \
    ./base64 +RTS -p -h > ./misc/don_quijote.b64

# create a graph of memory profile
hp2ps -c base64.hp

base64.aux

user@personal:~/.../base64$ cat base64.aux 
X_RANGE 1.40
Y_RANGE 89416.00
ORDER MAIN 1
ORDER (235)GHC.Conc.Signal.CAF 2
ORDER (219)GHC.IO.Encoding.CAF 3
ORDER (209)GHC.IO.Handle.FD.CAF 4
ORDER (132)PINNED 5
SHADE MAIN 0.00
SHADE (235)GHC.Conc.Signal.CAF 0.00
SHADE (219)GHC.IO.Encoding.CAF 0.00
SHADE (209)GHC.IO.Handle.FD.CAF 0.00
SHADE (132)PINNED 0.10

base64.prof

	Tue Jul 24 08:24 2018 Time and Allocation Profiling Report  (Final)

	   base64 +RTS -p -h -RTS

	total time  =        1.13 secs   (1134 ticks @ 1000 us, 1 processor)
	total alloc = 2,218,934,824 bytes  (excludes profiling overheads)

COST CENTRE       MODULE      SRC                             %time %alloc

main              Main        Main.hs:(33,1)-(34,30)           30.0   37.1
encode.aux        Data.Base64 Data/Base64.hs:(39,5)-(90,23)    29.0   16.1
encode.aux.b64    Data.Base64 Data/Base64.hs:(59,9)-(70,13)    13.3   14.3
encode.aux.cons   Data.Base64 Data/Base64.hs:86:9-23            7.3   18.0
encode.aux.idx    Data.Base64 Data/Base64.hs:84:9-23            4.1    2.1
encode.aux.tl     Data.Base64 Data/Base64.hs:90:9-23            4.0    4.8
encode.aux.b64.os Data.Base64 Data/Base64.hs:(61,13)-(64,19)    2.7    0.5
encode.aux.w32    Data.Base64 Data/Base64.hs:80:9-26            2.0    1.6
encode.aux.int    Data.Base64 Data/Base64.hs:82:9-26            2.0    2.1
encode.aux.hd     Data.Base64 Data/Base64.hs:88:9-23            1.9    1.6
encode.aux.buf'   Data.Base64 Data/Base64.hs:(71,9)-(76,73)     1.1    0.0
encode.aux.(...)  Data.Base64 Data/Base64.hs:54:15-35           0.8    1.3

...

base64.hp

user@personal:~/.../base64$ cat base64.hp 
JOB "base64 +RTS -p -h"
DATE "Tue Jul 24 08:24 2018"
SAMPLE_UNIT "seconds"
VALUE_UNIT "bytes"
BEGIN_SAMPLE 0.000000
END_SAMPLE 0.000000
BEGIN_SAMPLE 0.098550
(273)encode.aux.idx/encode...	24
(268)encode.aux.cons/encod...	24
(265)encode.aux/encode/Dat...	32
(282)encode.aux.hd/encode....	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(272)encode.aux.b64/encode...	32
(274)encode.aux.idx/encode...	32
(271)encode.aux.(...)/enco...	40
(267)encode.aux.tl/encode....	48
(261)main	136
(264)encode.aux/encode/main	88
(278)encode.aux.b64.os/enc...	16
(283)encode.aux.hd/encode....	48
END_SAMPLE 0.098550
BEGIN_SAMPLE 0.196928
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 0.196928
BEGIN_SAMPLE 0.295255
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 0.295255
BEGIN_SAMPLE 0.389418
(268)encode.aux.cons/encod...	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(282)encode.aux.hd/encode....	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(267)encode.aux.tl/encode....	144
(261)main	136
(264)encode.aux/encode/main	208
END_SAMPLE 0.389418
BEGIN_SAMPLE 0.487462
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 0.487462
BEGIN_SAMPLE 0.586448
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 0.586448
BEGIN_SAMPLE 0.686483
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	88
(274)encode.aux.idx/encode...	16
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 0.686483
BEGIN_SAMPLE 0.782415
(282)encode.aux.hd/encode....	24
(268)encode.aux.cons/encod...	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	32
(274)encode.aux.idx/encode...	48
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	88
END_SAMPLE 0.782415
BEGIN_SAMPLE 0.880152
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 0.880152
BEGIN_SAMPLE 0.978724
(268)encode.aux.cons/encod...	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(282)encode.aux.hd/encode....	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(267)encode.aux.tl/encode....	144
(261)main	136
(264)encode.aux/encode/main	208
END_SAMPLE 0.978724
BEGIN_SAMPLE 1.074090
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 1.074090
BEGIN_SAMPLE 1.171401
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 1.171401
BEGIN_SAMPLE 1.269989
(268)encode.aux.cons/encod...	24
(282)encode.aux.hd/encode....	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(278)encode.aux.b64.os/enc...	16
(272)encode.aux.b64/encode...	64
(274)encode.aux.idx/encode...	32
(283)encode.aux.hd/encode....	48
(267)encode.aux.tl/encode....	48
(271)encode.aux.(...)/enco...	40
(261)main	136
(264)encode.aux/encode/main	96
END_SAMPLE 1.269989
BEGIN_SAMPLE 1.368120
(268)encode.aux.cons/encod...	24
(265)encode.aux/encode/Dat...	32
(273)encode.aux.idx/encode...	24
(282)encode.aux.hd/encode....	24
(207)GHC.IO.Handle.Text.CAF	24
(235)GHC.Conc.Signal.CAF	640
(285)tbl/Data.Base64.CAF	72
(209)GHC.IO.Handle.FD.CAF	1584
MAIN	160
(217)GHC.IO.Encoding.Iconv.CAF	120
(219)GHC.IO.Encoding.CAF	1096
(132)PINNED	85936
(267)encode.aux.tl/encode....	144
(261)main	136
(264)encode.aux/encode/main	208
END_SAMPLE 1.368120
BEGIN_SAMPLE 1.402443
END_SAMPLE 1.402443

References: