A SAFE and idiomatic (no corner cutting or strange stuff under the hood)
implementation of an immutable array in Haskell aiming for an idiomatic low
memory footprint by storing chunks of 256 elements in a fixed vector
(data VF256 a = VF … , i00 :: a, … , iFF :: a }) by sharing a single
constructor. A side-effect of this approach, is that we achieve O(log₂₅₆ n)
asymptotic time complexity for most operations.
Futhermore, in order to avoid the usage of the following pattern data Present a
= Yes a | No for when an element is present as it will add a constructor
(overhead) for each element. This is achieved with the usage of ⊥ (bottom)
combined with Haskells lazy nature as well as a bitmap, where each of the
256 elements presence are stored as a single bit.
Remark: Please see the References below for Simon Marlows answer
at Stack Overflow with regard of memory footprint of Haskell data types.
Basics
Step-by-step insert example with an Array Log₈ with height equal to 0
Step-by-step insert example with an Array Log₄ with height greater than 0
------------------------------------------------------------------------------------ Data.Array.Log256, (c) 2020 SPISE MISU ApS----------------------------------------------------------------------------------{-# LANGUAGE Safe #-}--------------------------------------------------------------------------------moduleData.Array.Log256(AL256--,height,length--,create,update,exists,lookup,remove--,pprint,tuples,tolist--,amount,sparse,defrag,sliver,expand,reduce)where--------------------------------------------------------------------------------importPreludehiding(length,lookup)importData.Bits(Bits,shiftL,shiftR,(.&.))importData.List(dropWhile,foldl',takeWhile)importData.Ratio(Rational,(%))importData.Word(Word8)importData.Vector.Fixed256(VF256)importqualifiedData.Vector.Fixed256asVF--------------------------------------------------------------------------------dataAL256a=A!Integer!(Treea)dataTreea=L!(VF256a)|N!(VF256(Treea))instanceShowa=>Show(AL256a)whereshow(A_t)=showtinstanceShowa=>Show(Treea)whereshow=show.helper[]--------------------------------------------------------------------------------instanceFoldableTreewherefoldMapf(Lvfa)=foldMapfvfafoldMapf(Nvft)=foldMap(foldMapf)vftinstanceFoldableAL256wherefoldMapf(A_t)=foldMapftinstanceFunctorTreewherefmapf(Lvfa)=L$fmapfvfafmapf(Nvft)=N$fmap(fmapf)vftinstanceFunctorAL256wherefmapf(Alt)=Al$fmapft--------------------------------------------------------------------------------length::AL256a->Integerheight::AL256a->Integer{-# INLINE length #-}{-# INLINE height #-}length(Al_)=lheight(Al_)=log256$l-1--------------------------------------------------------------------------------create::Integer->AL256aupdate::Integer->a->AL256a->AL256aexists::Integer->AL256a->Boollookup::Integer->AL256a->aremove::Integer->AL256a->AL256a{-# INLINE create #-}{-# INLINE update #-}{-# INLINE exists #-}{-# INLINE lookup #-}{-# INLINE remove #-}{-| Length must be greater than 0. Devs responsability to call inside bounds
-}createl|l>aux=Al$NVF.create|otherwise=assert(l>0)msgAl$LVF.createwhereaux=logmod+1msg="The length of an array must be greater than 0"{-| Length can be read in O(1). Devs responsability to call inside bounds
-}updateia(Alt)=assert(i<l)msgAl$aux(loglvll)twhereaux_(Lfv)=L$k`seq`VF.updatekafvwherek=i2wrd8i.&.logmodauxj(Nfv)=N$v`seq`VF.updatekvfvwherek=i2wrd8$i.>.j.&.logmodn=j-logratc=ifVF.existskfvthenVF.lookupkfvelsebranchnv=auxncmsg="Index "++showi++" is out of bounds (length "++showl++")"{-| Length can be read in O(1). Devs responsability to call inside bounds
-}existsi(Alt)=assert(i<l)msgaux(loglvll)twhereaux_(Lfv)=VF.existskfvwherek=i2wrd8i.&.logmodauxj(Nfv)=e&&bwherek=i2wrd8$i.>.j.&.logmode=VF.existskfvn=j-logratb=auxn$VF.lookupkfvmsg="Index "++showi++" is out of bounds (length "++showl++")"{-| * Length can be read in O(1). Devs responsability to call inside bounds
* Existence can be checked in O(log₂₅₆ n). Devs responsability to check
-}lookupial@(Alt)=assert(i<l)msg0assert(existsial)msg1aux(loglvll)twhereaux_(Lfv)=VF.lookupkfvwherek=i2wrd8$i.&.logmodauxj(Nfv)=auxncwherek=i2wrd8$i.>.j.&.logmodn=j-logratc=VF.lookupkfvmsg0="Index "++showi++" is out of bounds (length "++showl++")"msg1="Index "++showi++" doesn't contain any element"{-| * Length can be read in O(1). Devs responsability to call inside bounds
* Existence can be checked in O(log₂₅₆ n). Devs responsability to check
-}removeial@(Alt)=assert(i<l)msg0assert(existsial)msg1Al$aux(loglvll)twhereaux_(Lfv)=Lrwherek=i2wrd8$i.&.logmodr=VF.removekfvauxj(Nfv)=Nawherek=i2wrd8$i.>.j.&.logmodn=j-logratc=VF.lookupkfvb=auxnca=ifvacantbthenVF.removekfvelseVF.updatekbfvmsg0="Index "++showi++" is out of bounds (length "++showl++")"msg1="Index "++showi++" doesn't contain any element"--------------------------------------------------------------------------------pprint::AL256a->Stringtuples::AL256a->[(Integer,a)]tolist::AL256a->[a]{-# INLINE pprint #-}{-# INLINE tuples #-}{-# INLINE tolist #-}pprint(Alt)="├ "++showl++aux0twhererepi=concat$replicatei"│ "bmpfv=mapf$VF.bitmapvauxi(Lfv)="\n"++repi++"├ "++bmp(\x->ifxthen'■'else'▭')fvauxi(Nfv)="\n"++repi++"├ "++bmp(\x->ifxthen'▣'else'□')fv++subwheresub=concat$map(aux(i+1).snd)$VF.tuplesfvtuples(A_t)=helper[]ttolist(A_t)=mapsnd$helper[]t--------------------------------------------------------------------------------amount::AL256a->Integersparse::AL256a->Rationaldefrag::AL256a->AL256asliver::Integer->Integer->AL256a->AL256aexpand::Integer->AL256a->AL256areduce::Integer->AL256a->AL256a{-# INLINE amount #-}{-# INLINE sparse #-}{-# INLINE defrag #-}{-# INLINE sliver #-}{-# INLINE expand #-}{-# INLINE reduce #-}amount(A_t)=auxtwhereaux(Lfv)=VF.amountfvaux(Nfv)=if0==VF.amountfvthen0elsefoldl'(+)0$map(aux.snd)$VF.tuplesfvsparse(A0_)=0sparseal@(Al_)=amountal%ldefragal=-- Reduces the degree of fragmentation. Check sparsity firstfoldl(\a(i,x)->updateixa)(createn)$zip[0..]$tolistalwheren=amountal{-| * Length can be read in O(1). Devs responsability to call inside bounds
* Index + offset must be: i+o<=l. Devs responsability to call inside bounds
-}sliverioal@(Al_)=assert(i+o<=l)msgfoldl(\a(j,x)->update(j-i)xa)(createo)$takeWhile((i+o>).fst)$dropWhile((i>).fst)$tuplesalwheremsg="Index i+o ("++show(i+o)++") must be less or equal to l ("++showl++")"{-| Length can be read in O(1). Devs responsability to call inside bounds
-}expandn(Alt)=assert(n>l)msgAn$aux$log256nwheremsg="New length "++shown++" must be greater than previous "++showlauxi|i>lvl=N$VF.update0(aux$i-1)VF.create|otherwise=tlvl=log256l{-| Length can be read in O(1). Devs responsability to call inside bounds
-}reducenal@(Al_)=assert(n<l)msgsliver0nalwheremsg="New length "++shown++" must be less than previous "++showl---------------------------------------------------------------------------------- HELPERShelper::[Integer]->Treea->[(Integer,a)]{-# INLINE helper #-}helperns(Lfv)=map(\(i,a)->(fromIntegrali+(auxlogratns),a))$VF.tuplesfvwhereaux_[ ]=0auxi(x:xs)=(1.<.i)*x+(aux(i+lograt)xs)helperns(Nfv)=concat$map(\(i,x)->helper(fromIntegrali:ns)x)$VF.tuplesfv--------------------------------------------------------------------------------assert::Bool->String->a->a{-# INLINE assert #-}assertTrue___a=aassertFalsemsg_=errormsg--------------------------------------------------------------------------------branch::Int->Treeavacant::Treea->Bool{-# INLINE branch #-}{-# INLINE vacant #-}branch0=LVF.createbranch_=NVF.createvacant(Lfv)=VF.amountfv==0vacant(Nfv)=VF.amountfv==0--------------------------------------------------------------------------------(.<.)::Bitsa=>a->Int->a(.>.)::Bitsa=>a->Int->a{-# INLINE (.<.) #-}{-# INLINE (.>.) #-}(.<.)=shiftL(.>.)=shiftR--------------------------------------------------------------------------------lograt::Intlogmod::Numa=>aloglvl::Integer->Inti2wrd8::Integrala=>a->Word8{-# INLINE lograt #-}{-# INLINE logmod #-}{-# INLINE loglvl #-}{-# INLINE i2wrd8 #-}lograt=0x08logmod=0xFFloglvl=((*)lograt).fromIntegral.log256.(flip(-)1)i2wrd8=fromIntegral---------------------------------------------------------------------------------- http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious---- log_{2^b} n ≈ m (floor)log02b::(Bitsa,Numa)=>Int->a->alog256::(Bitsa,Numa)=>a->a{-# INLINE log02b #-}{-# INLINE log256 #-}log02bbn=aux0(n.>.b)whereauxacc0=accauxaccx=aux(acc+1)(x.>.b)log256=log02blograt