Haskell: A quick tour

Martin Sulzmann

Haskell

Many cool ideas:

Why Haskell matters

Inspiration for many programming languages.

Haskell - Hello World Variants

main = putStrLn "Hello World"
main2 = do putStr "Hello "
           putStrLn "World"
main2' = do putStr "Hello "
          putStrLn "World" -- commands are not aligned!

yields

parse error on input `putStrLn'

Haskell - Hello World Variants (2)

main3 = do { putStr "Hello " ;
                 putStrLn "World" }
main4 = do putStr "Hello "
           if cond
            then putStrLn "World"
            else do putStr "wor"
                    putStrLn "ld"

Haskell Indentation Rules

IO in Haskell

Print output and read some input

main5 = do
    print "What is your name?"
    name <- getLine
    print ("Hello " ++ name ++ "!")

Compare this to C

#include <stdio.h>
int main (int argc, char **argv) {
    char name[71]; 
    printf("What is your name?\n"); 
    scanf("%s", name);
    printf("Hello %s!\n", name);
    return 0;
}

What if the name is more than 70 character long?

Further interesting points to note:

Imperative programming (side effects)

C is an imperative (impure) programming language (by default).

// C example
 int x = 1;
 x = x + 1;

Haskell is by default a pure language. By default you cannot overwrite the value assigned to a variable!

Imperative programming in Haskell is possible.

  x <- newIORef (1::Int)
  v <- readIORef x
  writeIORef x (v+1)

Feels like three address code/assembler. There are important advantages:

Complete Haskell example

import Data.IORef

-- Pure function
inc1 :: Int -> Int
inc1 x = x + 1

-- Impure function as signalled by the return type IO Int
inc2 :: Int -> IO Int
inc2 x = do
   print "Hello"
   return (x+1)

-- Yet again an impure function where the input is a reference
inc3 :: IORef Int -> IO Int
inc3 x = do
  v <- readIORef x
  let v2 = v + 1
  writeIORef x v2
  return v2

compose1 :: Int -> Int
compose1 = inc1 . inc1

compose2 :: Int -> IO Int
compose2 = inc2 . inc1

-- Will NOT type check.
-- Cannot mix Int with IO Int!
-- compose2b = inc1 . inc2

Sorting in Haskell

Quicksort in C


#include "stdio.h"

/*

Quicksort.

Idea: Split array (some totally ordered collection of values) into
       those that are lesser and greater than a certain element
       (commonly referred to as the pivot element).

      Quicksort the lesser and greater array.

      Concat results.


Example:

[3,2,5,1,4]


pick first element 3 as our pivat element.

lesser = [2,1]

greater = [5,4]


quicksort [2,1] => [1,2]

quicksort [5,4] => [4,5]


Then,

   quicksort [3,2,5,1,4] 

= (quicksort [2,1])  ++ [3] ++ (quicksort [5,4])

= [1,2] ++ [3] ++ [4,5]

= [1,2,3,4,5]

 */

void display(int x[], int low, int high) {

  printf("\n x[] low = %d high =%d \n", low, high);
  
  for(; low <= high; low++) {
    printf("%d ", x[low]);
  }
}


void swap(int* x, int* y) {
  int temp = *x;
  *x = *y;
  *y = temp;
}



void quicksort(int x[],int left, int right){
   int i, j, pivot, temp;

   if(left < right){
      pivot=left;
      i=left;
      j=right;

      /*
       1. Partition array 
         
           (a) 
           From left and right compare against pivot element.
           Adjust i (upper left frontier) and j (lower right frontier).
           Swap if frontiers haven't met yet.

           (b)
           Swap so that pivot element is placed in the middle.

       2. Recursively quicksort

           (a)
           All elements lesser (or equal) than pivot element.
           (b)
           All elements greater than pivot element.         

       */

      // (1a)
      while(i<j){
    while(x[i]<=x[pivot] && i<right) // (I1)
            i++;
    while(x[j]>x[pivot])            // (I2)
            j--;
         if(i<j){
       swap(&x[i], &x[j]);
         }
      }

      // (1b)
      swap(&x[pivot], &x[j]);

      // (2a)
      quicksort(x,left,j-1);
      // (2b)
      quicksort(x,j+1,right);

   }
}

/*

Further comments.

(I1) The side condition "i<right" ensures that we stay
     within valid array bounds.
     We might reach the case that i = right.
     Access to x[right] is valid, so we can safely evaluate
     x[i] <= x[pivot] and then abort the while loop because
     condition "i<right" no longer holds.

(I2) 

*/



int main() {

  int x[] = {3,4,1,2,5};
  int l, r, l2, r2;

  display(x, 0, 4);
  quicksort(x, 0, 4);
  display(x, 0, 4);  
  
  return 1;
}

Quicksort in Haskell

quicksort [] = []
quicksort (p:xs) = (quicksort lesser) ++ [p] ++ (quicksort greater)
    where
        lesser = filter (< p) xs
        greater = filter (>= p) xs

Write programs by defining function equations.

Evaluation via pattern matching (symbolic reasoning).

Correctness first.

Performance later (memory control, compiler optimizations, ...).

Mergesort in Haskell

mergesort []     = []
mergesort [x]    = [x]
mergesort xs = merge (mergesort left) (mergesort right)
   where
     (left,right) = split (length xs `div` 2) xs
       

merge [] ys = ys
merge xs [] = xs
merge (x:xs) (y:ys) = if x < y then x : merge xs (y:ys)
                      else y : merge (x:xs) ys

-- splitting a list a
-- uses "at"-pattern, guards and lets
split _ [] = ([],[])
split n (r@(x:xs))  
        |  n == 0   = ([],r) 
        |  n > 0    = let (x1,x2) = split (n-1) xs 
                      in (x:x1,x2)
        | otherwise = error "impossible"

Parallel sorting?

mergesortP []     = []
mergesortP [x]    = [x]
mergesortP xs = (forceList sleft) `par`
               (forceList sright) `par`
               merge sleft sright
   where
     (left,right) = split (length xs `div` 2) xs
     sleft = mergesortP left
     sright = mergesortP right


-- Haskell is lazy, the below forces evaluation of a list
forceList :: [a] -> ()
forceList [] = ()
forceList (x:xs) = x `seq` forceList xs

Via par we can carry out computations in parallel.

Of course, for 'small' lists we should resort to a sequantial sorting algorithm (due to overhead caused by thread management etc)

List processing - map

Salary increase for all employees

incSalary allEmployees = [ (employee, salary * 2) | 
                           (employee, salary) <- allEmployees ] 

List comprehensions are easy to understand (for non-programmers).

List comprehensions are syntactic sugar:

incSalary2 allEmployees = map (\(employee, salary) -> (employee, salary * 2)) 
                              allEmployees

List processing - filter

retainEven xs = [ x | x <- xs, x `mod` 2 == 0 ]

Syntactic sugar:

retainEven2 xs = filter (\x -> x `mod` 2 == 0) xs 

Recursive processing

Build the sum of a list

mySum [] = 0
mySum (x:xs) = x + mySum xs

Can equivalently expressed via

mySum2 xs = foldl (\r -> \x -> r + x) 0 xs

More on map, filter, fold later.

Data types and pattern matching

Interpreter for a fragment of IMP

data Exp = Val Int | Plus Exp Exp deriving Eq

eval (Val i) = i
eval (Plus e1 e2) = eval e1 + eval e2

Any differences to an OO solution?

simp (e@(Val i))      = e 
simp (Plus (Val 0) e) = simp e
simp (Plus e1 e2) = Plus (simp e1) (simp e2)

simpFix e = let e' = simp e
            in if e' == e
               then e
               else simpFix e'
data Exp = Val Int | Plus Exp Exp deriving Eq

We could of course provide our own definition

instance Eq Exp where
  (Val i1) == (Val i2)         = i1 == i2
  (Plus e1 e2) == (Plus e3 e4) = e1 == e3 && e2 == e4
  _ == _ = False

_ default pattern, matches anything.

Powerful type system

Example: Generalized algebraic data types

Implicit encoding of typing rules.

data ExpT a where
   ValT   :: Int -> ExpT Int
   TrueT  :: ExpT Bool
   FalseT :: ExpT Bool
   PlusT  :: ExpT Int -> ExpT Int -> ExpT Int
   AndT   :: ExpT Bool -> ExpT Bool -> ExpT Bool


e1 = AndT TrueT FalseT

e2 = AndT FalseT (ValT 1)  -- static type error

Strongly typed interpreter

evalT :: ExpT a -> a
evalT (ValT i)      = i
evalT (PlusT e1 e2) = evalT e1 + evalT e2
evalT TrueT         = True
evalT FalseT        = False
evalT (AndT e1 e2)  = evalT e1 && evalT e2

Summary


{-# LANGUAGE GADTs #-}



data Exp = Val Int | Plus Exp Exp
         | BTrue | BFalse | And Exp Exp


data Values = V Int | B Bool

-- Maybe predefined in Haskell, we simply build our own

data R a = Nil | Res a


-- Evaluator = Interpreter
eval :: Exp -> R Values
eval (Val i) = Res (V i)
eval (BTrue) = Res (B True)
eval (BFalse) = Res (B False)
eval (Plus e1 e2) = case (eval e1) of
                      Res (V i) -> case (eval e2) of
                                       Res (V j) -> Res (V (i + j))
                                       _ -> Nil
                      _ -> Nil
eval (And e1 e2) = case (eval e1) of
                      Res (B i) -> case (eval e2) of
                                       Res (B j) -> Res (B (i && j))
                                       _ -> Nil
                      _ -> Nil  


{-

Let's impose types.

e ::= 1 | true | false | e + e | e && e


well-typed expressions

Notation:  |- e : t

           expression e is of type t


t ::= int | bool

typing rules:

   |- 1 : Int   

   |- true : bool

   |- false : bool

   |- e1 : int    |- e2 : int
   ---------------------------
   |- e1 + e2 : int

   |- e1 : bool    |- e2 : bool
   ---------------------------
   |- e1 + e2 : bool

-}


data Types = TInt | TBool


-- Type checker
tc :: Exp -> R Types
tc (Val i) = Res TInt
tc (BTrue) = Res TBool
tc (BFalse) = Res TBool
tc (Plus e1 e2) = case (tc e1) of
                      Res TInt -> case (tc e2) of
                                       Res TInt -> Res TInt
                                       _ -> Nil
                      _ -> Nil
tc (And e1 e2) = case (tc e1) of
                      Res TBool -> case (tc e2) of
                                       Res TBool -> Res TBool
                                       _ -> Nil
                      _ -> Nil  


-- Specialized interpreters


evalI :: Exp -> Int
evalI (Val i) = i
evalI (Plus e1 e2) = evalI e1 + evalI e2

evalB :: Exp -> Bool
evalB (BTrue) = True
evalB (BFalse) = False
evalB (And e1 e2) = evalB e1 && evalB e2


evalE :: Exp -> R Values
evalE e = case (tc e) of
            Res TInt -> Res (V (evalI e))
            Res TBool -> Res (B (evalB e))
            _ -> Nil




-- Strongly typed interpreter thanks to
-- Generalized algebraic data types (GADTs)

data ExpT a where
   ValT   :: Int -> ExpT Int
   TrueT  :: ExpT Bool
   FalseT :: ExpT Bool
   PlusT  :: ExpT Int -> ExpT Int -> ExpT Int
   AndT   :: ExpT Bool -> ExpT Bool -> ExpT Bool

evalT :: ExpT a -> a
evalT (ValT i)      = i
evalT (PlusT e1 e2) = evalT e1 + evalT e2
evalT TrueT         = True
evalT FalseT        = False
evalT (AndT e1 e2)  = evalT e1 && evalT e2

Concurrency

Concurrent bank transfer with STM

transfer :: TVar Int -> TVar Int -> Int -> STM ()
transfer fromAcc toAcc amount = do
   f <- readTVar fromAcc
   if f <= amount
     then retry
     else do
       writeTVar fromAcc (f - amount)
       t <- readTVar toAcc
       writeTVar toAcc (t + amount)
atomicTransfer :: TVar Int -> TVar Int -> Int -> IO ()
atomicTransfer from to amount = atomically $ transfer from to amount

Atomicity and isolation guaranteed!

Notice the different types STM () and IO (). They represent different side effects/monads (more later).

Extension

transfer2 :: TVar Int -> TVar Int -> Int -> STM ()
transfer2 fromAcc toAcc amount = 
 (do f <- readTVar fromAcc
     if f <= amount
      then retry
      else do
       writeTVar fromAcc (f - amount)
       t <- readTVar toAcc
       writeTVar toAcc (t + amount) )
 `orElse`
    transfer2 fromAcc toAcc (amount-50) 
    -- try again with less money

STM is powerful - STM-based Semaphore

type Sem = TVar Int

newSem :: Int -> IO Sem
newSem n = newTVarIO n

p :: Sem -> STM ()
p sem = do n <- readTVar sem
           if n > 0
             then writeTVar sem (n-1)
             else retry

v :: Sem -> STM ()
v sem = do n <- readTVar sem
           writeTVar sem (n+1)

STM implementation

Summary

Appendix - QuickTour.hs


{-# LANGUAGE GADTs #-}

import Control.Concurrent.STM
import Control.Parallel
import Control.Monad

main = putStrLn "Hello World"


main2 = do putStr "Hello "
           putStrLn "World"


{-
main2' = do putStr "Hello "
           putStrLn "World"  -- commands are not aligned!
-}


main3 = do { putStr "Hello " ;
                 putStrLn "World" }

-- 
cond = False

main4 = do putStr "Hello "
           if cond
            then putStrLn "World"
            else do putStr "wor"
                    putStrLn "ld"


main5 = do
    print "What is your name?"
    name <- getLine
    print ("Hello " ++ name ++ "!")


quicksort [] = []
quicksort (p:xs) = (quicksort lesser) ++ [p] ++ (quicksort greater)
    where
        lesser = filter (< p) xs
        greater = filter (>= p) xs

mergesort []     = []
mergesort [x]    = [x]
mergesort xs = merge (mergesort left) (mergesort right)
   where
     (left,right) = split (length xs `div` 2) xs
       

merge [] ys = ys
merge xs [] = xs
merge (x:xs) (y:ys) = if x < y then x : merge xs (y:ys)
                      else y : merge (x:xs) ys

-- splitting a list a
-- uses "at"-pattern, guards and lets
split _ [] = ([],[])
split n (r@(x:xs))  
        |  n == 0   = ([],r) 
        |  n > 0    = let (x1,x2) = split (n-1) xs 
                      in (x:x1,x2)
        | otherwise = error "impossible"



mergesortP []     = []
mergesortP [x]    = [x]
mergesortP xs = (forceList sleft) `par`
               (forceList sright) `par`
               merge sleft sright
   where
     (left,right) = split (length xs `div` 2) xs
     sleft = mergesortP left
     sright = mergesortP right


-- Haskell is lazy, the below forces evaluation of a list
forceList :: [a] -> ()
forceList [] = ()
forceList (x:xs) = x `seq` forceList xs



incSalary allEmployees = [ (employee, salary * 2) | (employee, salary) <- allEmployees ] 

incSalary2 allEmployees = map (\(employee, salary) -> (employee, salary * 2)) allEmployees



retainEven xs = [ x | x <- xs, x `mod` 2 == 0 ]


retainEven2 xs = filter (\x -> x `mod` 2 == 0) xs 


mySum [] = 0
mySum (x:xs) = x + mySum xs

mySum2 xs = foldl (\r -> \x -> r + x) 0 xs


data Exp = Val Int | Plus Exp Exp deriving Eq

{- derived automatically
instance Eq Exp where
  (Val i1) == (Val i2)         = i1 == i2
  (Plus e1 e2) == (Plus e3 e4) = e1 == e3 && e2 == e4
-}

eval (Val i) = i
eval (Plus e1 e2) = eval e1 + eval e2


simp (e@(Val i))      = e 
simp (Plus (Val 0) e) = simp e
simp (Plus e1 e2) = Plus (simp e1) (simp e2)

simpFix e = let e' = simp e
            in if e' == e
               then e
               else simp e'


data ExpT a where
   ValT   :: Int -> ExpT Int
   TrueT  :: ExpT Bool
   FalseT :: ExpT Bool
   PlusT  :: ExpT Int -> ExpT Int -> ExpT Int
   AndT   :: ExpT Bool -> ExpT Bool -> ExpT Bool

evalT :: ExpT a -> a
evalT (ValT i)      = i
evalT (PlusT e1 e2) = evalT e1 + evalT e2
evalT TrueT         = True
evalT FalseT        = False
evalT (AndT e1 e2)  = evalT e1 && evalT e2

e1 = AndT TrueT FalseT

-- e2 = AndT FalseT (ValT 1)

transfer :: TVar Int -> TVar Int -> Int -> STM ()
transfer fromAcc toAcc amount = do
   f <- readTVar fromAcc
   if f <= amount
     then retry
     else do
       writeTVar fromAcc (f - amount)
       t <- readTVar toAcc
       writeTVar toAcc (t + amount)

atomicTransfer :: TVar Int -> TVar Int -> Int -> IO ()
atomicTransfer from to amount = atomically $ transfer from to amount


transfer2 :: TVar Int -> TVar Int -> Int -> STM ()
transfer2 fromAcc toAcc amount = 
 (do f <- readTVar fromAcc
     if f <= amount
      then retry
      else do
       writeTVar fromAcc (f - amount)
       t <- readTVar toAcc
       writeTVar toAcc (t + amount) )
 `orElse`
    transfer2 fromAcc toAcc (amount-50) 
    -- try again with less money


type Sem = TVar Int

newSem :: Int -> IO Sem
newSem n = newTVarIO n

p :: Sem -> STM ()
p sem = do n <- readTVar sem
           if n > 0
             then writeTVar sem (n-1)
             else retry

v :: Sem -> STM ()
v sem = do n <- readTVar sem
           writeTVar sem (n+1)