Martin Sulzmann
vars Variable names, start with lower-case letter
prog ::= block
block ::= "{" statement "}"
statement ::= statement ";" statement -- Command sequence
| vars ":=" exp -- Variable declaration
| vars "=" exp -- Variable assignment
| "while" exp block -- While
| "if" exp block "else" block -- If-then-else
| "print" exp -- Print
exp ::= 0 | 1 | -1 | ... -- Integers
| "true" | "false" -- Booleans
| exp "+" exp -- Addition
| exp "*" exp -- Multiplication
| exp "||" exp -- Disjunction
| exp "&&" exp -- Conjunction
| "!" exp -- Negation
| exp "==" exp -- Equality test
| exp "<" exp -- Lesser test
| "(" exp ")" -- Grouping of expressions
| vars -- Variables
The above describes the concrete syntax. In the abstract syntax, we
omit grouping of expressions and statement blocks. We will write
e
to refer to an expression and s
to refer to
a statement.
T ::= int | bool
We use lists to record the type assignment for variables. We will
write x
to refer to a variable.
G ::= [] | [x : T] | G ++ G
[]
refers to the empty environment.
We write [x1 : T1, ..., xn : Tn]
as a short-form for
[x1 : T1] ++ ... ++ [xn : Tn]
.
We assume that ++
overwrites any ‘earlier’ binding.
Here are a few examples.
[] ++ [x : int] = [x : int]
[x : int] ++ [x : bool] = [x : bool]
[x : int] ++ [y : bool] = [x : int, y : bool]
[x : int] ++ [y : bool] ++ [x : bool] = [y : bool, x : bool]
We assume a lookup
function to lookup the type of a
variable x
in some environment G
.
lookup(G,x) = T if G = [..., x : T, ...]
Some examples.
lookup([x : int, y : bool], y) = bool
lookup([x : int, y : bool], z) is undefined
G |- e : T
i some number
------------
G |- i : int
G |- true : bool
G |- false : bool
lookup(G,x) = T
------------------
G |- x : T
G |- e1 : int G |- e2 : int
----------------------------------------
G |- e1 + e2 : int
G |- e1 : int G |- e2 : int
----------------------------------------
G |- e1 * e2 : int
G |- e1 : bool G |- e2 : bool
----------------------------------------
G |- e1 || e2 : bool
G |- e1 : bool G |- e2 : bool
----------------------------------------
G |- e1 && e2 : bool
G |- e : bool
----------------------------------------
G |- ! e : bool
G |- e1 : T G |- e2 : T
----------------------------------------
G |- e1 == e2 : bool
G |- e1 : int G |- e2 : int
----------------------------------------
G |- e1 < e2 : bool
G |- (s,G2)
We can type check statement s
under environemnt
G
and possibly obtain a new environment G2
because we may introduce/overwrite variables via :=
.
The initial environment is always the empty environment
[]
.
G1 |- (s1, G2) G2 |- (s2,G3)
----------------------------------------
G1 |- (s1 ; s2, G3)
G |- e : T G2 = G ++ [x : T]
----------------------------------------
G |- (x := e, G2)
G |- x : T G |- e : T
----------------------------------------
G |- (x = e, G)
G |- e : bool G |- (s,_)
----------------------------------------
G |- (while e s, G)
G |- e : bool G |- (s1,_) G |- (s2,_)
----------------------------------------
G |- (if e s1 else s2, G)
G |- e : T
----------------------------------------
G |- (print e, G)
The following is valid.
x := 1; x := x < 2
Type checking x := 1
under the empty environment
[]
yields [x : int]
. Thus, we can type check
x := x < 2
.
The following is valid.
x := false;
while x {
x := 1
};
x := true
Within the while-body we “overwrite” variable x
. We
follow standard scoping rules where local variable bindings can’t leak
to the outer scope. Hence, in the premise of the typing rule for while
we find G |- (s,_)
where _
indicates that we
don’t care about any local variable binding that might have been
introduced in the while body.
Simplifying assumption. We assume that variable declarations
:=
always introduce distinct variables. Thus, we rule out
Example 1 and Example 2.
Thanks to this simplifying assumption, evaluation rules for while and if-then-else can be simplified. The general case is dealt with further below.
V ::= i | true | false
S ::= [] | [x : V] | S ++ S
State S
follows the same rules as environment
G
. Instead of type assignments we find value
assingments.
S |- e => V
S |- i => i
S |- true => true
S |- false => false
lookup(S,x) = V
------------------
S |- x => V
S |- e1 => i1 S |- e2 => i2
i = i1 + i2
----------------------------------------
S |- e2 + e2 => i
S |- e1 => i1 S |- e2 => i2
i = i1 * i2
----------------------------------------
S |- e2 * e2 => i
S |- e1 => true
----------------------------------------
S |- e2 || e2 => true
S |- e1 => false S |- e1 => V
where V is a Boolean value
----------------------------------------
S |- e2 || e2 => V
S |- e1 => false
----------------------------------------
S |- e2 && e2 => false
S |- e1 => true S |- e1 => V
where V is a Boolean value
----------------------------------------
S |- e2 && e2 => V
S |- e => true
----------------------------------------
S |- ! e => false
S |- e => false
----------------------------------------
S |- ! e => true
S |- e1 => V S |- e2 => V
----------------------------------------
S |- e1 == e2 => true
S |- e1 => V1 S |- e2 => V2
where V1 and V2 are different values
----------------------------------------
S |- e1 == e2 => false
S |- e1 => V1 S |- e2 => V2
where V1 and V2 are numbers and
V1 is smaller than V2
----------------------------------------
S |- e1 < e2 : true
S |- e1 => V1 S |- e2 => V2
where V1 and V2 are numbers and
V1 is not smaller than V2
----------------------------------------
S |- e1 < e2 : false
S | s => S2
S1 |- s1 => S2 S2 |- s2 => S3
----------------------------------------
S1 |- s1 ; s2 => S3
S |- e => V S2 = S ++ [x : V]
----------------------------------------
S |- x := e => S2
S |- e => V S2 = S ++ [x : V]
----------------------------------------
S |- x = e => S2
S |- e => false
----------------------------------------
S |- while e s => S
S |- e => true
S |- s => S2
S2 |- while e s => S3
----------------------------------------
S |- while e s => S3
S |- e => true S |- s1 => S2
----------------------------------------
S |- if e s1 else s2 => S2
S |- e => false S |- s2 => S2
----------------------------------------
S |- if e s1 else s2 => S2
S |- e => V
"print V on console"
----------------------------------------
S |- print e => S
We don’t necessarily assume that :=
always introduces
distinct variables. This complicates evaluation rules for while and
if-then-else.
Consider the following example.
x := false;
y := 1;
if x {
x = true
} else {
x := 1;
y = y + 1;
};
x = y < 3 // P
We evaluate the else branch where we introduce a new variable
declaration. This local variable declaration overwrites the global
declaration. In terms of value assignments, we enter the else branch
with [x : false, y : 1]
but exit the else branch with
[y : 2, x : 1]
.
Scoping rules demand that local variables are not allowed to leak
into the global scope. Hence, when evaluating the statement P we can’t
use [y : 2, x : 1]
but should use
[x : false, y : 2]
instead.
How to obtain the “correct” value assignment? We need to apply the following rules.
R1: Any changes that are made within the else branch but affect global variables must be recorded.
R2: Any changes that only affect local variables must be ignored.
We introduce a few more helper functions.
We compute the list of variables for some value assignment.
variables([x1 : V1, ..., x : Vn]) = [x1, ..., xn]
We use list comprehension notation of the following form.
S3 = [ x : lookup(S2,x) | x <- variables(S1), lookup(S2,x) and lookup(S1,x) have same type ]
++
[ x : lookup(S1,x) | x <- variables(S1), lookup(S2,x) and lookup(S1,x) have different type ]
We assume that S1
and S2
are two value
assignments.
S2
is possibly an extension of
S1
S2
possibly has updated some value assignments found
in S1
.
We build a new value assignment S3
where we only want to
consider variables that are mentioned by S1
but want to use
the values recorded in S2
. Because S2
may
overwrite an existing variable using a different type, we require the
two cases in the above list comprehension. The first case covers R1 and
the second case covers R2.
Given S1 and S2, we write update(S1,S2)
as a short-hand
for the above list comprehension.
Example. Consider the following simple case.
S1 = [x : 3, y : true]
S2 = [x : 4, y : true, z : false]
Then, we find
S3 = [x : 4, y : true]
Example. Consider the following complicated case.
S1 = [x : 3, y : true]
S2 = [y : false, x : false]
Then, we find
S3 = [x : 3, y : false]
Evaluations rules covering the general case are as follows.
S |- e => true
S |- s => S2
S3 = update(S,S2)
S3 |- while e s => S4
S5 = update(S,S4)
----------------------------------------
S |- while e s => S5
S |- e => true S |- s1 => S2
S3 = update(S,S2)
----------------------------------------
S |- if e s1 else s2 => S3
S |- e => false S |- s2 => S2
S3 = update(S,S2)
----------------------------------------
S |- if e s1 else s2 => S3
package main
import "fmt"
import "strconv"
// Simple imperative language
/*
vars Variable names, start with lower-case letter
prog ::= block
block ::= "{" statement "}"
statement ::= statement ";" statement -- Command sequence
| vars ":=" exp -- Variable declaration
| vars "=" exp -- Variable assignment
| "while" exp block -- While
| "if" exp block "else" block -- If-then-else
| "print" exp -- Print
exp ::= 0 | 1 | -1 | ... -- Integers
| "true" | "false" -- Booleans
| exp "+" exp -- Addition
| exp "*" exp -- Multiplication
| exp "||" exp -- Disjunction
| exp "&&" exp -- Conjunction
| "!" exp -- Negation
| exp "==" exp -- Equality test
| exp "<" exp -- Lesser test
| "(" exp ")" -- Grouping of expressions
| vars -- Variables
*/
// Values
type Kind int
const (
ValueInt Kind = 0
ValueBool Kind = 1
Undefined Kind = 2
)
type Val struct {
flag Kind
valI int
valB bool
}
func mkInt(x int) Val {
return Val{flag: ValueInt, valI: x}
}
func mkBool(x bool) Val {
return Val{flag: ValueBool, valB: x}
}
func mkUndefined() Val {
return Val{flag: Undefined}
}
func showVal(v Val) string {
var s string
switch {
case v.flag == ValueInt:
s = Num(v.valI).pretty()
case v.flag == ValueBool:
s = Bool(v.valB).pretty()
case v.flag == Undefined:
s = "Undefined"
}
return s
}
// Types
type Type int
const (
TyIllTyped Type = 0
TyInt Type = 1
TyBool Type = 2
)
func showType(t Type) string {
var s string
switch {
case t == TyInt:
s = "Int"
case t == TyBool:
s = "Bool"
case t == TyIllTyped:
s = "Illtyped"
}
return s
}
// Value State is a mapping from variable names to values
type ValState map[string]Val
// Value State is a mapping from variable names to types
type TyState map[string]Type
// Interface
type Exp interface {
pretty() string
eval(s ValState) Val
infer(t TyState) Type
}
type Stmt interface {
pretty() string
eval(s ValState)
check(t TyState) bool
}
// Statement cases (incomplete)
type Seq [2]Stmt
type Decl struct {
lhs string
rhs Exp
}
type IfThenElse struct {
cond Exp
thenStmt Stmt
elseStmt Stmt
}
type Assign struct {
lhs string
rhs Exp
}
// Expression cases (incomplete)
type Bool bool
type Num int
type Mult [2]Exp
type Plus [2]Exp
type And [2]Exp
type Or [2]Exp
type Var string
/////////////////////////
// Stmt instances
// pretty print
func (stmt Seq) pretty() string {
return stmt[0].pretty() + "; " + stmt[1].pretty()
}
func (decl Decl) pretty() string {
return decl.lhs + " := " + decl.rhs.pretty()
}
// eval
func (stmt Seq) eval(s ValState) {
stmt[0].eval(s)
stmt[1].eval(s)
}
func (ite IfThenElse) eval(s ValState) {
v := ite.cond.eval(s)
if v.flag == ValueBool {
switch {
case v.valB:
ite.thenStmt.eval(s)
case !v.valB:
ite.elseStmt.eval(s)
}
} else {
fmt.Printf("if-then-else eval fail")
}
}
// Maps are represented via points.
// Hence, maps are passed by "reference" and the update is visible for the caller as well.
func (decl Decl) eval(s ValState) {
v := decl.rhs.eval(s)
x := (string)(decl.lhs)
s[x] = v
}
// type check
func (stmt Seq) check(t TyState) bool {
if !stmt[0].check(t) {
return false
}
return stmt[1].check(t)
}
func (decl Decl) check(t TyState) bool {
ty := decl.rhs.infer(t)
if ty == TyIllTyped {
return false
}
x := (string)(decl.lhs)
t[x] = ty
return true
}
func (a Assign) check(t TyState) bool {
x := (string)(a.lhs)
return t[x] == a.rhs.infer(t)
}
/////////////////////////
// Exp instances
// pretty print
func (x Var) pretty() string {
return (string)(x)
}
func (x Bool) pretty() string {
if x {
return "true"
} else {
return "false"
}
}
func (x Num) pretty() string {
return strconv.Itoa(int(x))
}
func (e Mult) pretty() string {
var x string
x = "("
x += e[0].pretty()
x += "*"
x += e[1].pretty()
x += ")"
return x
}
func (e Plus) pretty() string {
var x string
x = "("
x += e[0].pretty()
x += "+"
x += e[1].pretty()
x += ")"
return x
}
func (e And) pretty() string {
var x string
x = "("
x += e[0].pretty()
x += "&&"
x += e[1].pretty()
x += ")"
return x
}
func (e Or) pretty() string {
var x string
x = "("
x += e[0].pretty()
x += "||"
x += e[1].pretty()
x += ")"
return x
}
// Evaluator
func (x Bool) eval(s ValState) Val {
return mkBool((bool)(x))
}
func (x Num) eval(s ValState) Val {
return mkInt((int)(x))
}
func (e Mult) eval(s ValState) Val {
n1 := e[0].eval(s)
n2 := e[1].eval(s)
if n1.flag == ValueInt && n2.flag == ValueInt {
return mkInt(n1.valI * n2.valI)
}
return mkUndefined()
}
func (e Plus) eval(s ValState) Val {
n1 := e[0].eval(s)
n2 := e[1].eval(s)
if n1.flag == ValueInt && n2.flag == ValueInt {
return mkInt(n1.valI + n2.valI)
}
return mkUndefined()
}
func (e And) eval(s ValState) Val {
b1 := e[0].eval(s)
b2 := e[1].eval(s)
switch {
case b1.flag == ValueBool && b1.valB == false:
return mkBool(false)
case b1.flag == ValueBool && b2.flag == ValueBool:
return mkBool(b1.valB && b2.valB)
}
return mkUndefined()
}
func (e Or) eval(s ValState) Val {
b1 := e[0].eval(s)
b2 := e[1].eval(s)
switch {
case b1.flag == ValueBool && b1.valB == true:
return mkBool(true)
case b1.flag == ValueBool && b2.flag == ValueBool:
return mkBool(b1.valB || b2.valB)
}
return mkUndefined()
}
// Type inferencer/checker
func (x Var) infer(t TyState) Type {
y := (string)(x)
ty, ok := t[y]
if ok {
return ty
} else {
return TyIllTyped // variable does not exist yields illtyped
}
}
func (x Bool) infer(t TyState) Type {
return TyBool
}
func (x Num) infer(t TyState) Type {
return TyInt
}
func (e Mult) infer(t TyState) Type {
t1 := e[0].infer(t)
t2 := e[1].infer(t)
if t1 == TyInt && t2 == TyInt {
return TyInt
}
return TyIllTyped
}
func (e Plus) infer(t TyState) Type {
t1 := e[0].infer(t)
t2 := e[1].infer(t)
if t1 == TyInt && t2 == TyInt {
return TyInt
}
return TyIllTyped
}
func (e And) infer(t TyState) Type {
t1 := e[0].infer(t)
t2 := e[1].infer(t)
if t1 == TyBool && t2 == TyBool {
return TyBool
}
return TyIllTyped
}
func (e Or) infer(t TyState) Type {
t1 := e[0].infer(t)
t2 := e[1].infer(t)
if t1 == TyBool && t2 == TyBool {
return TyBool
}
return TyIllTyped
}
// Helper functions to build ASTs by hand
func number(x int) Exp {
return Num(x)
}
func boolean(x bool) Exp {
return Bool(x)
}
func plus(x, y Exp) Exp {
return (Plus)([2]Exp{x, y})
// The type Plus is defined as the two element array consisting of Exp elements.
// Plus and [2]Exp are isomorphic but different types.
// We first build the AST value [2]Exp{x,y}.
// Then cast this value (of type [2]Exp) into a value of type Plus.
}
func mult(x, y Exp) Exp {
return (Mult)([2]Exp{x, y})
}
func and(x, y Exp) Exp {
return (And)([2]Exp{x, y})
}
func or(x, y Exp) Exp {
return (Or)([2]Exp{x, y})
}
// Examples
func run(e Exp) {
s := make(map[string]Val)
t := make(map[string]Type)
fmt.Printf("\n ******* ")
fmt.Printf("\n %s", e.pretty())
fmt.Printf("\n %s", showVal(e.eval(s)))
fmt.Printf("\n %s", showType(e.infer(t)))
}
func ex1() {
ast := plus(mult(number(1), number(2)), number(0))
run(ast)
}
func ex2() {
ast := and(boolean(false), number(0))
run(ast)
}
func ex3() {
ast := or(boolean(false), number(0))
run(ast)
}
func main() {
fmt.Printf("\n")
ex1()
ex2()
ex3()
}