Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions lexer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package main

import "strings"

// These constants are our token-types
const (
EOF = "EOF"

//
// TODO: Better names.
//
// Are there standard values?
//
LESS = "<"
GREATER = ">"
PLUS = "+"
MINUS = "-"
OUTPUT = "."
INPUT = ","
LOOP_OPEN = "["
LOOP_CLOSE = "]"
)

// Token contains the next token from the input program.
type Token struct {

// Type contains the token-type (such as "<", "[", etc).
Type string

// Repeat contains the number of consecutive appearances we've seen
// of this token.
Repeat int
}

// Lexer holds our lexer state.
type Lexer struct {

// input is the string we're lexing.
input string

// position is the current position within the input-string.
position int

// simple map of single-character tokens to their type
known map[string]string
}

// NewLexer creates a new Lexer, which will parse the specified
// input program into a series of tokens.
func NewLexer(input string) *Lexer {

// Create the lexer object.
l := &Lexer{input: input}

// Strip newlines/spaces from our iput
l.input = strings.ReplaceAll(l.input, "\n", "")
l.input = strings.ReplaceAll(l.input, "\r", "")
l.input = strings.ReplaceAll(l.input, " ", "")

// Populate the simple token-types in a map for
// later use.
l.known = make(map[string]string)

l.known["+"] = PLUS
l.known["-"] = MINUS
l.known[">"] = GREATER
l.known["<"] = LESS
l.known[","] = INPUT
l.known["."] = OUTPUT
l.known["["] = LOOP_OPEN
l.known["]"] = LOOP_CLOSE

return l
}

// Next returns the next token from our input stream.
//
// This is pretty naive lexer because we only have to consider
// single-character tokens. However we do look for tokens which
// are repeated.
func (l *Lexer) Next() *Token {

// Loop until we've exhausted our input.
for l.position < len(l.input) {

// Get the next character
char := string(l.input[l.position])

// Is this a known character/token?
_, ok := l.known[char]
if ok {

//
// Some tokens can't repeat. Horrid.
//
if char == INPUT || char == OUTPUT || char == LOOP_OPEN || char == LOOP_CLOSE {
l.position++
return &Token{Type: char, Repeat: 1}
}

// OK record our starting position
begin := l.position

// Loop forward to see if that character
// is repeated further times
for l.position < len(l.input) {

// If it isn't the same character
// we're done
if string(l.input[l.position]) != char {
break
}

// Otherwise keep advancing forward
l.position++
}

// Return the token and the times it was
// seen in adjacent positions
count := l.position - begin
return &Token{Type: char, Repeat: count}
}

//
// Here we're ignoring a token which was unknown.
//
l.position++
}

//
// If we got here then we're at/after the end of our input
// string. So we just return EOF.
//
return &Token{Type: EOF, Repeat: 1}
}
62 changes: 62 additions & 0 deletions lexer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package main

import (
"testing"
)

// TestLexer performs a trivial test of the lexer
func TestLexer(t *testing.T) {

tests := []struct {
expectedType string
expectedCount int
}{
{PLUS, 1},
{MINUS, 1},
{LESS, 5},
{GREATER, 5},
{LOOP_OPEN, 1},
{LOOP_CLOSE, 1},
{OUTPUT, 1},
{INPUT, 1},
{EOF, 1},
}

l := NewLexer("+-<<<<<\n>>>>>[].,")

for i, tt := range tests {
tok := l.Next()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type)
}
if tok.Repeat != tt.expectedCount {
t.Fatalf("tests[%d] - count wrong, expected=%d, got=%d", i, tt.expectedCount, tok.Repeat)
}
}
}

// TestAdjacent is designed to ensure we count adjacent runs of characters
// even when newlines are in the way.
func TestAdjacent(t *testing.T) {

tests := []struct {
expectedType string
expectedCount int
}{
{PLUS, 5},
{MINUS, 5},
{EOF, 1},
}

l := NewLexer("+\n+\n+\n+\n+- - - - -")

for i, tt := range tests {
tok := l.Next()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type)
}
if tok.Repeat != tt.expectedCount {
t.Fatalf("tests[%d] - count wrong, expected=%d, got=%d", i, tt.expectedCount, tok.Repeat)
}
}
}
97 changes: 59 additions & 38 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,55 +33,62 @@ _start:
//
opens := []int{}

//
// Create a lexer for the input program
//
l := NewLexer(source)

//
// Loop forever, processing the next token
//
tok := l.Next()

//
// We keep track of the loop-labels here.
//
// Each time we see a new loop-open "[" we bump this
// by one.
//
i := 0
ln := len(source)

for i < ln {
//
// We'll process the complete program until
// we hit an end of file/input
//
for tok.Type != EOF {

switch source[i] {
case '>':
end := i
for source[end] == '>' {
end++
}
buff.WriteString(fmt.Sprintf(" add r8, %d\n", end-i))
i = end - 1
case '<':
end := i
for source[end] == '<' {
end++
}
buff.WriteString(fmt.Sprintf(" sub r8, %d\n", end-i))
i = end - 1
case '+':
end := i
for source[end] == '+' {
end++
}
buff.WriteString(fmt.Sprintf(" add byte [r8], %d\n", end-i))
i = end - 1
case '-':
end := i
for source[end] == '-' {
end++
}
buff.WriteString(fmt.Sprintf(" sub byte [r8], %d\n", end-i))
i = end - 1
case '.':
// output
//
// Output different things depending on the token-type
//
switch tok.Type {

case GREATER:
buff.WriteString(fmt.Sprintf(" add r8, %d\n", tok.Repeat))

case LESS:
buff.WriteString(fmt.Sprintf(" sub r8, %d\n", tok.Repeat))

case PLUS:
buff.WriteString(fmt.Sprintf(" add byte [r8], %d\n", tok.Repeat))

case MINUS:
buff.WriteString(fmt.Sprintf(" sub byte [r8], %d\n", tok.Repeat))

case OUTPUT:
buff.WriteString(" mov rax, 1\n") // SYS_WRITE
buff.WriteString(" mov rdi, 1\n") // STDOUT
buff.WriteString(" mov rsi, r8\n") // data-comes-here
buff.WriteString(" mov rdx, 1\n") // one byte
buff.WriteString(" syscall\n") // Syscall
case ',':
// input

case INPUT:
buff.WriteString(" mov rax, 0\n") // SYS_READ
buff.WriteString(" mov rdi, 0\n") // STDIN
buff.WriteString(" mov rsi, r8\n") // Dest
buff.WriteString(" mov rdx, 1\n") // one byte
buff.WriteString(" syscall\n") // syscall
case '[':

case LOOP_OPEN:

//
// Open of a block.
Expand All @@ -92,11 +99,14 @@ _start:
// NOTE: We repeat the test at the end of the
// loop so the label here is AFTER our condition
//
i++
buff.WriteString(" cmp byte [r8], 0\n")
buff.WriteString(fmt.Sprintf(" je close_loop_%d\n", i))
buff.WriteString(fmt.Sprintf("label_loop_%d:\n", i))
opens = append(opens, i)
case ']':

case LOOP_CLOSE:

// "]" can only follow an "[".
//
// Every time we see a "[" we save the ID onto a
Expand All @@ -105,6 +115,10 @@ _start:
//
// This will cope with nesting.
//
if len(opens) < 1 {
fmt.Printf("close before open. bug? bogus program?\n")
os.Exit(1)
}

//
// Get the last label-ID
Expand Down Expand Up @@ -141,9 +155,16 @@ _start:

buff.WriteString(fmt.Sprintf(" jne label_loop_%d\n", last))
buff.WriteString(fmt.Sprintf("close_loop_%d:\n", last))

default:
fmt.Printf("token not handled: %v\n", tok)
os.Exit(1)
}

i++
//
// Keep processing
//
tok = l.Next()
}

// terminate
Expand Down