Mumpster
http://mumpster.org/

Dijkstra's shunting-yard parsing algorithm in GT.M
http://mumpster.org/viewtopic.php?f=16&t=1647
Page 1 of 1

Author:  jollis [ Tue Sep 20, 2011 9:09 pm ]
Post subject:  Dijkstra's shunting-yard parsing algorithm in GT.M

I have made a simple parser which converts infix notation to reverse polish, using Dijkstra's shunting-yard algorithm, as well as a generic queue and stack to support it:

The Parser and Scanner Proper:

Code:
parser

initScanner
 set number="0123456789"
 set whitespace=" "
 set operator="+-*/^"
 set terminator=";"
 set precedence("+")=1,precedence("-")=1
 set precedence("*")=2,precedence("/")=2
 set precedence("^")=3
 set assoc("+")="LEFT",assoc("-")="LEFT"
 set assoc("*")="LEFT",assoc("/")="LEFT"
 set assoc("^")="RIGHT"
 quit

;
; this very primitive scanner takes expressions like "1 + 1 * 3 + 4;" and
; converts them into an array of tokens. The whitespace shown in the example
; above is a requirement, at this point. I know, it's dirty...
;
scan(input,tokens)
 do initScanner
 new words,wordCount,outStream,inputLen,i
 set outStream="",wordCount=0
 ;first, divide everything up into words
 set inputLen=$length(input)
 for i=1:1:inputLen do
 . set currentChar=$extract(input,i,i)               ; get the next character
 . if $$isCharWhitespace(currentChar)!$$isCharTerm(currentChar) do   ; found the end of a word
 .. set wordCount=wordCount+1               
 .. set words(wordCount)=outStream
 .. set outStream=""
 . else  do
 .. set outStream=outStream_currentChar
 ;now, figure out what each word is
 new currentWord
 for i=1:1:wordCount do
 . set currentWord=words(i)
 . if $$isWordNumber(currentWord)=1 set tokens(i,"TOKEN")="NUMBER"
 . if $$isCharOperator(currentWord) set tokens(i,"TOKEN")="OPERATOR"
 . if $get(tokens(i,"TOKEN"))=0 write "scanner error: unrecognized lexeme in input '",currentWord,"'",! halt
 . set tokens(i,"VALUE")=words(i)
 . if tokens(i,"TOKEN")="OPERATOR" do
 .. set tokens(i,"PRECEDENCE")=precedence(words(i))
 .. set tokens(i,"ASSOC")=assoc(words(i))
 quit wordCount

;
; parse implements Dijkstra's shunting-yard algorithm to convert
; the infix expression "input" into RPN form in "outputQueue".
;
; Note that we are not dealing with parens (yet).
;
parse(input,outputQueue,tokens)
 new tokenCount,i,j set tokenCount=$$scan(input,.tokens)
 do init^queue(.outputQueue)
 new opStack do init^stack(.opStack)
 new curTok,curVal,curPrec,curAssoc
 for i=1:1:tokenCount do
 . set curTok=tokens(i,"TOKEN"),curVal=tokens(i,"VALUE"),curAssoc=$get(tokens(i,"ASSOC"))
 . set curPrec=$get(tokens(i,"PRECEDENCE"))
 . if curTok="NUMBER" do enqueue^queue(.outputQueue,i)
 . if curTok="OPERATOR" do
 .. for j=1:0 do  quit:$$checkOp(.opStack,.tokens,i,.outputQueue)=0
 .. do push^stack(.opStack,i)
 ;no more tokens to read
 for j=1:0 do  quit:$$top^stack(.opStack)=""
 . do enqueue^queue(.outputQueue,$$pop^stack(.opStack))
 quit


test
 new outputQueue,tokens,i
 do parse("3 + 4 * 2;",.outputQueue,.tokens)
 for i=1:0 do  quit:$$front^queue(.outputQueue)=""
 . write tokens($$dequeue^queue(.outputQueue),"VALUE")," "
 quit

; o1 is current
; o2 is top of stack
;
; Return 1
; IF
;  top(opStack) is operator
; AND
;  curTok is left-associative AND with precedence '> top(opStack)
; OR
;  curTok is right-associative AND with precedence < top(opStack)
;
; The caller will ensure that currentToken is OPERATOR.
;
checkOp(opStack,tokens,curTok,outQueue)
 new curTokAssoc,curTokPrec,topStkAssoc,topStkPrec,topStk,retVal
 set retVal=0
 set curTokAssoc=tokens(curTok,"ASSOC")
 set curTokPrec=tokens(curTok,"PRECEDENCE")
 set topStk=$$top^stack(.opStack)
 if topStk="" quit 0      ;opStack is empty
 set topStkAssoc=$get(tokens(topStk,"ASSOC"))
 set topStkPrec=$get(tokens(topStk,"PRECEDENCE"))
 if topStkAssoc="" quit 0   ;no operator at top of opStack
 if ((curTokAssoc="LEFT")&(curTokPrec'>topStkPrec))!((curTokAssoc="RIGHT")&(curTokPrec<topStkPrec)) do
 . set retVal=1
 . do enqueue^queue(.outQueue,$$pop^stack(.opStack))
 . set topStk=$$top^stack(.opStack)
 . if topStk="" set retVal=0
 . if tokens(topStk,"TOKEN")'="OPERATOR" set retVal=0
 quit retVal

isWordNumber(word)
 new inputLen,currentChar,i,retval s retval=1
 set inputLen=$length(word)
 for i=1:1:inputLen do
 . set currentChar=$extract(word,i,i)
 . if '$$isCharNumber(currentChar) s retval=0
 quit retval

isCharWhitespace(character)
 quit whitespace[character

isCharNumber(character)
 quit number[character

isCharOperator(character)
 quit operator[character

isCharTerm(character)
 quit terminator[character


The Queue Implementation

Code:
queue
; implementation of a queue

init(var)
 set var("count")=0
 set var("front")=0
 quit

enqueue(var,value)
 set var("count")=var("count")+1
 set var(var("count"))=value
 quit

dequeue(var)
 new retval
 if var("front")=var("count") quit ""
 set var("front")=var("front")+1
 set retval=var(var("front"))
 quit retval

front(var)
 if var("front")=0 quit ""
 new retval
 set retval=var(var("front"))
 quit retval


The Stack Implementation

Code:
stack
 ; implementation of a stack

;
; init will initialize the stack structure
; for var, which must be passed by reference.
;
init(var)
 set var("count")=0
 quit

push(var,value)
 set var("count")=var("count")+1
 set var(var("count"))=value
 quit

pop(var)
 new retval
 if var("count")<1 quit ""
 set retval=var(var("count"))
 set var("count")=var("count")-1
 quit retval

top(var)
 new retval
 if var("count")<1 quit ""
 set retval=var(var("count"))
 quit retval

Page 1 of 1 All times are UTC - 8 hours [ DST ]
Powered by phpBB © 2000, 2002, 2005, 2007 phpBB Group
http://www.phpbb.com/