It is currently Sun May 31, 2020 7:18 am

All times are UTC - 8 hours [ DST ]

Post new topic Reply to topic  [ 1 post ] 
Author Message
 Post subject: Dijkstra's shunting-yard parsing algorithm in GT.M
PostPosted: Tue Sep 20, 2011 9:09 pm 
User avatar

Joined: Mon Nov 01, 2010 1:39 pm
Posts: 50
Real Name: John Willis
Began Programming in MUMPS: 01 Apr 2010
I have made a simple parser which converts infix notation to reverse polish, using Dijkstra's shunting-yard algorithm, as well as a generic queue and stack to support it:

The Parser and Scanner Proper:


 set number="0123456789"
 set whitespace=" "
 set operator="+-*/^"
 set terminator=";"
 set precedence("+")=1,precedence("-")=1
 set precedence("*")=2,precedence("/")=2
 set precedence("^")=3
 set assoc("+")="LEFT",assoc("-")="LEFT"
 set assoc("*")="LEFT",assoc("/")="LEFT"
 set assoc("^")="RIGHT"

; this very primitive scanner takes expressions like "1 + 1 * 3 + 4;" and
; converts them into an array of tokens. The whitespace shown in the example
; above is a requirement, at this point. I know, it's dirty...
 do initScanner
 new words,wordCount,outStream,inputLen,i
 set outStream="",wordCount=0
 ;first, divide everything up into words
 set inputLen=$length(input)
 for i=1:1:inputLen do
 . set currentChar=$extract(input,i,i)               ; get the next character
 . if $$isCharWhitespace(currentChar)!$$isCharTerm(currentChar) do   ; found the end of a word
 .. set wordCount=wordCount+1               
 .. set words(wordCount)=outStream
 .. set outStream=""
 . else  do
 .. set outStream=outStream_currentChar
 ;now, figure out what each word is
 new currentWord
 for i=1:1:wordCount do
 . set currentWord=words(i)
 . if $$isWordNumber(currentWord)=1 set tokens(i,"TOKEN")="NUMBER"
 . if $$isCharOperator(currentWord) set tokens(i,"TOKEN")="OPERATOR"
 . if $get(tokens(i,"TOKEN"))=0 write "scanner error: unrecognized lexeme in input '",currentWord,"'",! halt
 . set tokens(i,"VALUE")=words(i)
 . if tokens(i,"TOKEN")="OPERATOR" do
 .. set tokens(i,"PRECEDENCE")=precedence(words(i))
 .. set tokens(i,"ASSOC")=assoc(words(i))
 quit wordCount

; parse implements Dijkstra's shunting-yard algorithm to convert
; the infix expression "input" into RPN form in "outputQueue".
; Note that we are not dealing with parens (yet).
 new tokenCount,i,j set tokenCount=$$scan(input,.tokens)
 do init^queue(.outputQueue)
 new opStack do init^stack(.opStack)
 new curTok,curVal,curPrec,curAssoc
 for i=1:1:tokenCount do
 . set curTok=tokens(i,"TOKEN"),curVal=tokens(i,"VALUE"),curAssoc=$get(tokens(i,"ASSOC"))
 . set curPrec=$get(tokens(i,"PRECEDENCE"))
 . if curTok="NUMBER" do enqueue^queue(.outputQueue,i)
 . if curTok="OPERATOR" do
 .. for j=1:0 do  quit:$$checkOp(.opStack,.tokens,i,.outputQueue)=0
 .. do push^stack(.opStack,i)
 ;no more tokens to read
 for j=1:0 do  quit:$$top^stack(.opStack)=""
 . do enqueue^queue(.outputQueue,$$pop^stack(.opStack))

 new outputQueue,tokens,i
 do parse("3 + 4 * 2;",.outputQueue,.tokens)
 for i=1:0 do  quit:$$front^queue(.outputQueue)=""
 . write tokens($$dequeue^queue(.outputQueue),"VALUE")," "

; o1 is current
; o2 is top of stack
; Return 1
; IF
;  top(opStack) is operator
;  curTok is left-associative AND with precedence '> top(opStack)
; OR
;  curTok is right-associative AND with precedence < top(opStack)
; The caller will ensure that currentToken is OPERATOR.
 new curTokAssoc,curTokPrec,topStkAssoc,topStkPrec,topStk,retVal
 set retVal=0
 set curTokAssoc=tokens(curTok,"ASSOC")
 set curTokPrec=tokens(curTok,"PRECEDENCE")
 set topStk=$$top^stack(.opStack)
 if topStk="" quit 0      ;opStack is empty
 set topStkAssoc=$get(tokens(topStk,"ASSOC"))
 set topStkPrec=$get(tokens(topStk,"PRECEDENCE"))
 if topStkAssoc="" quit 0   ;no operator at top of opStack
 if ((curTokAssoc="LEFT")&(curTokPrec'>topStkPrec))!((curTokAssoc="RIGHT")&(curTokPrec<topStkPrec)) do
 . set retVal=1
 . do enqueue^queue(.outQueue,$$pop^stack(.opStack))
 . set topStk=$$top^stack(.opStack)
 . if topStk="" set retVal=0
 . if tokens(topStk,"TOKEN")'="OPERATOR" set retVal=0
 quit retVal

 new inputLen,currentChar,i,retval s retval=1
 set inputLen=$length(word)
 for i=1:1:inputLen do
 . set currentChar=$extract(word,i,i)
 . if '$$isCharNumber(currentChar) s retval=0
 quit retval

 quit whitespace[character

 quit number[character

 quit operator[character

 quit terminator[character

The Queue Implementation

; implementation of a queue

 set var("count")=0
 set var("front")=0

 set var("count")=var("count")+1
 set var(var("count"))=value

 new retval
 if var("front")=var("count") quit ""
 set var("front")=var("front")+1
 set retval=var(var("front"))
 quit retval

 if var("front")=0 quit ""
 new retval
 set retval=var(var("front"))
 quit retval

The Stack Implementation

 ; implementation of a stack

; init will initialize the stack structure
; for var, which must be passed by reference.
 set var("count")=0

 set var("count")=var("count")+1
 set var(var("count"))=value

 new retval
 if var("count")<1 quit ""
 set retval=var(var("count"))
 set var("count")=var("count")-1
 quit retval

 new retval
 if var("count")<1 quit ""
 set retval=var(var("count"))
 quit retval

John Willis, Founder
Coherent Logic Development

Offline Profile  
Display posts from previous:  Sort by  
Post new topic Reply to topic  [ 1 post ] 

All times are UTC - 8 hours [ DST ]

Who is online

Users browsing this forum: No registered users and 2 guests

You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot post attachments in this forum

Search for:
Jump to:  
Powered by phpBB © 2000, 2002, 2005, 2007 phpBB Group
Theme created