From 47db343eeac15ce0ca844050abe89a7bba89b636 Mon Sep 17 00:00:00 2001 From: luciodj Date: Sun, 2 Aug 2015 12:38:07 +0200 Subject: [PATCH] changed oss to return only a sym --- OSP.Mod | 2 +- osg.py | 48 ++++++++++++--------- oss.py | 111 +++++++++++++++++++++++++----------------------- test_oss.py | 120 ++++++++++++++++++++++++++-------------------------- 4 files changed, 149 insertions(+), 132 deletions(-) diff --git a/OSP.Mod b/OSP.Mod index 5694cab..6779ce7 100644 --- a/OSP.Mod +++ b/OSP.Mod @@ -210,7 +210,7 @@ MODULE OSP; (* NW 23.9.93 / 20.10.07 / 30.10.2013*) IF pno = 0 THEN OSG.ReadInt(x) ELSIF pno = 1 THEN OSG.WriteInt(x) ELSIF pno = 2 THEN OSG.WriteChar(x) - ELSE OSS.Mark("no lparen") + ELSE OSS.Mark("no lparen") END ; IF sym = OSS.rparen THEN OSS.Get(sym) ELSE OSS.Mark("no rparen") END ; ELSIF pno = 3 THEN OSG.WriteLn diff --git a/osg.py b/osg.py index 37e6f4b..e8dc80c 100644 --- a/osg.py +++ b/osg.py @@ -7,12 +7,19 @@ from collections import namedtuple MemSize = 8192 -# class_ / mode -Head = 0; Const = 1; Var = 2; Par = 3; Fld = 4; Typ = 5 -SProc = 6; SFunc = 7; Proc = 8; NoTyp = 9; Reg = 10; RegI = 11; Cond = 12 -SB = 13; SP = 14; LNK = 15; # reserved registers -# form -Boolean = 0; Integer = 1; Array = 2; Record = 3; +WordSize = 4 + +class eClass: + Const, Var, Par, Field, Typ, SProc, SFunc, Proc, NoTyp = range( 9) + +class eMode: + Reg, RegI, Cond = range( 3) + +# reserved registers +SB = 13; SP = 14; LNK = 15; + +class eForm: # forms enum + Boolean, Integer, Array, Record = range( 4) U = 0x2000 # frequently used opcodes @@ -29,32 +36,35 @@ Item = namedtuple( 'Item', [ 'mode', 'lev', # int - 'type', # Type + 'type', # Type Descriptor 'a', # int 'b', # int 'r' # int ]) +ObjScope = namedtuple( 'ObjScope', [ + 'name', + 'idents', # list of ObjDesc + ]) + ObjDesc = namedtuple( 'ObjDesc', [ 'class_', - 'lev', # INTEGER - 'next', - 'dsc', # Object - 'type', # Type + # 'lev', # INTEGER + 'idents', # list of Ident + 'type', # Type Descriptor 'name', # oss.Ident 'val', 'nofpar' # LONGINT ]) - + TypeDesc = namedtuple( 'TypeDesc', [ - 'form', # INTEGER - 'dsc', # Object - 'base', # Type - 'size', - 'len', + 'form', # enum eForm + 'base', # Type Descriptor + 'size', # of bytes 'nofpar' # LONGINT ]) + class Osg: curlev = 0 pc = 0 @@ -67,8 +77,8 @@ class Osg: Ior : "IOR", Xor : "XOR", Add : "ADD", Sub : "SUB", Mul : "MUL", Div : "/" } mnemo1 = { PL : 'PL', MI : 'MI', EQ : 'EQ', NE : 'NE', LT : 'LT', GE : 'GE', LE : 'LE', GT : 'GT', 15 : 'NO'} code = [ 0 for x in xrange( MemSize)] - boolType = TypeDesc( form = Boolean, size = 4, dsc=None, base=None, len=None, nofpar=None) - intType = TypeDesc( form = Integer, size = 4, dsc=None, base=None, len=None, nofpar=None) + boolType = TypeDesc( form = eForm.Boolean, size = 4, base=None, nofpar=None) + intType = TypeDesc( form = eForm.Integer, size = 4, base=None, nofpar=None) def Put0( self, op, a, b, c): ' emit format-0 instruction' diff --git a/oss.py b/oss.py index 06fa5ac..23f58f2 100644 --- a/oss.py +++ b/oss.py @@ -51,7 +51,7 @@ def __init__( self, reader): self.reader = reader self.ch = reader.next() # print self.ch, # dbg - self.pos = 0 + self.value = None self.errpos = 0 self.errcnt = 0 @@ -74,8 +74,8 @@ def getIdentifier( self): # returns sym = keyword or ident ids.append( self.ch) self.next() ids = ''.join( ids) - if ids in KeyTable : return( KeyTable[ ids], None) - else: return ( Lex.ident_, ids[: IDLEN]) + if ids in KeyTable : return KeyTable[ ids] + else: self.value = ids[: IDLEN]; return Lex.ident_ def getString( self): self.next() @@ -88,12 +88,12 @@ def getString( self): if len( s) > STRINGLEN : self.mark( 'string too long') self.next() - return Lex.string_, s[:STRINGLEN] + self.value = s[:STRINGLEN] + return Lex.string_ def getHexString( self): - self.sym = Lex.string_ - self.strval = '' self.next(); + self.value = '' while (self.ch != '') and (self.ch != '$') : while self.ch in ' \x09\x0D': self.next() # skip blanks s = self.ch; self.next() @@ -101,10 +101,11 @@ def getHexString( self): try: m = int( s, base = 16) except ValueError: self.mark( 'hex dig pair expected') - if len(self.strval) < STRINGLEN : - self.strval += chr( m); + if len( self.value) < STRINGLEN : + self.value += chr( m); else: self.mark( 'string too long') - self.next(); + self.next() + return Lex.string_ def Ten( self, e): # returns a REAL; x = 1.0 @@ -116,6 +117,7 @@ def Ten( self, e): # returns a REAL; return x def getNumber( self): # returns a tuple (char_/integer_/real_ , ival/rval) + self.value = 0 digits = [] while self.ch in string.hexdigits: digits.append( self.ch) @@ -126,23 +128,24 @@ def getNumber( self): # returns a tuple (char_/integer_/real_ , ival/rval) if (self.ch in 'XHR') : # hex (char, int or real) c = self.ch; self.next() - try: k = int( s, base=16) + try: self.value = int( s, base=16) except ValueError: self.mark( 'bad hex digits') if c == 'X': - if k >= 0x100 : k = 0; self.mark( 'bad char value') - return Lex.char_, k - elif c == 'R' : - return Lex.real_, 1.0 * k + if self.value >= 0x100 : self.mark( 'bad char value') + return Lex.char_ + elif c == 'R' : + self.value *= 1.0 + return Lex.real_ else: # 'H' - return Lex.int_, k + return Lex.int_ elif self.ch == "." : self.next(); if self.ch == "." : self.ch = chr(0x7f) # double dot (upto) -> decimal integer - try: k = int( s, base=10) + try: self.value = int( s, base=10) except ValueError: self.mark( 'bad integer') - return Lex.int_, k + return Lex.int_ else: # real numbers x = 0.0 @@ -176,12 +179,13 @@ def getNumber( self): # returns a tuple (char_/integer_/real_ , ival/rval) elif e > 0 : if e <= MAXEX : x = self.Ten(e) * x else: x = 0.0; self.mark( 'too large') - return Lex.real_, x + self.value = x + return Lex.real_ else: # decimal integer - try: k = int( ''.join( digits)) + try: self.value = int( ''.join( digits)) except ValueError : self.mark( 'bad integer') - return Lex.int_, k + return Lex.int_ def comment( self): self.next(); @@ -194,66 +198,67 @@ def comment( self): while self.ch == "*" : self.next() if self.ch == ')' or self.ch == '' : break if self.ch != '' : self.next() - else: self.mark( "unterminated comment") + else: self.mark( 'unterminated comment') def get( self): # returns last symbol detected + self.value = None while ( self.ch != '') and ( self.ch <= ' ') : self.next() - if self.ch == '': return( Lex.eof_, None) + if self.ch == '': return Lex.eof_ if self.ch < 'A' : if self.ch < '0' : if self.ch == '"' : return self.getString() - elif self.ch == "#" : self.next(); return( Lex.neq_, None) + elif self.ch == "#" : self.next(); return Lex.neq_ elif self.ch == "$" : return self.getHexString() - elif self.ch == "&" : self.next(); return( Lex.and_, None) + elif self.ch == "&" : self.next(); return Lex.and_ elif self.ch == "(" : self.next(); if self.ch == "*" : return self.comment() - else: return( Lex.lparen_, None) - elif self.ch == ")" : self.next(); return( Lex.rparen_, None) - elif self.ch == "*" : self.next(); return( Lex.times_, None) - elif self.ch == "+" : self.next(); return( Lex.plus_, None) - elif self.ch == "," : self.next(); return( Lex.comma_, None) - elif self.ch == "-" : self.next(); return( Lex.minus_, None) + else: return Lex.lparen_ + elif self.ch == ")" : self.next(); return Lex.rparen_ + elif self.ch == "*" : self.next(); return Lex.times_ + elif self.ch == "+" : self.next(); return Lex.plus_ + elif self.ch == "," : self.next(); return Lex.comma_ + elif self.ch == "-" : self.next(); return Lex.minus_ elif self.ch == "." : self.next(); - if self.ch == "." : self.next(); return( Lex.upto_, None) - else: return (Lex.period_, None) - elif self.ch == "/" : self.next(); return( Lex.rdiv_, None) - else: self.next(); return( Lex.null_, None) # ! % ' + if self.ch == "." : self.next(); return Lex.upto_ + else: return Lex.period_ + elif self.ch == "/" : self.next(); return Lex.rdiv_ + else: self.next(); return Lex.null_ # ! % ' elif self.ch < ":" : return self.getNumber() elif self.ch == ":" : self.next(); - if self.ch == "=" : self.next(); return( Lex.becomes_, None) - else: return( Lex.colon_, None) - elif self.ch == ";" : self.next(); return( Lex.semicolon_, None) + if self.ch == "=" : self.next(); return Lex.becomes_ + else: return Lex.colon_ + elif self.ch == ";" : self.next(); return Lex.semicolon_ elif self.ch == "<" : self.next(); - if self.ch == "=" : self.next(); return( Lex.leq_, None) - else: return( Lex.lss_, None) - elif self.ch == "=" : self.next(); return( Lex.eql_, None) + if self.ch == "=" : self.next(); return Lex.leq_ + else: return Lex.lss_ + elif self.ch == "=" : self.next(); return Lex.eql_ elif self.ch == ">" : self.next(); - if self.ch == "=" : self.next(); return( Lex.geq_, None) - else: return( Lex.gtr_, None) - else: self.next(); return( Lex.null_, None) + if self.ch == "=" : self.next(); return Lex.geq_ + else: return Lex.gtr_ + else: self.next(); return Lex.null_ elif self.ch < "[" : return self.getIdentifier() elif self.ch < "a" : c = self.ch; self.next() - if c == "[" : return( Lex.lbrak_, None) - elif c == "]" : return( Lex.rbrak_, None) - elif c == "^" : return( Lex.arrow_, None) - else: return( Lex.null_, None) # _ ` + if c == "[" : return Lex.lbrak_ + elif c == "]" : return Lex.rbrak_ + elif c == "^" : return Lex.arrow_ + else: return Lex.null_ # _ ` elif self.ch < "{" : return self.getIdentifier() else: c = self.ch; self.next() - if c == "{" : return( Lex.lbrace_, None) - elif c == "}" : return( Lex.rbrace_, None) - elif c == "|" : return( Lex.bar_, None) - elif c == "~" : return( Lex.not_, None) - elif c == 0x7f: return( Lex.upto_, None) - else: return( Lex.null_, None) + if c == "{" : return Lex.lbrace_ + elif c == "}" : return Lex.rbrace_ + elif c == "|" : return Lex.bar_ + elif c == "~" : return Lex.not_ + elif c == 0x7f: return Lex.upto_ + else: return Lex.null_ diff --git a/test_oss.py b/test_oss.py index d30cec8..d157040 100644 --- a/test_oss.py +++ b/test_oss.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from ORS import Lex, Lexer +from oss import Lex, Lexer def printLex( symTuple): sym, arg = symTuple @@ -11,83 +11,85 @@ def printLex( symTuple): def test_string(): test = '@*2345' - r = Lexer( iter( '"'+test+'"')).get() - assert r == ( Lex.string_, test) + lex = Lexer( iter( '"'+test+'"' + ' $4041$')) + assert (lex.get(), lex.value) == ( Lex.string_, test) + assert (lex.get(), lex.value) == ( Lex.string_, '@A') def test_identifier(): test = 'alphabet' lex = Lexer( iter( test)) - assert lex.get() == ( Lex.ident_, test) + r = lex.get(), lex.value + assert r == ( Lex.ident_, test) def test_numbers(): test = '12345 12.345 12.3E-4 12X 0ABCDH' lex = Lexer( iter( test)) - assert lex.get() == ( Lex.int_, 12345) - assert lex.get() == ( Lex.real_, 12.345) - assert lex.get() == ( Lex.real_, 12.3E-4) - assert lex.get() == ( Lex.char_, 0x12) - assert lex.get() == ( Lex.int_, 0xABCD) - assert lex.get() == ( Lex.eof_, None) + assert (lex.get(), lex.value) == ( Lex.int_, 12345) + assert (lex.get(), lex.value) == ( Lex.real_, 12.345) + assert (lex.get(), lex.value) == ( Lex.real_, 12.3E-4) + assert (lex.get(), lex.value) == ( Lex.char_, 0x12) + assert (lex.get(), lex.value) == ( Lex.int_, 0xABCD) + assert (lex.get(), lex.value) == ( Lex.eof_, None) def test_symbols(): test = '+ - * / & | ~ ^ .. >= <= > < = # . , ; : := { } [ ]' lex = Lexer( iter( test)) - assert lex.get() == ( Lex.plus_, None) - assert lex.get() == ( Lex.minus_, None) - assert lex.get() == ( Lex.times_, None) - assert lex.get() == ( Lex.rdiv_, None) - assert lex.get() == ( Lex.and_, None) - assert lex.get() == ( Lex.bar_, None) - assert lex.get() == ( Lex.not_, None) - assert lex.get() == ( Lex.arrow_, None) - assert lex.get() == ( Lex.upto_, None) - assert lex.get() == ( Lex.geq_, None) - assert lex.get() == ( Lex.leq_, None) - assert lex.get() == ( Lex.gtr_, None) - assert lex.get() == ( Lex.lss_, None) - assert lex.get() == ( Lex.eql_, None) - assert lex.get() == ( Lex.neq_, None) - assert lex.get() == ( Lex.period_, None) - assert lex.get() == ( Lex.comma_, None) - assert lex.get() == ( Lex.semicolon_, None) - assert lex.get() == ( Lex.colon_, None) - assert lex.get() == ( Lex.becomes_, None) - assert lex.get() == ( Lex.lbrace_, None) - assert lex.get() == ( Lex.rbrace_, None) - assert lex.get() == ( Lex.lbrak_, None) - assert lex.get() == ( Lex.rbrak_, None) - assert lex.get() == ( Lex.eof_, None) + assert lex.get() == Lex.plus_ + assert lex.get() == Lex.minus_ + assert lex.get() == Lex.times_ + assert lex.get() == Lex.rdiv_ + assert lex.get() == Lex.and_ + assert lex.get() == Lex.bar_ + assert lex.get() == Lex.not_ + assert lex.get() == Lex.arrow_ + assert lex.get() == Lex.upto_ + assert lex.get() == Lex.geq_ + assert lex.get() == Lex.leq_ + assert lex.get() == Lex.gtr_ + assert lex.get() == Lex.lss_ + assert lex.get() == Lex.eql_ + assert lex.get() == Lex.neq_ + assert lex.get() == Lex.period_ + assert lex.get() == Lex.comma_ + assert lex.get() == Lex.semicolon_ + assert lex.get() == Lex.colon_ + assert lex.get() == Lex.becomes_ + assert lex.get() == Lex.lbrace_ + assert lex.get() == Lex.rbrace_ + assert lex.get() == Lex.lbrak_ + assert lex.get() == Lex.rbrak_ + assert lex.get() == Lex.eof_ def test_keywords(): test = '''IF DO OF OR TO IS BY IN END ELSE THEN ELSIF FALSE REPEAT RETURN PROCEDURE''' lex = Lexer( iter( test)) - assert lex.get() == ( Lex.if_, None) - assert lex.get() == ( Lex.do_, None) - assert lex.get() == ( Lex.of_, None) - assert lex.get() == ( Lex.or_, None) - assert lex.get() == ( Lex.to_, None) - assert lex.get() == ( Lex.is_, None) - assert lex.get() == ( Lex.by_, None) - assert lex.get() == ( Lex.in_, None) - assert lex.get() == ( Lex.end_, None) - assert lex.get() == ( Lex.else_, None) - assert lex.get() == ( Lex.then_, None) - assert lex.get() == ( Lex.elsif_, None) - assert lex.get() == ( Lex.false_, None) - assert lex.get() == ( Lex.repeat_, None) - assert lex.get() == ( Lex.return_, None) - assert lex.get() == ( Lex.procedure_, None) - assert lex.get() == ( Lex.eof_, None) + assert lex.get() == Lex.if_ + assert lex.get() == Lex.do_ + assert lex.get() == Lex.of_ + assert lex.get() == Lex.or_ + assert lex.get() == Lex.to_ + assert lex.get() == Lex.is_ + assert lex.get() == Lex.by_ + assert lex.get() == Lex.in_ + assert lex.get() == Lex.end_ + assert lex.get() == Lex.else_ + assert lex.get() == Lex.then_ + assert lex.get() == Lex.elsif_ + assert lex.get() == Lex.false_ + assert lex.get() == Lex.repeat_ + assert lex.get() == Lex.return_ + assert lex.get() == Lex.procedure_ + assert lex.get() == Lex.eof_ def test_negatives(): test = ' ` _ 0i name* ' lex = Lexer( iter( test)) - assert lex.get() == ( Lex.null_, None) - assert lex.get() == ( Lex.null_, None) - assert lex.get() == ( Lex.int_, 0) - assert lex.get() == ( Lex.ident_, 'i') - assert lex.get() == ( Lex.ident_, 'name') - assert lex.get() == ( Lex.times_, None) - assert lex.get() == ( Lex.eof_, None) + assert lex.get() == Lex.null_ + assert lex.get() == Lex.null_ + assert ( lex.get(), lex.value) == ( Lex.int_, 0) + assert ( lex.get(), lex.value) == ( Lex.ident_, 'i') + assert ( lex.get(), lex.value) == ( Lex.ident_, 'name') + assert lex.get() == Lex.times_ + assert lex.get() == Lex.eof_