using System.IO; using System.Collections.Generic; using System.Text; using System; namespace TLang { public enum LexSymbol { Identifier = 0, String=1, Char=2, Integer=3, Float=4, Boolean=5, Symbol=6, Keyword=7, Error=8 } public class LexToken { public static string Symbols="{};(),[]="; public static string[] Keywords=new string[]{"func","funcptr","string","int","uint","long","ulong","short","ushort","byte","sbyte","return","if","else","while","break"}; public LexToken(string text,int offsetInFile,int lineNumber,int columnNumber) { Func has_any_non_number=()=> { if(TokenText.Length <= 0) return false; bool hasDot=false; foreach(var c in TokenText) { if(c == '.') { if(hasDot) return true; hasDot=true; } if(c < '0' && c > '9') return true; } return false; }; TokenText=text; OffsetInFile=offsetInFile; LineNumber = lineNumber; ColumnNumber = columnNumber; string _lower=TokenText.ToLower(); if(TokenText.Length == 0){Type=LexSymbol.Error;return;} if(_lower == "true" || _lower=="false") { Type = LexSymbol.Boolean; } else if(TokenText[0] == '_' || (TokenText[0] >= 'a' && TokenText[0] <= 'z') || (TokenText[0] >= 'A' && TokenText[0] <= 'Z')) { Type= Keywords.Contains(TokenText) ? LexSymbol.Keyword : LexSymbol.Identifier; }else if(Symbols.Contains(TokenText[0])) { Type = LexSymbol.Symbol; }else if(!has_any_non_number()) { Type = TokenText.Contains('.') ? LexSymbol.Float : LexSymbol.Integer; }else{ Type = LexSymbol.Error; } } public string TokenText {get;set;} public int OffsetInFile {get;set;} public int LineNumber {get;set;} public int ColumnNumber {get;set;} public LexSymbol Type {get;set;} } public class Lexer { public static IEnumerable GetTokens(TextReader reader) { LexToken value = new LexToken("",0,0,0); int read=0; int i=0; int col=0; int ln=0; StringBuilder b=new StringBuilder(); Func pop_off = ()=>{ //we need to pop off what ever is in array if(b.Length <= 0) return false; value=new LexToken(b.ToString(),i,ln,col); b.Clear(); return true; }; while((read=reader.Read()) != -1) { //neq(T,T) //lte(T,T) //gte(T,T) //gt(T,T) //lt(T,T) //eq(T,T) //add(T,T) //concat(T,T) //tostr(T) //toint(T) //touint(T) //tolong(T) //toulong(T) //toshort(T) //toushort(T) //tobool(T) //tobyte(T) //tosbyte(T) //hasvalue(T) //sub(T,T) //times(T,T) //div(T,T) //mod(T,T) //split(s,symbol,expected_num) //split(s,symbol) //trimend(s,sym) //trimstart(s,sym) //len(T[]) //add(T[],T) //remove(T[],T) //removeat(T[],i) //insert(T[],T,i) //create_stream(read_ptr,write_ptr,seek_ptr,close_ptr) //read_stream(stream,byte[],int) //write_stream(stream byte[],int) switch(read) { case '{': case '}': case '(': case ')': case ',': case '[': case ']': case '=': case ';': col++; i++; if(pop_off()) yield return value; b.Append((char)read); if(pop_off()) yield return value; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': b.Append((char)read); col++; i++; break; case ' ': case '\t': if(read == ' ') {i++;col++;} if(read == '\t') {i++;col+=4;} if(pop_off()) yield return value; break; default: //needs to start with _ or letter //can contain numbers col++; if(read == '\n') {col=0;ln++;} i++; if(read == '_' || (read >= 'a' && read <= 'z') || (read >= 'A' && read <= 'Z')) { b.Append((char)read); } break; } }if(pop_off()) yield return value; } } }