tlang-compiler/Lexer.cs

201 lines
5.5 KiB
C#

using System.IO;
using System.Collections.Generic;
using System.Text;
using System;
namespace TLang
{
public enum LexSymbol
{
Identifier = 0,
String=1,
Char=2,
Integer=3,
Float=4,
Boolean=5,
Symbol=6,
Keyword=7,
Error=8
}
public class LexToken
{
public static string Symbols="{};(),[]=";
public static string[] Keywords=new string[]{"func","funcptr","string","int","uint","long","ulong","short","ushort","byte","sbyte","return","if","else","while","break"};
public LexToken(string text,int offsetInFile,int lineNumber,int columnNumber)
{
Func<bool> has_any_non_number=()=>
{
if(TokenText.Length <= 0)
return false;
bool hasDot=false;
foreach(var c in TokenText)
{
if(c == '.')
{
if(hasDot) return true;
hasDot=true;
}
if(c < '0' && c > '9')
return true;
}
return false;
};
TokenText=text;
OffsetInFile=offsetInFile;
LineNumber = lineNumber;
ColumnNumber = columnNumber;
string _lower=TokenText.ToLower();
if(TokenText.Length == 0){Type=LexSymbol.Error;return;}
if(_lower == "true" || _lower=="false")
{
Type = LexSymbol.Boolean;
}
else if(TokenText[0] == '_' || (TokenText[0] >= 'a' && TokenText[0] <= 'z') || (TokenText[0] >= 'A' && TokenText[0] <= 'Z'))
{
Type= Keywords.Contains(TokenText) ? LexSymbol.Keyword : LexSymbol.Identifier;
}else if(Symbols.Contains(TokenText[0]))
{
Type = LexSymbol.Symbol;
}else if(!has_any_non_number())
{
Type = TokenText.Contains('.') ? LexSymbol.Float : LexSymbol.Integer;
}else{
Type = LexSymbol.Error;
}
}
public string TokenText {get;set;}
public int OffsetInFile {get;set;}
public int LineNumber {get;set;}
public int ColumnNumber {get;set;}
public LexSymbol Type {get;set;}
}
public class Lexer
{
public static IEnumerable<LexToken> GetTokens(TextReader reader)
{
LexToken value = new LexToken("",0,0,0);
int read=0;
int i=0;
int col=0;
int ln=0;
StringBuilder b=new StringBuilder();
Func<bool> pop_off = ()=>{
//we need to pop off what ever is in array
if(b.Length <= 0)
return false;
value=new LexToken(b.ToString(),i,ln,col);
b.Clear();
return true;
};
while((read=reader.Read()) != -1)
{
//neq(T,T)
//lte(T,T)
//gte(T,T)
//gt(T,T)
//lt(T,T)
//eq(T,T)
//add(T,T)
//concat(T,T)
//tostr(T)
//toint(T)
//touint(T)
//tolong(T)
//toulong(T)
//toshort(T)
//toushort(T)
//tobool(T)
//tobyte(T)
//tosbyte(T)
//hasvalue(T)
//sub(T,T)
//times(T,T)
//div(T,T)
//mod(T,T)
//split(s,symbol,expected_num)
//split(s,symbol)
//trimend(s,sym)
//trimstart(s,sym)
//len(T[])
//add(T[],T)
//remove(T[],T)
//removeat(T[],i)
//insert(T[],T,i)
//create_stream(read_ptr,write_ptr,seek_ptr,close_ptr)
//read_stream(stream,byte[],int)
//write_stream(stream byte[],int)
switch(read)
{
case '{':
case '}':
case '(':
case ')':
case ',':
case '[':
case ']':
case '=':
case ';':
col++;
i++;
if(pop_off())
yield return value;
b.Append((char)read);
if(pop_off())
yield return value;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '.':
b.Append((char)read);
col++;
i++;
break;
case ' ':
case '\t':
if(read == ' ') {i++;col++;}
if(read == '\t') {i++;col+=4;}
if(pop_off())
yield return value;
break;
default:
//needs to start with _ or letter
//can contain numbers
col++;
if(read == '\n') {col=0;ln++;}
i++;
if(read == '_' || (read >= 'a' && read <= 'z') || (read >= 'A' && read <= 'Z'))
{
b.Append((char)read);
}
break;
}
}if(pop_off())
yield return value;
}
}
}