tlang-interperter-cs/tlanglib/Lexer.cs

227 lines
7.5 KiB
C#

using System.Text;
namespace tlang
{
public class Lexer
{
TextReader reader;
public Lexer(string fName) : this(new StreamReader(fName))
{
}
public Lexer(TextReader reader)
{
this.reader = reader;
}
List<LexToken> tokens=new List<LexToken>();
public List<LexToken> Tokens
{
get{
if(!set)
{
set=true;
tokens = _getTokens();
}
return tokens;
}
}
bool set=false;
List<LexToken> _tokens = new List<LexToken>();
StringBuilder b = new StringBuilder();
private void FlushBuilder()
{
if(b.Length > 0)
{
_tokens.Add(LexToken.FromGeneralToken(b.ToString()));
b.Clear();
}
}
bool escaped=false;
private char getChar()
{
int read = reader.Read();
if(read == '\\')
{
escaped = true;
read = reader.Read();
if(read == 'x')
{
return (char)short.Parse($"0x{(char)reader.Read()}{(char)reader.Read()}",System.Globalization.NumberStyles.AllowHexSpecifier);
}
else if(read == 'n')
{
return '\n';
}
else if(read == 't')
{
return '\t';
}
else if(read == 'r')
{
return '\r';
}
return (char)read;
}
escaped=false;
return (char)read;
}
private List<LexToken> _getTokens()
{
int read;
while((read = reader.Read()) != -1)
{
int next=reader.Peek();
switch(read)
{
case '\"':
FlushBuilder();
//"Some \"Some String\" Is OK"
while(true)
{
char r = getChar();
if(r == -1 || (!escaped && r == '\"'))
{
break;
}
b.Append(r);
}
_tokens.Add(LexToken.FromString(b.ToString()));
b.Clear();
break;
case '\'':
FlushBuilder();
_tokens.Add(LexToken.FromChar(getChar().ToString()));
reader.Read();
break;
case '#':
while(true)
{
read = reader.Read();
if(read == -1) return _tokens;
if(read == '\n')
{
break;
}
}
break;
case '/':
FlushBuilder();
if(next == '=')
{
reader.Read();
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
}
else if(next == '/')
{
reader.Read();
while(true)
{
read = reader.Read();
if(read == -1) return _tokens;
if(read == '\n')
{
break;
}
}
}
else if(next == '*')
{
reader.Read();
bool lastIsAstrick=false;
while(true)
{
read = reader.Read();
if(read == -1) return _tokens;
if(read == '*')
{
lastIsAstrick=true;
continue;
}
else if(read == '/')
{
if(lastIsAstrick)
{
break;
}
}
lastIsAstrick=false;
}
}
else
{
_tokens.Add(LexToken.FromGeneralToken(read));
}
break;
case '^':
case '*':
case '%':
FlushBuilder();
if(next == '=')
{
reader.Read();
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
}
else
{
_tokens.Add(LexToken.FromGeneralToken(read));
}
break;
case '|':
case '&':
case '<':
case '>':
case '+':
case '-':
case '!':
FlushBuilder();
if(next == read || next == '=')
{
reader.Read();
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
}
else
{
_tokens.Add(LexToken.FromGeneralToken(read));
}
break;
case '{':
case '}':
case '[':
case ']':
case '(':
case ')':
case '.':
case '?':
case ':':
case ',':
case ';':
case '=':
FlushBuilder();
_tokens.Add(LexToken.FromGeneralToken(read));
break;
case ' ':
case '\n':
case '\t':
case '\r':
FlushBuilder();
break;
default:
b.Append((char)read);
break;
}
}
FlushBuilder();
return _tokens;
}
}
}