237 lines
7.8 KiB
C#
237 lines
7.8 KiB
C#
|
using System.Text;
|
||
|
|
||
|
namespace tlang
|
||
|
{
|
||
|
internal class Lexer
|
||
|
{
|
||
|
TextReader reader;
|
||
|
public Lexer(string fName) : this(new StreamReader(fName))
|
||
|
{
|
||
|
|
||
|
}
|
||
|
public Lexer(TextReader reader)
|
||
|
{
|
||
|
this.reader = reader;
|
||
|
}
|
||
|
List<LexToken>? tokens=null;
|
||
|
public List<LexToken> Tokens
|
||
|
{
|
||
|
get{
|
||
|
if(tokens == null)
|
||
|
{
|
||
|
tokens = _getTokens();
|
||
|
}
|
||
|
return tokens;
|
||
|
}
|
||
|
}
|
||
|
List<LexToken> _tokens = new List<LexToken>();
|
||
|
StringBuilder b = new StringBuilder();
|
||
|
private void FlushBuilder()
|
||
|
{
|
||
|
if(b.Length > 0)
|
||
|
{
|
||
|
_tokens.Add(LexToken.FromGeneralToken(b.ToString()));
|
||
|
b.Clear();
|
||
|
}
|
||
|
}
|
||
|
bool escaped=false;
|
||
|
private char getChar()
|
||
|
{
|
||
|
int read = reader.Read();
|
||
|
if(read == '\\')
|
||
|
{
|
||
|
escaped = true;
|
||
|
read = reader.Read();
|
||
|
if(read == 'x')
|
||
|
{
|
||
|
return (char)Convert.FromHexString($"{(char)reader.Read()}{(char)reader.Read()}")[0];
|
||
|
}
|
||
|
else if(read == 'n')
|
||
|
{
|
||
|
return '\n';
|
||
|
}
|
||
|
else if(read == 't')
|
||
|
{
|
||
|
return '\t';
|
||
|
}
|
||
|
else if(read == 'r')
|
||
|
{
|
||
|
return '\r';
|
||
|
}
|
||
|
return (char)read;
|
||
|
}
|
||
|
escaped=false;
|
||
|
return (char)read;
|
||
|
}
|
||
|
private List<LexToken> _getTokens()
|
||
|
{
|
||
|
|
||
|
int read;
|
||
|
while((read = reader.Read()) != -1)
|
||
|
{
|
||
|
int next=reader.Peek();
|
||
|
switch(read)
|
||
|
{
|
||
|
case '\"':
|
||
|
FlushBuilder();
|
||
|
//"Some \"Some String\" Is OK"
|
||
|
while(true)
|
||
|
{
|
||
|
char r = getChar();
|
||
|
if(r == -1 || (!escaped && r == '\"'))
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
b.Append(r);
|
||
|
}
|
||
|
_tokens.Add(LexToken.FromString(b.ToString()));
|
||
|
b.Clear();
|
||
|
break;
|
||
|
case '\'':
|
||
|
FlushBuilder();
|
||
|
LexToken.FromChar(getChar().ToString());
|
||
|
reader.Read();
|
||
|
break;
|
||
|
case '#':
|
||
|
while(true)
|
||
|
{
|
||
|
read = reader.Read();
|
||
|
if(read == -1) return _tokens;
|
||
|
|
||
|
if(read == '\n')
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
break;
|
||
|
case '/':
|
||
|
FlushBuilder();
|
||
|
if(next == '=')
|
||
|
{
|
||
|
reader.Read();
|
||
|
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
|
||
|
}
|
||
|
else if(next == '/')
|
||
|
{
|
||
|
reader.Read();
|
||
|
|
||
|
while(true)
|
||
|
{
|
||
|
read = reader.Read();
|
||
|
if(read == -1) return _tokens;
|
||
|
|
||
|
if(read == '\n')
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
else if(next == '*')
|
||
|
{
|
||
|
reader.Read();
|
||
|
bool lastIsAstrick=false;
|
||
|
while(true)
|
||
|
{
|
||
|
read = reader.Read();
|
||
|
if(read == -1) return _tokens;
|
||
|
if(read == '*')
|
||
|
{
|
||
|
lastIsAstrick=true;
|
||
|
continue;
|
||
|
}
|
||
|
else if(read == '/')
|
||
|
{
|
||
|
if(lastIsAstrick)
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
lastIsAstrick=false;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
||
|
}
|
||
|
|
||
|
break;
|
||
|
case '^':
|
||
|
case '*':
|
||
|
case '%':
|
||
|
FlushBuilder();
|
||
|
if(next == '=')
|
||
|
{
|
||
|
reader.Read();
|
||
|
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
||
|
}
|
||
|
break;
|
||
|
case '|':
|
||
|
case '&':
|
||
|
case '<':
|
||
|
case '>':
|
||
|
case '+':
|
||
|
case '-':
|
||
|
case '!':
|
||
|
FlushBuilder();
|
||
|
if(next == read || next == '=')
|
||
|
{
|
||
|
reader.Read();
|
||
|
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
||
|
}
|
||
|
break;
|
||
|
case '{':
|
||
|
case '}':
|
||
|
case '[':
|
||
|
case ']':
|
||
|
case '(':
|
||
|
case ')':
|
||
|
case '.':
|
||
|
case '?':
|
||
|
case ':':
|
||
|
case ',':
|
||
|
case ';':
|
||
|
FlushBuilder();
|
||
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
||
|
break;
|
||
|
case '=':
|
||
|
FlushBuilder();
|
||
|
if(next == read || next == '>')
|
||
|
{
|
||
|
reader.Read();
|
||
|
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
|
||
|
case ' ':
|
||
|
case '\n':
|
||
|
case '\t':
|
||
|
case '\r':
|
||
|
FlushBuilder();
|
||
|
break;
|
||
|
default:
|
||
|
b.Append((char)read);
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
FlushBuilder();
|
||
|
return _tokens;
|
||
|
}
|
||
|
}
|
||
|
}
|