227 lines
7.5 KiB
C#
227 lines
7.5 KiB
C#
using System.Text;
|
|
|
|
namespace tlang
|
|
{
|
|
public class Lexer
|
|
{
|
|
TextReader reader;
|
|
public Lexer(string fName) : this(new StreamReader(fName))
|
|
{
|
|
|
|
}
|
|
public Lexer(TextReader reader)
|
|
{
|
|
this.reader = reader;
|
|
}
|
|
List<LexToken> tokens=new List<LexToken>();
|
|
public List<LexToken> Tokens
|
|
{
|
|
get{
|
|
if(!set)
|
|
{
|
|
set=true;
|
|
tokens = _getTokens();
|
|
}
|
|
return tokens;
|
|
}
|
|
}
|
|
bool set=false;
|
|
List<LexToken> _tokens = new List<LexToken>();
|
|
StringBuilder b = new StringBuilder();
|
|
private void FlushBuilder()
|
|
{
|
|
if(b.Length > 0)
|
|
{
|
|
_tokens.Add(LexToken.FromGeneralToken(b.ToString()));
|
|
b.Clear();
|
|
}
|
|
}
|
|
bool escaped=false;
|
|
private char getChar()
|
|
{
|
|
int read = reader.Read();
|
|
if(read == '\\')
|
|
{
|
|
escaped = true;
|
|
read = reader.Read();
|
|
if(read == 'x')
|
|
{
|
|
|
|
return (char)short.Parse($"0x{(char)reader.Read()}{(char)reader.Read()}",System.Globalization.NumberStyles.AllowHexSpecifier);
|
|
}
|
|
else if(read == 'n')
|
|
{
|
|
return '\n';
|
|
}
|
|
else if(read == 't')
|
|
{
|
|
return '\t';
|
|
}
|
|
else if(read == 'r')
|
|
{
|
|
return '\r';
|
|
}
|
|
return (char)read;
|
|
}
|
|
escaped=false;
|
|
return (char)read;
|
|
}
|
|
private List<LexToken> _getTokens()
|
|
{
|
|
|
|
int read;
|
|
while((read = reader.Read()) != -1)
|
|
{
|
|
int next=reader.Peek();
|
|
switch(read)
|
|
{
|
|
case '\"':
|
|
FlushBuilder();
|
|
//"Some \"Some String\" Is OK"
|
|
while(true)
|
|
{
|
|
char r = getChar();
|
|
if(r == -1 || (!escaped && r == '\"'))
|
|
{
|
|
break;
|
|
}
|
|
b.Append(r);
|
|
}
|
|
_tokens.Add(LexToken.FromString(b.ToString()));
|
|
b.Clear();
|
|
break;
|
|
case '\'':
|
|
FlushBuilder();
|
|
_tokens.Add(LexToken.FromChar(getChar().ToString()));
|
|
reader.Read();
|
|
break;
|
|
case '#':
|
|
while(true)
|
|
{
|
|
read = reader.Read();
|
|
if(read == -1) return _tokens;
|
|
|
|
if(read == '\n')
|
|
{
|
|
break;
|
|
}
|
|
|
|
}
|
|
break;
|
|
case '/':
|
|
FlushBuilder();
|
|
if(next == '=')
|
|
{
|
|
reader.Read();
|
|
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
|
|
}
|
|
else if(next == '/')
|
|
{
|
|
reader.Read();
|
|
|
|
while(true)
|
|
{
|
|
read = reader.Read();
|
|
if(read == -1) return _tokens;
|
|
|
|
if(read == '\n')
|
|
{
|
|
break;
|
|
}
|
|
|
|
}
|
|
}
|
|
else if(next == '*')
|
|
{
|
|
reader.Read();
|
|
bool lastIsAstrick=false;
|
|
while(true)
|
|
{
|
|
read = reader.Read();
|
|
if(read == -1) return _tokens;
|
|
if(read == '*')
|
|
{
|
|
lastIsAstrick=true;
|
|
continue;
|
|
}
|
|
else if(read == '/')
|
|
{
|
|
if(lastIsAstrick)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
lastIsAstrick=false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
|
}
|
|
|
|
break;
|
|
case '^':
|
|
case '*':
|
|
case '%':
|
|
FlushBuilder();
|
|
if(next == '=')
|
|
{
|
|
reader.Read();
|
|
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
|
|
}
|
|
else
|
|
{
|
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
|
}
|
|
break;
|
|
case '|':
|
|
case '&':
|
|
case '<':
|
|
case '>':
|
|
case '+':
|
|
case '-':
|
|
case '!':
|
|
FlushBuilder();
|
|
if(next == read || next == '=')
|
|
{
|
|
reader.Read();
|
|
_tokens.Add(LexToken.FromGeneralToken($"{(char)read}{(char)next}"));
|
|
}
|
|
else
|
|
{
|
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
|
}
|
|
break;
|
|
case '{':
|
|
case '}':
|
|
case '[':
|
|
case ']':
|
|
case '(':
|
|
case ')':
|
|
case '.':
|
|
case '?':
|
|
case ':':
|
|
case ',':
|
|
case ';':
|
|
case '=':
|
|
FlushBuilder();
|
|
_tokens.Add(LexToken.FromGeneralToken(read));
|
|
break;
|
|
case ' ':
|
|
case '\n':
|
|
case '\t':
|
|
case '\r':
|
|
FlushBuilder();
|
|
break;
|
|
default:
|
|
b.Append((char)read);
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
FlushBuilder();
|
|
return _tokens;
|
|
}
|
|
}
|
|
} |