jtr/DevDisciples.Parsing/Lexer.cs
2024-09-15 17:23:27 +02:00

68 lines
1.9 KiB
C#

namespace DevDisciples.Parsing;
public abstract partial class Lexer<TToken> where TToken : Enum
{
protected List<Rule> Rules { get; init; } = default!;
protected abstract TToken EndOfSource { get; }
public List<Token> Lex(string file, string source)
{
var ctx = new Context(file, new Source(file, source), new List<Token>());
while (!ctx.Source.Ended())
{
var matched = false;
for (var i = 0; i < Rules.Count; i++)
{
if (Rules[i](ctx))
{
matched = true;
break;
}
}
if (!matched)
{
Report.Halt(ctx.Source, $"Unexpected character '{ctx.Source.Current}'.");
}
}
ctx.AddToken(EndOfSource, "<EOF>", ctx.Source.Line, ctx.Source.Column);
return ctx.Tokens;
}
protected static bool Match(Context ctx, TToken type, char @char)
{
if (!ctx.Source.Match(@char)) return false;
var line = ctx.Source.Line;
var column = ctx.Source.Column;
var lexeme = ctx.Source.Extract();
ctx.Source.Column += 1;
ctx.AddToken(type, lexeme, line, column);
return true;
}
/*
* Do not use this method for keywords!
* This will treat an identifier named 'ifelse' as separated 'if' and 'else' tokens.
*/
protected static bool Match(Context ctx, TToken token, string sequence)
{
if (!ctx.Source.Match(sequence)) return false;
var line = ctx.Source.Line;
var column = ctx.Source.Column;
var lexeme = ctx.Source.Extract();
ctx.Source.Column += sequence.Length;
ctx.Tokens.Add(new Token(ctx.File, token, lexeme, line, column));
return true;
}
}