jtr/DevDisciples.Parsing/Lexer.Rule.cs
2024-09-15 17:23:27 +02:00

213 lines
5.9 KiB
C#

namespace DevDisciples.Parsing;
public abstract partial class Lexer<TToken> where TToken : Enum
{
public delegate bool Rule(Context ctx);
protected static class DefaultRule
{
private const int DefaultSpaceSize = 1;
private const int DefaultTabSize = 4;
public static bool IgnoreWhitespace(Context context, int space = DefaultSpaceSize, int tab = DefaultTabSize)
{
space = space <= 0 ? DefaultSpaceSize : space;
tab = tab <= 0 ? DefaultTabSize : tab;
var src = context.Source;
if (src.Match('\r'))
{
src.Ignore();
return true;
}
if (src.Match(' '))
{
src.Ignore();
src.Column += space;
return true;
}
if (src.Match('\t'))
{
src.Ignore();
src.Column += tab;
return true;
}
return false;
}
public static bool NewLine(Context context)
{
if (!context.Source.Match('\n')) return false;
context.Source.Line++;
context.Source.Column = 1;
context.Source.Ignore();
return true;
}
public static Rule SingleQuoteString(TToken token)
{
return context =>
{
if (!context.Source.Match('\'')) return false;
var src = context.Source;
int line = src.Line, column = src.Column;
while (src.Peek() != '\'' && !src.Ended())
{
// Account for source lines and columns when dealing with a string spanning multiple lines.
if (src.Peek() == '\n')
{
src.Line++;
src.Column = 1;
}
src.Advance();
}
if (src.Ended())
{
throw new ParsingException(
$"[line: {src.Line}, column: {src.Column}] Unterminated string near '{src.Last}'."
);
}
// Include the closing quotation mark.
src.Advance();
var lexeme = src.Extract();
context.AddToken(
token,
lexeme.Trim('\''),
line,
column
);
return true;
};
}
public static Rule DoubleQuoteString(TToken token)
{
return context =>
{
if (!context.Source.Match('"')) return false;
var src = context.Source;
int line = src.Line, column = src.Column;
while (src.Peek() != '"' && !src.Ended())
{
// Account for source lines and columns when dealing with a string spanning multiple lines.
if (src.Peek() == '\n')
{
src.Line++;
src.Column = 0;
}
src.Advance();
}
if (src.Ended())
{
throw new ParsingException(
$"[line: {src.Line}, column: {src.Column}] Unterminated string near '{src.Last}'."
);
}
// Include the closing quotation mark.
src.Advance();
var lexeme = src.Extract();
context.AddToken(
token,
lexeme.Trim('"'),
line,
column
);
return true;
};
}
public static Rule Identifier(TToken token)
{
return context =>
{
var src = context.Source;
if (!IsAlpha(src.Peek())) return false;
int line = src.Line, column = src.Column;
while (IsAlphaNumeric(src.Peek())) src.Advance();
var lexeme = src.Extract();
context.AddToken(token, lexeme, line, column);
return true;
};
}
// public static bool DoubleSlashComment(Context context)
// {
// var src = context.Source;
//
// if (!src.Match("//")) return false;
//
// while (!src.Ended() && src.Peek() != '\n') src.Advance();
//
// src.Ignore();
//
// return true;
// }
private static bool IsAlpha(char c) => c is >= 'a' and <= 'z' or >= 'A' and <= 'Z' or '_';
private static bool IsAlphaNumeric(char c) => IsAlpha(c) || IsDigit(c);
public static Rule Number(TToken token)
{
return context =>
{
var src = context.Source;
if (!IsDigit(src.Peek())) return false;
int line = src.Line, column = src.Column;
ProcessDigits(src);
if (src.Peek() == '.' && IsDigit(src.Peek(1)))
{
// Consume the "."
src.Advance();
ProcessDigits(src);
}
var lexeme = src.Extract();
context.AddToken(token, lexeme, line, column);
return true;
};
}
private static bool IsDigit(char c) => c is >= '0' and <= '9';
private static void ProcessDigits(Source src)
{
while (IsDigit(src.Peek()))
src.Advance();
}
}
}