Initial commit

This commit is contained in:
2024-09-15 17:23:27 +02:00
commit 8d1069e477
69 changed files with 2296 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@@ -0,0 +1,8 @@
namespace DevDisciples.Parsing;
public interface ISourceLocation
{
public string File { get; }
public int Line { get; }
public int Column { get; }
}

View File

@@ -0,0 +1,3 @@
namespace DevDisciples.Parsing;
public interface ISyntaxNode { }

View File

@@ -0,0 +1,29 @@
namespace DevDisciples.Parsing;
public abstract partial class Lexer<TToken> where TToken : Enum
{
public class Context
{
public string File { get; }
public Source Source { get; }
public List<Token> Tokens { get; }
public Context(string file, Source source, List<Token> tokens)
{
File = file;
Source = source;
Tokens = tokens;
}
public void AddToken(TToken type, string lexeme, int line = -1, int column = -1)
{
Tokens.Add(new Token(
File,
type,
lexeme,
line == -1 ? Source.Line : line,
column == -1 ? Source.Column : column
));
}
}
}

View File

@@ -0,0 +1,213 @@
namespace DevDisciples.Parsing;
public abstract partial class Lexer<TToken> where TToken : Enum
{
public delegate bool Rule(Context ctx);
protected static class DefaultRule
{
private const int DefaultSpaceSize = 1;
private const int DefaultTabSize = 4;
public static bool IgnoreWhitespace(Context context, int space = DefaultSpaceSize, int tab = DefaultTabSize)
{
space = space <= 0 ? DefaultSpaceSize : space;
tab = tab <= 0 ? DefaultTabSize : tab;
var src = context.Source;
if (src.Match('\r'))
{
src.Ignore();
return true;
}
if (src.Match(' '))
{
src.Ignore();
src.Column += space;
return true;
}
if (src.Match('\t'))
{
src.Ignore();
src.Column += tab;
return true;
}
return false;
}
public static bool NewLine(Context context)
{
if (!context.Source.Match('\n')) return false;
context.Source.Line++;
context.Source.Column = 1;
context.Source.Ignore();
return true;
}
public static Rule SingleQuoteString(TToken token)
{
return context =>
{
if (!context.Source.Match('\'')) return false;
var src = context.Source;
int line = src.Line, column = src.Column;
while (src.Peek() != '\'' && !src.Ended())
{
// Account for source lines and columns when dealing with a string spanning multiple lines.
if (src.Peek() == '\n')
{
src.Line++;
src.Column = 1;
}
src.Advance();
}
if (src.Ended())
{
throw new ParsingException(
$"[line: {src.Line}, column: {src.Column}] Unterminated string near '{src.Last}'."
);
}
// Include the closing quotation mark.
src.Advance();
var lexeme = src.Extract();
context.AddToken(
token,
lexeme.Trim('\''),
line,
column
);
return true;
};
}
public static Rule DoubleQuoteString(TToken token)
{
return context =>
{
if (!context.Source.Match('"')) return false;
var src = context.Source;
int line = src.Line, column = src.Column;
while (src.Peek() != '"' && !src.Ended())
{
// Account for source lines and columns when dealing with a string spanning multiple lines.
if (src.Peek() == '\n')
{
src.Line++;
src.Column = 0;
}
src.Advance();
}
if (src.Ended())
{
throw new ParsingException(
$"[line: {src.Line}, column: {src.Column}] Unterminated string near '{src.Last}'."
);
}
// Include the closing quotation mark.
src.Advance();
var lexeme = src.Extract();
context.AddToken(
token,
lexeme.Trim('"'),
line,
column
);
return true;
};
}
public static Rule Identifier(TToken token)
{
return context =>
{
var src = context.Source;
if (!IsAlpha(src.Peek())) return false;
int line = src.Line, column = src.Column;
while (IsAlphaNumeric(src.Peek())) src.Advance();
var lexeme = src.Extract();
context.AddToken(token, lexeme, line, column);
return true;
};
}
// public static bool DoubleSlashComment(Context context)
// {
// var src = context.Source;
//
// if (!src.Match("//")) return false;
//
// while (!src.Ended() && src.Peek() != '\n') src.Advance();
//
// src.Ignore();
//
// return true;
// }
private static bool IsAlpha(char c) => c is >= 'a' and <= 'z' or >= 'A' and <= 'Z' or '_';
private static bool IsAlphaNumeric(char c) => IsAlpha(c) || IsDigit(c);
public static Rule Number(TToken token)
{
return context =>
{
var src = context.Source;
if (!IsDigit(src.Peek())) return false;
int line = src.Line, column = src.Column;
ProcessDigits(src);
if (src.Peek() == '.' && IsDigit(src.Peek(1)))
{
// Consume the "."
src.Advance();
ProcessDigits(src);
}
var lexeme = src.Extract();
context.AddToken(token, lexeme, line, column);
return true;
};
}
private static bool IsDigit(char c) => c is >= '0' and <= '9';
private static void ProcessDigits(Source src)
{
while (IsDigit(src.Peek()))
src.Advance();
}
}
}

View File

@@ -0,0 +1,23 @@
namespace DevDisciples.Parsing;
public abstract partial class Lexer<TToken> where TToken : Enum
{
public struct Token : ISourceLocation
{
public string File { get; }
public TToken Type { get; }
public string Lexeme { get; }
public int Line { get; }
public int Column { get; }
public Token(string file, TToken type, string lexeme, int line, int column)
{
File = file;
Type = type;
Lexeme = lexeme;
Line = line;
Column = column;
}
}
}

View File

@@ -0,0 +1,68 @@
namespace DevDisciples.Parsing;
public abstract partial class Lexer<TToken> where TToken : Enum
{
protected List<Rule> Rules { get; init; } = default!;
protected abstract TToken EndOfSource { get; }
public List<Token> Lex(string file, string source)
{
var ctx = new Context(file, new Source(file, source), new List<Token>());
while (!ctx.Source.Ended())
{
var matched = false;
for (var i = 0; i < Rules.Count; i++)
{
if (Rules[i](ctx))
{
matched = true;
break;
}
}
if (!matched)
{
Report.Halt(ctx.Source, $"Unexpected character '{ctx.Source.Current}'.");
}
}
ctx.AddToken(EndOfSource, "<EOF>", ctx.Source.Line, ctx.Source.Column);
return ctx.Tokens;
}
protected static bool Match(Context ctx, TToken type, char @char)
{
if (!ctx.Source.Match(@char)) return false;
var line = ctx.Source.Line;
var column = ctx.Source.Column;
var lexeme = ctx.Source.Extract();
ctx.Source.Column += 1;
ctx.AddToken(type, lexeme, line, column);
return true;
}
/*
* Do not use this method for keywords!
* This will treat an identifier named 'ifelse' as separated 'if' and 'else' tokens.
*/
protected static bool Match(Context ctx, TToken token, string sequence)
{
if (!ctx.Source.Match(sequence)) return false;
var line = ctx.Source.Line;
var column = ctx.Source.Column;
var lexeme = ctx.Source.Extract();
ctx.Source.Column += sequence.Length;
ctx.Tokens.Add(new Token(ctx.File, token, lexeme, line, column));
return true;
}
}

View File

@@ -0,0 +1,32 @@
namespace DevDisciples.Parsing;
public abstract class ParsableStream<T>
{
private readonly ReadOnlyMemory<T> _tokens;
protected ReadOnlySpan<T> Tokens => _tokens.Span;
public int Position { get; set; }
public T Current => Position < Tokens.Length ? Tokens[Position] : default!;
public ParsableStream(ReadOnlyMemory<T> tokens)
{
_tokens = tokens;
}
public virtual T Advance()
{
return Position + 1 <= Tokens.Length ? Tokens[++Position - 1] : default!;
}
public virtual T Peek(int offset = 0)
{
return Position + offset < Tokens.Length ? Tokens[Position + offset] : default!;
}
public virtual bool Ended()
{
return Position >= Tokens.Length;
}
}

View File

@@ -0,0 +1,106 @@
namespace DevDisciples.Parsing;
public class ParserContext<TToken> : ParsableStream<Lexer<TToken>.Token> where TToken : Enum
{
protected readonly TToken _endOfSource;
public ParserContext(Memory<Lexer<TToken>.Token> tokens, TToken endOfSource) : base(tokens)
{
_endOfSource = endOfSource;
}
public bool Check(TToken type, int offset = 0)
{
if (Ended()) return false;
if (Equals(Peek(offset).Type, _endOfSource)) return false;
return Equals(Peek(offset).Type, type);
}
/// <summary>
/// Checks whether the passed sequence can be matched against the current parsing context.
/// </summary>
/// <param name="sequence"></param>
/// <returns></returns>
public bool CheckSequence(params TToken[] sequence)
{
for (var i = 0; i < sequence.Length; i++)
{
if (!Check(sequence[i], i))
{
return false;
}
}
return true;
}
public override bool Ended()
{
return base.Ended() || Equals(Current.Type, _endOfSource);
}
public bool Match(TToken token)
{
var matched = Check(token);
if (matched) Advance();
return matched;
}
public bool MatchAny(params TToken[] types)
{
for (var i = 0; i < types.Length; i++)
{
if (Check(types[i]))
{
Advance();
return true;
}
}
return false;
}
public bool MatchSequence(params TToken[] sequence)
{
for (var i = 0; i < sequence.Length; i++)
{
if (!Check(sequence[i], i))
{
return false;
}
}
for (var i = 0; i < sequence.Length; i++)
{
Advance();
}
return true;
}
public Lexer<TToken>.Token Previous()
{
return Peek(-1);
}
public Lexer<TToken>.Token Consume(TToken type, string message)
{
if (Check(type)) return Advance();
throw Error(message);
}
public Exception Error(string message)
{
return new ParsingException(Report.FormatMessage(Current, message));
}
public Exception Error(Lexer<TToken>.Token token, string message)
{
return new ParsingException(Report.FormatMessage(token, message));
}
public void Halt(Lexer<TToken>.Token token, string message)
{
throw new ParsingException(Report.FormatMessage(token, message));
}
}

View File

@@ -0,0 +1,16 @@
namespace DevDisciples.Parsing;
public class ParsingException : Exception
{
public ParsingException()
{
}
public ParsingException(string? message) : base(message)
{
}
public ParsingException(string? message, Exception? innerException) : base(message, innerException)
{
}
}

View File

@@ -0,0 +1,19 @@
namespace DevDisciples.Parsing;
public static class Report
{
public static Exception Error(ISourceLocation token, string message)
{
return new(FormatMessage(token, message));
}
public static void Halt(ISourceLocation token, string message)
{
throw new(FormatMessage(token, message));
}
public static string FormatMessage(ISourceLocation token, string msg)
{
return $"{token.File}\n\t[line: {token.Line}, column: {token.Column}] {msg}";
}
}

View File

@@ -0,0 +1,64 @@
namespace DevDisciples.Parsing;
public class Source : ParsableStream<char>, ISourceLocation
{
private readonly string _source;
public string File { get; }
public int Start { get; set; }
public int Line { get; set; } = 1;
public int Column { get; set; } = 1;
public char Last => Tokens[^1];
public Source(string file, string source) : base(source.AsMemory())
{
File = file;
_source = source;
}
public override char Advance()
{
Column++;
return base.Advance();
}
public override bool Ended()
{
return Current == '\0' || base.Ended();
}
public string Extract()
{
var position = (Start, Length: Position - Start);
Start = Position;
return _source.Substring(position.Start, position.Length);
}
public void Ignore()
{
Start = Position;
}
public bool Match(char expected)
{
if (Tokens[Position] != expected) return false;
Position += 1;
return true;
}
public bool Match(ReadOnlySpan<char> expected)
{
if (Position + expected.Length > Tokens.Length) return false;
for (var index = 0; index < expected.Length; index++)
{
if (Tokens[Position + index] != expected[index])
{
return false;
}
}
Position += expected.Length;
return true;
}
}

View File

@@ -0,0 +1,8 @@
namespace DevDisciples.Parsing;
public static class Visitor
{
public delegate void Visit(object visitee, params object[] args);
public delegate TOut Visit<TOut>(object visitee, params object[] args);
public delegate TOut Visit<TIn, TOut>(TIn visitee, params object[] args);
}

View File

@@ -0,0 +1,42 @@
namespace DevDisciples.Parsing;
public class VisitorContainer
{
private Dictionary<Type, Visitor.Visit> Visitors { get; } = new();
private Visitor.Visit Default { get; set; } = default!;
public void Register<TVisitee>(Visitor.Visit visitor)
{
Visitors[typeof(TVisitee)] = visitor;
}
public Visitor.Visit this[Type type] => Visitors.GetValueOrDefault(type, Default);
}
public class VisitorContainer<T>
{
protected Dictionary<Type, Visitor.Visit<T>> Visitors { get; } = new();
public Visitor.Visit<T> Default { get; set; } = default!;
public VisitorContainer<T> Register<TVisitee>(Visitor.Visit<T> visitor)
{
Visitors[typeof(TVisitee)] = visitor;
return this;
}
public Visitor.Visit<T> this[Type type] => Visitors.ContainsKey(type) ? Visitors[type] : Default;
}
public class VisitorContainer<TIn, TOut>
{
protected Dictionary<Type, Visitor.Visit<TIn, TOut>> Visitors { get; } = new();
public Visitor.Visit<TIn, TOut> Default { get; set; } = default!;
public void Register<TVisitee>(Visitor.Visit<TIn, TOut> visitor)
{
Visitors[typeof(TVisitee)] = visitor;
}
public Visitor.Visit<TIn, TOut> this[Type type] => Visitors.ContainsKey(type) ? Visitors[type] : Default;
}