jtr/Jtr.Parsing/Lexer.Rule.cs

namespace Jtr.Parsing;

public abstract partial class Lexer<TToken> where TToken : Enum
{
    public delegate bool Rule(Context ctx);

    protected static class DefaultRule
    {
        private const int DefaultSpaceSize = 1;
        private const int DefaultTabSize = 4;

        public static bool IgnoreWhitespace(Context context, int space = DefaultSpaceSize, int tab = DefaultTabSize)
        {
            space = space <= 0 ? DefaultSpaceSize : space;
            tab = tab <= 0 ? DefaultTabSize : tab;

            var src = context.Source;

            if (src.Match('\r'))
            {
                src.Ignore();
                return true;
            }

            if (src.Match(' '))
            {
                src.Ignore();
                src.Column += space;
                return true;
            }

            if (src.Match('\t'))
            {
                src.Ignore();
                src.Column += tab;
                return true;
            }

            return false;
        }

        public static bool NewLine(Context context)
        {
            if (!context.Source.Match('\n')) return false;

            context.Source.Line++;
            context.Source.Column = 1;
            context.Source.Ignore();

            return true;
        }

        public static Rule SingleQuoteString(TToken token)
        {
            return context =>
            {
                if (!context.Source.Match('\'')) return false;

                var src = context.Source;
                int line = src.Line, column = src.Column;

                while (src.Peek() != '\'' && !src.Ended())
                {
                    // Account for source lines and columns when dealing with a string spanning multiple lines.
                    if (src.Peek() == '\n')
                    {
                        src.Line++;
                        src.Column = 1;
                    }

                    src.Advance();
                }

                if (src.Ended())
                {
                    throw new SyntaxException(
                        $"[line: {src.Line}, column: {src.Column}] Unterminated string near '{src.Last}'."
                    );
                }

                // Include the closing quotation mark.
                src.Advance();

                var lexeme = src.Extract();

                context.AddToken(
                    token,
                    lexeme.Trim('\''),
                    line,
                    column
                );

                return true;
            };
        }

        public static Rule DoubleQuoteString(TToken token)
        {
            return context =>
            {
                if (!context.Source.Match('"')) return false;

                var src = context.Source;
                int line = src.Line, column = src.Column;

                while (src.Peek() != '"' && !src.Ended())
                {
                    // Account for source lines and columns when dealing with a string spanning multiple lines.
                    if (src.Peek() == '\n')
                    {
                        src.Line++;
                        src.Column = 0;
                    }

                    src.Advance();
                }

                if (src.Ended())
                {
                    throw new SyntaxException(
                        $"[line: {src.Line}, column: {src.Column}] Unterminated string near '{src.Last}'."
                    );
                }

                // Include the closing quotation mark.
                src.Advance();

                var lexeme = src.Extract();

                context.AddToken(
                    token,
                    lexeme.Trim('"'),
                    line,
                    column
                );

                return true;
            };
        }

        public static Rule Identifier(TToken token)
        {
            return context =>
            {
                var src = context.Source;

                if (!IsAlpha(src.Peek())) return false;

                int line = src.Line, column = src.Column;

                while (IsAlphaNumeric(src.Peek())) src.Advance();

                var lexeme = src.Extract();

                context.AddToken(token, lexeme, line, column);

                return true;
            };
        }

        // public static bool DoubleSlashComment(Context context)
        // {
        //     var src = context.Source;
        //
        //     if (!src.Match("//")) return false;
        //
        //     while (!src.Ended() && src.Peek() != '\n') src.Advance();
        //
        //     src.Ignore();
        //
        //     return true;
        // }

        private static bool IsAlpha(char c) => c is >= 'a' and <= 'z' or >= 'A' and <= 'Z' or '_';

        private static bool IsAlphaNumeric(char c) => IsAlpha(c) || IsDigit(c);

        public static Rule Number(TToken token)
        {
            return context =>
            {
                var src = context.Source;

                if (!IsDigit(src.Peek())) return false;

                int line = src.Line, column = src.Column;

                ProcessDigits(src);

                if (IsDigit(src.Peek(1)) && src.Match('.'))
                {
                    ProcessDigits(src);
                }

                var lexeme = src.Extract();

                context.AddToken(token, lexeme, line, column);

                return true;
            };
        }

        private static bool IsDigit(char c) => c is >= '0' and <= '9';

        private static void ProcessDigits(Source src)
        {
            while (IsDigit(src.Peek()))
                src.Advance();
        }
    }
}