Initial commit

2024-04-20 16:47:13 +02:00
commit 0a4c5ebb8d
44 changed files with 4236 additions and 0 deletions
--- a/MicroForge.Parsing/MicroForge.Parsing.csproj
+++ b/MicroForge.Parsing/MicroForge.Parsing.csproj
@@ -0,0 +1,26 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+    <PropertyGroup>
+        <TargetFramework>net8.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+        <Nullable>enable</Nullable>
+    </PropertyGroup>
+
+    <ItemGroup>
+      <PackageReference Include="Antlr4" Version="4.6.6">
+        <PrivateAssets>all</PrivateAssets>
+        <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+      </PackageReference>
+      <PackageReference Include="Antlr4.CodeGenerator" Version="4.6.6">
+        <PrivateAssets>all</PrivateAssets>
+        <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+      </PackageReference>
+      <PackageReference Include="Antlr4.Runtime" Version="4.6.6" />
+    </ItemGroup>
+
+    <ItemGroup>
+      <Content Include="PythonLexer.g4" />
+      <Content Include="PythonParser.g4" />
+    </ItemGroup>
+
+</Project>
--- a/MicroForge.Parsing/PythonLexer.g4
+++ b/MicroForge.Parsing/PythonLexer.g4
--- a/MicroForge.Parsing/PythonLexerBase.cs
+++ b/MicroForge.Parsing/PythonLexerBase.cs
@@ -0,0 +1,493 @@
+/*
+The MIT License (MIT)
+Copyright (c) 2021 Robert Einhorn
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+ */
+
+/*
+ * Project      : Python Indent/Dedent handler for ANTLR4 grammars
+ *
+ * Developed by : Robert Einhorn
+ */
+
+using Antlr4.Runtime;
+using System.Text.RegularExpressions;
+
+namespace MicroForge.Parsing;
+
+public abstract class PythonLexerBase : Lexer
+{
+    // A stack that keeps track of the indentation lengths
+    private Stack<int> indentLengthStack;
+    // A list where tokens are waiting to be loaded into the token stream
+    private LinkedList<IToken> pendingTokens;
+    // last pending token types
+    private int previousPendingTokenType;
+    private int lastPendingTokenTypeFromDefaultChannel;
+
+    // The amount of opened parentheses, square brackets, or curly braces
+    private int opened;
+    //  The amount of opened parentheses and square brackets in the current lexer mode
+    private Stack<int> paren_or_bracket_openedStack;
+
+    private bool wasSpaceIndentation;
+    private bool wasTabIndentation;
+    private bool wasIndentationMixedWithSpacesAndTabs;
+    private const int INVALID_LENGTH = -1;
+
+    private CommonToken curToken; // current (under processing) token
+    private IToken ffgToken;      // following (look ahead) token
+
+    private const string ERR_TXT = " ERROR: ";
+
+    protected PythonLexerBase(ICharStream input) : base(input)
+    {
+        this.Init();
+    }
+
+    private void Init()
+    {
+        this.indentLengthStack = new Stack<int>();
+        this.pendingTokens = new LinkedList<IToken>();
+        this.previousPendingTokenType = 0;
+        this.lastPendingTokenTypeFromDefaultChannel = 0;
+        this.opened = 0;
+        this.paren_or_bracket_openedStack = new Stack<int>();
+        this.wasSpaceIndentation = false;
+        this.wasTabIndentation = false;
+        this.wasIndentationMixedWithSpacesAndTabs = false;
+        this.curToken = null!;
+        this.ffgToken = null!;
+    }
+
+    public override IToken NextToken() // reading the input stream until a return EOF
+    {
+        this.CheckNextToken();
+        IToken firstPendingToken = this.pendingTokens.First.Value;
+        this.pendingTokens.RemoveFirst();
+        return firstPendingToken; // add the queued token to the token stream
+    }
+
+    private void CheckNextToken()
+    {
+        if (this.previousPendingTokenType != TokenConstants.Eof)
+        {
+            this.SetCurrentAndFollowingTokens();
+            if (this.indentLengthStack.Count == 0) // We're at the first token
+            {
+                this.HandleStartOfInput();
+            }
+
+            switch (this.curToken.Type)
+            {
+                case PythonLexer.LPAR:
+                case PythonLexer.LSQB:
+                case PythonLexer.LBRACE:
+                    this.opened++;
+                    this.AddPendingToken(this.curToken);
+                    break;
+                case PythonLexer.RPAR:
+                case PythonLexer.RSQB:
+                case PythonLexer.RBRACE:
+                    this.opened--;
+                    this.AddPendingToken(this.curToken);
+                    break;
+                case PythonLexer.NEWLINE:
+                    this.HandleNEWLINEtoken();
+                    break;
+                case PythonLexer.STRING:
+                    this.HandleSTRINGtoken();
+                    break;
+                case PythonLexer.FSTRING_MIDDLE:
+                    this.HandleFSTRING_MIDDLE_token();
+                    break;
+                case PythonLexer.ERROR_TOKEN:
+                    this.ReportLexerError("token recognition error at: '" + this.curToken.Text + "'");
+                    this.AddPendingToken(this.curToken);
+                    break;
+                case TokenConstants.Eof:
+                    this.HandleEOFtoken();
+                    break;
+                default:
+                    this.AddPendingToken(this.curToken);
+                    break;
+            }
+            this.HandleFORMAT_SPECIFICATION_MODE();
+        }
+    }
+
+    private void SetCurrentAndFollowingTokens()
+    {
+        this.curToken = this.ffgToken == null ?
+            new CommonToken(base.NextToken()) :
+            new CommonToken(this.ffgToken);
+
+        this.HandleFStringLexerModes();
+
+        this.ffgToken = this.curToken.Type == TokenConstants.Eof ?
+            this.curToken :
+            base.NextToken();
+    }
+
+    // initialize the _indentLengths
+    // hide the leading NEWLINE token(s)
+    // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel
+    // insert a leading INDENT token if necessary
+    private void HandleStartOfInput()
+    {
+        // initialize the stack with a default 0 indentation length
+        this.indentLengthStack.Push(0); // this will never be popped off
+        while (this.curToken.Type != TokenConstants.Eof)
+        {
+            if (this.curToken.Channel == TokenConstants.DefaultChannel)
+            {
+                if (this.curToken.Type == PythonLexer.NEWLINE)
+                {
+                    // all the NEWLINE tokens must be ignored before the first statement
+                    this.HideAndAddPendingToken(this.curToken);
+                }
+                else
+                { // We're at the first statement
+                    this.InsertLeadingIndentToken();
+                    return; // continue the processing of the current token with CheckNextToken()
+                }
+            }
+            else
+            {
+                this.AddPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING, or COMMENT token
+            }
+            this.SetCurrentAndFollowingTokens();
+        } // continue the processing of the EOF token with CheckNextToken()
+    }
+
+    private void InsertLeadingIndentToken()
+    {
+        if (this.previousPendingTokenType == PythonLexer.WS)
+        {
+            var prevToken = this.pendingTokens.Last.Value;
+            if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement
+            {
+                const string errMsg = "first statement indented";
+                this.ReportLexerError(errMsg);
+                // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser
+                this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.curToken);
+            }
+        }
+    }
+
+    private void HandleNEWLINEtoken()
+    {
+        if (this.opened > 0)
+        {
+            // We're in an implicit line joining, ignore the current NEWLINE token
+            this.HideAndAddPendingToken(this.curToken);
+        }
+        else
+        {
+            CommonToken nlToken = new CommonToken(this.curToken); // save the current NEWLINE token
+            bool isLookingAhead = this.ffgToken.Type == PythonLexer.WS;
+            if (isLookingAhead)
+            {
+                this.SetCurrentAndFollowingTokens(); // set the next two tokens
+            }
+
+            switch (this.ffgToken.Type)
+            {
+                case PythonLexer.NEWLINE:      // We're before a blank line
+                case PythonLexer.COMMENT:      // We're before a comment
+                case PythonLexer.TYPE_COMMENT: // We're before a type comment
+                    this.HideAndAddPendingToken(nlToken);
+                    if (isLookingAhead)
+                    {
+                        this.AddPendingToken(this.curToken);  // WS token
+                    }
+                    break;
+                default:
+                    this.AddPendingToken(nlToken);
+                    if (isLookingAhead)
+                    { // We're on whitespace(s) followed by a statement
+                        int indentationLength = this.ffgToken.Type == TokenConstants.Eof ?
+                            0 :
+                            this.GetIndentationLength(this.curToken.Text);
+
+                        if (indentationLength != PythonLexerBase.INVALID_LENGTH)
+                        {
+                            this.AddPendingToken(this.curToken);  // WS token
+                            this.InsertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s)                            
+                        }
+                        else
+                        {
+                            this.ReportError("inconsistent use of tabs and spaces in indentation");
+                        }
+                    }
+                    else
+                    {
+                        // We're at a newline followed by a statement (there is no whitespace before the statement)
+                        this.InsertIndentOrDedentToken(0); // may insert DEDENT token(s)
+                    }
+                    break;
+            }
+        }
+    }
+
+    private void InsertIndentOrDedentToken(int indentLength)
+    {
+        //*** https://docs.python.org/3/reference/lexical_analysis.html#indentation
+        int prevIndentLength = this.indentLengthStack.Peek();
+        if (indentLength > prevIndentLength)
+        {
+            this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, null, this.ffgToken);
+            this.indentLengthStack.Push(indentLength);
+        }
+        else
+        {
+            while (indentLength < prevIndentLength)
+            { // more than 1 DEDENT token may be inserted into the token stream
+                this.indentLengthStack.Pop();
+                prevIndentLength = this.indentLengthStack.Peek();
+                if (indentLength <= prevIndentLength)
+                {
+                    this.CreateAndAddPendingToken(PythonLexer.DEDENT, TokenConstants.DefaultChannel, null, this.ffgToken);
+                }
+                else
+                {
+                    this.ReportError("inconsistent dedent");
+                }
+            }
+        }
+    }
+
+    private void HandleSTRINGtoken()
+    {
+        // remove the \<newline> escape sequences from the string literal
+        // https://docs.python.org/3.11/reference/lexical_analysis.html#string-and-bytes-literals
+        string line_joinFreeStringLiteral = Regex.Replace(this.curToken.Text, @"\\\r?\n", "");
+        if (this.curToken.Text.Length == line_joinFreeStringLiteral.Length)
+        {
+            this.AddPendingToken(this.curToken);
+        }
+        else
+        {
+            CommonToken originalSTRINGtoken = new CommonToken(this.curToken); // backup the original token
+            this.curToken.Text = line_joinFreeStringLiteral;
+            this.AddPendingToken(this.curToken);                  // add the modified token with inline string literal
+            this.HideAndAddPendingToken(originalSTRINGtoken); // add the original token with a hidden channel
+            // this inserted hidden token allows to restore the original string literal with the \<newline> escape sequences
+        }
+    }
+
+    private void HandleFSTRING_MIDDLE_token() // replace the double braces '{{' or '}}' to single braces and hide the second braces
+    {
+        string fsMid = this.curToken.Text;
+        fsMid = fsMid.Replace("{{", "{_").Replace("}}", "}_"); // replace: {{ --> {_  and   }} --> }_
+        Regex regex = new Regex(@"(?<=[{}])_");
+        string[] arrOfStr = regex.Split(fsMid); // split by {_  or  }_
+        foreach (string s in arrOfStr)
+        {
+            if (!String.IsNullOrEmpty(s))
+            {
+                this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, s, this.ffgToken);
+                string lastCharacter = s.Substring(s.Length - 1);
+                if ("{}".Contains(lastCharacter))
+                {
+                    this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.HiddenChannel, lastCharacter, this.ffgToken);
+                    // this inserted hidden token allows to restore the original f-string literal with the double braces
+                }
+            }
+        }
+    }
+
+    private void HandleFStringLexerModes()  // https://peps.python.org/pep-0498/#specification
+    {
+        if (this._modeStack.Count > 0)
+        {
+            switch (this.curToken.Type)
+            {
+                case PythonLexer.LBRACE:
+                    this.PushMode(PythonLexer.DefaultMode);
+                    this.paren_or_bracket_openedStack.Push(0);
+                    break;
+                case PythonLexer.LPAR:
+                case PythonLexer.LSQB:
+                    // https://peps.python.org/pep-0498/#lambdas-inside-expressions
+                    this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() + 1); // increment the last element
+                    break;
+                case PythonLexer.RPAR:
+                case PythonLexer.RSQB:
+                    this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() - 1); // decrement the last element
+                    break;
+                case PythonLexer.COLON: // colon can only come from DEFAULT_MODE
+                    if (this.paren_or_bracket_openedStack.Peek() == 0)
+                    {
+                        switch (this._modeStack.First()) // check the previous lexer mode (the current is DEFAULT_MODE)
+                        {
+                            case PythonLexer.SINGLE_QUOTE_FSTRING_MODE:
+                            case PythonLexer.LONG_SINGLE_QUOTE_FSTRING_MODE:
+                            case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE:
+                                this.Mode(PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                                break;
+                            case PythonLexer.DOUBLE_QUOTE_FSTRING_MODE:
+                            case PythonLexer.LONG_DOUBLE_QUOTE_FSTRING_MODE:
+                            case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE:
+                                this.Mode(PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                                break;
+                        }
+                    }
+                    break;
+                case PythonLexer.RBRACE:
+                    switch (_mode)
+                    {
+                        case PythonLexer.DefaultMode:
+                        case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE:
+                        case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE:
+                            this.PopMode();
+                            this.paren_or_bracket_openedStack.Pop();
+                            break;
+                        default:
+                            this.ReportLexerError("f-string: single '}' is not allowed");
+                            break;
+                    }
+                    break;
+            }
+        }
+    }
+
+    private void HandleFORMAT_SPECIFICATION_MODE()
+    {
+        if (this._modeStack.Count > 0 && this.ffgToken.Type == PythonLexer.RBRACE)
+        {
+            switch (this.curToken.Type)
+            {
+                case PythonLexer.COLON:
+                case PythonLexer.RBRACE:
+                    // insert an empty FSTRING_MIDDLE token instead of the missing format specification
+                    this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, "", this.ffgToken);
+                    break;
+            }
+        }
+    }
+
+    private void InsertTrailingTokens()
+    {
+        switch (this.lastPendingTokenTypeFromDefaultChannel)
+        {
+            case PythonLexer.NEWLINE:
+            case PythonLexer.DEDENT:
+                break; // no trailing NEWLINE token is needed
+            default:
+                // insert an extra trailing NEWLINE token that serves as the end of the last statement
+                this.CreateAndAddPendingToken(PythonLexer.NEWLINE, TokenConstants.DefaultChannel, null, this.ffgToken); // ffgToken is EOF
+                break;
+        }
+        this.InsertIndentOrDedentToken(0); // Now insert as many trailing DEDENT tokens as needed
+    }
+
+    private void HandleEOFtoken()
+    {
+        if (this.lastPendingTokenTypeFromDefaultChannel > 0)
+        { // there was a statement in the input (leading NEWLINE tokens are hidden)
+            this.InsertTrailingTokens();
+        }
+        this.AddPendingToken(this.curToken);
+    }
+
+    private void HideAndAddPendingToken(CommonToken cToken)
+    {
+        cToken.Channel = TokenConstants.HiddenChannel;
+        this.AddPendingToken(cToken);
+    }
+
+    private void CreateAndAddPendingToken(int type, int channel, string text, IToken baseToken)
+    {
+        CommonToken cToken = new CommonToken(baseToken);
+        cToken.Type = type;
+        cToken.Channel = channel;
+        cToken.StopIndex = baseToken.StartIndex - 1;
+
+        // cToken.Text = text == null
+        //     ? "<" + Vocabulary.GetSymbolicName(type) + ">"
+        //     : text;
+        cToken.Text = text ?? string.Empty;
+
+        this.AddPendingToken(cToken);
+    }
+
+    private void AddPendingToken(IToken token)
+    {
+        // save the last pending token type because the pendingTokens linked list can be empty by the nextToken()
+        this.previousPendingTokenType = token.Type;
+        if (token.Channel == TokenConstants.DefaultChannel)
+        {
+            this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType;
+        }
+        this.pendingTokens.AddLast(token);
+    }
+
+    private int GetIndentationLength(string textWS) // the textWS may contain spaces, tabs or form feeds
+    {
+        const int TAB_LENGTH = 8; // the standard number of spaces to replace a tab with spaces
+        int length = 0;
+        foreach (char ch in textWS)
+        {
+            switch (ch)
+            {
+                case ' ':
+                    this.wasSpaceIndentation = true;
+                    length += 1;
+                    break;
+                case '\t':
+                    this.wasTabIndentation = true;
+                    length += TAB_LENGTH - (length % TAB_LENGTH);
+                    break;
+                case '\f': // form feed
+                    length = 0;
+                    break;
+            }
+        }
+
+        if (this.wasTabIndentation && this.wasSpaceIndentation)
+        {
+            if (!this.wasIndentationMixedWithSpacesAndTabs)
+            {
+                this.wasIndentationMixedWithSpacesAndTabs = true;
+                return PythonLexerBase.INVALID_LENGTH; // only for the first inconsistent indent
+            }
+        }
+        return length;
+    }
+
+    private void ReportLexerError(string errMsg)
+    {
+        // this.ErrorListenerDispatch.SyntaxError(this.ErrorOutput, this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null);
+        this.ErrorListenerDispatch.SyntaxError( this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null);
+    }
+
+    private void ReportError(string errMsg)
+    {
+        this.ReportLexerError(errMsg);
+
+        // the ERROR_TOKEN will raise an error in the parser
+        this.CreateAndAddPendingToken(PythonLexer.ERROR_TOKEN, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.ffgToken);
+    }
+
+    public override void Reset()
+    {
+        this.Init();
+        base.Reset();
+    }
+}
--- a/MicroForge.Parsing/PythonParser.g4
+++ b/MicroForge.Parsing/PythonParser.g4
@@ -0,0 +1,880 @@
+/*
+Python grammar
+The MIT License (MIT)
+Copyright (c) 2021 Robert Einhorn
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+ */
+
+ /*
+  * Project      : an ANTLR4 parser grammar by the official PEG grammar
+  *                https://github.com/RobEin/ANTLR4-parser-for-Python-3.12
+  * Developed by : Robert Einhorn
+  *
+  */
+
+parser grammar PythonParser; // Python 3.12.1  https://docs.python.org/3.12/reference/grammar.html#full-grammar-specification
+options {
+    tokenVocab=PythonLexer;
+    superClass=PythonParserBase;
+}
+
+// STARTING RULES
+// ==============
+
+file_input: statements? EOF;
+interactive: statement_newline;
+eval: expressions NEWLINE* EOF;
+func_type: '(' type_expressions? ')' '->' expression NEWLINE* EOF;
+fstring_input: star_expressions;
+
+// GENERAL STATEMENTS
+// ==================
+
+statements: statement+;
+
+statement: compound_stmt  | simple_stmts;
+
+statement_newline
+    : compound_stmt NEWLINE
+    | simple_stmts
+    | NEWLINE
+    | EOF;
+
+simple_stmts
+    : simple_stmt (';' simple_stmt)* ';'? NEWLINE
+    ;
+
+// NOTE: assignment MUST precede expression, else parsing a simple assignment
+// will throw a SyntaxError.
+simple_stmt
+    : assignment
+    | type_alias
+    | star_expressions
+    | return_stmt
+    | import_stmt
+    | raise_stmt
+    | 'pass'
+    | del_stmt
+    | yield_stmt
+    | assert_stmt
+    | 'break'
+    | 'continue'
+    | global_stmt
+    | nonlocal_stmt;
+
+compound_stmt
+    : function_def
+    | if_stmt
+    | class_def
+    | with_stmt
+    | for_stmt
+    | try_stmt
+    | while_stmt
+    | match_stmt;
+
+// SIMPLE STATEMENTS
+// =================
+
+// NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield'
+assignment
+    : NAME ':' expression ('=' annotated_rhs )?
+    | ('(' single_target ')'
+         | single_subscript_attribute_target) ':' expression ('=' annotated_rhs )?
+    | (star_targets '=' )+ (yield_expr | star_expressions) TYPE_COMMENT?
+    | single_target augassign (yield_expr | star_expressions);
+
+annotated_rhs: yield_expr | star_expressions;
+
+augassign
+    : '+='
+    | '-='
+    | '*='
+    | '@='
+    | '/='
+    | '%='
+    | '&='
+    | '|='
+    | '^='
+    | '<<='
+    | '>>='
+    | '**='
+    | '//=';
+
+return_stmt
+    : 'return' star_expressions?;
+
+raise_stmt
+    : 'raise' (expression ('from' expression )?)?
+    ;
+
+global_stmt: 'global' NAME (',' NAME)*;
+
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*;
+
+del_stmt
+    : 'del' del_targets;
+
+yield_stmt: yield_expr;
+
+assert_stmt: 'assert' expression (',' expression )?;
+
+import_stmt
+    : import_name
+    | import_from;
+
+// Import statements
+// -----------------
+
+import_name: 'import' dotted_as_names;
+// note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from
+    : 'from' ('.' | '...')* dotted_name 'import' import_from_targets
+    | 'from' ('.' | '...')+ 'import' import_from_targets;
+import_from_targets
+    : '(' import_from_as_names ','? ')'
+    | import_from_as_names
+    | '*';
+import_from_as_names
+    : import_from_as_name (',' import_from_as_name)*;
+import_from_as_name
+    : NAME ('as' NAME )?;
+dotted_as_names
+    : dotted_as_name (',' dotted_as_name)*;
+dotted_as_name
+    : dotted_name ('as' NAME )?;
+dotted_name
+    : dotted_name '.' NAME
+    | NAME;
+
+// COMPOUND STATEMENTS
+// ===================
+
+// Common elements
+// ---------------
+
+block
+    : NEWLINE INDENT statements DEDENT
+    | simple_stmts;
+
+decorators: ('@' named_expression NEWLINE )+;
+
+// Class definitions
+// -----------------
+
+class_def
+    : decorators class_def_raw
+    | class_def_raw;
+
+class_def_raw
+    : 'class' NAME type_params? ('(' arguments? ')' )? ':' block;
+
+// Function definitions
+// --------------------
+
+function_def
+    : decorators function_def_raw
+    | function_def_raw;
+
+function_def_raw
+    : 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block
+    | ASYNC 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block;
+
+// Function parameters
+// -------------------
+
+params
+    : parameters;
+
+parameters
+    : slash_no_default param_no_default* param_with_default* star_etc?
+    | slash_with_default param_with_default* star_etc?
+    | param_no_default+ param_with_default* star_etc?
+    | param_with_default+ star_etc?
+    | star_etc;
+
+// Some duplication here because we can't write (',' | {isCurrentTokenType(RPAR)}?),
+// which is because we don't support empty alternatives (yet).
+
+slash_no_default
+    : param_no_default+ '/' ','?
+    ;
+slash_with_default
+    : param_no_default* param_with_default+ '/' ','?
+    ;
+
+star_etc
+    : '*' param_no_default param_maybe_default* kwds?
+    | '*' param_no_default_star_annotation param_maybe_default* kwds?
+    | '*' ',' param_maybe_default+ kwds?
+    | kwds;
+
+kwds
+    : '**' param_no_default;
+
+// One parameter.  This *includes* a following comma and type comment.
+//
+// There are three styles:
+// - No default_assignment
+// - With default_assignment
+// - Maybe with default_assignment
+//
+// There are two alternative forms of each, to deal with type comments:
+// - Ends in a comma followed by an optional type comment
+// - No comma, optional type comment, must be followed by close paren
+// The latter form is for a final parameter without trailing comma.
+//
+
+param_no_default
+    : param ','? TYPE_COMMENT?
+    ;
+param_no_default_star_annotation
+    : param_star_annotation ','? TYPE_COMMENT?
+    ;
+param_with_default
+    : param default_assignment ','? TYPE_COMMENT?
+    ;
+param_maybe_default
+    : param default_assignment? ','? TYPE_COMMENT?
+    ;
+param: NAME annotation?;
+param_star_annotation: NAME star_annotation;
+annotation: ':' expression;
+star_annotation: ':' star_expression;
+default_assignment: '=' expression;
+
+// If statement
+// ------------
+
+if_stmt
+    : 'if' named_expression ':' block (elif_stmt | else_block?)
+    ;
+elif_stmt
+    : 'elif' named_expression ':' block (elif_stmt | else_block?)
+    ;
+else_block
+    : 'else' ':' block;
+
+// While statement
+// ---------------
+
+while_stmt
+    : 'while' named_expression ':' block else_block?;
+
+// For statement
+// -------------
+
+for_stmt
+    : ASYNC? 'for' star_targets 'in' star_expressions ':' TYPE_COMMENT? block else_block?
+    ;
+
+// With statement
+// --------------
+
+with_stmt
+    : ASYNC? 'with' ( '(' with_item (',' with_item)* ','? ')' ':'
+                    | with_item (',' with_item)* ':' TYPE_COMMENT?
+                    ) block
+    ;
+
+with_item
+    : expression ('as' star_target)?
+    ;
+
+// Try statement
+// -------------
+
+try_stmt
+    : 'try' ':' block finally_block
+    | 'try' ':' block except_block+ else_block? finally_block?
+    | 'try' ':' block except_star_block+ else_block? finally_block?;
+
+
+// Except statement
+// ----------------
+
+except_block
+    : 'except' (expression ('as' NAME )?)? ':' block
+    ;
+except_star_block
+    : 'except' '*' expression ('as' NAME )? ':' block;
+finally_block
+    : 'finally' ':' block;
+
+// Match statement
+// ---------------
+
+match_stmt
+    : soft_kw_match subject_expr ':' NEWLINE INDENT case_block+ DEDENT;
+
+subject_expr
+    : star_named_expression ',' star_named_expressions?
+    | named_expression;
+
+case_block
+    : soft_kw_case patterns guard? ':' block;
+
+guard: 'if' named_expression;
+
+patterns
+    : open_sequence_pattern
+    | pattern;
+
+pattern
+    : as_pattern
+    | or_pattern;
+
+as_pattern
+    : or_pattern 'as' pattern_capture_target;
+
+or_pattern
+    : closed_pattern ('|' closed_pattern)*;
+
+closed_pattern
+    : literal_pattern
+    | capture_pattern
+    | wildcard_pattern
+    | value_pattern
+    | group_pattern
+    | sequence_pattern
+    | mapping_pattern
+    | class_pattern;
+
+// Literal patterns are used for equality and identity constraints
+literal_pattern
+    : signed_number
+    | complex_number
+    | strings
+    | 'None'
+    | 'True'
+    | 'False';
+
+// Literal expressions are used to restrict permitted mapping pattern keys
+literal_expr
+    : signed_number
+    | complex_number
+    | strings
+    | 'None'
+    | 'True'
+    | 'False';
+
+complex_number
+    : signed_real_number ('+' | '-') imaginary_number
+    ;
+
+signed_number
+    : '-'? NUMBER
+    ;
+
+signed_real_number
+    : '-'? real_number
+    ;
+
+real_number
+    : NUMBER;
+
+imaginary_number
+    : NUMBER;
+
+capture_pattern
+    : pattern_capture_target;
+
+pattern_capture_target
+    : soft_kw__not__wildcard;
+
+wildcard_pattern
+    : soft_kw_wildcard;
+
+value_pattern
+    : attr;
+
+attr
+    : NAME ('.' NAME)+
+    ;
+name_or_attr
+    : NAME ('.' NAME)*
+    ;
+
+group_pattern
+    : '(' pattern ')';
+
+sequence_pattern
+    : '[' maybe_sequence_pattern? ']'
+    | '(' open_sequence_pattern? ')';
+
+open_sequence_pattern
+    : maybe_star_pattern ',' maybe_sequence_pattern?;
+
+maybe_sequence_pattern
+    : maybe_star_pattern (',' maybe_star_pattern)* ','?;
+
+maybe_star_pattern
+    : star_pattern
+    | pattern;
+
+star_pattern
+    : '*' pattern_capture_target
+    | '*' wildcard_pattern;
+
+mapping_pattern
+    : LBRACE RBRACE
+    | LBRACE double_star_pattern ','? RBRACE
+    | LBRACE items_pattern (',' double_star_pattern)? ','? RBRACE
+    ;
+
+items_pattern
+    : key_value_pattern (',' key_value_pattern)*;
+
+key_value_pattern
+    : (literal_expr | attr) ':' pattern;
+
+double_star_pattern
+    : '**' pattern_capture_target;
+
+class_pattern
+    : name_or_attr '(' ((positional_patterns (',' keyword_patterns)? | keyword_patterns) ','?)? ')'
+    ;
+
+
+
+positional_patterns
+    : pattern (',' pattern)*;
+
+keyword_patterns
+    : keyword_pattern (',' keyword_pattern)*;
+
+keyword_pattern
+    : NAME '=' pattern;
+
+// Type statement
+// ---------------
+
+type_alias
+    : soft_kw_type NAME type_params? '=' expression;
+
+// Type parameter declaration
+// --------------------------
+
+type_params: '[' type_param_seq  ']';
+
+type_param_seq: type_param (',' type_param)* ','?;
+
+type_param
+    : NAME type_param_bound?
+    | '*'  NAME (':' expression)?
+    | '**' NAME (':' expression)?
+    ;
+
+
+type_param_bound: ':' expression;
+
+// EXPRESSIONS
+// -----------
+
+expressions
+    : expression (',' expression )* ','?
+    ;
+
+
+expression
+    : disjunction ('if' disjunction 'else' expression)?
+    | lambdef
+    ;
+
+yield_expr
+    : 'yield' ('from' expression | star_expressions?)
+    ;
+
+star_expressions
+    : star_expression (',' star_expression )* ','?
+    ;
+
+
+star_expression
+    : '*' bitwise_or
+    | expression;
+
+star_named_expressions: star_named_expression (',' star_named_expression)* ','?;
+
+star_named_expression
+    : '*' bitwise_or
+    | named_expression;
+
+assignment_expression
+    : NAME ':=' expression;
+
+named_expression
+    : assignment_expression
+    | expression;
+
+disjunction
+    : conjunction ('or' conjunction )*
+    ;
+
+conjunction
+    : inversion ('and' inversion )*
+    ;
+
+inversion
+    : 'not' inversion
+    | comparison;
+
+// Comparison operators
+// --------------------
+
+comparison
+    : bitwise_or compare_op_bitwise_or_pair*
+    ;
+
+compare_op_bitwise_or_pair
+    : eq_bitwise_or
+    | noteq_bitwise_or
+    | lte_bitwise_or
+    | lt_bitwise_or
+    | gte_bitwise_or
+    | gt_bitwise_or
+    | notin_bitwise_or
+    | in_bitwise_or
+    | isnot_bitwise_or
+    | is_bitwise_or;
+
+eq_bitwise_or: '==' bitwise_or;
+noteq_bitwise_or
+    : ('!=' ) bitwise_or;
+lte_bitwise_or: '<=' bitwise_or;
+lt_bitwise_or: '<' bitwise_or;
+gte_bitwise_or: '>=' bitwise_or;
+gt_bitwise_or: '>' bitwise_or;
+notin_bitwise_or: 'not' 'in' bitwise_or;
+in_bitwise_or: 'in' bitwise_or;
+isnot_bitwise_or: 'is' 'not' bitwise_or;
+is_bitwise_or: 'is' bitwise_or;
+
+// Bitwise operators
+// -----------------
+
+bitwise_or
+    : bitwise_or '|' bitwise_xor
+    | bitwise_xor;
+
+bitwise_xor
+    : bitwise_xor '^' bitwise_and
+    | bitwise_and;
+
+bitwise_and
+    : bitwise_and '&' shift_expr
+    | shift_expr;
+
+shift_expr
+    : shift_expr ('<<' | '>>') sum
+    | sum
+    ;
+
+// Arithmetic operators
+// --------------------
+
+sum
+    : sum ('+' | '-') term
+    | term
+    ;
+
+term
+    : term ('*' | '/' | '//' | '%' | '@') factor
+    | factor
+    ;
+
+
+
+
+factor
+    : '+' factor
+    | '-' factor
+    | '~' factor
+    | power;
+
+power
+    : await_primary ('**' factor)?
+    ;
+
+// Primary elements
+// ----------------
+
+// Primary elements are things like "obj.something.something", "obj[something]", "obj(something)", "obj" ...
+
+await_primary
+    : AWAIT primary
+    | primary;
+
+primary
+    : primary ('.' NAME | genexp | '(' arguments? ')' | '[' slices ']')
+    | atom
+    ;
+
+
+
+slices
+    : slice
+    | (slice | starred_expression) (',' (slice | starred_expression))* ','?;
+
+slice
+    : expression? ':' expression? (':' expression? )?
+    | named_expression;
+
+atom
+    : NAME
+    | 'True'
+    | 'False'
+    | 'None'
+    | strings
+    | NUMBER
+    | (tuple | group | genexp)
+    | (list | listcomp)
+    | (dict | set | dictcomp | setcomp)
+    | '...';
+
+group
+    : '(' (yield_expr | named_expression) ')';
+
+// Lambda functions
+// ----------------
+
+lambdef
+    : 'lambda' lambda_params? ':' expression;
+
+lambda_params
+    : lambda_parameters;
+
+// lambda_parameters etc. duplicates parameters but without annotations
+// or type comments, and if there's no comma after a parameter, we expect
+// a colon, not a close parenthesis.  (For more, see parameters above.)
+//
+lambda_parameters
+    : lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* lambda_star_etc?
+    | lambda_slash_with_default lambda_param_with_default* lambda_star_etc?
+    | lambda_param_no_default+ lambda_param_with_default* lambda_star_etc?
+    | lambda_param_with_default+ lambda_star_etc?
+    | lambda_star_etc;
+
+lambda_slash_no_default
+    : lambda_param_no_default+ '/' ','?
+    ;
+
+lambda_slash_with_default
+    : lambda_param_no_default* lambda_param_with_default+ '/' ','?
+    ;
+
+lambda_star_etc
+    : '*' lambda_param_no_default lambda_param_maybe_default* lambda_kwds?
+    | '*' ',' lambda_param_maybe_default+ lambda_kwds?
+    | lambda_kwds;
+
+lambda_kwds
+    : '**' lambda_param_no_default;
+
+lambda_param_no_default
+    : lambda_param ','?
+    ;
+lambda_param_with_default
+    : lambda_param default_assignment ','?
+    ;
+lambda_param_maybe_default
+    : lambda_param default_assignment? ','?
+    ;
+lambda_param: NAME;
+
+// LITERALS
+// ========
+
+fstring_middle
+    : fstring_replacement_field
+    | FSTRING_MIDDLE;
+fstring_replacement_field
+    : LBRACE (yield_expr | star_expressions) '='? fstring_conversion? fstring_full_format_spec? RBRACE;
+fstring_conversion
+    : '!' NAME;
+fstring_full_format_spec
+    : ':' fstring_format_spec*;
+fstring_format_spec
+    : FSTRING_MIDDLE
+    | fstring_replacement_field;
+fstring
+    : FSTRING_START fstring_middle* FSTRING_END;
+
+string: STRING;
+strings: (fstring|string)+;
+
+list
+    : '[' star_named_expressions? ']';
+
+tuple
+    : '(' (star_named_expression ',' star_named_expressions?  )? ')';
+
+set: LBRACE star_named_expressions RBRACE;
+
+// Dicts
+// -----
+
+dict
+    : LBRACE double_starred_kvpairs? RBRACE;
+
+double_starred_kvpairs: double_starred_kvpair (',' double_starred_kvpair)* ','?;
+
+double_starred_kvpair
+    : '**' bitwise_or
+    | kvpair;
+
+kvpair: expression ':' expression;
+
+// Comprehensions & Generators
+// ---------------------------
+
+for_if_clauses
+    : for_if_clause+;
+
+for_if_clause
+    : ASYNC? 'for' star_targets 'in' disjunction ('if' disjunction )*
+    ;
+
+listcomp
+    : '[' named_expression for_if_clauses ']';
+
+setcomp
+    : LBRACE named_expression for_if_clauses RBRACE;
+
+genexp
+    : '(' ( assignment_expression | expression) for_if_clauses ')';
+
+dictcomp
+    : LBRACE kvpair for_if_clauses RBRACE;
+
+// FUNCTION CALL ARGUMENTS
+// =======================
+
+arguments
+    : args ','?;
+
+args
+    : (starred_expression | ( assignment_expression | expression)) (',' (starred_expression | ( assignment_expression | expression)))* (',' kwargs )?
+    | kwargs;
+
+kwargs
+    : kwarg_or_starred (',' kwarg_or_starred)* (',' kwarg_or_double_starred (',' kwarg_or_double_starred)*)?
+    | kwarg_or_double_starred (',' kwarg_or_double_starred)*
+    ;
+
+starred_expression
+    : '*' expression;
+
+kwarg_or_starred
+    : NAME '=' expression
+    | starred_expression;
+
+kwarg_or_double_starred
+    : NAME '=' expression
+    | '**' expression;
+
+// ASSIGNMENT TARGETS
+// ==================
+
+// Generic targets
+// ---------------
+
+// NOTE: star_targets may contain *bitwise_or, targets may not.
+star_targets
+    : star_target (',' star_target )* ','?
+    ;
+
+star_targets_list_seq: star_target (',' star_target)+ ','?;
+
+star_targets_tuple_seq
+    : star_target (',' | (',' star_target )+ ','?)
+    ;
+
+star_target
+    : '*' (star_target)
+    | target_with_star_atom;
+
+target_with_star_atom
+    : t_primary ('.' NAME | '[' slices ']')
+    | star_atom
+    ;
+
+star_atom
+    : NAME
+    | '(' target_with_star_atom ')'
+    | '(' star_targets_tuple_seq? ')'
+    | '[' star_targets_list_seq? ']';
+
+single_target
+    : single_subscript_attribute_target
+    | NAME
+    | '(' single_target ')';
+
+single_subscript_attribute_target
+    : t_primary ('.' NAME | '[' slices ']')
+    ;
+
+t_primary
+    : t_primary ('.' NAME | '[' slices ']' | genexp | '(' arguments? ')')
+    | atom
+    ;
+
+
+
+
+
+// Targets for del statements
+// --------------------------
+
+del_targets: del_target (',' del_target)* ','?;
+
+del_target
+    : t_primary ('.' NAME | '[' slices ']')
+    | del_t_atom
+    ;
+
+del_t_atom
+    : NAME
+    | '(' del_target ')'
+    | '(' del_targets? ')'
+    | '[' del_targets? ']';
+
+// TYPING ELEMENTS
+// ---------------
+
+
+// type_expressions allow */** but ignore them
+type_expressions
+    : expression (',' expression)* (',' ('*' expression (',' '**' expression)? | '**' expression))?
+    | '*' expression (',' '**' expression)?
+    | '**' expression
+    ;
+
+
+
+func_type_comment
+    : NEWLINE TYPE_COMMENT   // Must be followed by indented block
+    | TYPE_COMMENT;
+
+// *** Soft Keywords:  https://docs.python.org/3.12/reference/lexical_analysis.html#soft-keywords
+soft_kw_type:           {this.isEqualToCurrentTokenText("type")}?  NAME;
+soft_kw_match:          {this.isEqualToCurrentTokenText("match")}? NAME;
+soft_kw_case:           {this.isEqualToCurrentTokenText("case")}?  NAME;
+soft_kw_wildcard:       {this.isEqualToCurrentTokenText("_")}?     NAME;
+soft_kw__not__wildcard: {this.isnotEqualToCurrentTokenText("_")}?  NAME;
+
+// ========================= END OF THE GRAMMAR ===========================
--- a/MicroForge.Parsing/PythonParserBase.cs
+++ b/MicroForge.Parsing/PythonParserBase.cs
@@ -0,0 +1,21 @@
+using Antlr4.Runtime;
+
+namespace MicroForge.Parsing;
+
+public abstract class PythonParserBase : Parser
+{
+    protected PythonParserBase(ITokenStream input) : base(input)
+    {
+    }
+
+    // https://docs.python.org/3/reference/lexical_analysis.html#soft-keywords
+    public bool isEqualToCurrentTokenText(string tokenText)
+    {
+        return this.CurrentToken.Text == tokenText;
+    }
+
+    public bool isnotEqualToCurrentTokenText(string tokenText)
+    {
+        return !this.isEqualToCurrentTokenText(tokenText); // for compatibility with the Python 'not' logical operator
+    }
+}