Renamed MicroForge to MycroForge
This commit is contained in:
26
MycroForge.Parsing/MycroForge.Parsing.csproj
Normal file
26
MycroForge.Parsing/MycroForge.Parsing.csproj
Normal file
@@ -0,0 +1,26 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Antlr4" Version="4.6.6">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Antlr4.CodeGenerator" Version="4.6.6">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Antlr4.Runtime" Version="4.6.6" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Include="PythonLexer.g4" />
|
||||
<Content Include="PythonParser.g4" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
1369
MycroForge.Parsing/PythonLexer.g4
Normal file
1369
MycroForge.Parsing/PythonLexer.g4
Normal file
File diff suppressed because it is too large
Load Diff
493
MycroForge.Parsing/PythonLexerBase.cs
Normal file
493
MycroForge.Parsing/PythonLexerBase.cs
Normal file
@@ -0,0 +1,493 @@
|
||||
/*
|
||||
The MIT License (MIT)
|
||||
Copyright (c) 2021 Robert Einhorn
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Project : Python Indent/Dedent handler for ANTLR4 grammars
|
||||
*
|
||||
* Developed by : Robert Einhorn
|
||||
*/
|
||||
|
||||
using Antlr4.Runtime;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace MycroForge.Parsing;
|
||||
|
||||
public abstract class PythonLexerBase : Lexer
|
||||
{
|
||||
// A stack that keeps track of the indentation lengths
|
||||
private Stack<int> indentLengthStack;
|
||||
// A list where tokens are waiting to be loaded into the token stream
|
||||
private LinkedList<IToken> pendingTokens;
|
||||
// last pending token types
|
||||
private int previousPendingTokenType;
|
||||
private int lastPendingTokenTypeFromDefaultChannel;
|
||||
|
||||
// The amount of opened parentheses, square brackets, or curly braces
|
||||
private int opened;
|
||||
// The amount of opened parentheses and square brackets in the current lexer mode
|
||||
private Stack<int> paren_or_bracket_openedStack;
|
||||
|
||||
private bool wasSpaceIndentation;
|
||||
private bool wasTabIndentation;
|
||||
private bool wasIndentationMixedWithSpacesAndTabs;
|
||||
private const int INVALID_LENGTH = -1;
|
||||
|
||||
private CommonToken curToken; // current (under processing) token
|
||||
private IToken ffgToken; // following (look ahead) token
|
||||
|
||||
private const string ERR_TXT = " ERROR: ";
|
||||
|
||||
protected PythonLexerBase(ICharStream input) : base(input)
|
||||
{
|
||||
this.Init();
|
||||
}
|
||||
|
||||
private void Init()
|
||||
{
|
||||
this.indentLengthStack = new Stack<int>();
|
||||
this.pendingTokens = new LinkedList<IToken>();
|
||||
this.previousPendingTokenType = 0;
|
||||
this.lastPendingTokenTypeFromDefaultChannel = 0;
|
||||
this.opened = 0;
|
||||
this.paren_or_bracket_openedStack = new Stack<int>();
|
||||
this.wasSpaceIndentation = false;
|
||||
this.wasTabIndentation = false;
|
||||
this.wasIndentationMixedWithSpacesAndTabs = false;
|
||||
this.curToken = null!;
|
||||
this.ffgToken = null!;
|
||||
}
|
||||
|
||||
public override IToken NextToken() // reading the input stream until a return EOF
|
||||
{
|
||||
this.CheckNextToken();
|
||||
IToken firstPendingToken = this.pendingTokens.First.Value;
|
||||
this.pendingTokens.RemoveFirst();
|
||||
return firstPendingToken; // add the queued token to the token stream
|
||||
}
|
||||
|
||||
private void CheckNextToken()
|
||||
{
|
||||
if (this.previousPendingTokenType != TokenConstants.Eof)
|
||||
{
|
||||
this.SetCurrentAndFollowingTokens();
|
||||
if (this.indentLengthStack.Count == 0) // We're at the first token
|
||||
{
|
||||
this.HandleStartOfInput();
|
||||
}
|
||||
|
||||
switch (this.curToken.Type)
|
||||
{
|
||||
case PythonLexer.LPAR:
|
||||
case PythonLexer.LSQB:
|
||||
case PythonLexer.LBRACE:
|
||||
this.opened++;
|
||||
this.AddPendingToken(this.curToken);
|
||||
break;
|
||||
case PythonLexer.RPAR:
|
||||
case PythonLexer.RSQB:
|
||||
case PythonLexer.RBRACE:
|
||||
this.opened--;
|
||||
this.AddPendingToken(this.curToken);
|
||||
break;
|
||||
case PythonLexer.NEWLINE:
|
||||
this.HandleNEWLINEtoken();
|
||||
break;
|
||||
case PythonLexer.STRING:
|
||||
this.HandleSTRINGtoken();
|
||||
break;
|
||||
case PythonLexer.FSTRING_MIDDLE:
|
||||
this.HandleFSTRING_MIDDLE_token();
|
||||
break;
|
||||
case PythonLexer.ERROR_TOKEN:
|
||||
this.ReportLexerError("token recognition error at: '" + this.curToken.Text + "'");
|
||||
this.AddPendingToken(this.curToken);
|
||||
break;
|
||||
case TokenConstants.Eof:
|
||||
this.HandleEOFtoken();
|
||||
break;
|
||||
default:
|
||||
this.AddPendingToken(this.curToken);
|
||||
break;
|
||||
}
|
||||
this.HandleFORMAT_SPECIFICATION_MODE();
|
||||
}
|
||||
}
|
||||
|
||||
private void SetCurrentAndFollowingTokens()
|
||||
{
|
||||
this.curToken = this.ffgToken == null ?
|
||||
new CommonToken(base.NextToken()) :
|
||||
new CommonToken(this.ffgToken);
|
||||
|
||||
this.HandleFStringLexerModes();
|
||||
|
||||
this.ffgToken = this.curToken.Type == TokenConstants.Eof ?
|
||||
this.curToken :
|
||||
base.NextToken();
|
||||
}
|
||||
|
||||
// initialize the _indentLengths
|
||||
// hide the leading NEWLINE token(s)
|
||||
// if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel
|
||||
// insert a leading INDENT token if necessary
|
||||
private void HandleStartOfInput()
|
||||
{
|
||||
// initialize the stack with a default 0 indentation length
|
||||
this.indentLengthStack.Push(0); // this will never be popped off
|
||||
while (this.curToken.Type != TokenConstants.Eof)
|
||||
{
|
||||
if (this.curToken.Channel == TokenConstants.DefaultChannel)
|
||||
{
|
||||
if (this.curToken.Type == PythonLexer.NEWLINE)
|
||||
{
|
||||
// all the NEWLINE tokens must be ignored before the first statement
|
||||
this.HideAndAddPendingToken(this.curToken);
|
||||
}
|
||||
else
|
||||
{ // We're at the first statement
|
||||
this.InsertLeadingIndentToken();
|
||||
return; // continue the processing of the current token with CheckNextToken()
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
this.AddPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING, or COMMENT token
|
||||
}
|
||||
this.SetCurrentAndFollowingTokens();
|
||||
} // continue the processing of the EOF token with CheckNextToken()
|
||||
}
|
||||
|
||||
private void InsertLeadingIndentToken()
|
||||
{
|
||||
if (this.previousPendingTokenType == PythonLexer.WS)
|
||||
{
|
||||
var prevToken = this.pendingTokens.Last.Value;
|
||||
if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement
|
||||
{
|
||||
const string errMsg = "first statement indented";
|
||||
this.ReportLexerError(errMsg);
|
||||
// insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser
|
||||
this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.curToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void HandleNEWLINEtoken()
|
||||
{
|
||||
if (this.opened > 0)
|
||||
{
|
||||
// We're in an implicit line joining, ignore the current NEWLINE token
|
||||
this.HideAndAddPendingToken(this.curToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
CommonToken nlToken = new CommonToken(this.curToken); // save the current NEWLINE token
|
||||
bool isLookingAhead = this.ffgToken.Type == PythonLexer.WS;
|
||||
if (isLookingAhead)
|
||||
{
|
||||
this.SetCurrentAndFollowingTokens(); // set the next two tokens
|
||||
}
|
||||
|
||||
switch (this.ffgToken.Type)
|
||||
{
|
||||
case PythonLexer.NEWLINE: // We're before a blank line
|
||||
case PythonLexer.COMMENT: // We're before a comment
|
||||
case PythonLexer.TYPE_COMMENT: // We're before a type comment
|
||||
this.HideAndAddPendingToken(nlToken);
|
||||
if (isLookingAhead)
|
||||
{
|
||||
this.AddPendingToken(this.curToken); // WS token
|
||||
}
|
||||
break;
|
||||
default:
|
||||
this.AddPendingToken(nlToken);
|
||||
if (isLookingAhead)
|
||||
{ // We're on whitespace(s) followed by a statement
|
||||
int indentationLength = this.ffgToken.Type == TokenConstants.Eof ?
|
||||
0 :
|
||||
this.GetIndentationLength(this.curToken.Text);
|
||||
|
||||
if (indentationLength != PythonLexerBase.INVALID_LENGTH)
|
||||
{
|
||||
this.AddPendingToken(this.curToken); // WS token
|
||||
this.InsertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s)
|
||||
}
|
||||
else
|
||||
{
|
||||
this.ReportError("inconsistent use of tabs and spaces in indentation");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// We're at a newline followed by a statement (there is no whitespace before the statement)
|
||||
this.InsertIndentOrDedentToken(0); // may insert DEDENT token(s)
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void InsertIndentOrDedentToken(int indentLength)
|
||||
{
|
||||
//*** https://docs.python.org/3/reference/lexical_analysis.html#indentation
|
||||
int prevIndentLength = this.indentLengthStack.Peek();
|
||||
if (indentLength > prevIndentLength)
|
||||
{
|
||||
this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, null, this.ffgToken);
|
||||
this.indentLengthStack.Push(indentLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (indentLength < prevIndentLength)
|
||||
{ // more than 1 DEDENT token may be inserted into the token stream
|
||||
this.indentLengthStack.Pop();
|
||||
prevIndentLength = this.indentLengthStack.Peek();
|
||||
if (indentLength <= prevIndentLength)
|
||||
{
|
||||
this.CreateAndAddPendingToken(PythonLexer.DEDENT, TokenConstants.DefaultChannel, null, this.ffgToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
this.ReportError("inconsistent dedent");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void HandleSTRINGtoken()
|
||||
{
|
||||
// remove the \<newline> escape sequences from the string literal
|
||||
// https://docs.python.org/3.11/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
string line_joinFreeStringLiteral = Regex.Replace(this.curToken.Text, @"\\\r?\n", "");
|
||||
if (this.curToken.Text.Length == line_joinFreeStringLiteral.Length)
|
||||
{
|
||||
this.AddPendingToken(this.curToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
CommonToken originalSTRINGtoken = new CommonToken(this.curToken); // backup the original token
|
||||
this.curToken.Text = line_joinFreeStringLiteral;
|
||||
this.AddPendingToken(this.curToken); // add the modified token with inline string literal
|
||||
this.HideAndAddPendingToken(originalSTRINGtoken); // add the original token with a hidden channel
|
||||
// this inserted hidden token allows to restore the original string literal with the \<newline> escape sequences
|
||||
}
|
||||
}
|
||||
|
||||
private void HandleFSTRING_MIDDLE_token() // replace the double braces '{{' or '}}' to single braces and hide the second braces
|
||||
{
|
||||
string fsMid = this.curToken.Text;
|
||||
fsMid = fsMid.Replace("{{", "{_").Replace("}}", "}_"); // replace: {{ --> {_ and }} --> }_
|
||||
Regex regex = new Regex(@"(?<=[{}])_");
|
||||
string[] arrOfStr = regex.Split(fsMid); // split by {_ or }_
|
||||
foreach (string s in arrOfStr)
|
||||
{
|
||||
if (!String.IsNullOrEmpty(s))
|
||||
{
|
||||
this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, s, this.ffgToken);
|
||||
string lastCharacter = s.Substring(s.Length - 1);
|
||||
if ("{}".Contains(lastCharacter))
|
||||
{
|
||||
this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.HiddenChannel, lastCharacter, this.ffgToken);
|
||||
// this inserted hidden token allows to restore the original f-string literal with the double braces
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void HandleFStringLexerModes() // https://peps.python.org/pep-0498/#specification
|
||||
{
|
||||
if (this._modeStack.Count > 0)
|
||||
{
|
||||
switch (this.curToken.Type)
|
||||
{
|
||||
case PythonLexer.LBRACE:
|
||||
this.PushMode(PythonLexer.DefaultMode);
|
||||
this.paren_or_bracket_openedStack.Push(0);
|
||||
break;
|
||||
case PythonLexer.LPAR:
|
||||
case PythonLexer.LSQB:
|
||||
// https://peps.python.org/pep-0498/#lambdas-inside-expressions
|
||||
this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() + 1); // increment the last element
|
||||
break;
|
||||
case PythonLexer.RPAR:
|
||||
case PythonLexer.RSQB:
|
||||
this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() - 1); // decrement the last element
|
||||
break;
|
||||
case PythonLexer.COLON: // colon can only come from DEFAULT_MODE
|
||||
if (this.paren_or_bracket_openedStack.Peek() == 0)
|
||||
{
|
||||
switch (this._modeStack.First()) // check the previous lexer mode (the current is DEFAULT_MODE)
|
||||
{
|
||||
case PythonLexer.SINGLE_QUOTE_FSTRING_MODE:
|
||||
case PythonLexer.LONG_SINGLE_QUOTE_FSTRING_MODE:
|
||||
case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE:
|
||||
this.Mode(PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
|
||||
break;
|
||||
case PythonLexer.DOUBLE_QUOTE_FSTRING_MODE:
|
||||
case PythonLexer.LONG_DOUBLE_QUOTE_FSTRING_MODE:
|
||||
case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE:
|
||||
this.Mode(PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PythonLexer.RBRACE:
|
||||
switch (_mode)
|
||||
{
|
||||
case PythonLexer.DefaultMode:
|
||||
case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE:
|
||||
case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE:
|
||||
this.PopMode();
|
||||
this.paren_or_bracket_openedStack.Pop();
|
||||
break;
|
||||
default:
|
||||
this.ReportLexerError("f-string: single '}' is not allowed");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void HandleFORMAT_SPECIFICATION_MODE()
|
||||
{
|
||||
if (this._modeStack.Count > 0 && this.ffgToken.Type == PythonLexer.RBRACE)
|
||||
{
|
||||
switch (this.curToken.Type)
|
||||
{
|
||||
case PythonLexer.COLON:
|
||||
case PythonLexer.RBRACE:
|
||||
// insert an empty FSTRING_MIDDLE token instead of the missing format specification
|
||||
this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, "", this.ffgToken);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void InsertTrailingTokens()
|
||||
{
|
||||
switch (this.lastPendingTokenTypeFromDefaultChannel)
|
||||
{
|
||||
case PythonLexer.NEWLINE:
|
||||
case PythonLexer.DEDENT:
|
||||
break; // no trailing NEWLINE token is needed
|
||||
default:
|
||||
// insert an extra trailing NEWLINE token that serves as the end of the last statement
|
||||
this.CreateAndAddPendingToken(PythonLexer.NEWLINE, TokenConstants.DefaultChannel, null, this.ffgToken); // ffgToken is EOF
|
||||
break;
|
||||
}
|
||||
this.InsertIndentOrDedentToken(0); // Now insert as many trailing DEDENT tokens as needed
|
||||
}
|
||||
|
||||
private void HandleEOFtoken()
|
||||
{
|
||||
if (this.lastPendingTokenTypeFromDefaultChannel > 0)
|
||||
{ // there was a statement in the input (leading NEWLINE tokens are hidden)
|
||||
this.InsertTrailingTokens();
|
||||
}
|
||||
this.AddPendingToken(this.curToken);
|
||||
}
|
||||
|
||||
private void HideAndAddPendingToken(CommonToken cToken)
|
||||
{
|
||||
cToken.Channel = TokenConstants.HiddenChannel;
|
||||
this.AddPendingToken(cToken);
|
||||
}
|
||||
|
||||
private void CreateAndAddPendingToken(int type, int channel, string text, IToken baseToken)
|
||||
{
|
||||
CommonToken cToken = new CommonToken(baseToken);
|
||||
cToken.Type = type;
|
||||
cToken.Channel = channel;
|
||||
cToken.StopIndex = baseToken.StartIndex - 1;
|
||||
|
||||
// cToken.Text = text == null
|
||||
// ? "<" + Vocabulary.GetSymbolicName(type) + ">"
|
||||
// : text;
|
||||
cToken.Text = text ?? string.Empty;
|
||||
|
||||
this.AddPendingToken(cToken);
|
||||
}
|
||||
|
||||
private void AddPendingToken(IToken token)
|
||||
{
|
||||
// save the last pending token type because the pendingTokens linked list can be empty by the nextToken()
|
||||
this.previousPendingTokenType = token.Type;
|
||||
if (token.Channel == TokenConstants.DefaultChannel)
|
||||
{
|
||||
this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType;
|
||||
}
|
||||
this.pendingTokens.AddLast(token);
|
||||
}
|
||||
|
||||
private int GetIndentationLength(string textWS) // the textWS may contain spaces, tabs or form feeds
|
||||
{
|
||||
const int TAB_LENGTH = 8; // the standard number of spaces to replace a tab with spaces
|
||||
int length = 0;
|
||||
foreach (char ch in textWS)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
case ' ':
|
||||
this.wasSpaceIndentation = true;
|
||||
length += 1;
|
||||
break;
|
||||
case '\t':
|
||||
this.wasTabIndentation = true;
|
||||
length += TAB_LENGTH - (length % TAB_LENGTH);
|
||||
break;
|
||||
case '\f': // form feed
|
||||
length = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.wasTabIndentation && this.wasSpaceIndentation)
|
||||
{
|
||||
if (!this.wasIndentationMixedWithSpacesAndTabs)
|
||||
{
|
||||
this.wasIndentationMixedWithSpacesAndTabs = true;
|
||||
return PythonLexerBase.INVALID_LENGTH; // only for the first inconsistent indent
|
||||
}
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
private void ReportLexerError(string errMsg)
|
||||
{
|
||||
// this.ErrorListenerDispatch.SyntaxError(this.ErrorOutput, this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null);
|
||||
this.ErrorListenerDispatch.SyntaxError( this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null);
|
||||
}
|
||||
|
||||
private void ReportError(string errMsg)
|
||||
{
|
||||
this.ReportLexerError(errMsg);
|
||||
|
||||
// the ERROR_TOKEN will raise an error in the parser
|
||||
this.CreateAndAddPendingToken(PythonLexer.ERROR_TOKEN, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.ffgToken);
|
||||
}
|
||||
|
||||
public override void Reset()
|
||||
{
|
||||
this.Init();
|
||||
base.Reset();
|
||||
}
|
||||
}
|
||||
880
MycroForge.Parsing/PythonParser.g4
Normal file
880
MycroForge.Parsing/PythonParser.g4
Normal file
@@ -0,0 +1,880 @@
|
||||
/*
|
||||
Python grammar
|
||||
The MIT License (MIT)
|
||||
Copyright (c) 2021 Robert Einhorn
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Project : an ANTLR4 parser grammar by the official PEG grammar
|
||||
* https://github.com/RobEin/ANTLR4-parser-for-Python-3.12
|
||||
* Developed by : Robert Einhorn
|
||||
*
|
||||
*/
|
||||
|
||||
parser grammar PythonParser; // Python 3.12.1 https://docs.python.org/3.12/reference/grammar.html#full-grammar-specification
|
||||
options {
|
||||
tokenVocab=PythonLexer;
|
||||
superClass=PythonParserBase;
|
||||
}
|
||||
|
||||
// STARTING RULES
|
||||
// ==============
|
||||
|
||||
file_input: statements? EOF;
|
||||
interactive: statement_newline;
|
||||
eval: expressions NEWLINE* EOF;
|
||||
func_type: '(' type_expressions? ')' '->' expression NEWLINE* EOF;
|
||||
fstring_input: star_expressions;
|
||||
|
||||
// GENERAL STATEMENTS
|
||||
// ==================
|
||||
|
||||
statements: statement+;
|
||||
|
||||
statement: compound_stmt | simple_stmts;
|
||||
|
||||
statement_newline
|
||||
: compound_stmt NEWLINE
|
||||
| simple_stmts
|
||||
| NEWLINE
|
||||
| EOF;
|
||||
|
||||
simple_stmts
|
||||
: simple_stmt (';' simple_stmt)* ';'? NEWLINE
|
||||
;
|
||||
|
||||
// NOTE: assignment MUST precede expression, else parsing a simple assignment
|
||||
// will throw a SyntaxError.
|
||||
simple_stmt
|
||||
: assignment
|
||||
| type_alias
|
||||
| star_expressions
|
||||
| return_stmt
|
||||
| import_stmt
|
||||
| raise_stmt
|
||||
| 'pass'
|
||||
| del_stmt
|
||||
| yield_stmt
|
||||
| assert_stmt
|
||||
| 'break'
|
||||
| 'continue'
|
||||
| global_stmt
|
||||
| nonlocal_stmt;
|
||||
|
||||
compound_stmt
|
||||
: function_def
|
||||
| if_stmt
|
||||
| class_def
|
||||
| with_stmt
|
||||
| for_stmt
|
||||
| try_stmt
|
||||
| while_stmt
|
||||
| match_stmt;
|
||||
|
||||
// SIMPLE STATEMENTS
|
||||
// =================
|
||||
|
||||
// NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield'
|
||||
assignment
|
||||
: NAME ':' expression ('=' annotated_rhs )?
|
||||
| ('(' single_target ')'
|
||||
| single_subscript_attribute_target) ':' expression ('=' annotated_rhs )?
|
||||
| (star_targets '=' )+ (yield_expr | star_expressions) TYPE_COMMENT?
|
||||
| single_target augassign (yield_expr | star_expressions);
|
||||
|
||||
annotated_rhs: yield_expr | star_expressions;
|
||||
|
||||
augassign
|
||||
: '+='
|
||||
| '-='
|
||||
| '*='
|
||||
| '@='
|
||||
| '/='
|
||||
| '%='
|
||||
| '&='
|
||||
| '|='
|
||||
| '^='
|
||||
| '<<='
|
||||
| '>>='
|
||||
| '**='
|
||||
| '//=';
|
||||
|
||||
return_stmt
|
||||
: 'return' star_expressions?;
|
||||
|
||||
raise_stmt
|
||||
: 'raise' (expression ('from' expression )?)?
|
||||
;
|
||||
|
||||
global_stmt: 'global' NAME (',' NAME)*;
|
||||
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*;
|
||||
|
||||
del_stmt
|
||||
: 'del' del_targets;
|
||||
|
||||
yield_stmt: yield_expr;
|
||||
|
||||
assert_stmt: 'assert' expression (',' expression )?;
|
||||
|
||||
import_stmt
|
||||
: import_name
|
||||
| import_from;
|
||||
|
||||
// Import statements
|
||||
// -----------------
|
||||
|
||||
import_name: 'import' dotted_as_names;
|
||||
// note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from
|
||||
: 'from' ('.' | '...')* dotted_name 'import' import_from_targets
|
||||
| 'from' ('.' | '...')+ 'import' import_from_targets;
|
||||
import_from_targets
|
||||
: '(' import_from_as_names ','? ')'
|
||||
| import_from_as_names
|
||||
| '*';
|
||||
import_from_as_names
|
||||
: import_from_as_name (',' import_from_as_name)*;
|
||||
import_from_as_name
|
||||
: NAME ('as' NAME )?;
|
||||
dotted_as_names
|
||||
: dotted_as_name (',' dotted_as_name)*;
|
||||
dotted_as_name
|
||||
: dotted_name ('as' NAME )?;
|
||||
dotted_name
|
||||
: dotted_name '.' NAME
|
||||
| NAME;
|
||||
|
||||
// COMPOUND STATEMENTS
|
||||
// ===================
|
||||
|
||||
// Common elements
|
||||
// ---------------
|
||||
|
||||
block
|
||||
: NEWLINE INDENT statements DEDENT
|
||||
| simple_stmts;
|
||||
|
||||
decorators: ('@' named_expression NEWLINE )+;
|
||||
|
||||
// Class definitions
|
||||
// -----------------
|
||||
|
||||
class_def
|
||||
: decorators class_def_raw
|
||||
| class_def_raw;
|
||||
|
||||
class_def_raw
|
||||
: 'class' NAME type_params? ('(' arguments? ')' )? ':' block;
|
||||
|
||||
// Function definitions
|
||||
// --------------------
|
||||
|
||||
function_def
|
||||
: decorators function_def_raw
|
||||
| function_def_raw;
|
||||
|
||||
function_def_raw
|
||||
: 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block
|
||||
| ASYNC 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block;
|
||||
|
||||
// Function parameters
|
||||
// -------------------
|
||||
|
||||
params
|
||||
: parameters;
|
||||
|
||||
parameters
|
||||
: slash_no_default param_no_default* param_with_default* star_etc?
|
||||
| slash_with_default param_with_default* star_etc?
|
||||
| param_no_default+ param_with_default* star_etc?
|
||||
| param_with_default+ star_etc?
|
||||
| star_etc;
|
||||
|
||||
// Some duplication here because we can't write (',' | {isCurrentTokenType(RPAR)}?),
|
||||
// which is because we don't support empty alternatives (yet).
|
||||
|
||||
slash_no_default
|
||||
: param_no_default+ '/' ','?
|
||||
;
|
||||
slash_with_default
|
||||
: param_no_default* param_with_default+ '/' ','?
|
||||
;
|
||||
|
||||
star_etc
|
||||
: '*' param_no_default param_maybe_default* kwds?
|
||||
| '*' param_no_default_star_annotation param_maybe_default* kwds?
|
||||
| '*' ',' param_maybe_default+ kwds?
|
||||
| kwds;
|
||||
|
||||
kwds
|
||||
: '**' param_no_default;
|
||||
|
||||
// One parameter. This *includes* a following comma and type comment.
|
||||
//
|
||||
// There are three styles:
|
||||
// - No default_assignment
|
||||
// - With default_assignment
|
||||
// - Maybe with default_assignment
|
||||
//
|
||||
// There are two alternative forms of each, to deal with type comments:
|
||||
// - Ends in a comma followed by an optional type comment
|
||||
// - No comma, optional type comment, must be followed by close paren
|
||||
// The latter form is for a final parameter without trailing comma.
|
||||
//
|
||||
|
||||
param_no_default
|
||||
: param ','? TYPE_COMMENT?
|
||||
;
|
||||
param_no_default_star_annotation
|
||||
: param_star_annotation ','? TYPE_COMMENT?
|
||||
;
|
||||
param_with_default
|
||||
: param default_assignment ','? TYPE_COMMENT?
|
||||
;
|
||||
param_maybe_default
|
||||
: param default_assignment? ','? TYPE_COMMENT?
|
||||
;
|
||||
param: NAME annotation?;
|
||||
param_star_annotation: NAME star_annotation;
|
||||
annotation: ':' expression;
|
||||
star_annotation: ':' star_expression;
|
||||
default_assignment: '=' expression;
|
||||
|
||||
// If statement
|
||||
// ------------
|
||||
|
||||
if_stmt
|
||||
: 'if' named_expression ':' block (elif_stmt | else_block?)
|
||||
;
|
||||
elif_stmt
|
||||
: 'elif' named_expression ':' block (elif_stmt | else_block?)
|
||||
;
|
||||
else_block
|
||||
: 'else' ':' block;
|
||||
|
||||
// While statement
|
||||
// ---------------
|
||||
|
||||
while_stmt
|
||||
: 'while' named_expression ':' block else_block?;
|
||||
|
||||
// For statement
|
||||
// -------------
|
||||
|
||||
for_stmt
|
||||
: ASYNC? 'for' star_targets 'in' star_expressions ':' TYPE_COMMENT? block else_block?
|
||||
;
|
||||
|
||||
// With statement
|
||||
// --------------
|
||||
|
||||
with_stmt
|
||||
: ASYNC? 'with' ( '(' with_item (',' with_item)* ','? ')' ':'
|
||||
| with_item (',' with_item)* ':' TYPE_COMMENT?
|
||||
) block
|
||||
;
|
||||
|
||||
with_item
|
||||
: expression ('as' star_target)?
|
||||
;
|
||||
|
||||
// Try statement
|
||||
// -------------
|
||||
|
||||
try_stmt
|
||||
: 'try' ':' block finally_block
|
||||
| 'try' ':' block except_block+ else_block? finally_block?
|
||||
| 'try' ':' block except_star_block+ else_block? finally_block?;
|
||||
|
||||
|
||||
// Except statement
|
||||
// ----------------
|
||||
|
||||
except_block
|
||||
: 'except' (expression ('as' NAME )?)? ':' block
|
||||
;
|
||||
except_star_block
|
||||
: 'except' '*' expression ('as' NAME )? ':' block;
|
||||
finally_block
|
||||
: 'finally' ':' block;
|
||||
|
||||
// Match statement
|
||||
// ---------------
|
||||
|
||||
match_stmt
|
||||
: soft_kw_match subject_expr ':' NEWLINE INDENT case_block+ DEDENT;
|
||||
|
||||
subject_expr
|
||||
: star_named_expression ',' star_named_expressions?
|
||||
| named_expression;
|
||||
|
||||
case_block
|
||||
: soft_kw_case patterns guard? ':' block;
|
||||
|
||||
guard: 'if' named_expression;
|
||||
|
||||
patterns
|
||||
: open_sequence_pattern
|
||||
| pattern;
|
||||
|
||||
pattern
|
||||
: as_pattern
|
||||
| or_pattern;
|
||||
|
||||
as_pattern
|
||||
: or_pattern 'as' pattern_capture_target;
|
||||
|
||||
or_pattern
|
||||
: closed_pattern ('|' closed_pattern)*;
|
||||
|
||||
closed_pattern
|
||||
: literal_pattern
|
||||
| capture_pattern
|
||||
| wildcard_pattern
|
||||
| value_pattern
|
||||
| group_pattern
|
||||
| sequence_pattern
|
||||
| mapping_pattern
|
||||
| class_pattern;
|
||||
|
||||
// Literal patterns are used for equality and identity constraints
|
||||
literal_pattern
|
||||
: signed_number
|
||||
| complex_number
|
||||
| strings
|
||||
| 'None'
|
||||
| 'True'
|
||||
| 'False';
|
||||
|
||||
// Literal expressions are used to restrict permitted mapping pattern keys
|
||||
literal_expr
|
||||
: signed_number
|
||||
| complex_number
|
||||
| strings
|
||||
| 'None'
|
||||
| 'True'
|
||||
| 'False';
|
||||
|
||||
complex_number
|
||||
: signed_real_number ('+' | '-') imaginary_number
|
||||
;
|
||||
|
||||
signed_number
|
||||
: '-'? NUMBER
|
||||
;
|
||||
|
||||
signed_real_number
|
||||
: '-'? real_number
|
||||
;
|
||||
|
||||
real_number
|
||||
: NUMBER;
|
||||
|
||||
imaginary_number
|
||||
: NUMBER;
|
||||
|
||||
capture_pattern
|
||||
: pattern_capture_target;
|
||||
|
||||
pattern_capture_target
|
||||
: soft_kw__not__wildcard;
|
||||
|
||||
wildcard_pattern
|
||||
: soft_kw_wildcard;
|
||||
|
||||
value_pattern
|
||||
: attr;
|
||||
|
||||
attr
|
||||
: NAME ('.' NAME)+
|
||||
;
|
||||
name_or_attr
|
||||
: NAME ('.' NAME)*
|
||||
;
|
||||
|
||||
group_pattern
|
||||
: '(' pattern ')';
|
||||
|
||||
sequence_pattern
|
||||
: '[' maybe_sequence_pattern? ']'
|
||||
| '(' open_sequence_pattern? ')';
|
||||
|
||||
open_sequence_pattern
|
||||
: maybe_star_pattern ',' maybe_sequence_pattern?;
|
||||
|
||||
maybe_sequence_pattern
|
||||
: maybe_star_pattern (',' maybe_star_pattern)* ','?;
|
||||
|
||||
maybe_star_pattern
|
||||
: star_pattern
|
||||
| pattern;
|
||||
|
||||
star_pattern
|
||||
: '*' pattern_capture_target
|
||||
| '*' wildcard_pattern;
|
||||
|
||||
mapping_pattern
|
||||
: LBRACE RBRACE
|
||||
| LBRACE double_star_pattern ','? RBRACE
|
||||
| LBRACE items_pattern (',' double_star_pattern)? ','? RBRACE
|
||||
;
|
||||
|
||||
items_pattern
|
||||
: key_value_pattern (',' key_value_pattern)*;
|
||||
|
||||
key_value_pattern
|
||||
: (literal_expr | attr) ':' pattern;
|
||||
|
||||
double_star_pattern
|
||||
: '**' pattern_capture_target;
|
||||
|
||||
class_pattern
|
||||
: name_or_attr '(' ((positional_patterns (',' keyword_patterns)? | keyword_patterns) ','?)? ')'
|
||||
;
|
||||
|
||||
|
||||
|
||||
positional_patterns
|
||||
: pattern (',' pattern)*;
|
||||
|
||||
keyword_patterns
|
||||
: keyword_pattern (',' keyword_pattern)*;
|
||||
|
||||
keyword_pattern
|
||||
: NAME '=' pattern;
|
||||
|
||||
// Type statement
|
||||
// ---------------
|
||||
|
||||
type_alias
|
||||
: soft_kw_type NAME type_params? '=' expression;
|
||||
|
||||
// Type parameter declaration
|
||||
// --------------------------
|
||||
|
||||
type_params: '[' type_param_seq ']';
|
||||
|
||||
type_param_seq: type_param (',' type_param)* ','?;
|
||||
|
||||
type_param
|
||||
: NAME type_param_bound?
|
||||
| '*' NAME (':' expression)?
|
||||
| '**' NAME (':' expression)?
|
||||
;
|
||||
|
||||
|
||||
type_param_bound: ':' expression;
|
||||
|
||||
// EXPRESSIONS
|
||||
// -----------
|
||||
|
||||
expressions
|
||||
: expression (',' expression )* ','?
|
||||
;
|
||||
|
||||
|
||||
expression
|
||||
: disjunction ('if' disjunction 'else' expression)?
|
||||
| lambdef
|
||||
;
|
||||
|
||||
yield_expr
|
||||
: 'yield' ('from' expression | star_expressions?)
|
||||
;
|
||||
|
||||
star_expressions
|
||||
: star_expression (',' star_expression )* ','?
|
||||
;
|
||||
|
||||
|
||||
star_expression
|
||||
: '*' bitwise_or
|
||||
| expression;
|
||||
|
||||
star_named_expressions: star_named_expression (',' star_named_expression)* ','?;
|
||||
|
||||
star_named_expression
|
||||
: '*' bitwise_or
|
||||
| named_expression;
|
||||
|
||||
assignment_expression
|
||||
: NAME ':=' expression;
|
||||
|
||||
named_expression
|
||||
: assignment_expression
|
||||
| expression;
|
||||
|
||||
disjunction
|
||||
: conjunction ('or' conjunction )*
|
||||
;
|
||||
|
||||
conjunction
|
||||
: inversion ('and' inversion )*
|
||||
;
|
||||
|
||||
inversion
|
||||
: 'not' inversion
|
||||
| comparison;
|
||||
|
||||
// Comparison operators
|
||||
// --------------------
|
||||
|
||||
comparison
|
||||
: bitwise_or compare_op_bitwise_or_pair*
|
||||
;
|
||||
|
||||
compare_op_bitwise_or_pair
|
||||
: eq_bitwise_or
|
||||
| noteq_bitwise_or
|
||||
| lte_bitwise_or
|
||||
| lt_bitwise_or
|
||||
| gte_bitwise_or
|
||||
| gt_bitwise_or
|
||||
| notin_bitwise_or
|
||||
| in_bitwise_or
|
||||
| isnot_bitwise_or
|
||||
| is_bitwise_or;
|
||||
|
||||
eq_bitwise_or: '==' bitwise_or;
|
||||
noteq_bitwise_or
|
||||
: ('!=' ) bitwise_or;
|
||||
lte_bitwise_or: '<=' bitwise_or;
|
||||
lt_bitwise_or: '<' bitwise_or;
|
||||
gte_bitwise_or: '>=' bitwise_or;
|
||||
gt_bitwise_or: '>' bitwise_or;
|
||||
notin_bitwise_or: 'not' 'in' bitwise_or;
|
||||
in_bitwise_or: 'in' bitwise_or;
|
||||
isnot_bitwise_or: 'is' 'not' bitwise_or;
|
||||
is_bitwise_or: 'is' bitwise_or;
|
||||
|
||||
// Bitwise operators
|
||||
// -----------------
|
||||
|
||||
bitwise_or
|
||||
: bitwise_or '|' bitwise_xor
|
||||
| bitwise_xor;
|
||||
|
||||
bitwise_xor
|
||||
: bitwise_xor '^' bitwise_and
|
||||
| bitwise_and;
|
||||
|
||||
bitwise_and
|
||||
: bitwise_and '&' shift_expr
|
||||
| shift_expr;
|
||||
|
||||
shift_expr
|
||||
: shift_expr ('<<' | '>>') sum
|
||||
| sum
|
||||
;
|
||||
|
||||
// Arithmetic operators
|
||||
// --------------------
|
||||
|
||||
sum
|
||||
: sum ('+' | '-') term
|
||||
| term
|
||||
;
|
||||
|
||||
term
|
||||
: term ('*' | '/' | '//' | '%' | '@') factor
|
||||
| factor
|
||||
;
|
||||
|
||||
|
||||
|
||||
|
||||
factor
|
||||
: '+' factor
|
||||
| '-' factor
|
||||
| '~' factor
|
||||
| power;
|
||||
|
||||
power
|
||||
: await_primary ('**' factor)?
|
||||
;
|
||||
|
||||
// Primary elements
|
||||
// ----------------
|
||||
|
||||
// Primary elements are things like "obj.something.something", "obj[something]", "obj(something)", "obj" ...
|
||||
|
||||
await_primary
|
||||
: AWAIT primary
|
||||
| primary;
|
||||
|
||||
primary
|
||||
: primary ('.' NAME | genexp | '(' arguments? ')' | '[' slices ']')
|
||||
| atom
|
||||
;
|
||||
|
||||
|
||||
|
||||
slices
|
||||
: slice
|
||||
| (slice | starred_expression) (',' (slice | starred_expression))* ','?;
|
||||
|
||||
slice
|
||||
: expression? ':' expression? (':' expression? )?
|
||||
| named_expression;
|
||||
|
||||
atom
|
||||
: NAME
|
||||
| 'True'
|
||||
| 'False'
|
||||
| 'None'
|
||||
| strings
|
||||
| NUMBER
|
||||
| (tuple | group | genexp)
|
||||
| (list | listcomp)
|
||||
| (dict | set | dictcomp | setcomp)
|
||||
| '...';
|
||||
|
||||
group
|
||||
: '(' (yield_expr | named_expression) ')';
|
||||
|
||||
// Lambda functions
|
||||
// ----------------
|
||||
|
||||
lambdef
|
||||
: 'lambda' lambda_params? ':' expression;
|
||||
|
||||
lambda_params
|
||||
: lambda_parameters;
|
||||
|
||||
// lambda_parameters etc. duplicates parameters but without annotations
|
||||
// or type comments, and if there's no comma after a parameter, we expect
|
||||
// a colon, not a close parenthesis. (For more, see parameters above.)
|
||||
//
|
||||
lambda_parameters
|
||||
: lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* lambda_star_etc?
|
||||
| lambda_slash_with_default lambda_param_with_default* lambda_star_etc?
|
||||
| lambda_param_no_default+ lambda_param_with_default* lambda_star_etc?
|
||||
| lambda_param_with_default+ lambda_star_etc?
|
||||
| lambda_star_etc;
|
||||
|
||||
lambda_slash_no_default
|
||||
: lambda_param_no_default+ '/' ','?
|
||||
;
|
||||
|
||||
lambda_slash_with_default
|
||||
: lambda_param_no_default* lambda_param_with_default+ '/' ','?
|
||||
;
|
||||
|
||||
lambda_star_etc
|
||||
: '*' lambda_param_no_default lambda_param_maybe_default* lambda_kwds?
|
||||
| '*' ',' lambda_param_maybe_default+ lambda_kwds?
|
||||
| lambda_kwds;
|
||||
|
||||
lambda_kwds
|
||||
: '**' lambda_param_no_default;
|
||||
|
||||
lambda_param_no_default
|
||||
: lambda_param ','?
|
||||
;
|
||||
lambda_param_with_default
|
||||
: lambda_param default_assignment ','?
|
||||
;
|
||||
lambda_param_maybe_default
|
||||
: lambda_param default_assignment? ','?
|
||||
;
|
||||
lambda_param: NAME;
|
||||
|
||||
// LITERALS
|
||||
// ========
|
||||
|
||||
fstring_middle
|
||||
: fstring_replacement_field
|
||||
| FSTRING_MIDDLE;
|
||||
fstring_replacement_field
|
||||
: LBRACE (yield_expr | star_expressions) '='? fstring_conversion? fstring_full_format_spec? RBRACE;
|
||||
fstring_conversion
|
||||
: '!' NAME;
|
||||
fstring_full_format_spec
|
||||
: ':' fstring_format_spec*;
|
||||
fstring_format_spec
|
||||
: FSTRING_MIDDLE
|
||||
| fstring_replacement_field;
|
||||
fstring
|
||||
: FSTRING_START fstring_middle* FSTRING_END;
|
||||
|
||||
string: STRING;
|
||||
strings: (fstring|string)+;
|
||||
|
||||
list
|
||||
: '[' star_named_expressions? ']';
|
||||
|
||||
tuple
|
||||
: '(' (star_named_expression ',' star_named_expressions? )? ')';
|
||||
|
||||
set: LBRACE star_named_expressions RBRACE;
|
||||
|
||||
// Dicts
|
||||
// -----
|
||||
|
||||
dict
|
||||
: LBRACE double_starred_kvpairs? RBRACE;
|
||||
|
||||
double_starred_kvpairs: double_starred_kvpair (',' double_starred_kvpair)* ','?;
|
||||
|
||||
double_starred_kvpair
|
||||
: '**' bitwise_or
|
||||
| kvpair;
|
||||
|
||||
kvpair: expression ':' expression;
|
||||
|
||||
// Comprehensions & Generators
|
||||
// ---------------------------
|
||||
|
||||
for_if_clauses
|
||||
: for_if_clause+;
|
||||
|
||||
for_if_clause
|
||||
: ASYNC? 'for' star_targets 'in' disjunction ('if' disjunction )*
|
||||
;
|
||||
|
||||
listcomp
|
||||
: '[' named_expression for_if_clauses ']';
|
||||
|
||||
setcomp
|
||||
: LBRACE named_expression for_if_clauses RBRACE;
|
||||
|
||||
genexp
|
||||
: '(' ( assignment_expression | expression) for_if_clauses ')';
|
||||
|
||||
dictcomp
|
||||
: LBRACE kvpair for_if_clauses RBRACE;
|
||||
|
||||
// FUNCTION CALL ARGUMENTS
|
||||
// =======================
|
||||
|
||||
arguments
|
||||
: args ','?;
|
||||
|
||||
args
|
||||
: (starred_expression | ( assignment_expression | expression)) (',' (starred_expression | ( assignment_expression | expression)))* (',' kwargs )?
|
||||
| kwargs;
|
||||
|
||||
kwargs
|
||||
: kwarg_or_starred (',' kwarg_or_starred)* (',' kwarg_or_double_starred (',' kwarg_or_double_starred)*)?
|
||||
| kwarg_or_double_starred (',' kwarg_or_double_starred)*
|
||||
;
|
||||
|
||||
starred_expression
|
||||
: '*' expression;
|
||||
|
||||
kwarg_or_starred
|
||||
: NAME '=' expression
|
||||
| starred_expression;
|
||||
|
||||
kwarg_or_double_starred
|
||||
: NAME '=' expression
|
||||
| '**' expression;
|
||||
|
||||
// ASSIGNMENT TARGETS
|
||||
// ==================
|
||||
|
||||
// Generic targets
|
||||
// ---------------
|
||||
|
||||
// NOTE: star_targets may contain *bitwise_or, targets may not.
|
||||
star_targets
|
||||
: star_target (',' star_target )* ','?
|
||||
;
|
||||
|
||||
star_targets_list_seq: star_target (',' star_target)+ ','?;
|
||||
|
||||
star_targets_tuple_seq
|
||||
: star_target (',' | (',' star_target )+ ','?)
|
||||
;
|
||||
|
||||
star_target
|
||||
: '*' (star_target)
|
||||
| target_with_star_atom;
|
||||
|
||||
target_with_star_atom
|
||||
: t_primary ('.' NAME | '[' slices ']')
|
||||
| star_atom
|
||||
;
|
||||
|
||||
star_atom
|
||||
: NAME
|
||||
| '(' target_with_star_atom ')'
|
||||
| '(' star_targets_tuple_seq? ')'
|
||||
| '[' star_targets_list_seq? ']';
|
||||
|
||||
single_target
|
||||
: single_subscript_attribute_target
|
||||
| NAME
|
||||
| '(' single_target ')';
|
||||
|
||||
single_subscript_attribute_target
|
||||
: t_primary ('.' NAME | '[' slices ']')
|
||||
;
|
||||
|
||||
t_primary
|
||||
: t_primary ('.' NAME | '[' slices ']' | genexp | '(' arguments? ')')
|
||||
| atom
|
||||
;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Targets for del statements
|
||||
// --------------------------
|
||||
|
||||
del_targets: del_target (',' del_target)* ','?;
|
||||
|
||||
del_target
|
||||
: t_primary ('.' NAME | '[' slices ']')
|
||||
| del_t_atom
|
||||
;
|
||||
|
||||
del_t_atom
|
||||
: NAME
|
||||
| '(' del_target ')'
|
||||
| '(' del_targets? ')'
|
||||
| '[' del_targets? ']';
|
||||
|
||||
// TYPING ELEMENTS
|
||||
// ---------------
|
||||
|
||||
|
||||
// type_expressions allow */** but ignore them
|
||||
type_expressions
|
||||
: expression (',' expression)* (',' ('*' expression (',' '**' expression)? | '**' expression))?
|
||||
| '*' expression (',' '**' expression)?
|
||||
| '**' expression
|
||||
;
|
||||
|
||||
|
||||
|
||||
func_type_comment
|
||||
: NEWLINE TYPE_COMMENT // Must be followed by indented block
|
||||
| TYPE_COMMENT;
|
||||
|
||||
// *** Soft Keywords: https://docs.python.org/3.12/reference/lexical_analysis.html#soft-keywords
|
||||
soft_kw_type: {this.isEqualToCurrentTokenText("type")}? NAME;
|
||||
soft_kw_match: {this.isEqualToCurrentTokenText("match")}? NAME;
|
||||
soft_kw_case: {this.isEqualToCurrentTokenText("case")}? NAME;
|
||||
soft_kw_wildcard: {this.isEqualToCurrentTokenText("_")}? NAME;
|
||||
soft_kw__not__wildcard: {this.isnotEqualToCurrentTokenText("_")}? NAME;
|
||||
|
||||
// ========================= END OF THE GRAMMAR ===========================
|
||||
21
MycroForge.Parsing/PythonParserBase.cs
Normal file
21
MycroForge.Parsing/PythonParserBase.cs
Normal file
@@ -0,0 +1,21 @@
|
||||
using Antlr4.Runtime;
|
||||
|
||||
namespace MycroForge.Parsing;
|
||||
|
||||
public abstract class PythonParserBase : Parser
|
||||
{
|
||||
protected PythonParserBase(ITokenStream input) : base(input)
|
||||
{
|
||||
}
|
||||
|
||||
// https://docs.python.org/3/reference/lexical_analysis.html#soft-keywords
|
||||
public bool isEqualToCurrentTokenText(string tokenText)
|
||||
{
|
||||
return this.CurrentToken.Text == tokenText;
|
||||
}
|
||||
|
||||
public bool isnotEqualToCurrentTokenText(string tokenText)
|
||||
{
|
||||
return !this.isEqualToCurrentTokenText(tokenText); // for compatibility with the Python 'not' logical operator
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user