commit 0a4c5ebb8d4bee4b2741e8456070e1d5fe9ac4c7 Author: mdnapo Date: Sat Apr 20 16:47:13 2024 +0200 Initial commit diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..caa6b3c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,34 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/go/build-context-dockerignore/ + +**/.DS_Store +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md + +**/venv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..052619c --- /dev/null +++ b/.gitignore @@ -0,0 +1,43 @@ +*.swp +*.*~ +project.lock.json +.DS_Store +*.pyc +nupkg/ + +# Visual Studio Code +.vscode/ + +# Rider +.idea/ + +# Visual Studio +.vs/ + +# Fleet +.fleet/ + +# Code Rush +.cr/ + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +build/ +bld/ +[Bb]in/ +[Oo]bj/ +[Oo]ut/ +msbuild.log +msbuild.err +msbuild.wrn \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9182945 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0-alpine AS build +COPY . /source +WORKDIR /source/MicroForge.CLI +ARG TARGETARCH +# Leverage a cache mount to /root/.nuget/packages so that subsequent builds don't have to re-download packages. +# If TARGETARCH is "amd64", replace it with "x64" - "x64" is .NET's canonical name for this and "amd64" doesn't +# work in .NET 6.0. +RUN --mount=type=cache,id=nuget,target=/root/.nuget/packages \ + dotnet publish -a ${TARGETARCH/amd64/x64} --use-current-runtime --self-contained false -o /app + +FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim AS final +RUN apt update -y && \ + apt upgrade -y && \ + apt install -y git && \ + apt install -y bash && \ + apt install -y python3 && \ + apt install -y python3-pip && \ + apt install -y python3-venv + +# The Docker approach doesn't work for now, because the venv setup depends on absolute paths. +# This means that the would need to recreate the full path to the actual working directory in the Docker container, +# which should be pretty doable, but it's a concern for later. + +ENV PYTHONUNBUFFERED=1 +WORKDIR /app +COPY --from=build /app . + +WORKDIR /project +COPY MicroForge.CLI/scripts /scripts +USER root + +ENTRYPOINT ["dotnet", "/app/MicroForge.CLI.dll"] CMD ["-?"] diff --git a/MicroForge.CLI/ArgsContext.cs b/MicroForge.CLI/ArgsContext.cs new file mode 100644 index 0000000..8737110 --- /dev/null +++ b/MicroForge.CLI/ArgsContext.cs @@ -0,0 +1,6 @@ +namespace MicroForge.CLI; + +public class ArgsContext +{ + public string[] Args { get; init; } +} \ No newline at end of file diff --git a/MicroForge.CLI/Bash.cs b/MicroForge.CLI/Bash.cs new file mode 100644 index 0000000..b188f1b --- /dev/null +++ b/MicroForge.CLI/Bash.cs @@ -0,0 +1,46 @@ +using System.Diagnostics; +using System.Text; +using MicroForge.CLI.Exceptions; + +namespace MicroForge.CLI; + +public static class Bash +{ + public static async Task ExecuteAsync(params string[] script) + { + var info = new ProcessStartInfo + { + FileName = "bash", + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardInput = true, + RedirectStandardOutput = true, + RedirectStandardError = true, + }; + + using var process = Process.Start(info); + + if (process is null) + throw new NullReferenceException("Could not initialize bash process."); + + await using var input = process.StandardInput; + foreach (var line in script) + await input.WriteLineAsync(line); + + await input.FlushAsync(); + input.Close(); + + var sb = new StringBuilder(); + sb.Append(await process.StandardOutput.ReadToEndAsync()); + sb.Append(await process.StandardError.ReadToEndAsync()); + Console.WriteLine(sb.ToString()); + + await process.WaitForExitAsync(); + + // if (process.ExitCode != 0) + // throw new BashException($"Process exited with status code {process.ExitCode}."); + + if (process.ExitCode != 0) + Console.WriteLine($"Process exited with status code {process.ExitCode}."); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/CodeGen/OrmEnvInitializer.cs b/MicroForge.CLI/CodeGen/OrmEnvInitializer.cs new file mode 100644 index 0000000..3ee9cd8 --- /dev/null +++ b/MicroForge.CLI/CodeGen/OrmEnvInitializer.cs @@ -0,0 +1,52 @@ +using MicroForge.Parsing; + +namespace MicroForge.CLI.CodeGen; + +public class OrmEnvInitializer : PythonSourceModifier +{ + public OrmEnvInitializer(string source) : base(source) + { + } + + public override object? VisitImport_from(PythonParser.Import_fromContext context) + { + var text = GetOriginalText(context); + + if (text != "from alembic import context") return null; + + Rewrite(context, + text, + "from orm.settings import OrmSettings", + "from orm.entities.entity_base import EntityBase" + ); + + return base.VisitImport_from(context); + } + + public override object? VisitAssignment(PythonParser.AssignmentContext context) + { + var text = GetOriginalText(context); + Console.WriteLine(text); + + if (text == "target_metadata = None") + { + Rewrite(context, "target_metadata = EntityBase.metadata"); + } + else if (text == "url = config.get_main_option(\"sqlalchemy.url\")") + { + Rewrite(context, "url = OrmSettings.get_connectionstring()"); + } + else if (text.StartsWith("connectable =")) + { + // Important note, the indent here is 4 spaces and not tab(s). + const string indent = " "; + Rewrite(context, [ + "url = OrmSettings.get_connectionstring()", + $"{indent}context.config.set_main_option('sqlalchemy.url', url)", + $"{indent}{text}" + ]); + } + + return base.VisitAssignment(context); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/CodeGen/OrmEnvUpdater.cs b/MicroForge.CLI/CodeGen/OrmEnvUpdater.cs new file mode 100644 index 0000000..b889819 --- /dev/null +++ b/MicroForge.CLI/CodeGen/OrmEnvUpdater.cs @@ -0,0 +1,29 @@ +using MicroForge.Parsing; + +namespace MicroForge.CLI.CodeGen; + +public class OrmEnvUpdater : PythonSourceModifier +{ + private readonly string _moduleName; + private readonly string _className; + + public OrmEnvUpdater(string source, string moduleName, string className) : base(source) + { + _moduleName = moduleName; + _className = className; + } + + public override object? VisitImport_from(PythonParser.Import_fromContext context) + { + var text = GetOriginalText(context); + + if (text != "from orm.entities.entity_base import EntityBase") return null; + + Rewrite(context, [ + text, + $"from orm.entities.{_moduleName} import {_className}" + ]); + + return base.VisitImport_from(context); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/CodeGen/PythonSourceModifier.cs b/MicroForge.CLI/CodeGen/PythonSourceModifier.cs new file mode 100644 index 0000000..7368897 --- /dev/null +++ b/MicroForge.CLI/CodeGen/PythonSourceModifier.cs @@ -0,0 +1,39 @@ +using Antlr4.Runtime; +using MicroForge.Parsing; + +namespace MicroForge.CLI.CodeGen; + +public abstract class PythonSourceModifier : PythonParserBaseVisitor +{ + private CommonTokenStream Stream { get; } + private PythonParser Parser { get; } + private TokenStreamRewriter Rewriter { get; } + + protected PythonSourceModifier(string source) + { + var input = new AntlrInputStream(source); + var lexer = new PythonLexer(input); + Stream = new CommonTokenStream(lexer); + Parser = new PythonParser(Stream); + Rewriter = new TokenStreamRewriter(Stream); + } + + public string Rewrite() + { + var tree = Parser.file_input(); + Visit(tree); + return Rewriter.GetText(); + } + + protected string GetOriginalText(ParserRuleContext context) + { + // The parser does not necessarily return the original source, + // so we return the text from Rewriter.TokenStream, since this is unmodified. + return Rewriter.TokenStream.GetText(context); + } + + protected void Rewrite(ParserRuleContext context, params string[] text) + { + Rewriter.Replace(from: context.start, to: context.Stop, text: string.Join('\n', text)); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/Interfaces/ISubCommandOf.cs b/MicroForge.CLI/Commands/Interfaces/ISubCommandOf.cs new file mode 100644 index 0000000..7107eae --- /dev/null +++ b/MicroForge.CLI/Commands/Interfaces/ISubCommandOf.cs @@ -0,0 +1,7 @@ +using System.CommandLine; + +namespace MicroForge.CLI.Commands.Interfaces; + +public interface ISubCommandOf where T : Command +{ +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Add.Api.cs b/MicroForge.CLI/Commands/MicroForge.Add.Api.cs new file mode 100644 index 0000000..810c38c --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Add.Api.cs @@ -0,0 +1,21 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; +using MicroForge.CLI.Features; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Add + { + public class Api : Command, ISubCommandOf + { + public Api(ProjectContext context, IEnumerable features) : + base("api", "Add FastAPI to your project") + { + var feature = features.First(f => f.Name == Features.Api.FeatureName); + this.SetHandler(async () => await feature.ExecuteAsync(context)); + } + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Add.Orm.cs b/MicroForge.CLI/Commands/MicroForge.Add.Orm.cs new file mode 100644 index 0000000..47731e6 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Add.Orm.cs @@ -0,0 +1,26 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; +using MicroForge.CLI.Features; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Add + { + public class Orm : Command, ISubCommandOf + { + public Orm(ProjectContext context, IEnumerable features) : + base("orm", "Add SQLAlchemy to your project") + { + var feature = features.First(f => f.Name == Features.Orm.FeatureName); + this.SetHandler(async () => await feature.ExecuteAsync(context)); + } + + public class Generate + { + + } + } + } +} diff --git a/MicroForge.CLI/Commands/MicroForge.Add.cs b/MicroForge.CLI/Commands/MicroForge.Add.cs new file mode 100644 index 0000000..7403a68 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Add.cs @@ -0,0 +1,33 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public new partial class Add : Command, ISubCommandOf + { + public Add(IEnumerable> subCommands) : + base("add", "Add a predefined feature to your project") + { + foreach (var subCommandOf in subCommands) + AddCommand((subCommandOf as Command)!); + } + } + + public class Run : Command, ISubCommandOf + { + public Run() : base("run", "Run your app") + { + this.SetHandler(ExecuteAsync); + } + + private async Task ExecuteAsync() + { + await Bash.ExecuteAsync([ + "source .venv/bin/activate", + "uvicorn main:app --reload" + ]); + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Generate.Entity.cs b/MicroForge.CLI/Commands/MicroForge.Generate.Entity.cs new file mode 100644 index 0000000..6592749 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Generate.Entity.cs @@ -0,0 +1,56 @@ +using System.CommandLine; +using Humanizer; +using MicroForge.CLI.CodeGen; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Generate + { + public class Entity : Command, ISubCommandOf + { + private static readonly string[] Template = + [ + "from sqlalchemy import INTEGER, Column, String", + "from orm.entities.entity_base import EntityBase", + "", + "class %class_name%(EntityBase):", + "\t__tablename__ = \"%table_name%\"", + "\tid = Column(INTEGER, primary_key=True)", + "", + "\tdef __repr__(self) -> str:", + "\t\treturn f\"%class_name%(id={self.id!r})\"" + ]; + + private static readonly Argument NameArgument = + new(name: "name", description: "The name of the orm entity"); + + private readonly ProjectContext _context; + + public Entity(ProjectContext context) : base("entity", "Generate and orm entity") + { + _context = context; + AddAlias("e"); + AddArgument(NameArgument); + this.SetHandler(ExecuteAsync, NameArgument); + } + + private async Task ExecuteAsync(string name) + { + var className = name.Underscore().Pascalize(); + var moduleName = name.Underscore(); + var code = string.Join('\n', Template); + + code = code.Replace("%class_name%", className); + code = code.Replace("%table_name%", name.ToLower().Underscore()); + await _context.CreateFile($"orm/entities/{moduleName}.py", code); + + var env = await _context.ReadFile("orm/env.py"); + env = new OrmEnvUpdater(env, moduleName, className).Rewrite(); + await _context.WriteFile("orm/env.py", env); + } + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Generate.Migration.cs b/MicroForge.CLI/Commands/MicroForge.Generate.Migration.cs new file mode 100644 index 0000000..da44591 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Generate.Migration.cs @@ -0,0 +1,32 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Generate + { + public class Migration : Command, ISubCommandOf + { + private static readonly Argument NameArgument = + new(name: "name", description: "The name of the migration"); + + + public Migration() : base("migration", "Generate a migration") + { + AddAlias("m"); + AddArgument(NameArgument); + this.SetHandler(ExecuteAsync, NameArgument); + } + + private async Task ExecuteAsync(string name) + { + await Bash.ExecuteAsync( + "source .venv/bin/activate", + $"alembic revision --autogenerate -m \"{name}\" --rev-id $(date -u +\"%Y%m%d%H%M%S\")" + ); + } + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Generate.Router.cs b/MicroForge.CLI/Commands/MicroForge.Generate.Router.cs new file mode 100644 index 0000000..389c4cb --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Generate.Router.cs @@ -0,0 +1,53 @@ +using System.CommandLine; +using Humanizer; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Generate + { + public class Router : Command, ISubCommandOf + { + private static readonly string[] Template = + [ + "from fastapi import APIRouter", + "from fastapi.responses import JSONResponse", + "from fastapi.encoders import jsonable_encoder", + "", + "router = APIRouter()", + "", + "@router.get(\"/{name}\")", + "async def index(name: str):", + "\treturn JSONResponse(status_code=200, content=jsonable_encoder({'greeting': f\"Hello, {name}!\"}))" + ]; + + private static readonly Argument NameArgument = + new(name: "name", description: "The name of the api router"); + + private readonly ProjectContext _context; + + public Router(ProjectContext context) : base("router", "Generate an api router") + { + _context = context; + AddAlias("r"); + AddArgument(NameArgument); + this.SetHandler(ExecuteAsync, NameArgument); + } + + private async Task ExecuteAsync(string name) + { + var moduleName = name.Underscore(); + await _context.CreateFile($"api/routers/{moduleName}.py", Template); + + var main = await _context.ReadFile("main.py"); + main += string.Join('\n', + $"\nfrom api.routers import {moduleName}", + $"app.include_router(prefix=\"/{name.Kebaberize()}\", router={moduleName}.router)\n" + ); + await _context.WriteFile("main.py", main); + } + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Generate.cs b/MicroForge.CLI/Commands/MicroForge.Generate.cs new file mode 100644 index 0000000..1235a15 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Generate.cs @@ -0,0 +1,18 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Generate : Command, ISubCommandOf + { + public Generate(IEnumerable> subCommands) : + base("generate", "Generate a project item") + { + AddAlias("g"); + foreach (var subCommandOf in subCommands) + AddCommand((subCommandOf as Command)!); + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Init.cs b/MicroForge.CLI/Commands/MicroForge.Init.cs new file mode 100644 index 0000000..cf3a291 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Init.cs @@ -0,0 +1,159 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; +using MicroForge.CLI.Features; +using Microsoft.Extensions.DependencyInjection; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public class Init : Command, ISubCommandOf + { + #region GitIgnore + + private static readonly string[] GitIgnore = + [ + "# Byte-compiled / optimized / DLL files", "__pycache__/", "*.py[cod]", "*$py.class", "# C extensions", + "*.so", "# Distribution / packaging", ".Python", "build/", "develop-eggs/", "dist/", "downloads/", "eggs/", + ".eggs/", "lib/", "lib64/", "parts/", "sdist/", "var/", "wheels/", "share/python-wheels/", "*.egg-info/", + ".installed.cfg", "*.egg", "MANIFEST", "# PyInstaller", + "# Usually these files are written by a python script from a template", + "# before PyInstaller builds the exe, so as to inject date/other infos into it.", "*.manifest", "*.spec", + "# Installer logs", "pip-log.txt", "pip-delete-this-directory.txt", "# Unit test / coverage reports", + "htmlcov/", ".tox/", ".nox/", ".coverage", ".coverage.*", ".cache", "nosetests.xml", "coverage.xml", + "*.cover", "*.py,cover", ".hypothesis/", ".pytest_cache/", "cover/", "# Translations", "*.mo", "*.pot", + "# Django stuff:", "*.log", "local_settings.py", "db.sqlite3", "db.sqlite3-journal", "# Flask stuff:", + "instance/", ".webassets-cache", "# Scrapy stuff:", ".scrapy", "# Sphinx documentation", "docs/_build/", + "# PyBuilder", ".pybuilder/", "target/", "# Jupyter Notebook", ".ipynb_checkpoints", "# IPython", + "profile_default/", "ipython_config.py", "# pyenv", + "# For a library or package, you might want to ignore these files since the code is", + "# intended to run in multiple environments; otherwise, check them in:", "# .python-version", "# pipenv", + "# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.", + "# However, in case of collaboration, if having platform-specific dependencies or dependencies", + "# having no cross-platform support, pipenv may install dependencies that don't work, or not", + "# install all needed dependencies.", "#Pipfile.lock", "# poetry", + "# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.", + "# This is especially recommended for binary packages to ensure reproducibility, and is more", + "# commonly ignored for libraries.", + "# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control", + "#poetry.lock", "# pdm", + "# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.", + "#pdm.lock", + "# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it", + "# in version control.", "# https://pdm.fming.dev/#use-with-ide", ".pdm.toml", + "# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm", "__pypackages__/", + "# Celery stuff", "celerybeat-schedule", "celerybeat.pid", "# SageMath parsed files", "*.sage.py", + "# Environments", ".env", ".venv", "env/", "venv/", "ENV/", "env.bak/", "venv.bak/", + "# Spyder project settings", ".spyderproject", ".spyproject", "# Rope project settings", ".ropeproject", + "# mkdocs documentation", "/site", "# mypy", ".mypy_cache/", ".dmypy.json", "dmypy.json", + "# Pyre type checker", ".pyre/", "# pytype static type analyzer", ".pytype/", "# Cython debug symbols", + "cython_debug/", "# PyCharm", + "# JetBrains specific template is maintained in a separate JetBrains.gitignore that can", + "# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore", + "# and can be added to the global gitignore or merged into this file. For a more nuclear", + "# option (not recommended) you can uncomment the following to ignore the entire idea folder.", "#.idea/" + ]; + + #endregion + + private static readonly Argument NameArgument = + new(name: "name", description: "The name of your project"); + + private static readonly Option EntryPoint = + new(name: "--entrypoint", description: "The name of the entrypoint file"); + + private static readonly Option BranchOption = + new(name: "--branch", description: "The name of the initial git branch"); + + private static readonly Option> FeaturesOption = + new(name: "--features", description: "The features to include") + { + AllowMultipleArgumentsPerToken = true + }; + + private readonly IServiceProvider _services; + + private readonly List _features; + + public Init(IServiceProvider services) : base("init", "Initialize a new project") + { + AddArgument(NameArgument); + AddOption(EntryPoint); + AddOption(BranchOption); + AddOption(FeaturesOption); + this.SetHandler(ExecuteAsync, NameArgument, EntryPoint, BranchOption, FeaturesOption); + + _services = services; + _features = _services.GetServices().ToList(); + } + + private async Task ExecuteAsync(string name, string entrypoint, string branch, IEnumerable features) + { + var featuresList = features.ToList(); + Validate(featuresList); + await Initialize(name, entrypoint, branch, featuresList); + } + + private void Validate(List features) + { + foreach (var feature in features) + if (_features.All(f => f.Name != feature)) + throw new Exception($"Feature {feature} was not found."); + } + + private async Task Initialize(string name, string entrypoint, string branch, List features) + { + // Create the project directory and change the directory for the ProjectContext + var projectRoot = await CreateDirectory(name); + var ctx = _services.GetRequiredService(); + ctx.ChangeDirectory(projectRoot); + + // Create the config file and initialize the config + await ctx.CreateFile("m4g.json", "{}"); + await ctx.LoadConfig(force: true); + + // Create the entrypoint file + entrypoint = string.IsNullOrEmpty(entrypoint) ? "main.py" : entrypoint; + await ctx.CreateFile(entrypoint, string.Empty); + ctx.Config.Entrypoint = entrypoint; + + // Create the default .gitignore + await ctx.CreateFile(".gitignore", GitIgnore); + + // Create the venv + await Bash.ExecuteAsync($"python3 -m venv {Path.Combine(projectRoot, ".venv")}"); + + // Initialize git + var _branch = string.IsNullOrEmpty(branch) ? "main" : branch; + await Bash.ExecuteAsync($"git -c init.defaultBranch={_branch} init {projectRoot}"); + + // Initialize features + if (features.Count > 0) + await InitializeFeatures(ctx, features); + + Console.WriteLine($"Directory {projectRoot} was successfully initialized"); + } + + private async Task CreateDirectory(string name) + { + var directory = Path.Combine(Directory.GetCurrentDirectory(), name); + + if (Directory.Exists(directory)) + throw new Exception($"Directory {directory} already exists."); + + Console.WriteLine($"Creating directory {directory}"); + + Directory.CreateDirectory(directory); + + await Bash.ExecuteAsync($"chmod -R 777 {directory}"); + + return directory; + } + + private async Task InitializeFeatures(ProjectContext projectCtx, List features) + { + foreach (var feature in features) + await _features.First(p => p.Name == feature).ExecuteAsync(projectCtx); + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Install.cs b/MicroForge.CLI/Commands/MicroForge.Install.cs new file mode 100644 index 0000000..74f3de1 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Install.cs @@ -0,0 +1,29 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public class Install : Command, ISubCommandOf + { + private static readonly Argument> PackagesArgument = + new(name: "packages", description: "The names of the packages to install"); + + public Install() : base("install", "Install packages and update the requirements.txt") + { + AddAlias("i"); + AddArgument(PackagesArgument); + this.SetHandler(ExecuteAsync, PackagesArgument); + } + + private async Task ExecuteAsync(IEnumerable packages) + { + await Bash.ExecuteAsync( + "source .venv/bin/activate", + $"pip install {string.Join(' ', packages)}", + "pip freeze > requirements.txt" + ); + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Migrations.Apply.cs b/MicroForge.CLI/Commands/MicroForge.Migrations.Apply.cs new file mode 100644 index 0000000..7114fa5 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Migrations.Apply.cs @@ -0,0 +1,27 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Migrations + { + public class Apply : Command, ISubCommandOf + { + public Apply() : base("apply", "Apply migrations to the database") + { + AddAlias("a"); + this.SetHandler(ExecuteAsync); + } + + private async Task ExecuteAsync() + { + await Bash.ExecuteAsync([ + "source .venv/bin/activate", + "alembic upgrade head" + ]); + } + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Migrations.Rollback.cs b/MicroForge.CLI/Commands/MicroForge.Migrations.Rollback.cs new file mode 100644 index 0000000..a8b8267 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Migrations.Rollback.cs @@ -0,0 +1,27 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Migrations + { + public class Rollback : Command, ISubCommandOf + { + public Rollback() : base("rollback", "Rollback the last migration") + { + AddAlias("r"); + this.SetHandler(ExecuteAsync); + } + + private async Task ExecuteAsync() + { + await Bash.ExecuteAsync([ + "source .venv/bin/activate", + "alembic downgrade -1" + ]); + } + } + } +} diff --git a/MicroForge.CLI/Commands/MicroForge.Migrations.cs b/MicroForge.CLI/Commands/MicroForge.Migrations.cs new file mode 100644 index 0000000..c2ff26a --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Migrations.cs @@ -0,0 +1,18 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public partial class Migrations : Command, ISubCommandOf + { + public Migrations(IEnumerable> subCommands) : + base("migrations", "Manage your migrations") + { + AddAlias("m"); + foreach (var subCommandOf in subCommands) + AddCommand((subCommandOf as Command)!); + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.Rewrite.cs b/MicroForge.CLI/Commands/MicroForge.Rewrite.cs new file mode 100644 index 0000000..96e9574 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Rewrite.cs @@ -0,0 +1,24 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public class Rewrite : Command, ISubCommandOf + { + public Rewrite() : base("rewrite", "Test a python source rewriter.") + { + this.SetHandler(ExecuteAsync); + } + + private async Task ExecuteAsync() + { + // var path = Path.Combine(Directory.GetCurrentDirectory(), "main.py"); + // var source = await File.ReadAllTextAsync(path); + // var rewriter = new TestRewriter(source); + // var rewrite = rewriter.Rewrite(); + // await File.WriteAllTextAsync(path, rewrite); + } + } +} diff --git a/MicroForge.CLI/Commands/MicroForge.Uninstall.cs b/MicroForge.CLI/Commands/MicroForge.Uninstall.cs new file mode 100644 index 0000000..488a664 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.Uninstall.cs @@ -0,0 +1,29 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge +{ + public class Uninstall : Command, ISubCommandOf + { + private static readonly Argument> PackagesArgument = + new(name: "packages", description: "The names of the packages to uninstall"); + + public Uninstall() : base("uninstall", "Uninstall packages and update the requirements.txt") + { + AddAlias("u"); + AddArgument(PackagesArgument); + this.SetHandler(ExecuteAsync, PackagesArgument); + } + + private async Task ExecuteAsync(IEnumerable packages) + { + await Bash.ExecuteAsync( + "source .venv/bin/activate", + $"pip uninstall {string.Join(' ', packages)}", + "pip freeze > requirements.txt" + ); + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Commands/MicroForge.cs b/MicroForge.CLI/Commands/MicroForge.cs new file mode 100644 index 0000000..aec2623 --- /dev/null +++ b/MicroForge.CLI/Commands/MicroForge.cs @@ -0,0 +1,17 @@ +using System.CommandLine; +using MicroForge.CLI.Commands.Interfaces; + +namespace MicroForge.CLI.Commands; + +public partial class MicroForge : RootCommand +{ + public override string Name => "m4g"; + + public MicroForge(IEnumerable> commands) : base("The MicroForge CLI tool.") + { + commands + .Cast() + .ToList() + .ForEach(AddCommand); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Exceptions/BashException.cs b/MicroForge.CLI/Exceptions/BashException.cs new file mode 100644 index 0000000..0e3fb76 --- /dev/null +++ b/MicroForge.CLI/Exceptions/BashException.cs @@ -0,0 +1,13 @@ +namespace MicroForge.CLI.Exceptions; + +public class BashException : Exception +{ + private readonly string _message; + + public override string Message => _message; + + public BashException(string message) : base(message) + { + _message = message; + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Extensions/ObjectStreamExtensions.cs b/MicroForge.CLI/Extensions/ObjectStreamExtensions.cs new file mode 100644 index 0000000..72b6467 --- /dev/null +++ b/MicroForge.CLI/Extensions/ObjectStreamExtensions.cs @@ -0,0 +1,20 @@ +using System.Text.Json; + +namespace MicroForge.CLI.Extensions; + +public static class ObjectStreamExtensions +{ + public static async Task SerializeAsync( + this object @object, + JsonSerializerOptions? jsonSerializerOptions = null + ) + { + using var stream = new MemoryStream(); + using var reader = new StreamReader(stream); + var options = jsonSerializerOptions ?? Shared.DefaultJsonSerializerOptions.Default; + + await JsonSerializer.SerializeAsync(stream, @object, options); + stream.Position = 0; + return await reader.ReadToEndAsync(); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Extensions/ServiceCollectionExtensions.cs b/MicroForge.CLI/Extensions/ServiceCollectionExtensions.cs new file mode 100644 index 0000000..d81ab04 --- /dev/null +++ b/MicroForge.CLI/Extensions/ServiceCollectionExtensions.cs @@ -0,0 +1,46 @@ +using MicroForge.CLI.Commands.Interfaces; +using MicroForge.CLI.Features; +using Microsoft.Extensions.DependencyInjection; + +namespace MicroForge.CLI.Extensions; + +public static class ServiceCollectionExtensions +{ + public static IServiceCollection AddServices(this IServiceCollection services, string[] args) + { + services.AddScoped(_ => new ArgsContext { Args = args }); + services.AddScoped(); + services.AddScoped(); + services.AddScoped(); + + return services; + } + + public static IServiceCollection AddCommands(this IServiceCollection services) + { + // Register "m4g" + services.AddScoped(); + services.AddScoped, Commands.MicroForge.Init>(); + services.AddScoped, Commands.MicroForge.Run>(); + services.AddScoped, Commands.MicroForge.Install>(); + services.AddScoped, Commands.MicroForge.Uninstall>(); + + // Register "m4g add" + services.AddScoped, Commands.MicroForge.Add>(); + services.AddScoped, Commands.MicroForge.Add.Api>(); + services.AddScoped, Commands.MicroForge.Add.Orm>(); + + // Register "m4g generate" + services.AddScoped, Commands.MicroForge.Generate>(); + services.AddScoped, Commands.MicroForge.Generate.Entity>(); + services.AddScoped, Commands.MicroForge.Generate.Router>(); + services.AddScoped, Commands.MicroForge.Generate.Migration>(); + + // Register "m4g migrations" + services.AddScoped, Commands.MicroForge.Migrations>(); + services.AddScoped, Commands.MicroForge.Migrations.Apply>(); + services.AddScoped, Commands.MicroForge.Migrations.Rollback>(); + + return services; + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Features/Api.cs b/MicroForge.CLI/Features/Api.cs new file mode 100644 index 0000000..ddf009f --- /dev/null +++ b/MicroForge.CLI/Features/Api.cs @@ -0,0 +1,64 @@ +namespace MicroForge.CLI.Features; + +public sealed class Api : IFeature +{ + #region Main + + private static readonly string[] HelloRouter = + [ + "from fastapi import APIRouter", + "from fastapi.responses import JSONResponse", + "from fastapi.encoders import jsonable_encoder", + "", + "router = APIRouter()", + "", + "@router.get(\"/{name}\")", + "async def greet(name: str):", + "\treturn JSONResponse(status_code=200, content=jsonable_encoder({'greeting': f\"Hello, {name}!\"}))" + ]; + + private static readonly string[] Main = + [ + "from fastapi import FastAPI", + "app = FastAPI()", + "", + "from api.routers import hello", + "app.include_router(prefix=\"/hello\", router=hello.router)" + ]; + + #endregion + + public const string FeatureName = "api"; + + public string Name => FeatureName; + + public async Task ExecuteAsync(ProjectContext context) + { + if (context.Config.Features.Contains(FeatureName)) + { + Console.WriteLine($"Feature {FeatureName} has already been initialized."); + return; + } + + Console.WriteLine(string.Join("\n", [ + $"Adding feature {FeatureName}", + "Requirements:", + " - fastapi", + " - uvicorn[standard]", + ])); + + await Bash.ExecuteAsync( + "source .venv/bin/activate", + "python3 -m pip install fastapi uvicorn[standard]", + "python3 -m pip freeze > requirements.txt" + ); + + await context.CreateFile("api/routers/hello.py", HelloRouter); + + var main = await context.ReadFile("main.py"); + main = string.Join('\n', Main) + main; + await context.WriteFile("main.py", main); + + context.Config.Features.Add(FeatureName); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Features/IFeature.cs b/MicroForge.CLI/Features/IFeature.cs new file mode 100644 index 0000000..962d9af --- /dev/null +++ b/MicroForge.CLI/Features/IFeature.cs @@ -0,0 +1,8 @@ +namespace MicroForge.CLI.Features; + + +public interface IFeature +{ + public string Name { get; } + public Task ExecuteAsync(ProjectContext context); +} diff --git a/MicroForge.CLI/Features/Orm.cs b/MicroForge.CLI/Features/Orm.cs new file mode 100644 index 0000000..8956465 --- /dev/null +++ b/MicroForge.CLI/Features/Orm.cs @@ -0,0 +1,95 @@ +using MicroForge.CLI.CodeGen; + +namespace MicroForge.CLI.Features; + +public sealed class Orm : IFeature +{ + #region Defaults + + private static readonly string[] Settings = + [ + "connectionstring = \"mysql+asyncmy://root:root@localhost:3306/example\"", + "", + "class OrmSettings:", + "\tdef get_connectionstring() -> str:", + "\t\treturn connectionstring" + ]; + + private static readonly string[] AsyncSession = + [ + "from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine, AsyncSession", + "from orm.settings import OrmSettings", + "", + "async_engine: AsyncEngine = create_async_engine(OrmSettings.get_connectionstring())", + "", + "def async_session():", + "\treturn AsyncSession(async_engine, expire_on_commit=False)" + ]; + + private static readonly string[] EntityBase = + [ + "from sqlalchemy.orm import DeclarativeBase", + "", + "class EntityBase(DeclarativeBase):", + "\tpass" + ]; + + private static readonly string[] User = + [ + "from sqlalchemy import INTEGER, Column, String", + "from orm.entities.entity_base import EntityBase", + "", + "class User(EntityBase):", + "\t__tablename__ = \"users\"", + "\tid = Column(INTEGER, primary_key=True)", + "\tfirstname = Column(String(255))", + "\tlastname = Column(String(255))\n", + "def __repr__(self) -> str:", + "\treturn f\"User(id={self.id!r}, firstname={self.firstname!r}, lastname={self.lastname!r})\"" + ]; + + #endregion + + public const string FeatureName = "orm"; + + public string Name => FeatureName; + + public async Task ExecuteAsync(ProjectContext context) + { + if (context.Config.Features.Contains(FeatureName)) + { + Console.WriteLine($"Feature {FeatureName} has already been initialized."); + return; + } + + Console.WriteLine(string.Join("\n", [ + $"Adding feature {FeatureName}", + "Requirements:", + " - asyncmy", + " - sqlalchemy", + " - alembic", + ])); + + await Bash.ExecuteAsync( + "source .venv/bin/activate", + "python3 -m pip install asyncmy sqlalchemy alembic", + "python3 -m pip freeze > requirements.txt", + "alembic init -t async orm" + ); + + var env = await context.ReadFile("orm/env.py"); + env = new OrmEnvInitializer(env).Rewrite(); + env = new OrmEnvUpdater(env, "user", "User").Rewrite(); + await context.WriteFile("orm/env.py", env); + + await context.CreateFile("orm/settings.py", Settings); + + await context.CreateFile("orm/engine/async_session.py", AsyncSession); + + await context.CreateFile("orm/entities/entity_base.py", EntityBase); + + await context.CreateFile("orm/entities/user.py", User); + + context.Config.Features.Add(FeatureName); + } +} \ No newline at end of file diff --git a/MicroForge.CLI/MicroForge.CLI.csproj b/MicroForge.CLI/MicroForge.CLI.csproj new file mode 100644 index 0000000..fe2a5dd --- /dev/null +++ b/MicroForge.CLI/MicroForge.CLI.csproj @@ -0,0 +1,38 @@ + + + + Exe + net8.0 + enable + enable + true + m4g + ./nupkg + + + + + + + + + + + + + + + + + + PreserveNewest + + + + + + bin\Debug\net8.0\MicroForge.Parsing.dll + + + + diff --git a/MicroForge.CLI/Program.cs b/MicroForge.CLI/Program.cs new file mode 100644 index 0000000..98ea55f --- /dev/null +++ b/MicroForge.CLI/Program.cs @@ -0,0 +1,33 @@ +using System.CommandLine; +using MicroForge.CLI; +using MicroForge.CLI.CodeGen; +using MicroForge.CLI.Exceptions; +using MicroForge.CLI.Extensions; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using RootCommand = MicroForge.CLI.Commands.MicroForge; + +using var host = Host + .CreateDefaultBuilder() + .ConfigureServices((_, services) => + { + services + .AddServices(args) + .AddCommands(); + }) + .Build(); + +try +{ + var ctx = host.Services.GetRequiredService(); + await ctx.LoadConfig(); + await host.Services.GetRequiredService().InvokeAsync(args); + await ctx.SaveConfig(); +} +catch +{ + // Console.WriteLine(e.Message); +} + +// var src = new OrmEnvInitializer(await File.ReadAllTextAsync("scripts/env.py")).Rewrite(); +// Console.WriteLine(src); \ No newline at end of file diff --git a/MicroForge.CLI/ProjectConfig.cs b/MicroForge.CLI/ProjectConfig.cs new file mode 100644 index 0000000..b644916 --- /dev/null +++ b/MicroForge.CLI/ProjectConfig.cs @@ -0,0 +1,7 @@ +namespace MicroForge.CLI; + +public class ProjectConfig +{ + public string Entrypoint { get; set; } = string.Empty; + public List Features { get; set; } = new(); +} \ No newline at end of file diff --git a/MicroForge.CLI/ProjectContext.cs b/MicroForge.CLI/ProjectContext.cs new file mode 100644 index 0000000..89f4cf2 --- /dev/null +++ b/MicroForge.CLI/ProjectContext.cs @@ -0,0 +1,83 @@ +using System.Text.Json; +using MicroForge.CLI.Extensions; + +namespace MicroForge.CLI; + +public class ProjectContext +{ + public string RootDirectory { get; private set; } = Environment.CurrentDirectory; + public string ConfigPath => Path.Combine(RootDirectory, "m4g.json"); + public ProjectConfig Config { get; private set; } = default!; + + private readonly ArgsContext _argsContext; + + public ProjectContext(ArgsContext argsContext) + { + _argsContext = argsContext; + } + + public async Task LoadConfig(bool force = false) + { + if (_argsContext.Args + is ["init", ..] + or ["-?", ..] + or ["-h", ..] + or ["--help"] + or ["--version"] && !force) + return; + + if (!File.Exists(ConfigPath)) + throw new FileNotFoundException($"File {ConfigPath} does not exist."); + + Config = (await JsonSerializer.DeserializeAsync( + File.OpenRead(ConfigPath), + Shared.DefaultJsonSerializerOptions.CamelCasePrettyPrint + ))!; + } + + public void ChangeDirectory(string path) + { + Directory.SetCurrentDirectory(path); + RootDirectory = path; + } + + public async Task CreateFile(string path, params string[] content) + { + var fullPath = Path.Combine(RootDirectory, path); + var fileInfo = new FileInfo(fullPath); + + if (fileInfo.Exists) return; + + Directory.CreateDirectory(fileInfo.Directory!.FullName); + await File.WriteAllTextAsync(fullPath, string.Join("\n", content)); + await Bash.ExecuteAsync($"chmod 777 {fullPath}"); + } + + public async Task WriteFile(string path, params string[] content) + { + var fullPath = Path.Combine(RootDirectory, path); + var fileInfo = new FileInfo(fullPath); + Directory.CreateDirectory(fileInfo.Directory!.FullName); + await File.WriteAllTextAsync(fullPath, string.Join("\n", content)); + } + + public async Task ReadFile(string path) + { + var fullPath = Path.Combine(RootDirectory, path); + var fileInfo = new FileInfo(fullPath); + + if (!fileInfo.Exists) + throw new Exception($"File {fullPath} does not exist."); + + return await File.ReadAllTextAsync(fullPath); + } + + public async Task SaveConfig() + { + if (Config is not null) + { + var json = await Config.SerializeAsync(Shared.DefaultJsonSerializerOptions.CamelCasePrettyPrint); + await File.WriteAllTextAsync(ConfigPath, json); + } + } +} \ No newline at end of file diff --git a/MicroForge.CLI/Shared.cs b/MicroForge.CLI/Shared.cs new file mode 100644 index 0000000..9faf6ee --- /dev/null +++ b/MicroForge.CLI/Shared.cs @@ -0,0 +1,20 @@ +using System.Text.Json; + +namespace MicroForge.CLI; + +public static class Shared +{ + public static class DefaultJsonSerializerOptions + { + public static readonly JsonSerializerOptions Default = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase + }; + + public static readonly JsonSerializerOptions CamelCasePrettyPrint = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + WriteIndented = true + }; + } +} \ No newline at end of file diff --git a/MicroForge.CLI/scripts/env.py b/MicroForge.CLI/scripts/env.py new file mode 100644 index 0000000..5acaf4f --- /dev/null +++ b/MicroForge.CLI/scripts/env.py @@ -0,0 +1,90 @@ +import asyncio +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import async_engine_from_config + +from alembic import context + + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/MicroForge.CLI/scripts/publish.sh b/MicroForge.CLI/scripts/publish.sh new file mode 100644 index 0000000..beafa66 --- /dev/null +++ b/MicroForge.CLI/scripts/publish.sh @@ -0,0 +1,5 @@ +#!/usr/bin/bash + +dotnet pack -v d + +dotnet tool update --global --add-source ./nupkg MicroForge.CLI -v d \ No newline at end of file diff --git a/MicroForge.Parsing/MicroForge.Parsing.csproj b/MicroForge.Parsing/MicroForge.Parsing.csproj new file mode 100644 index 0000000..11f6c00 --- /dev/null +++ b/MicroForge.Parsing/MicroForge.Parsing.csproj @@ -0,0 +1,26 @@ + + + + net8.0 + enable + enable + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + + diff --git a/MicroForge.Parsing/PythonLexer.g4 b/MicroForge.Parsing/PythonLexer.g4 new file mode 100644 index 0000000..277f08c --- /dev/null +++ b/MicroForge.Parsing/PythonLexer.g4 @@ -0,0 +1,1369 @@ +/* +Python grammar +The MIT License (MIT) +Copyright (c) 2021 Robert Einhorn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + + /* + * Project : an ANTLR4 lexer grammar for Python 3 + * https://github.com/RobEin/ANTLR4-parser-for-Python-3.12 + * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com + */ + +lexer grammar PythonLexer; + +options { superClass=PythonLexerBase; } + +tokens { + INDENT, DEDENT // https://docs.python.org/3.12/reference/lexical_analysis.html#indentation + , FSTRING_START, FSTRING_MIDDLE, FSTRING_END // https://peps.python.org/pep-0701/#specification +} + + +// https://docs.python.org/3.12/reference/lexical_analysis.html + +/* + * default lexer mode + */ + +// https://docs.python.org/3.12/reference/lexical_analysis.html#keywords +FALSE : 'False'; +AWAIT : 'await'; +ELSE : 'else'; +IMPORT : 'import'; +PASS : 'pass'; +NONE : 'None'; +BREAK : 'break'; +EXCEPT : 'except'; +IN : 'in'; +RAISE : 'raise'; +TRUE : 'True'; +CLASS : 'class'; +FINALLY : 'finally'; +IS : 'is'; +RETURN : 'return'; +AND : 'and'; +CONTINUE : 'continue'; +FOR : 'for'; +LAMBDA : 'lambda'; +TRY : 'try'; +AS : 'as'; +DEF : 'def'; +FROM : 'from'; +NONLOCAL : 'nonlocal'; +WHILE : 'while'; +ASSERT : 'assert'; +DEL : 'del'; +GLOBAL : 'global'; +NOT : 'not'; +WITH : 'with'; +ASYNC : 'async'; +ELIF : 'elif'; +IF : 'if'; +OR : 'or'; +YIELD : 'yield'; + +// https://docs.python.org/3.12/library/token.html#module-token +LPAR : '('; // OPEN_PAREN +LSQB : '['; // OPEN_BRACK +LBRACE : '{'; // OPEN_BRACE +RPAR : ')'; // CLOSE_PAREN +RSQB : ']'; // CLOSE_BRACK +RBRACE : '}'; // CLOSE_BRACE +DOT : '.'; +COLON : ':'; +COMMA : ','; +SEMI : ';'; +PLUS : '+'; +MINUS : '-'; +STAR : '*'; +SLASH : '/'; +VBAR : '|'; +AMPER : '&'; +LESS : '<'; +GREATER : '>'; +EQUAL : '='; +PERCENT : '%'; +EQEQUAL : '=='; +NOTEQUAL : '!='; +LESSEQUAL : '<='; +GREATEREQUAL : '>='; +TILDE : '~'; +CIRCUMFLEX : '^'; +LEFTSHIFT : '<<'; +RIGHTSHIFT : '>>'; +DOUBLESTAR : '**'; +PLUSEQUAL : '+='; +MINEQUAL : '-='; +STAREQUAL : '*='; +SLASHEQUAL : '/='; +PERCENTEQUAL : '%='; +AMPEREQUAL : '&='; +VBAREQUAL : '|='; +CIRCUMFLEXEQUAL : '^='; +LEFTSHIFTEQUAL : '<<='; +RIGHTSHIFTEQUAL : '>>='; +DOUBLESTAREQUAL : '**='; +DOUBLESLASH : '//'; +DOUBLESLASHEQUAL : '//='; +AT : '@'; +ATEQUAL : '@='; +RARROW : '->'; +ELLIPSIS : '...'; +COLONEQUAL : ':='; +EXCLAMATION : '!'; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#identifiers +NAME + : ID_START ID_CONTINUE* + ; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#numeric-literals +NUMBER + : INTEGER + | FLOAT_NUMBER + | IMAG_NUMBER + ; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals +STRING + : STRING_LITERAL + | BYTES_LITERAL + ; + +// https://peps.python.org/pep-0484/#type-comments +TYPE_COMMENT + : '#' WS? 'type:' ~[\r\n]* + ; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#physical-lines +NEWLINE + : OS_INDEPENDENT_NL + ; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#comments +COMMENT : '#' ~[\r\n]* -> channel(HIDDEN); + +// https://docs.python.org/3.12/reference/lexical_analysis.html#whitespace-between-tokens +WS : [ \t\f]+ -> channel(HIDDEN); + +// https://docs.python.org/3.12/reference/lexical_analysis.html#explicit-line-joining +EXPLICIT_LINE_JOINING : '\\' NEWLINE -> channel(HIDDEN); + +// https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals +SINGLE_QUOTE_FSTRING_START : F_STRING_PREFIX ['] -> type(FSTRING_START), pushMode(SINGLE_QUOTE_FSTRING_MODE); +DOUBLE_QUOTE_FSTRING_START : F_STRING_PREFIX ["] -> type(FSTRING_START), pushMode(DOUBLE_QUOTE_FSTRING_MODE); +LONG_SINGLE_QUOTE_FSTRING_START : F_STRING_PREFIX ['][']['] -> type(FSTRING_START), pushMode(LONG_SINGLE_QUOTE_FSTRING_MODE); +LONG_DOUBLE_QUOTE_FSTRING_START : F_STRING_PREFIX ["]["]["] -> type(FSTRING_START), pushMode(LONG_DOUBLE_QUOTE_FSTRING_MODE); + +ERROR_TOKEN : . ; // catch the unrecognized characters and redirect these errors to the parser + + +/* + * other lexer modes + */ + +mode SINGLE_QUOTE_FSTRING_MODE; + SINGLE_QUOTE_FSTRING_END : ['] -> type(FSTRING_END), popMode; + SINGLE_QUOTE_FSTRING_MIDDLE : SINGLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); + SINGLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + +mode DOUBLE_QUOTE_FSTRING_MODE; + DOUBLE_QUOTE_FSTRING_END : ["] -> type(FSTRING_END), popMode; + DOUBLE_QUOTE_FSTRING_MIDDLE : DOUBLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); + DOUBLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + +mode LONG_SINGLE_QUOTE_FSTRING_MODE; + LONG_SINGLE_QUOTE_FSTRING_END : ['][']['] -> type(FSTRING_END), popMode; + LONG_SINGLE_QUOTE_FSTRING_MIDDLE : SINGLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); + LONG_SINGLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + +mode LONG_DOUBLE_QUOTE_FSTRING_MODE; + LONG_DOUBLE_QUOTE_FSTRING_END : ["]["]["] -> type(FSTRING_END), popMode; + LONG_DOUBLE_QUOTE_FSTRING_MIDDLE : DOUBLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); + LONG_DOUBLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + +mode SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE; // only used after a format specifier colon + SINGLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE : FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE+ -> type(FSTRING_MIDDLE); + SINGLE_QUOTE_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class + +mode DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE; // only used after a format specifier colon + DOUBLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE : FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE+ -> type(FSTRING_MIDDLE); + DOUBLE_QUOTE_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class + + +/* + * fragments + */ + +// https://docs.python.org/3.12/reference/lexical_analysis.html#literals + +// https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals +fragment STRING_LITERAL : STRING_PREFIX? (SHORT_STRING | LONG_STRING); +fragment STRING_PREFIX : 'r' | 'u' | 'R' | 'U'; + +fragment SHORT_STRING + : '\'' SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* '\'' + | '"' SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* '"' + ; + +fragment LONG_STRING + : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' + | '"""' LONG_STRING_ITEM*? '"""' + ; + +fragment SHORT_STRING_ITEM_FOR_SINGLE_QUOTE : SHORT_STRING_CHAR_NO_SINGLE_QUOTE | STRING_ESCAPE_SEQ; +fragment SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE : SHORT_STRING_CHAR_NO_DOUBLE_QUOTE | STRING_ESCAPE_SEQ; + +fragment LONG_STRING_ITEM : LONG_STRING_CHAR | STRING_ESCAPE_SEQ; + +fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // +fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // + +fragment LONG_STRING_CHAR : ~'\\'; // + +fragment STRING_ESCAPE_SEQ + : '\\' OS_INDEPENDENT_NL // \ escape sequence + | '\\' . // "\" + ; // the \ (not \n) escape sequences will be removed from the string literals by the PythonLexerBase class + +fragment BYTES_LITERAL : BYTES_PREFIX (SHORT_BYTES | LONG_BYTES); +fragment BYTES_PREFIX : 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR' | 'rb' | 'rB' | 'Rb' | 'RB'; + +fragment SHORT_BYTES + : '\'' SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE* '\'' + | '"' SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE* '"' + ; + +fragment LONG_BYTES + : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' + | '"""' LONG_BYTES_ITEM*? '"""' + ; + +fragment SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE : SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ; +fragment SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE : SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ; + +fragment LONG_BYTES_ITEM : LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ; + +fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE // + : [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0026] + | [\u0028-\u005B] + | [\u005D-\u007F] + ; + +fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE // + : [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0021] + | [\u0023-\u005B] + | [\u005D-\u007F] + ; + +fragment LONG_BYTES_CHAR : [\u0000-\u005B] | [\u005D-\u007F]; // +fragment BYTES_ESCAPE_SEQ : '\\' [\u0000-\u007F]; // "\" + +// https://docs.python.org/3.12/library/string.html#format-specification-mini-language +fragment SINGLE_QUOTE_FSTRING_LITERAL : (FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE | DOUBLE_BRACES)+; +fragment DOUBLE_QUOTE_FSTRING_LITERAL : (FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE | DOUBLE_BRACES)+; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals +fragment F_STRING_PREFIX : 'f' | 'F' | 'fr' | 'Fr' | 'fR' | 'FR' | 'rf' | 'rF' | 'Rf' | 'RF'; +fragment FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE : ~[{}']; +fragment FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE : ~[{}"]; +fragment DOUBLE_BRACES : '{' | '}'; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#integer-literals +fragment INTEGER : DEC_INTEGER | BIN_INTEGER | OCT_INTEGER | HEX_INTEGER; +fragment DEC_INTEGER : NON_ZERO_DIGIT ('_'? DIGIT)* | '0'+ ('_'? '0')*; +fragment BIN_INTEGER : '0' ('b' | 'B') ('_'? BIN_DIGIT)+; +fragment OCT_INTEGER : '0' ('o' | 'O') ('_'? OCT_DIGIT)+; +fragment HEX_INTEGER : '0' ('x' | 'X') ('_'? HEX_DIGIT)+; +fragment NON_ZERO_DIGIT : [1-9]; +fragment DIGIT : [0-9]; +fragment BIN_DIGIT : '0' | '1'; +fragment OCT_DIGIT : [0-7]; +fragment HEX_DIGIT : DIGIT | [a-f] | [A-F]; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#floating-point-literals +fragment FLOAT_NUMBER : POINT_FLOAT | EXPONENT_FLOAT; +fragment POINT_FLOAT : DIGIT_PART? FRACTION | DIGIT_PART '.'; +fragment EXPONENT_FLOAT : (DIGIT_PART | POINT_FLOAT) EXPONENT; +fragment DIGIT_PART : DIGIT ('_'? DIGIT)*; +fragment FRACTION : '.' DIGIT_PART; +fragment EXPONENT : ('e' | 'E') ('+' | '-')? DIGIT_PART; + +// https://docs.python.org/3.12/reference/lexical_analysis.html#imaginary-literals +fragment IMAG_NUMBER : (FLOAT_NUMBER | DIGIT_PART) ('j' | 'J'); + +// https://docs.python.org/3.12/reference/lexical_analysis.html#physical-lines +fragment OS_INDEPENDENT_NL : '\r'? '\n'; // Unix, Windows + +// https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers +fragment ID_CONTINUE: + ID_START + | [\u0030-\u0039] + | [\u00B7] + | [\u0300-\u036F] + | [\u0387] + | [\u0483-\u0487] + | [\u0591-\u05BD] + | [\u05BF] + | [\u05C1-\u05C2] + | [\u05C4-\u05C5] + | [\u05C7] + | [\u0610-\u061A] + | [\u064B-\u0669] + | [\u0670] + | [\u06D6-\u06DC] + | [\u06DF-\u06E4] + | [\u06E7-\u06E8] + | [\u06EA-\u06ED] + | [\u06F0-\u06F9] + | [\u0711] + | [\u0730-\u074A] + | [\u07A6-\u07B0] + | [\u07C0-\u07C9] + | [\u07EB-\u07F3] + | [\u07FD] + | [\u0816-\u0819] + | [\u081B-\u0823] + | [\u0825-\u0827] + | [\u0829-\u082D] + | [\u0859-\u085B] + | [\u0898-\u089F] + | [\u08CA-\u08E1] + | [\u08E3-\u0903] + | [\u093A-\u093C] + | [\u093E-\u094F] + | [\u0951-\u0957] + | [\u0962-\u0963] + | [\u0966-\u096F] + | [\u0981-\u0983] + | [\u09BC] + | [\u09BE-\u09C4] + | [\u09C7-\u09C8] + | [\u09CB-\u09CD] + | [\u09D7] + | [\u09E2-\u09E3] + | [\u09E6-\u09EF] + | [\u09FE] + | [\u0A01-\u0A03] + | [\u0A3C] + | [\u0A3E-\u0A42] + | [\u0A47-\u0A48] + | [\u0A4B-\u0A4D] + | [\u0A51] + | [\u0A66-\u0A71] + | [\u0A75] + | [\u0A81-\u0A83] + | [\u0ABC] + | [\u0ABE-\u0AC5] + | [\u0AC7-\u0AC9] + | [\u0ACB-\u0ACD] + | [\u0AE2-\u0AE3] + | [\u0AE6-\u0AEF] + | [\u0AFA-\u0AFF] + | [\u0B01-\u0B03] + | [\u0B3C] + | [\u0B3E-\u0B44] + | [\u0B47-\u0B48] + | [\u0B4B-\u0B4D] + | [\u0B55-\u0B57] + | [\u0B62-\u0B63] + | [\u0B66-\u0B6F] + | [\u0B82] + | [\u0BBE-\u0BC2] + | [\u0BC6-\u0BC8] + | [\u0BCA-\u0BCD] + | [\u0BD7] + | [\u0BE6-\u0BEF] + | [\u0C00-\u0C04] + | [\u0C3C] + | [\u0C3E-\u0C44] + | [\u0C46-\u0C48] + | [\u0C4A-\u0C4D] + | [\u0C55-\u0C56] + | [\u0C62-\u0C63] + | [\u0C66-\u0C6F] + | [\u0C81-\u0C83] + | [\u0CBC] + | [\u0CBE-\u0CC4] + | [\u0CC6-\u0CC8] + | [\u0CCA-\u0CCD] + | [\u0CD5-\u0CD6] + | [\u0CE2-\u0CE3] + | [\u0CE6-\u0CEF] + | [\u0CF3] + | [\u0D00-\u0D03] + | [\u0D3B-\u0D3C] + | [\u0D3E-\u0D44] + | [\u0D46-\u0D48] + | [\u0D4A-\u0D4D] + | [\u0D57] + | [\u0D62-\u0D63] + | [\u0D66-\u0D6F] + | [\u0D81-\u0D83] + | [\u0DCA] + | [\u0DCF-\u0DD4] + | [\u0DD6] + | [\u0DD8-\u0DDF] + | [\u0DE6-\u0DEF] + | [\u0DF2-\u0DF3] + | [\u0E31] + | [\u0E33-\u0E3A] + | [\u0E47-\u0E4E] + | [\u0E50-\u0E59] + | [\u0EB1] + | [\u0EB3-\u0EBC] + | [\u0EC8-\u0ECE] + | [\u0ED0-\u0ED9] + | [\u0F18-\u0F19] + | [\u0F20-\u0F29] + | [\u0F35] + | [\u0F37] + | [\u0F39] + | [\u0F3E-\u0F3F] + | [\u0F71-\u0F84] + | [\u0F86-\u0F87] + | [\u0F8D-\u0F97] + | [\u0F99-\u0FBC] + | [\u0FC6] + | [\u102B-\u103E] + | [\u1040-\u1049] + | [\u1056-\u1059] + | [\u105E-\u1060] + | [\u1062-\u1064] + | [\u1067-\u106D] + | [\u1071-\u1074] + | [\u1082-\u108D] + | [\u108F-\u109D] + | [\u135D-\u135F] + | [\u1369-\u1371] + | [\u1712-\u1715] + | [\u1732-\u1734] + | [\u1752-\u1753] + | [\u1772-\u1773] + | [\u17B4-\u17D3] + | [\u17DD] + | [\u17E0-\u17E9] + | [\u180B-\u180D] + | [\u180F-\u1819] + | [\u18A9] + | [\u1920-\u192B] + | [\u1930-\u193B] + | [\u1946-\u194F] + | [\u19D0-\u19DA] + | [\u1A17-\u1A1B] + | [\u1A55-\u1A5E] + | [\u1A60-\u1A7C] + | [\u1A7F-\u1A89] + | [\u1A90-\u1A99] + | [\u1AB0-\u1ABD] + | [\u1ABF-\u1ACE] + | [\u1B00-\u1B04] + | [\u1B34-\u1B44] + | [\u1B50-\u1B59] + | [\u1B6B-\u1B73] + | [\u1B80-\u1B82] + | [\u1BA1-\u1BAD] + | [\u1BB0-\u1BB9] + | [\u1BE6-\u1BF3] + | [\u1C24-\u1C37] + | [\u1C40-\u1C49] + | [\u1C50-\u1C59] + | [\u1CD0-\u1CD2] + | [\u1CD4-\u1CE8] + | [\u1CED] + | [\u1CF4] + | [\u1CF7-\u1CF9] + | [\u1DC0-\u1DFF] + | [\u203F-\u2040] + | [\u2054] + | [\u20D0-\u20DC] + | [\u20E1] + | [\u20E5-\u20F0] + | [\u2CEF-\u2CF1] + | [\u2D7F] + | [\u2DE0-\u2DFF] + | [\u302A-\u302F] + | [\u3099-\u309A] + | [\uA620-\uA629] + | [\uA66F] + | [\uA674-\uA67D] + | [\uA69E-\uA69F] + | [\uA6F0-\uA6F1] + | [\uA802] + | [\uA806] + | [\uA80B] + | [\uA823-\uA827] + | [\uA82C] + | [\uA880-\uA881] + | [\uA8B4-\uA8C5] + | [\uA8D0-\uA8D9] + | [\uA8E0-\uA8F1] + | [\uA8FF-\uA909] + | [\uA926-\uA92D] + | [\uA947-\uA953] + | [\uA980-\uA983] + | [\uA9B3-\uA9C0] + | [\uA9D0-\uA9D9] + | [\uA9E5] + | [\uA9F0-\uA9F9] + | [\uAA29-\uAA36] + | [\uAA43] + | [\uAA4C-\uAA4D] + | [\uAA50-\uAA59] + | [\uAA7B-\uAA7D] + | [\uAAB0] + | [\uAAB2-\uAAB4] + | [\uAAB7-\uAAB8] + | [\uAABE-\uAABF] + | [\uAAC1] + | [\uAAEB-\uAAEF] + | [\uAAF5-\uAAF6] + | [\uABE3-\uABEA] + | [\uABEC-\uABED] + | [\uABF0-\uABF9] + | [\uFB1E] + | [\uFE00-\uFE0F] + | [\uFE20-\uFE2F] + | [\uFE33-\uFE34] + | [\uFE4D-\uFE4F] + | [\uFF10-\uFF19] + | [\uFF3F] + | [\uFF9E-\uFF9F] + | [\u101FD] + | [\u102E0] + | [\u10376-\u1037A] + | [\u104A0-\u104A9] + | [\u10A01-\u10A03] + | [\u10A05-\u10A06] + | [\u10A0C-\u10A0F] + | [\u10A38-\u10A3A] + | [\u10A3F] + | [\u10AE5-\u10AE6] + | [\u10D24-\u10D27] + | [\u10D30-\u10D39] + | [\u10EAB-\u10EAC] + | [\u10EFD-\u10EFF] + | [\u10F46-\u10F50] + | [\u10F82-\u10F85] + | [\u11000-\u11002] + | [\u11038-\u11046] + | [\u11066-\u11070] + | [\u11073-\u11074] + | [\u1107F-\u11082] + | [\u110B0-\u110BA] + | [\u110C2] + | [\u110F0-\u110F9] + | [\u11100-\u11102] + | [\u11127-\u11134] + | [\u11136-\u1113F] + | [\u11145-\u11146] + | [\u11173] + | [\u11180-\u11182] + | [\u111B3-\u111C0] + | [\u111C9-\u111CC] + | [\u111CE-\u111D9] + | [\u1122C-\u11237] + | [\u1123E] + | [\u11241] + | [\u112DF-\u112EA] + | [\u112F0-\u112F9] + | [\u11300-\u11303] + | [\u1133B-\u1133C] + | [\u1133E-\u11344] + | [\u11347-\u11348] + | [\u1134B-\u1134D] + | [\u11357] + | [\u11362-\u11363] + | [\u11366-\u1136C] + | [\u11370-\u11374] + | [\u11435-\u11446] + | [\u11450-\u11459] + | [\u1145E] + | [\u114B0-\u114C3] + | [\u114D0-\u114D9] + | [\u115AF-\u115B5] + | [\u115B8-\u115C0] + | [\u115DC-\u115DD] + | [\u11630-\u11640] + | [\u11650-\u11659] + | [\u116AB-\u116B7] + | [\u116C0-\u116C9] + | [\u1171D-\u1172B] + | [\u11730-\u11739] + | [\u1182C-\u1183A] + | [\u118E0-\u118E9] + | [\u11930-\u11935] + | [\u11937-\u11938] + | [\u1193B-\u1193E] + | [\u11940] + | [\u11942-\u11943] + | [\u11950-\u11959] + | [\u119D1-\u119D7] + | [\u119DA-\u119E0] + | [\u119E4] + | [\u11A01-\u11A0A] + | [\u11A33-\u11A39] + | [\u11A3B-\u11A3E] + | [\u11A47] + | [\u11A51-\u11A5B] + | [\u11A8A-\u11A99] + | [\u11C2F-\u11C36] + | [\u11C38-\u11C3F] + | [\u11C50-\u11C59] + | [\u11C92-\u11CA7] + | [\u11CA9-\u11CB6] + | [\u11D31-\u11D36] + | [\u11D3A] + | [\u11D3C-\u11D3D] + | [\u11D3F-\u11D45] + | [\u11D47] + | [\u11D50-\u11D59] + | [\u11D8A-\u11D8E] + | [\u11D90-\u11D91] + | [\u11D93-\u11D97] + | [\u11DA0-\u11DA9] + | [\u11EF3-\u11EF6] + | [\u11F00-\u11F01] + | [\u11F03] + | [\u11F34-\u11F3A] + | [\u11F3E-\u11F42] + | [\u11F50-\u11F59] + | [\u13440] + | [\u13447-\u13455] + | [\u16A60-\u16A69] + | [\u16AC0-\u16AC9] + | [\u16AF0-\u16AF4] + | [\u16B30-\u16B36] + | [\u16B50-\u16B59] + | [\u16F4F] + | [\u16F51-\u16F87] + | [\u16F8F-\u16F92] + | [\u16FE4] + | [\u16FF0-\u16FF1] + | [\u1BC9D-\u1BC9E] + | [\u1CF00-\u1CF2D] + | [\u1CF30-\u1CF46] + | [\u1D165-\u1D169] + | [\u1D16D-\u1D172] + | [\u1D17B-\u1D182] + | [\u1D185-\u1D18B] + | [\u1D1AA-\u1D1AD] + | [\u1D242-\u1D244] + | [\u1D7CE-\u1D7FF] + | [\u1DA00-\u1DA36] + | [\u1DA3B-\u1DA6C] + | [\u1DA75] + | [\u1DA84] + | [\u1DA9B-\u1DA9F] + | [\u1DAA1-\u1DAAF] + | [\u1E000-\u1E006] + | [\u1E008-\u1E018] + | [\u1E01B-\u1E021] + | [\u1E023-\u1E024] + | [\u1E026-\u1E02A] + | [\u1E08F] + | [\u1E130-\u1E136] + | [\u1E140-\u1E149] + | [\u1E2AE] + | [\u1E2EC-\u1E2F9] + | [\u1E4EC-\u1E4F9] + | [\u1E8D0-\u1E8D6] + | [\u1E944-\u1E94A] + | [\u1E950-\u1E959] + | [\u1FBF0-\u1FBF9] + | [\uE0100-\uE01EF] +; + +// https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers +fragment ID_START: + [\u0041-\u005A] + | [\u005F] + | [\u0061-\u007A] + | [\u00AA] + | [\u00B5] + | [\u00BA] + | [\u00C0-\u00D6] + | [\u00D8-\u00F6] + | [\u00F8-\u02C1] + | [\u02C6-\u02D1] + | [\u02E0-\u02E4] + | [\u02EC] + | [\u02EE] + | [\u0370-\u0374] + | [\u0376-\u0377] + | [\u037B-\u037D] + | [\u037F] + | [\u0386] + | [\u0388-\u038A] + | [\u038C] + | [\u038E-\u03A1] + | [\u03A3-\u03F5] + | [\u03F7-\u0481] + | [\u048A-\u052F] + | [\u0531-\u0556] + | [\u0559] + | [\u0560-\u0588] + | [\u05D0-\u05EA] + | [\u05EF-\u05F2] + | [\u0620-\u064A] + | [\u066E-\u066F] + | [\u0671-\u06D3] + | [\u06D5] + | [\u06E5-\u06E6] + | [\u06EE-\u06EF] + | [\u06FA-\u06FC] + | [\u06FF] + | [\u0710] + | [\u0712-\u072F] + | [\u074D-\u07A5] + | [\u07B1] + | [\u07CA-\u07EA] + | [\u07F4-\u07F5] + | [\u07FA] + | [\u0800-\u0815] + | [\u081A] + | [\u0824] + | [\u0828] + | [\u0840-\u0858] + | [\u0860-\u086A] + | [\u0870-\u0887] + | [\u0889-\u088E] + | [\u08A0-\u08C9] + | [\u0904-\u0939] + | [\u093D] + | [\u0950] + | [\u0958-\u0961] + | [\u0971-\u0980] + | [\u0985-\u098C] + | [\u098F-\u0990] + | [\u0993-\u09A8] + | [\u09AA-\u09B0] + | [\u09B2] + | [\u09B6-\u09B9] + | [\u09BD] + | [\u09CE] + | [\u09DC-\u09DD] + | [\u09DF-\u09E1] + | [\u09F0-\u09F1] + | [\u09FC] + | [\u0A05-\u0A0A] + | [\u0A0F-\u0A10] + | [\u0A13-\u0A28] + | [\u0A2A-\u0A30] + | [\u0A32-\u0A33] + | [\u0A35-\u0A36] + | [\u0A38-\u0A39] + | [\u0A59-\u0A5C] + | [\u0A5E] + | [\u0A72-\u0A74] + | [\u0A85-\u0A8D] + | [\u0A8F-\u0A91] + | [\u0A93-\u0AA8] + | [\u0AAA-\u0AB0] + | [\u0AB2-\u0AB3] + | [\u0AB5-\u0AB9] + | [\u0ABD] + | [\u0AD0] + | [\u0AE0-\u0AE1] + | [\u0AF9] + | [\u0B05-\u0B0C] + | [\u0B0F-\u0B10] + | [\u0B13-\u0B28] + | [\u0B2A-\u0B30] + | [\u0B32-\u0B33] + | [\u0B35-\u0B39] + | [\u0B3D] + | [\u0B5C-\u0B5D] + | [\u0B5F-\u0B61] + | [\u0B71] + | [\u0B83] + | [\u0B85-\u0B8A] + | [\u0B8E-\u0B90] + | [\u0B92-\u0B95] + | [\u0B99-\u0B9A] + | [\u0B9C] + | [\u0B9E-\u0B9F] + | [\u0BA3-\u0BA4] + | [\u0BA8-\u0BAA] + | [\u0BAE-\u0BB9] + | [\u0BD0] + | [\u0C05-\u0C0C] + | [\u0C0E-\u0C10] + | [\u0C12-\u0C28] + | [\u0C2A-\u0C39] + | [\u0C3D] + | [\u0C58-\u0C5A] + | [\u0C5D] + | [\u0C60-\u0C61] + | [\u0C80] + | [\u0C85-\u0C8C] + | [\u0C8E-\u0C90] + | [\u0C92-\u0CA8] + | [\u0CAA-\u0CB3] + | [\u0CB5-\u0CB9] + | [\u0CBD] + | [\u0CDD-\u0CDE] + | [\u0CE0-\u0CE1] + | [\u0CF1-\u0CF2] + | [\u0D04-\u0D0C] + | [\u0D0E-\u0D10] + | [\u0D12-\u0D3A] + | [\u0D3D] + | [\u0D4E] + | [\u0D54-\u0D56] + | [\u0D5F-\u0D61] + | [\u0D7A-\u0D7F] + | [\u0D85-\u0D96] + | [\u0D9A-\u0DB1] + | [\u0DB3-\u0DBB] + | [\u0DBD] + | [\u0DC0-\u0DC6] + | [\u0E01-\u0E30] + | [\u0E32] + | [\u0E40-\u0E46] + | [\u0E81-\u0E82] + | [\u0E84] + | [\u0E86-\u0E8A] + | [\u0E8C-\u0EA3] + | [\u0EA5] + | [\u0EA7-\u0EB0] + | [\u0EB2] + | [\u0EBD] + | [\u0EC0-\u0EC4] + | [\u0EC6] + | [\u0EDC-\u0EDF] + | [\u0F00] + | [\u0F40-\u0F47] + | [\u0F49-\u0F6C] + | [\u0F88-\u0F8C] + | [\u1000-\u102A] + | [\u103F] + | [\u1050-\u1055] + | [\u105A-\u105D] + | [\u1061] + | [\u1065-\u1066] + | [\u106E-\u1070] + | [\u1075-\u1081] + | [\u108E] + | [\u10A0-\u10C5] + | [\u10C7] + | [\u10CD] + | [\u10D0-\u10FA] + | [\u10FC-\u1248] + | [\u124A-\u124D] + | [\u1250-\u1256] + | [\u1258] + | [\u125A-\u125D] + | [\u1260-\u1288] + | [\u128A-\u128D] + | [\u1290-\u12B0] + | [\u12B2-\u12B5] + | [\u12B8-\u12BE] + | [\u12C0] + | [\u12C2-\u12C5] + | [\u12C8-\u12D6] + | [\u12D8-\u1310] + | [\u1312-\u1315] + | [\u1318-\u135A] + | [\u1380-\u138F] + | [\u13A0-\u13F5] + | [\u13F8-\u13FD] + | [\u1401-\u166C] + | [\u166F-\u167F] + | [\u1681-\u169A] + | [\u16A0-\u16EA] + | [\u16EE-\u16F8] + | [\u1700-\u1711] + | [\u171F-\u1731] + | [\u1740-\u1751] + | [\u1760-\u176C] + | [\u176E-\u1770] + | [\u1780-\u17B3] + | [\u17D7] + | [\u17DC] + | [\u1820-\u1878] + | [\u1880-\u18A8] + | [\u18AA] + | [\u18B0-\u18F5] + | [\u1900-\u191E] + | [\u1950-\u196D] + | [\u1970-\u1974] + | [\u1980-\u19AB] + | [\u19B0-\u19C9] + | [\u1A00-\u1A16] + | [\u1A20-\u1A54] + | [\u1AA7] + | [\u1B05-\u1B33] + | [\u1B45-\u1B4C] + | [\u1B83-\u1BA0] + | [\u1BAE-\u1BAF] + | [\u1BBA-\u1BE5] + | [\u1C00-\u1C23] + | [\u1C4D-\u1C4F] + | [\u1C5A-\u1C7D] + | [\u1C80-\u1C88] + | [\u1C90-\u1CBA] + | [\u1CBD-\u1CBF] + | [\u1CE9-\u1CEC] + | [\u1CEE-\u1CF3] + | [\u1CF5-\u1CF6] + | [\u1CFA] + | [\u1D00-\u1DBF] + | [\u1E00-\u1F15] + | [\u1F18-\u1F1D] + | [\u1F20-\u1F45] + | [\u1F48-\u1F4D] + | [\u1F50-\u1F57] + | [\u1F59] + | [\u1F5B] + | [\u1F5D] + | [\u1F5F-\u1F7D] + | [\u1F80-\u1FB4] + | [\u1FB6-\u1FBC] + | [\u1FBE] + | [\u1FC2-\u1FC4] + | [\u1FC6-\u1FCC] + | [\u1FD0-\u1FD3] + | [\u1FD6-\u1FDB] + | [\u1FE0-\u1FEC] + | [\u1FF2-\u1FF4] + | [\u1FF6-\u1FFC] + | [\u2071] + | [\u207F] + | [\u2090-\u209C] + | [\u2102] + | [\u2107] + | [\u210A-\u2113] + | [\u2115] + | [\u2118-\u211D] + | [\u2124] + | [\u2126] + | [\u2128] + | [\u212A-\u2139] + | [\u213C-\u213F] + | [\u2145-\u2149] + | [\u214E] + | [\u2160-\u2188] + | [\u2C00-\u2CE4] + | [\u2CEB-\u2CEE] + | [\u2CF2-\u2CF3] + | [\u2D00-\u2D25] + | [\u2D27] + | [\u2D2D] + | [\u2D30-\u2D67] + | [\u2D6F] + | [\u2D80-\u2D96] + | [\u2DA0-\u2DA6] + | [\u2DA8-\u2DAE] + | [\u2DB0-\u2DB6] + | [\u2DB8-\u2DBE] + | [\u2DC0-\u2DC6] + | [\u2DC8-\u2DCE] + | [\u2DD0-\u2DD6] + | [\u2DD8-\u2DDE] + | [\u3005-\u3007] + | [\u3021-\u3029] + | [\u3031-\u3035] + | [\u3038-\u303C] + | [\u3041-\u3096] + | [\u309D-\u309F] + | [\u30A1-\u30FA] + | [\u30FC-\u30FF] + | [\u3105-\u312F] + | [\u3131-\u318E] + | [\u31A0-\u31BF] + | [\u31F0-\u31FF] + | [\u3400-\u4DBF] + | [\u4E00-\uA48C] + | [\uA4D0-\uA4FD] + | [\uA500-\uA60C] + | [\uA610-\uA61F] + | [\uA62A-\uA62B] + | [\uA640-\uA66E] + | [\uA67F-\uA69D] + | [\uA6A0-\uA6EF] + | [\uA717-\uA71F] + | [\uA722-\uA788] + | [\uA78B-\uA7CA] + | [\uA7D0-\uA7D1] + | [\uA7D3] + | [\uA7D5-\uA7D9] + | [\uA7F2-\uA801] + | [\uA803-\uA805] + | [\uA807-\uA80A] + | [\uA80C-\uA822] + | [\uA840-\uA873] + | [\uA882-\uA8B3] + | [\uA8F2-\uA8F7] + | [\uA8FB] + | [\uA8FD-\uA8FE] + | [\uA90A-\uA925] + | [\uA930-\uA946] + | [\uA960-\uA97C] + | [\uA984-\uA9B2] + | [\uA9CF] + | [\uA9E0-\uA9E4] + | [\uA9E6-\uA9EF] + | [\uA9FA-\uA9FE] + | [\uAA00-\uAA28] + | [\uAA40-\uAA42] + | [\uAA44-\uAA4B] + | [\uAA60-\uAA76] + | [\uAA7A] + | [\uAA7E-\uAAAF] + | [\uAAB1] + | [\uAAB5-\uAAB6] + | [\uAAB9-\uAABD] + | [\uAAC0] + | [\uAAC2] + | [\uAADB-\uAADD] + | [\uAAE0-\uAAEA] + | [\uAAF2-\uAAF4] + | [\uAB01-\uAB06] + | [\uAB09-\uAB0E] + | [\uAB11-\uAB16] + | [\uAB20-\uAB26] + | [\uAB28-\uAB2E] + | [\uAB30-\uAB5A] + | [\uAB5C-\uAB69] + | [\uAB70-\uABE2] + | [\uAC00-\uD7A3] + | [\uD7B0-\uD7C6] + | [\uD7CB-\uD7FB] + | [\uF900-\uFA6D] + | [\uFA70-\uFAD9] + | [\uFB00-\uFB06] + | [\uFB13-\uFB17] + | [\uFB1D] + | [\uFB1F-\uFB28] + | [\uFB2A-\uFB36] + | [\uFB38-\uFB3C] + | [\uFB3E] + | [\uFB40-\uFB41] + | [\uFB43-\uFB44] + | [\uFB46-\uFBB1] + | [\uFBD3-\uFC5D] + | [\uFC64-\uFD3D] + | [\uFD50-\uFD8F] + | [\uFD92-\uFDC7] + | [\uFDF0-\uFDF9] + | [\uFE71] + | [\uFE73] + | [\uFE77] + | [\uFE79] + | [\uFE7B] + | [\uFE7D] + | [\uFE7F-\uFEFC] + | [\uFF21-\uFF3A] + | [\uFF41-\uFF5A] + | [\uFF66-\uFF9D] + | [\uFFA0-\uFFBE] + | [\uFFC2-\uFFC7] + | [\uFFCA-\uFFCF] + | [\uFFD2-\uFFD7] + | [\uFFDA-\uFFDC] + | [\u10000-\u1000B] + | [\u1000D-\u10026] + | [\u10028-\u1003A] + | [\u1003C-\u1003D] + | [\u1003F-\u1004D] + | [\u10050-\u1005D] + | [\u10080-\u100FA] + | [\u10140-\u10174] + | [\u10280-\u1029C] + | [\u102A0-\u102D0] + | [\u10300-\u1031F] + | [\u1032D-\u1034A] + | [\u10350-\u10375] + | [\u10380-\u1039D] + | [\u103A0-\u103C3] + | [\u103C8-\u103CF] + | [\u103D1-\u103D5] + | [\u10400-\u1049D] + | [\u104B0-\u104D3] + | [\u104D8-\u104FB] + | [\u10500-\u10527] + | [\u10530-\u10563] + | [\u10570-\u1057A] + | [\u1057C-\u1058A] + | [\u1058C-\u10592] + | [\u10594-\u10595] + | [\u10597-\u105A1] + | [\u105A3-\u105B1] + | [\u105B3-\u105B9] + | [\u105BB-\u105BC] + | [\u10600-\u10736] + | [\u10740-\u10755] + | [\u10760-\u10767] + | [\u10780-\u10785] + | [\u10787-\u107B0] + | [\u107B2-\u107BA] + | [\u10800-\u10805] + | [\u10808] + | [\u1080A-\u10835] + | [\u10837-\u10838] + | [\u1083C] + | [\u1083F-\u10855] + | [\u10860-\u10876] + | [\u10880-\u1089E] + | [\u108E0-\u108F2] + | [\u108F4-\u108F5] + | [\u10900-\u10915] + | [\u10920-\u10939] + | [\u10980-\u109B7] + | [\u109BE-\u109BF] + | [\u10A00] + | [\u10A10-\u10A13] + | [\u10A15-\u10A17] + | [\u10A19-\u10A35] + | [\u10A60-\u10A7C] + | [\u10A80-\u10A9C] + | [\u10AC0-\u10AC7] + | [\u10AC9-\u10AE4] + | [\u10B00-\u10B35] + | [\u10B40-\u10B55] + | [\u10B60-\u10B72] + | [\u10B80-\u10B91] + | [\u10C00-\u10C48] + | [\u10C80-\u10CB2] + | [\u10CC0-\u10CF2] + | [\u10D00-\u10D23] + | [\u10E80-\u10EA9] + | [\u10EB0-\u10EB1] + | [\u10F00-\u10F1C] + | [\u10F27] + | [\u10F30-\u10F45] + | [\u10F70-\u10F81] + | [\u10FB0-\u10FC4] + | [\u10FE0-\u10FF6] + | [\u11003-\u11037] + | [\u11071-\u11072] + | [\u11075] + | [\u11083-\u110AF] + | [\u110D0-\u110E8] + | [\u11103-\u11126] + | [\u11144] + | [\u11147] + | [\u11150-\u11172] + | [\u11176] + | [\u11183-\u111B2] + | [\u111C1-\u111C4] + | [\u111DA] + | [\u111DC] + | [\u11200-\u11211] + | [\u11213-\u1122B] + | [\u1123F-\u11240] + | [\u11280-\u11286] + | [\u11288] + | [\u1128A-\u1128D] + | [\u1128F-\u1129D] + | [\u1129F-\u112A8] + | [\u112B0-\u112DE] + | [\u11305-\u1130C] + | [\u1130F-\u11310] + | [\u11313-\u11328] + | [\u1132A-\u11330] + | [\u11332-\u11333] + | [\u11335-\u11339] + | [\u1133D] + | [\u11350] + | [\u1135D-\u11361] + | [\u11400-\u11434] + | [\u11447-\u1144A] + | [\u1145F-\u11461] + | [\u11480-\u114AF] + | [\u114C4-\u114C5] + | [\u114C7] + | [\u11580-\u115AE] + | [\u115D8-\u115DB] + | [\u11600-\u1162F] + | [\u11644] + | [\u11680-\u116AA] + | [\u116B8] + | [\u11700-\u1171A] + | [\u11740-\u11746] + | [\u11800-\u1182B] + | [\u118A0-\u118DF] + | [\u118FF-\u11906] + | [\u11909] + | [\u1190C-\u11913] + | [\u11915-\u11916] + | [\u11918-\u1192F] + | [\u1193F] + | [\u11941] + | [\u119A0-\u119A7] + | [\u119AA-\u119D0] + | [\u119E1] + | [\u119E3] + | [\u11A00] + | [\u11A0B-\u11A32] + | [\u11A3A] + | [\u11A50] + | [\u11A5C-\u11A89] + | [\u11A9D] + | [\u11AB0-\u11AF8] + | [\u11C00-\u11C08] + | [\u11C0A-\u11C2E] + | [\u11C40] + | [\u11C72-\u11C8F] + | [\u11D00-\u11D06] + | [\u11D08-\u11D09] + | [\u11D0B-\u11D30] + | [\u11D46] + | [\u11D60-\u11D65] + | [\u11D67-\u11D68] + | [\u11D6A-\u11D89] + | [\u11D98] + | [\u11EE0-\u11EF2] + | [\u11F02] + | [\u11F04-\u11F10] + | [\u11F12-\u11F33] + | [\u11FB0] + | [\u12000-\u12399] + | [\u12400-\u1246E] + | [\u12480-\u12543] + | [\u12F90-\u12FF0] + | [\u13000-\u1342F] + | [\u13441-\u13446] + | [\u14400-\u14646] + | [\u16800-\u16A38] + | [\u16A40-\u16A5E] + | [\u16A70-\u16ABE] + | [\u16AD0-\u16AED] + | [\u16B00-\u16B2F] + | [\u16B40-\u16B43] + | [\u16B63-\u16B77] + | [\u16B7D-\u16B8F] + | [\u16E40-\u16E7F] + | [\u16F00-\u16F4A] + | [\u16F50] + | [\u16F93-\u16F9F] + | [\u16FE0-\u16FE1] + | [\u16FE3] + | [\u17000-\u187F7] + | [\u18800-\u18CD5] + | [\u18D00-\u18D08] + | [\u1AFF0-\u1AFF3] + | [\u1AFF5-\u1AFFB] + | [\u1AFFD-\u1AFFE] + | [\u1B000-\u1B122] + | [\u1B132] + | [\u1B150-\u1B152] + | [\u1B155] + | [\u1B164-\u1B167] + | [\u1B170-\u1B2FB] + | [\u1BC00-\u1BC6A] + | [\u1BC70-\u1BC7C] + | [\u1BC80-\u1BC88] + | [\u1BC90-\u1BC99] + | [\u1D400-\u1D454] + | [\u1D456-\u1D49C] + | [\u1D49E-\u1D49F] + | [\u1D4A2] + | [\u1D4A5-\u1D4A6] + | [\u1D4A9-\u1D4AC] + | [\u1D4AE-\u1D4B9] + | [\u1D4BB] + | [\u1D4BD-\u1D4C3] + | [\u1D4C5-\u1D505] + | [\u1D507-\u1D50A] + | [\u1D50D-\u1D514] + | [\u1D516-\u1D51C] + | [\u1D51E-\u1D539] + | [\u1D53B-\u1D53E] + | [\u1D540-\u1D544] + | [\u1D546] + | [\u1D54A-\u1D550] + | [\u1D552-\u1D6A5] + | [\u1D6A8-\u1D6C0] + | [\u1D6C2-\u1D6DA] + | [\u1D6DC-\u1D6FA] + | [\u1D6FC-\u1D714] + | [\u1D716-\u1D734] + | [\u1D736-\u1D74E] + | [\u1D750-\u1D76E] + | [\u1D770-\u1D788] + | [\u1D78A-\u1D7A8] + | [\u1D7AA-\u1D7C2] + | [\u1D7C4-\u1D7CB] + | [\u1DF00-\u1DF1E] + | [\u1DF25-\u1DF2A] + | [\u1E030-\u1E06D] + | [\u1E100-\u1E12C] + | [\u1E137-\u1E13D] + | [\u1E14E] + | [\u1E290-\u1E2AD] + | [\u1E2C0-\u1E2EB] + | [\u1E4D0-\u1E4EB] + | [\u1E7E0-\u1E7E6] + | [\u1E7E8-\u1E7EB] + | [\u1E7ED-\u1E7EE] + | [\u1E7F0-\u1E7FE] + | [\u1E800-\u1E8C4] + | [\u1E900-\u1E943] + | [\u1E94B] + | [\u1EE00-\u1EE03] + | [\u1EE05-\u1EE1F] + | [\u1EE21-\u1EE22] + | [\u1EE24] + | [\u1EE27] + | [\u1EE29-\u1EE32] + | [\u1EE34-\u1EE37] + | [\u1EE39] + | [\u1EE3B] + | [\u1EE42] + | [\u1EE47] + | [\u1EE49] + | [\u1EE4B] + | [\u1EE4D-\u1EE4F] + | [\u1EE51-\u1EE52] + | [\u1EE54] + | [\u1EE57] + | [\u1EE59] + | [\u1EE5B] + | [\u1EE5D] + | [\u1EE5F] + | [\u1EE61-\u1EE62] + | [\u1EE64] + | [\u1EE67-\u1EE6A] + | [\u1EE6C-\u1EE72] + | [\u1EE74-\u1EE77] + | [\u1EE79-\u1EE7C] + | [\u1EE7E] + | [\u1EE80-\u1EE89] + | [\u1EE8B-\u1EE9B] + | [\u1EEA1-\u1EEA3] + | [\u1EEA5-\u1EEA9] + | [\u1EEAB-\u1EEBB] + | [\u20000-\u2A6DF] + | [\u2A700-\u2B739] + | [\u2B740-\u2B81D] + | [\u2B820-\u2CEA1] + | [\u2CEB0-\u2EBE0] + | [\u2F800-\u2FA1D] + | [\u30000-\u3134A] + | [\u31350-\u323AF] +; \ No newline at end of file diff --git a/MicroForge.Parsing/PythonLexerBase.cs b/MicroForge.Parsing/PythonLexerBase.cs new file mode 100644 index 0000000..98d5448 --- /dev/null +++ b/MicroForge.Parsing/PythonLexerBase.cs @@ -0,0 +1,493 @@ +/* +The MIT License (MIT) +Copyright (c) 2021 Robert Einhorn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + +/* + * Project : Python Indent/Dedent handler for ANTLR4 grammars + * + * Developed by : Robert Einhorn + */ + +using Antlr4.Runtime; +using System.Text.RegularExpressions; + +namespace MicroForge.Parsing; + +public abstract class PythonLexerBase : Lexer +{ + // A stack that keeps track of the indentation lengths + private Stack indentLengthStack; + // A list where tokens are waiting to be loaded into the token stream + private LinkedList pendingTokens; + // last pending token types + private int previousPendingTokenType; + private int lastPendingTokenTypeFromDefaultChannel; + + // The amount of opened parentheses, square brackets, or curly braces + private int opened; + // The amount of opened parentheses and square brackets in the current lexer mode + private Stack paren_or_bracket_openedStack; + + private bool wasSpaceIndentation; + private bool wasTabIndentation; + private bool wasIndentationMixedWithSpacesAndTabs; + private const int INVALID_LENGTH = -1; + + private CommonToken curToken; // current (under processing) token + private IToken ffgToken; // following (look ahead) token + + private const string ERR_TXT = " ERROR: "; + + protected PythonLexerBase(ICharStream input) : base(input) + { + this.Init(); + } + + private void Init() + { + this.indentLengthStack = new Stack(); + this.pendingTokens = new LinkedList(); + this.previousPendingTokenType = 0; + this.lastPendingTokenTypeFromDefaultChannel = 0; + this.opened = 0; + this.paren_or_bracket_openedStack = new Stack(); + this.wasSpaceIndentation = false; + this.wasTabIndentation = false; + this.wasIndentationMixedWithSpacesAndTabs = false; + this.curToken = null!; + this.ffgToken = null!; + } + + public override IToken NextToken() // reading the input stream until a return EOF + { + this.CheckNextToken(); + IToken firstPendingToken = this.pendingTokens.First.Value; + this.pendingTokens.RemoveFirst(); + return firstPendingToken; // add the queued token to the token stream + } + + private void CheckNextToken() + { + if (this.previousPendingTokenType != TokenConstants.Eof) + { + this.SetCurrentAndFollowingTokens(); + if (this.indentLengthStack.Count == 0) // We're at the first token + { + this.HandleStartOfInput(); + } + + switch (this.curToken.Type) + { + case PythonLexer.LPAR: + case PythonLexer.LSQB: + case PythonLexer.LBRACE: + this.opened++; + this.AddPendingToken(this.curToken); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + case PythonLexer.RBRACE: + this.opened--; + this.AddPendingToken(this.curToken); + break; + case PythonLexer.NEWLINE: + this.HandleNEWLINEtoken(); + break; + case PythonLexer.STRING: + this.HandleSTRINGtoken(); + break; + case PythonLexer.FSTRING_MIDDLE: + this.HandleFSTRING_MIDDLE_token(); + break; + case PythonLexer.ERROR_TOKEN: + this.ReportLexerError("token recognition error at: '" + this.curToken.Text + "'"); + this.AddPendingToken(this.curToken); + break; + case TokenConstants.Eof: + this.HandleEOFtoken(); + break; + default: + this.AddPendingToken(this.curToken); + break; + } + this.HandleFORMAT_SPECIFICATION_MODE(); + } + } + + private void SetCurrentAndFollowingTokens() + { + this.curToken = this.ffgToken == null ? + new CommonToken(base.NextToken()) : + new CommonToken(this.ffgToken); + + this.HandleFStringLexerModes(); + + this.ffgToken = this.curToken.Type == TokenConstants.Eof ? + this.curToken : + base.NextToken(); + } + + // initialize the _indentLengths + // hide the leading NEWLINE token(s) + // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel + // insert a leading INDENT token if necessary + private void HandleStartOfInput() + { + // initialize the stack with a default 0 indentation length + this.indentLengthStack.Push(0); // this will never be popped off + while (this.curToken.Type != TokenConstants.Eof) + { + if (this.curToken.Channel == TokenConstants.DefaultChannel) + { + if (this.curToken.Type == PythonLexer.NEWLINE) + { + // all the NEWLINE tokens must be ignored before the first statement + this.HideAndAddPendingToken(this.curToken); + } + else + { // We're at the first statement + this.InsertLeadingIndentToken(); + return; // continue the processing of the current token with CheckNextToken() + } + } + else + { + this.AddPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING, or COMMENT token + } + this.SetCurrentAndFollowingTokens(); + } // continue the processing of the EOF token with CheckNextToken() + } + + private void InsertLeadingIndentToken() + { + if (this.previousPendingTokenType == PythonLexer.WS) + { + var prevToken = this.pendingTokens.Last.Value; + if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement + { + const string errMsg = "first statement indented"; + this.ReportLexerError(errMsg); + // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser + this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.curToken); + } + } + } + + private void HandleNEWLINEtoken() + { + if (this.opened > 0) + { + // We're in an implicit line joining, ignore the current NEWLINE token + this.HideAndAddPendingToken(this.curToken); + } + else + { + CommonToken nlToken = new CommonToken(this.curToken); // save the current NEWLINE token + bool isLookingAhead = this.ffgToken.Type == PythonLexer.WS; + if (isLookingAhead) + { + this.SetCurrentAndFollowingTokens(); // set the next two tokens + } + + switch (this.ffgToken.Type) + { + case PythonLexer.NEWLINE: // We're before a blank line + case PythonLexer.COMMENT: // We're before a comment + case PythonLexer.TYPE_COMMENT: // We're before a type comment + this.HideAndAddPendingToken(nlToken); + if (isLookingAhead) + { + this.AddPendingToken(this.curToken); // WS token + } + break; + default: + this.AddPendingToken(nlToken); + if (isLookingAhead) + { // We're on whitespace(s) followed by a statement + int indentationLength = this.ffgToken.Type == TokenConstants.Eof ? + 0 : + this.GetIndentationLength(this.curToken.Text); + + if (indentationLength != PythonLexerBase.INVALID_LENGTH) + { + this.AddPendingToken(this.curToken); // WS token + this.InsertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) + } + else + { + this.ReportError("inconsistent use of tabs and spaces in indentation"); + } + } + else + { + // We're at a newline followed by a statement (there is no whitespace before the statement) + this.InsertIndentOrDedentToken(0); // may insert DEDENT token(s) + } + break; + } + } + } + + private void InsertIndentOrDedentToken(int indentLength) + { + //*** https://docs.python.org/3/reference/lexical_analysis.html#indentation + int prevIndentLength = this.indentLengthStack.Peek(); + if (indentLength > prevIndentLength) + { + this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, null, this.ffgToken); + this.indentLengthStack.Push(indentLength); + } + else + { + while (indentLength < prevIndentLength) + { // more than 1 DEDENT token may be inserted into the token stream + this.indentLengthStack.Pop(); + prevIndentLength = this.indentLengthStack.Peek(); + if (indentLength <= prevIndentLength) + { + this.CreateAndAddPendingToken(PythonLexer.DEDENT, TokenConstants.DefaultChannel, null, this.ffgToken); + } + else + { + this.ReportError("inconsistent dedent"); + } + } + } + } + + private void HandleSTRINGtoken() + { + // remove the \ escape sequences from the string literal + // https://docs.python.org/3.11/reference/lexical_analysis.html#string-and-bytes-literals + string line_joinFreeStringLiteral = Regex.Replace(this.curToken.Text, @"\\\r?\n", ""); + if (this.curToken.Text.Length == line_joinFreeStringLiteral.Length) + { + this.AddPendingToken(this.curToken); + } + else + { + CommonToken originalSTRINGtoken = new CommonToken(this.curToken); // backup the original token + this.curToken.Text = line_joinFreeStringLiteral; + this.AddPendingToken(this.curToken); // add the modified token with inline string literal + this.HideAndAddPendingToken(originalSTRINGtoken); // add the original token with a hidden channel + // this inserted hidden token allows to restore the original string literal with the \ escape sequences + } + } + + private void HandleFSTRING_MIDDLE_token() // replace the double braces '{{' or '}}' to single braces and hide the second braces + { + string fsMid = this.curToken.Text; + fsMid = fsMid.Replace("{{", "{_").Replace("}}", "}_"); // replace: {{ --> {_ and }} --> }_ + Regex regex = new Regex(@"(?<=[{}])_"); + string[] arrOfStr = regex.Split(fsMid); // split by {_ or }_ + foreach (string s in arrOfStr) + { + if (!String.IsNullOrEmpty(s)) + { + this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, s, this.ffgToken); + string lastCharacter = s.Substring(s.Length - 1); + if ("{}".Contains(lastCharacter)) + { + this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.HiddenChannel, lastCharacter, this.ffgToken); + // this inserted hidden token allows to restore the original f-string literal with the double braces + } + } + } + } + + private void HandleFStringLexerModes() // https://peps.python.org/pep-0498/#specification + { + if (this._modeStack.Count > 0) + { + switch (this.curToken.Type) + { + case PythonLexer.LBRACE: + this.PushMode(PythonLexer.DefaultMode); + this.paren_or_bracket_openedStack.Push(0); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + // https://peps.python.org/pep-0498/#lambdas-inside-expressions + this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() + 1); // increment the last element + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() - 1); // decrement the last element + break; + case PythonLexer.COLON: // colon can only come from DEFAULT_MODE + if (this.paren_or_bracket_openedStack.Peek() == 0) + { + switch (this._modeStack.First()) // check the previous lexer mode (the current is DEFAULT_MODE) + { + case PythonLexer.SINGLE_QUOTE_FSTRING_MODE: + case PythonLexer.LONG_SINGLE_QUOTE_FSTRING_MODE: + case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: + this.Mode(PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DOUBLE_QUOTE_FSTRING_MODE: + case PythonLexer.LONG_DOUBLE_QUOTE_FSTRING_MODE: + case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: + this.Mode(PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + } + } + break; + case PythonLexer.RBRACE: + switch (_mode) + { + case PythonLexer.DefaultMode: + case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: + this.PopMode(); + this.paren_or_bracket_openedStack.Pop(); + break; + default: + this.ReportLexerError("f-string: single '}' is not allowed"); + break; + } + break; + } + } + } + + private void HandleFORMAT_SPECIFICATION_MODE() + { + if (this._modeStack.Count > 0 && this.ffgToken.Type == PythonLexer.RBRACE) + { + switch (this.curToken.Type) + { + case PythonLexer.COLON: + case PythonLexer.RBRACE: + // insert an empty FSTRING_MIDDLE token instead of the missing format specification + this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, "", this.ffgToken); + break; + } + } + } + + private void InsertTrailingTokens() + { + switch (this.lastPendingTokenTypeFromDefaultChannel) + { + case PythonLexer.NEWLINE: + case PythonLexer.DEDENT: + break; // no trailing NEWLINE token is needed + default: + // insert an extra trailing NEWLINE token that serves as the end of the last statement + this.CreateAndAddPendingToken(PythonLexer.NEWLINE, TokenConstants.DefaultChannel, null, this.ffgToken); // ffgToken is EOF + break; + } + this.InsertIndentOrDedentToken(0); // Now insert as many trailing DEDENT tokens as needed + } + + private void HandleEOFtoken() + { + if (this.lastPendingTokenTypeFromDefaultChannel > 0) + { // there was a statement in the input (leading NEWLINE tokens are hidden) + this.InsertTrailingTokens(); + } + this.AddPendingToken(this.curToken); + } + + private void HideAndAddPendingToken(CommonToken cToken) + { + cToken.Channel = TokenConstants.HiddenChannel; + this.AddPendingToken(cToken); + } + + private void CreateAndAddPendingToken(int type, int channel, string text, IToken baseToken) + { + CommonToken cToken = new CommonToken(baseToken); + cToken.Type = type; + cToken.Channel = channel; + cToken.StopIndex = baseToken.StartIndex - 1; + + // cToken.Text = text == null + // ? "<" + Vocabulary.GetSymbolicName(type) + ">" + // : text; + cToken.Text = text ?? string.Empty; + + this.AddPendingToken(cToken); + } + + private void AddPendingToken(IToken token) + { + // save the last pending token type because the pendingTokens linked list can be empty by the nextToken() + this.previousPendingTokenType = token.Type; + if (token.Channel == TokenConstants.DefaultChannel) + { + this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; + } + this.pendingTokens.AddLast(token); + } + + private int GetIndentationLength(string textWS) // the textWS may contain spaces, tabs or form feeds + { + const int TAB_LENGTH = 8; // the standard number of spaces to replace a tab with spaces + int length = 0; + foreach (char ch in textWS) + { + switch (ch) + { + case ' ': + this.wasSpaceIndentation = true; + length += 1; + break; + case '\t': + this.wasTabIndentation = true; + length += TAB_LENGTH - (length % TAB_LENGTH); + break; + case '\f': // form feed + length = 0; + break; + } + } + + if (this.wasTabIndentation && this.wasSpaceIndentation) + { + if (!this.wasIndentationMixedWithSpacesAndTabs) + { + this.wasIndentationMixedWithSpacesAndTabs = true; + return PythonLexerBase.INVALID_LENGTH; // only for the first inconsistent indent + } + } + return length; + } + + private void ReportLexerError(string errMsg) + { + // this.ErrorListenerDispatch.SyntaxError(this.ErrorOutput, this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null); + this.ErrorListenerDispatch.SyntaxError( this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null); + } + + private void ReportError(string errMsg) + { + this.ReportLexerError(errMsg); + + // the ERROR_TOKEN will raise an error in the parser + this.CreateAndAddPendingToken(PythonLexer.ERROR_TOKEN, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.ffgToken); + } + + public override void Reset() + { + this.Init(); + base.Reset(); + } +} \ No newline at end of file diff --git a/MicroForge.Parsing/PythonParser.g4 b/MicroForge.Parsing/PythonParser.g4 new file mode 100644 index 0000000..d2f6a07 --- /dev/null +++ b/MicroForge.Parsing/PythonParser.g4 @@ -0,0 +1,880 @@ +/* +Python grammar +The MIT License (MIT) +Copyright (c) 2021 Robert Einhorn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + + /* + * Project : an ANTLR4 parser grammar by the official PEG grammar + * https://github.com/RobEin/ANTLR4-parser-for-Python-3.12 + * Developed by : Robert Einhorn + * + */ + +parser grammar PythonParser; // Python 3.12.1 https://docs.python.org/3.12/reference/grammar.html#full-grammar-specification +options { + tokenVocab=PythonLexer; + superClass=PythonParserBase; +} + +// STARTING RULES +// ============== + +file_input: statements? EOF; +interactive: statement_newline; +eval: expressions NEWLINE* EOF; +func_type: '(' type_expressions? ')' '->' expression NEWLINE* EOF; +fstring_input: star_expressions; + +// GENERAL STATEMENTS +// ================== + +statements: statement+; + +statement: compound_stmt | simple_stmts; + +statement_newline + : compound_stmt NEWLINE + | simple_stmts + | NEWLINE + | EOF; + +simple_stmts + : simple_stmt (';' simple_stmt)* ';'? NEWLINE + ; + +// NOTE: assignment MUST precede expression, else parsing a simple assignment +// will throw a SyntaxError. +simple_stmt + : assignment + | type_alias + | star_expressions + | return_stmt + | import_stmt + | raise_stmt + | 'pass' + | del_stmt + | yield_stmt + | assert_stmt + | 'break' + | 'continue' + | global_stmt + | nonlocal_stmt; + +compound_stmt + : function_def + | if_stmt + | class_def + | with_stmt + | for_stmt + | try_stmt + | while_stmt + | match_stmt; + +// SIMPLE STATEMENTS +// ================= + +// NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield' +assignment + : NAME ':' expression ('=' annotated_rhs )? + | ('(' single_target ')' + | single_subscript_attribute_target) ':' expression ('=' annotated_rhs )? + | (star_targets '=' )+ (yield_expr | star_expressions) TYPE_COMMENT? + | single_target augassign (yield_expr | star_expressions); + +annotated_rhs: yield_expr | star_expressions; + +augassign + : '+=' + | '-=' + | '*=' + | '@=' + | '/=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '**=' + | '//='; + +return_stmt + : 'return' star_expressions?; + +raise_stmt + : 'raise' (expression ('from' expression )?)? + ; + +global_stmt: 'global' NAME (',' NAME)*; + +nonlocal_stmt: 'nonlocal' NAME (',' NAME)*; + +del_stmt + : 'del' del_targets; + +yield_stmt: yield_expr; + +assert_stmt: 'assert' expression (',' expression )?; + +import_stmt + : import_name + | import_from; + +// Import statements +// ----------------- + +import_name: 'import' dotted_as_names; +// note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from + : 'from' ('.' | '...')* dotted_name 'import' import_from_targets + | 'from' ('.' | '...')+ 'import' import_from_targets; +import_from_targets + : '(' import_from_as_names ','? ')' + | import_from_as_names + | '*'; +import_from_as_names + : import_from_as_name (',' import_from_as_name)*; +import_from_as_name + : NAME ('as' NAME )?; +dotted_as_names + : dotted_as_name (',' dotted_as_name)*; +dotted_as_name + : dotted_name ('as' NAME )?; +dotted_name + : dotted_name '.' NAME + | NAME; + +// COMPOUND STATEMENTS +// =================== + +// Common elements +// --------------- + +block + : NEWLINE INDENT statements DEDENT + | simple_stmts; + +decorators: ('@' named_expression NEWLINE )+; + +// Class definitions +// ----------------- + +class_def + : decorators class_def_raw + | class_def_raw; + +class_def_raw + : 'class' NAME type_params? ('(' arguments? ')' )? ':' block; + +// Function definitions +// -------------------- + +function_def + : decorators function_def_raw + | function_def_raw; + +function_def_raw + : 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block + | ASYNC 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block; + +// Function parameters +// ------------------- + +params + : parameters; + +parameters + : slash_no_default param_no_default* param_with_default* star_etc? + | slash_with_default param_with_default* star_etc? + | param_no_default+ param_with_default* star_etc? + | param_with_default+ star_etc? + | star_etc; + +// Some duplication here because we can't write (',' | {isCurrentTokenType(RPAR)}?), +// which is because we don't support empty alternatives (yet). + +slash_no_default + : param_no_default+ '/' ','? + ; +slash_with_default + : param_no_default* param_with_default+ '/' ','? + ; + +star_etc + : '*' param_no_default param_maybe_default* kwds? + | '*' param_no_default_star_annotation param_maybe_default* kwds? + | '*' ',' param_maybe_default+ kwds? + | kwds; + +kwds + : '**' param_no_default; + +// One parameter. This *includes* a following comma and type comment. +// +// There are three styles: +// - No default_assignment +// - With default_assignment +// - Maybe with default_assignment +// +// There are two alternative forms of each, to deal with type comments: +// - Ends in a comma followed by an optional type comment +// - No comma, optional type comment, must be followed by close paren +// The latter form is for a final parameter without trailing comma. +// + +param_no_default + : param ','? TYPE_COMMENT? + ; +param_no_default_star_annotation + : param_star_annotation ','? TYPE_COMMENT? + ; +param_with_default + : param default_assignment ','? TYPE_COMMENT? + ; +param_maybe_default + : param default_assignment? ','? TYPE_COMMENT? + ; +param: NAME annotation?; +param_star_annotation: NAME star_annotation; +annotation: ':' expression; +star_annotation: ':' star_expression; +default_assignment: '=' expression; + +// If statement +// ------------ + +if_stmt + : 'if' named_expression ':' block (elif_stmt | else_block?) + ; +elif_stmt + : 'elif' named_expression ':' block (elif_stmt | else_block?) + ; +else_block + : 'else' ':' block; + +// While statement +// --------------- + +while_stmt + : 'while' named_expression ':' block else_block?; + +// For statement +// ------------- + +for_stmt + : ASYNC? 'for' star_targets 'in' star_expressions ':' TYPE_COMMENT? block else_block? + ; + +// With statement +// -------------- + +with_stmt + : ASYNC? 'with' ( '(' with_item (',' with_item)* ','? ')' ':' + | with_item (',' with_item)* ':' TYPE_COMMENT? + ) block + ; + +with_item + : expression ('as' star_target)? + ; + +// Try statement +// ------------- + +try_stmt + : 'try' ':' block finally_block + | 'try' ':' block except_block+ else_block? finally_block? + | 'try' ':' block except_star_block+ else_block? finally_block?; + + +// Except statement +// ---------------- + +except_block + : 'except' (expression ('as' NAME )?)? ':' block + ; +except_star_block + : 'except' '*' expression ('as' NAME )? ':' block; +finally_block + : 'finally' ':' block; + +// Match statement +// --------------- + +match_stmt + : soft_kw_match subject_expr ':' NEWLINE INDENT case_block+ DEDENT; + +subject_expr + : star_named_expression ',' star_named_expressions? + | named_expression; + +case_block + : soft_kw_case patterns guard? ':' block; + +guard: 'if' named_expression; + +patterns + : open_sequence_pattern + | pattern; + +pattern + : as_pattern + | or_pattern; + +as_pattern + : or_pattern 'as' pattern_capture_target; + +or_pattern + : closed_pattern ('|' closed_pattern)*; + +closed_pattern + : literal_pattern + | capture_pattern + | wildcard_pattern + | value_pattern + | group_pattern + | sequence_pattern + | mapping_pattern + | class_pattern; + +// Literal patterns are used for equality and identity constraints +literal_pattern + : signed_number + | complex_number + | strings + | 'None' + | 'True' + | 'False'; + +// Literal expressions are used to restrict permitted mapping pattern keys +literal_expr + : signed_number + | complex_number + | strings + | 'None' + | 'True' + | 'False'; + +complex_number + : signed_real_number ('+' | '-') imaginary_number + ; + +signed_number + : '-'? NUMBER + ; + +signed_real_number + : '-'? real_number + ; + +real_number + : NUMBER; + +imaginary_number + : NUMBER; + +capture_pattern + : pattern_capture_target; + +pattern_capture_target + : soft_kw__not__wildcard; + +wildcard_pattern + : soft_kw_wildcard; + +value_pattern + : attr; + +attr + : NAME ('.' NAME)+ + ; +name_or_attr + : NAME ('.' NAME)* + ; + +group_pattern + : '(' pattern ')'; + +sequence_pattern + : '[' maybe_sequence_pattern? ']' + | '(' open_sequence_pattern? ')'; + +open_sequence_pattern + : maybe_star_pattern ',' maybe_sequence_pattern?; + +maybe_sequence_pattern + : maybe_star_pattern (',' maybe_star_pattern)* ','?; + +maybe_star_pattern + : star_pattern + | pattern; + +star_pattern + : '*' pattern_capture_target + | '*' wildcard_pattern; + +mapping_pattern + : LBRACE RBRACE + | LBRACE double_star_pattern ','? RBRACE + | LBRACE items_pattern (',' double_star_pattern)? ','? RBRACE + ; + +items_pattern + : key_value_pattern (',' key_value_pattern)*; + +key_value_pattern + : (literal_expr | attr) ':' pattern; + +double_star_pattern + : '**' pattern_capture_target; + +class_pattern + : name_or_attr '(' ((positional_patterns (',' keyword_patterns)? | keyword_patterns) ','?)? ')' + ; + + + +positional_patterns + : pattern (',' pattern)*; + +keyword_patterns + : keyword_pattern (',' keyword_pattern)*; + +keyword_pattern + : NAME '=' pattern; + +// Type statement +// --------------- + +type_alias + : soft_kw_type NAME type_params? '=' expression; + +// Type parameter declaration +// -------------------------- + +type_params: '[' type_param_seq ']'; + +type_param_seq: type_param (',' type_param)* ','?; + +type_param + : NAME type_param_bound? + | '*' NAME (':' expression)? + | '**' NAME (':' expression)? + ; + + +type_param_bound: ':' expression; + +// EXPRESSIONS +// ----------- + +expressions + : expression (',' expression )* ','? + ; + + +expression + : disjunction ('if' disjunction 'else' expression)? + | lambdef + ; + +yield_expr + : 'yield' ('from' expression | star_expressions?) + ; + +star_expressions + : star_expression (',' star_expression )* ','? + ; + + +star_expression + : '*' bitwise_or + | expression; + +star_named_expressions: star_named_expression (',' star_named_expression)* ','?; + +star_named_expression + : '*' bitwise_or + | named_expression; + +assignment_expression + : NAME ':=' expression; + +named_expression + : assignment_expression + | expression; + +disjunction + : conjunction ('or' conjunction )* + ; + +conjunction + : inversion ('and' inversion )* + ; + +inversion + : 'not' inversion + | comparison; + +// Comparison operators +// -------------------- + +comparison + : bitwise_or compare_op_bitwise_or_pair* + ; + +compare_op_bitwise_or_pair + : eq_bitwise_or + | noteq_bitwise_or + | lte_bitwise_or + | lt_bitwise_or + | gte_bitwise_or + | gt_bitwise_or + | notin_bitwise_or + | in_bitwise_or + | isnot_bitwise_or + | is_bitwise_or; + +eq_bitwise_or: '==' bitwise_or; +noteq_bitwise_or + : ('!=' ) bitwise_or; +lte_bitwise_or: '<=' bitwise_or; +lt_bitwise_or: '<' bitwise_or; +gte_bitwise_or: '>=' bitwise_or; +gt_bitwise_or: '>' bitwise_or; +notin_bitwise_or: 'not' 'in' bitwise_or; +in_bitwise_or: 'in' bitwise_or; +isnot_bitwise_or: 'is' 'not' bitwise_or; +is_bitwise_or: 'is' bitwise_or; + +// Bitwise operators +// ----------------- + +bitwise_or + : bitwise_or '|' bitwise_xor + | bitwise_xor; + +bitwise_xor + : bitwise_xor '^' bitwise_and + | bitwise_and; + +bitwise_and + : bitwise_and '&' shift_expr + | shift_expr; + +shift_expr + : shift_expr ('<<' | '>>') sum + | sum + ; + +// Arithmetic operators +// -------------------- + +sum + : sum ('+' | '-') term + | term + ; + +term + : term ('*' | '/' | '//' | '%' | '@') factor + | factor + ; + + + + +factor + : '+' factor + | '-' factor + | '~' factor + | power; + +power + : await_primary ('**' factor)? + ; + +// Primary elements +// ---------------- + +// Primary elements are things like "obj.something.something", "obj[something]", "obj(something)", "obj" ... + +await_primary + : AWAIT primary + | primary; + +primary + : primary ('.' NAME | genexp | '(' arguments? ')' | '[' slices ']') + | atom + ; + + + +slices + : slice + | (slice | starred_expression) (',' (slice | starred_expression))* ','?; + +slice + : expression? ':' expression? (':' expression? )? + | named_expression; + +atom + : NAME + | 'True' + | 'False' + | 'None' + | strings + | NUMBER + | (tuple | group | genexp) + | (list | listcomp) + | (dict | set | dictcomp | setcomp) + | '...'; + +group + : '(' (yield_expr | named_expression) ')'; + +// Lambda functions +// ---------------- + +lambdef + : 'lambda' lambda_params? ':' expression; + +lambda_params + : lambda_parameters; + +// lambda_parameters etc. duplicates parameters but without annotations +// or type comments, and if there's no comma after a parameter, we expect +// a colon, not a close parenthesis. (For more, see parameters above.) +// +lambda_parameters + : lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* lambda_star_etc? + | lambda_slash_with_default lambda_param_with_default* lambda_star_etc? + | lambda_param_no_default+ lambda_param_with_default* lambda_star_etc? + | lambda_param_with_default+ lambda_star_etc? + | lambda_star_etc; + +lambda_slash_no_default + : lambda_param_no_default+ '/' ','? + ; + +lambda_slash_with_default + : lambda_param_no_default* lambda_param_with_default+ '/' ','? + ; + +lambda_star_etc + : '*' lambda_param_no_default lambda_param_maybe_default* lambda_kwds? + | '*' ',' lambda_param_maybe_default+ lambda_kwds? + | lambda_kwds; + +lambda_kwds + : '**' lambda_param_no_default; + +lambda_param_no_default + : lambda_param ','? + ; +lambda_param_with_default + : lambda_param default_assignment ','? + ; +lambda_param_maybe_default + : lambda_param default_assignment? ','? + ; +lambda_param: NAME; + +// LITERALS +// ======== + +fstring_middle + : fstring_replacement_field + | FSTRING_MIDDLE; +fstring_replacement_field + : LBRACE (yield_expr | star_expressions) '='? fstring_conversion? fstring_full_format_spec? RBRACE; +fstring_conversion + : '!' NAME; +fstring_full_format_spec + : ':' fstring_format_spec*; +fstring_format_spec + : FSTRING_MIDDLE + | fstring_replacement_field; +fstring + : FSTRING_START fstring_middle* FSTRING_END; + +string: STRING; +strings: (fstring|string)+; + +list + : '[' star_named_expressions? ']'; + +tuple + : '(' (star_named_expression ',' star_named_expressions? )? ')'; + +set: LBRACE star_named_expressions RBRACE; + +// Dicts +// ----- + +dict + : LBRACE double_starred_kvpairs? RBRACE; + +double_starred_kvpairs: double_starred_kvpair (',' double_starred_kvpair)* ','?; + +double_starred_kvpair + : '**' bitwise_or + | kvpair; + +kvpair: expression ':' expression; + +// Comprehensions & Generators +// --------------------------- + +for_if_clauses + : for_if_clause+; + +for_if_clause + : ASYNC? 'for' star_targets 'in' disjunction ('if' disjunction )* + ; + +listcomp + : '[' named_expression for_if_clauses ']'; + +setcomp + : LBRACE named_expression for_if_clauses RBRACE; + +genexp + : '(' ( assignment_expression | expression) for_if_clauses ')'; + +dictcomp + : LBRACE kvpair for_if_clauses RBRACE; + +// FUNCTION CALL ARGUMENTS +// ======================= + +arguments + : args ','?; + +args + : (starred_expression | ( assignment_expression | expression)) (',' (starred_expression | ( assignment_expression | expression)))* (',' kwargs )? + | kwargs; + +kwargs + : kwarg_or_starred (',' kwarg_or_starred)* (',' kwarg_or_double_starred (',' kwarg_or_double_starred)*)? + | kwarg_or_double_starred (',' kwarg_or_double_starred)* + ; + +starred_expression + : '*' expression; + +kwarg_or_starred + : NAME '=' expression + | starred_expression; + +kwarg_or_double_starred + : NAME '=' expression + | '**' expression; + +// ASSIGNMENT TARGETS +// ================== + +// Generic targets +// --------------- + +// NOTE: star_targets may contain *bitwise_or, targets may not. +star_targets + : star_target (',' star_target )* ','? + ; + +star_targets_list_seq: star_target (',' star_target)+ ','?; + +star_targets_tuple_seq + : star_target (',' | (',' star_target )+ ','?) + ; + +star_target + : '*' (star_target) + | target_with_star_atom; + +target_with_star_atom + : t_primary ('.' NAME | '[' slices ']') + | star_atom + ; + +star_atom + : NAME + | '(' target_with_star_atom ')' + | '(' star_targets_tuple_seq? ')' + | '[' star_targets_list_seq? ']'; + +single_target + : single_subscript_attribute_target + | NAME + | '(' single_target ')'; + +single_subscript_attribute_target + : t_primary ('.' NAME | '[' slices ']') + ; + +t_primary + : t_primary ('.' NAME | '[' slices ']' | genexp | '(' arguments? ')') + | atom + ; + + + + + +// Targets for del statements +// -------------------------- + +del_targets: del_target (',' del_target)* ','?; + +del_target + : t_primary ('.' NAME | '[' slices ']') + | del_t_atom + ; + +del_t_atom + : NAME + | '(' del_target ')' + | '(' del_targets? ')' + | '[' del_targets? ']'; + +// TYPING ELEMENTS +// --------------- + + +// type_expressions allow */** but ignore them +type_expressions + : expression (',' expression)* (',' ('*' expression (',' '**' expression)? | '**' expression))? + | '*' expression (',' '**' expression)? + | '**' expression + ; + + + +func_type_comment + : NEWLINE TYPE_COMMENT // Must be followed by indented block + | TYPE_COMMENT; + +// *** Soft Keywords: https://docs.python.org/3.12/reference/lexical_analysis.html#soft-keywords +soft_kw_type: {this.isEqualToCurrentTokenText("type")}? NAME; +soft_kw_match: {this.isEqualToCurrentTokenText("match")}? NAME; +soft_kw_case: {this.isEqualToCurrentTokenText("case")}? NAME; +soft_kw_wildcard: {this.isEqualToCurrentTokenText("_")}? NAME; +soft_kw__not__wildcard: {this.isnotEqualToCurrentTokenText("_")}? NAME; + +// ========================= END OF THE GRAMMAR =========================== \ No newline at end of file diff --git a/MicroForge.Parsing/PythonParserBase.cs b/MicroForge.Parsing/PythonParserBase.cs new file mode 100644 index 0000000..9211932 --- /dev/null +++ b/MicroForge.Parsing/PythonParserBase.cs @@ -0,0 +1,21 @@ +using Antlr4.Runtime; + +namespace MicroForge.Parsing; + +public abstract class PythonParserBase : Parser +{ + protected PythonParserBase(ITokenStream input) : base(input) + { + } + + // https://docs.python.org/3/reference/lexical_analysis.html#soft-keywords + public bool isEqualToCurrentTokenText(string tokenText) + { + return this.CurrentToken.Text == tokenText; + } + + public bool isnotEqualToCurrentTokenText(string tokenText) + { + return !this.isEqualToCurrentTokenText(tokenText); // for compatibility with the Python 'not' logical operator + } +} \ No newline at end of file diff --git a/MicroForge.sln b/MicroForge.sln new file mode 100644 index 0000000..e3be39a --- /dev/null +++ b/MicroForge.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MicroForge.CLI", "MicroForge.CLI\MicroForge.CLI.csproj", "{27EFB015-AFC3-4046-8D9A-DD5C5D3B35E0}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MicroForge.Parsing", "MicroForge.Parsing\MicroForge.Parsing.csproj", "{D697CEFD-7CF7-4680-82FC-F84B08F81635}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {27EFB015-AFC3-4046-8D9A-DD5C5D3B35E0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {27EFB015-AFC3-4046-8D9A-DD5C5D3B35E0}.Debug|Any CPU.Build.0 = Debug|Any CPU + {27EFB015-AFC3-4046-8D9A-DD5C5D3B35E0}.Release|Any CPU.ActiveCfg = Release|Any CPU + {27EFB015-AFC3-4046-8D9A-DD5C5D3B35E0}.Release|Any CPU.Build.0 = Release|Any CPU + {D697CEFD-7CF7-4680-82FC-F84B08F81635}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D697CEFD-7CF7-4680-82FC-F84B08F81635}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D697CEFD-7CF7-4680-82FC-F84B08F81635}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D697CEFD-7CF7-4680-82FC-F84B08F81635}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection +EndGlobal diff --git a/README.Docker.md b/README.Docker.md new file mode 100644 index 0000000..ec0ea1f --- /dev/null +++ b/README.Docker.md @@ -0,0 +1,46 @@ +### Building and running your application + +When you're ready, start your application by running: +`docker compose up --build`. + +Your application will be available at http://localhost:8080. + +### Deploying your application to the cloud + +First, build your image, e.g.: `docker build -t myapp .`. +If your cloud uses a different CPU architecture than your development +machine (e.g., you are on a Mac M1 and your cloud provider is amd64), +you'll want to build the image for that platform, e.g.: +`docker build --platform=linux/amd64 -t myapp .`. + +Then, push it to your registry, e.g. `docker push myregistry.com/myapp`. + +Consult Docker's [getting started](https://docs.docker.com/go/get-started-sharing/) +docs for more detail on building and pushing. + +### References +* [Docker's .NET guide](https://docs.docker.com/language/dotnet/) +* The [dotnet-docker](https://github.com/dotnet/dotnet-docker/tree/main/samples) + repository has many relevant samples and docs. + +### Dependencies + +bash (/usr/bin/bash) + +Python 3.10.2 (/usr/bin/python3) + - python3-pip + - python3-venv + +pkg-config (sudo apt update && sudo apt install pkg-config) + +### Notes + +Try Running NuGet restore when ANTLR doesn't generate Lexer or Parser + +### TODO +- Make entrypoint, i.e. main.py, customizable or fixed? +- Figure out why BashException cannot be caught, can it be due to the differences in scoping? +Because the `Bash` class is static and the services calling `Bash.ExecuteAsync` are in the container. +Maybe this in combination with the async nature of the whole thing? +- Make it so that `Bash.ExecuteAsync` can run "directly" in the calling terminal also. +- This will improve the usability of the "m4g run" command. \ No newline at end of file