java AST 抽象语法树

Posted xinyuan_java

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了java AST 抽象语法树相关的知识,希望对你有一定的参考价值。

AST : Abstract Syntax Tree

https://www.geeksforgeeks.org/abstract-syntax-tree-ast-in-java/

抽象语法树是一种用编程语言编写的源代码的抽象语法结构的树表示。树的每个节点表示源代码中出现的一个构造。

AST 在编译器中的应用非常重要,因为抽象语法树是编译器中广泛用于表示程序代码结构的数据结构。AST 通常是编译器语法分析阶段的结果。它通常通过编译器所需的几个阶段作为程序的中间表示,并且对编译器的最终输出有很大的影响。

在进一步讨论实现部分之前,让我们先讨论一下 AST 的使用。 AST 主要用于编译器以检查代码的准确性。如果生成的树有错误,编译器会打印一条错误消息。使用抽象语法树 (AST) 是因为某些构造无法用上下文无关语法表示,例如隐式类型。它们高度特定于编程语言,但对通用语法树的研究正在进行中。

 java 代码

// Java Custom Source Code

// Main class
class GFG 

	// Main driver method
	public static void main(String[] args)
	

		// Print statement
		System.out.println("Hello World!");
	

java 对应AST

CLASS_DEF -> CLASS_DEF [1:0]
|--MODIFIERS -> MODIFIERS [1:0]
| `--LITERAL_PUBLIC -> public [1:0]
|--LITERAL_CLASS -> class [1:7]
|--IDENT -> GFG [1:13]
`--OBJBLOCK -> OBJBLOCK [1:17]
	|--LCURLY ->  [1:17]
	|--METHOD_DEF -> METHOD_DEF [2:4]
	| |--MODIFIERS -> MODIFIERS [2:4]
	| | |--LITERAL_PUBLIC -> public [2:4]
	| | `--LITERAL_STATIC -> static [2:11]
	| |--TYPE -> TYPE [2:18]
	| | `--LITERAL_VOID -> void [2:18]
	| |--IDENT -> main [2:23]
	| |--LPAREN -> ( [2:27]
	| |--PARAMETERS -> PARAMETERS [2:34]
	| | `--PARAMETER_DEF -> PARAMETER_DEF [2:34]
	| |	 |--MODIFIERS -> MODIFIERS [2:34]
	| |	 |--TYPE -> TYPE [2:34]
	| |	 | `--ARRAY_DECLARATOR -> [ [2:34]
	| |	 |	 |--IDENT -> String [2:28]
	| |	 |	 `--RBRACK -> ] [2:35]
	| |	 `--IDENT -> args [2:37]
	| |--RPAREN -> ) [2:41]
	| `--SLIST ->  [2:43]
	|	 |--EXPR -> EXPR [3:26]
	|	 | `--METHOD_CALL -> ( [3:26]
	|	 |	 |--DOT -> . [3:18]
	|	 |	 | |--DOT -> . [3:14]
	|	 |	 | | |--IDENT -> System [3:8]
	|	 |	 | | `--IDENT -> out [3:15]
	|	 |	 | `--IDENT -> println [3:19]
	|	 |	 |--ELIST -> ELIST [3:27]
	|	 |	 | `--EXPR -> EXPR [3:27]
	|	 |	 |	 `--STRING_LITERAL -> "Hello World!" [3:27]
	|	 |	 `--RPAREN -> ) [3:41]
	|	 |--SEMI -> ; [3:42]
	|	 `--RCURLY ->  [4:4]
	`--RCURLY ->  [5:0]

逻辑示例

表示 1 + 2 的操作在 AST 中的展示

+ BinaryExpression
- type: +
- left_value:
LiteralExpr:
value: 1
- right_vaue:
LiteralExpr:
value: 2

AST 使用

AST 出现在java到class中间, java提供接口可以修改 AST的生成,作用与class生成

https://blog.mythsman.com/post/5d2c11c767f841464434a3bf/

https://cloud.tencent.com/developer/news/740798

https://xie.infoq.cn/article/6f8ff63d6b88480c05f805d96

手撸Getter

实验的目的是自定义一个针对类的Getter注解,它能够读取该类的成员方法并自动生成getter方法。

项目依赖

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.mythsman.test</groupId>
    <artifactId>getter</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>test</name>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>

        <dependency>
            <groupId>com.sun</groupId>
            <artifactId>tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>$java.home/../lib/tools.jar</systemPath>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

主要定义了下项目名,除了默认依赖的junit之外(其实并没有用),这里添加了tools.jar包。这个包实在jdk的lib下面,因此scope是system,由于$java.home变量表示的是jre的位置,因此还要根据这个位置找到实际的tools.jar的路径并写在systemPath里。
由于防止在写代码的时候用到java8的一些语法,这里配置了下编译插件使其支持java8。

创建Getter注解

package com.mythsman.test;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

@Target(ElementType.TYPE)
@Retention(RetentionPolicy.SOURCE)
public @interface Getter 

创建Getter注解的处理器

package com.mythsman.test;

import javax.annotation.processing.*;
import javax.lang.model.SourceVersion;
import javax.lang.model.element.TypeElement;
import java.util.Set;

@SupportedAnnotationTypes("com.mythsman.test.Getter")
@SupportedSourceVersion(SourceVersion.RELEASE_8)
public class GetterProcessor extends AbstractProcessor 

    @Override
    public synchronized void init(ProcessingEnvironment processingEnv) 
        super.init(processingEnv);
    

    @Override
    public boolean process(Set<? extends TypeElement> annotations, RoundEnvironment roundEnv) 
        return true;
    

需要定义两个注解,一个表示该处理器需要处理的注解,另外一个表示该处理器支持的源码版本。然后需要着重实现两个方法,init跟process。init的主要用途是通过ProcessingEnvironment来获取编译阶段的一些环境信息;process主要是实现具体逻辑的地方,也就是对AST进行处理的地方。

package com.mythsman.test;

import java.util.Set;

import javax.annotation.processing.AbstractProcessor;
import javax.annotation.processing.Messager;
import javax.annotation.processing.ProcessingEnvironment;
import javax.annotation.processing.RoundEnvironment;
import javax.annotation.processing.SupportedAnnotationTypes;
import javax.annotation.processing.SupportedSourceVersion;
import javax.lang.model.SourceVersion;
import javax.lang.model.element.Element;
import javax.lang.model.element.TypeElement;
import javax.tools.Diagnostic;

import com.sun.source.tree.Tree;
import com.sun.tools.javac.api.JavacTrees;
import com.sun.tools.javac.code.Flags;
import com.sun.tools.javac.processing.JavacProcessingEnvironment;
import com.sun.tools.javac.tree.JCTree;
import com.sun.tools.javac.tree.TreeMaker;
import com.sun.tools.javac.tree.TreeTranslator;
import com.sun.tools.javac.util.Context;
import com.sun.tools.javac.util.List;
import com.sun.tools.javac.util.ListBuffer;
import com.sun.tools.javac.util.Name;
import com.sun.tools.javac.util.Names;


@SupportedAnnotationTypes("com.mythsman.test.Getter")
@SupportedSourceVersion(SourceVersion.RELEASE_8)
public class MyGetterProcessor extends AbstractProcessor 

	private Messager messager;
	private JavacTrees trees;
	private TreeMaker treeMaker;
	private Names names;

	@Override
	public synchronized void init(ProcessingEnvironment processingEnv) 
		super.init(processingEnv);
		this.messager = processingEnv.getMessager();
		this.trees = JavacTrees.instance(processingEnv);
		Context context = ((JavacProcessingEnvironment) processingEnv).getContext();
		this.treeMaker = TreeMaker.instance(context);
		this.names = Names.instance(context);
	

	@Override
	public boolean process(Set<? extends TypeElement> annotations, RoundEnvironment roundEnv) 
		messager.printMessage(Diagnostic.Kind.NOTE, " process called!!!");

		Set<? extends Element> set = roundEnv.getElementsAnnotatedWith(MyGetter.class);
        set.forEach(element -> 
            JCTree jcTree = trees.getTree(element);
            jcTree.accept(new TreeTranslator() 
                @Override
                public void visitClassDef(JCTree.JCClassDecl jcClassDecl) 
                    List<JCTree.JCVariableDecl> jcVariableDeclList = List.nil();

                    for (JCTree tree : jcClassDecl.defs) 
                        if (tree.getKind().equals(Tree.Kind.VARIABLE)) 
                            JCTree.JCVariableDecl jcVariableDecl = (JCTree.JCVariableDecl) tree;
                            jcVariableDeclList = jcVariableDeclList.append(jcVariableDecl);
                        
                    

                    jcVariableDeclList.forEach(jcVariableDecl -> 
                        messager.printMessage(Diagnostic.Kind.NOTE, jcVariableDecl.getName() + " has been processed");
                        jcClassDecl.defs = jcClassDecl.defs.prepend(makeGetterMethodDecl(jcVariableDecl));
                    );
                    super.visitClassDef(jcClassDecl);
                

            );
        );

        return true;
	
	
	 private JCTree.JCMethodDecl makeGetterMethodDecl(JCTree.JCVariableDecl jcVariableDecl) 

	        ListBuffer<JCTree.JCStatement> statements = new ListBuffer<>();
	        statements.append(treeMaker.Return(treeMaker.Select(treeMaker.Ident(names.fromString("this")), jcVariableDecl.getName())));
	        JCTree.JCBlock body = treeMaker.Block(0, statements.toList());
	        return treeMaker.MethodDef(treeMaker.Modifiers(Flags.PUBLIC), getNewMethodName(jcVariableDecl.getName()), jcVariableDecl.vartype, List.nil(), List.nil(), List.nil(), body, null);
	    

	    private Name getNewMethodName(Name name) 
	        String s = name.toString();
	        return names.fromString("get" + s.substring(0, 1).toUpperCase() + s.substring(1, name.length()));
	    


1. 首先我们要重写下init方法,从环境里提取一些关键的类:

private Messager messager;
    private JavacTrees trees;
    private TreeMaker treeMaker;
    private Names names;

    @Override
    public synchronized void init(ProcessingEnvironment processingEnv) 
        super.init(processingEnv);
        this.messager = processingEnv.getMessager();
        this.trees = JavacTrees.instance(processingEnv);
        Context context = ((JavacProcessingEnvironment) processingEnv).getContext();
        this.treeMaker = TreeMaker.instance(context);
        this.names = Names.instance(context);
    

我们提取了四个主要的类:

  • Messager主要是用来在编译期打log用的
  • JavacTrees提供了待处理的抽象语法树
  • TreeMaker封装了创建AST节点的一些方法
  • Names提供了创建标识符的方法

2. process方法

@Override
    public synchronized boolean process(Set<? extends TypeElement> annotations, RoundEnvironment roundEnv) 
        Set<? extends Element> set = roundEnv.getElementsAnnotatedWith(Getter.class);
        set.forEach(element -> 
            JCTree jcTree = trees.getTree(element);
            jcTree.accept(new TreeTranslator() 
                @Override
                public void visitClassDef(JCTree.JCClassDecl jcClassDecl) 
                    List<JCTree.JCVariableDecl> jcVariableDeclList = List.nil();

                    for (JCTree tree : jcClassDecl.defs) 
                        if (tree.getKind().equals(Tree.Kind.VARIABLE)) 
                            JCTree.JCVariableDecl jcVariableDecl = (JCTree.JCVariableDecl) tree;
                            jcVariableDeclList = jcVariableDeclList.append(jcVariableDecl);
                        
                    

                    jcVariableDeclList.forEach(jcVariableDecl -> 
                        messager.printMessage(Diagnostic.Kind.NOTE, jcVariableDecl.getName() + " has been processed");
                        jcClassDecl.defs = jcClassDecl.defs.prepend(makeGetterMethodDecl(jcVariableDecl));
                    );
                    super.visitClassDef(jcClassDecl);
                

            );
        );

        return true;
    

步骤大概是下面这样:

  1. 利用roundEnv的getElementsAnnotatedWith方法过滤出被Getter这个注解标记的类,并存入set
  2. 遍历这个set里的每一个元素,并生成jCTree这个语法树
  3. 创建一个TreeTranslator,并重写其中的visitClassDef方法,这个方法处理遍历语法树得到的类定义部分jcClassDecl
  4. 创建一个jcVariableDeclList保存类的成员变量
  5. 遍历jcTree的所有成员(包括成员变量和成员函数和构造函数),过滤出其中的成员变量,并添加进jcVariableDeclList
  6. 将jcVariableDeclList的所有变量转换成需要添加的getter方法,并添加进jcClassDecl的成员中
  7. 调用默认的遍历方法遍历处理后的jcClassDecl
  8. 利用上面的TreeTranslator去处理jcTree

接下来再实现makeGetterMethodDecl方法:

    private JCTree.JCMethodDecl makeGetterMethodDecl(JCTree.JCVariableDecl jcVariableDecl) 

        ListBuffer<JCTree.JCStatement> statements = new ListBuffer<>();
        statements.append(treeMaker.Return(treeMaker.Select(treeMaker.Ident(names.fromString("this")), jcVariableDecl.getName())));
        JCTree.JCBlock body = treeMaker.Block(0, statements.toList());
        return treeMaker.MethodDef(treeMaker.Modifiers(Flags.PUBLIC), getNewMethodName(jcVariableDecl.getName()), jcVariableDecl.vartype, List.nil(), List.nil(), List.nil(), body, null);
    

    private Name getNewMethodName(Name name) 
        String s = name.toString();
        return names.fromString("get" + s.substring(0, 1).toUpperCase() + s.substring(1, name.length()));
    

测试类

上面基本就是所有功能代码了,接下来我们要写一个类来测试一下(App.java):

package com.mythsman.test;

@Getter
public class App 
    private String value;

    private String value2;

    public App(String value) 
        this.value = value;
    

    public static void main(String[] args) 
        App app = new App("it works");
        System.out.println(app.getValue());
    

先不要急着构建,构建了肯定会失败,因为这原则上应该是两个项目。Getter.java是注解类没问题,但是GetterProcessor.java是处理器,App.java需要在编译期调用这个处理器,因此这两个东西是不能一起编译的

1. 先编译 Getter 和 GetterProcessor 得到 Processor 处理类
2. 使用 Processor 编译 App.java


mkdir classes

javac -cp $JAVA_HOME/lib/tools.jar com/mythsman/test/Getter* -d classes/

javac -cp classes -d classes -processor com.mythsman.test.GetterProcessor com/mythsman/test/App.java

javap -p classes com/mythsman/test/App.class

java -cp classes com.mythsman.test.App


1. 创建保存class文件的文件夹
2. 导入tools.jar,编译processor并输出
3. 编译App.java,并使用javac的-processor参数指定编译阶段的处理器GetterProcessor
4. 用javap显示编译后的App.class文件(非必须,方便看结果)
5. 执行测试类

进入项目的根目录,当前的目录结构应该是这样的:

.
├── pom.xml
├── src
│   ├── main
│   │   ├── java
│   │   │   ├── com
│   │   │   │   └── mythsman
│   │   │   │       └── test
│   │   │   │           ├── App.java
│   │   │   │           ├── Getter.java
│   │   │   │           └── GetterProcessor.java
│   │   │   └── compile.sh

调用compile.sh,输出如下:

Note: value has been processed
Note: value2 has been processed
Compiled from "App.java"
public class com.mythsman.test.App 
  private java.lang.String value;
  private java.lang.String value2;
  public java.lang.String getValue2();
  public java.lang.String getValue();
  public com.mythsman.test.App(java.lang.String);
  public static void main(java.lang.String[]);

it works

Note行就是在GetterProcessor类里通过messager打印的log,中间的是javap反编译的结果,最后一行表示测试调用成功。

Maven构建并打包

上面的测试部分其实是为了测试而测试,其实这应当是两个项目,一个是processor项目,这个项目应当被打成一个jar包,供调用者使用;另一个项目是app项目,这个项目是专门使用jar包的,他并不希望添加任何额外编译参数,就跟lombok的用法一样。
简单来说,就是我们希望把processor打成一个包,并且在使用时不需要添加额外参数。
那么如何在调用的时候不用加参数呢,其实我们知道java在编译的时候会去资源文件夹下读一个META-INF文件夹,这个文件夹下面除了MANIFEST.MF文件之外,还可以添加一个services文件夹,我们可以在这个文件夹下创建一个文件,文件名是javax.annotation.processing.Processor,文件内容是com.mythsman.test.GetterProcessor。
我们知道maven在编译前会先拷贝资源文件夹,然后当他在编译时候发现了资源文件夹下的META-INF/serivces文件夹时,他就会读取里面的文件,并将文件名所代表的接口用文件内容表示的类来实现。这就相当于做了-processor参数该做的事了。
当然这个文件我们并不希望调用者去写,而是希望在processor项目里集成,调用的时候能直接继承META-INF。

当前目录结构应该是这样的:

.
├── pom.xml
├── src
│   └── main
│       ├── java
│       │   └── com
│       │       └── mythsman
│       │           └── test
│       │               ├── Getter.java
│       │               └── GetterProcessor.java
│       └── resources
│           └── META-INF
│               └── services
│                   └── javax.annotation.processing.Processor

当然,我们还不能编译,因为processor项目并不需要把自己添加为processor(况且自己还没编译呢怎么调用自己)。。。完了,好像死循环了,自己在编译的时候不能添加services文件夹,但是又需要打的包里有services文件夹,这该怎么搞呢?
其实很简单,配置一下maven的插件就行,打开pom.xml,在project/build/标签里添加下面的配置:

    <build>
       <resources>
            <resource>
                <directory>src/main/resources</directory>
                <excludes>
                    <exclude>META-INF/**/*</exclude>
                </excludes>
            </resource>
        </resources>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-resources-plugin</artifactId>
                <version>2.6</version>
                <executions>
                    <execution>
                        <id>process-META</id>
                        <phase>prepare-package</phase>
                        <goals>
                            <goal>copy-resources</goal>
                        </goals>
                        <configuration>
                            <outputDirectory>target/classes</outputDirectory>
                            <resources>
                                <resource>
                                    <directory>$basedir/src/main/resources/</directory>
                                    <includes>
                                        <include>**/*</include>
                                    </includes>
                                </resource>
                            </resources>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            ...
        </plugins>
    </build>

我们知道maven构建的第一步就是调用maven-resources-plugin插件的resources命令,将resources文件夹复制到target/classes中,那么我们配置一下resources标签,过滤掉META-INF文件夹,这样在编译的时候就不会找到services的配置了。然后我们在打包前(prepare-package生命周期)再利用maven-resources-plugin插件的copy-resources命令把services文件夹重新拷贝过来不就好了么。
这样配置好了,就可以直接执行mvn clean install打包提交到本地私服:

myths@pc:~/Desktop/test$ mvn clean install
[INFO] Scanning for projects...
[INFO] 
[INFO] ------------------------------------------------------------------------
[INFO] Building test 1.0-SNAPSHOT
[INFO] ------------------------------------------------------------------------
[INFO] 
[INFO] --- maven-clean-plugin:2.5:clean (default-clean) @ getter ---
[INFO] 
[INFO] --- maven-resources-plugin:2.6:resources (default-resources) @ getter ---
[INFO] Using 'UTF-8' encoding to copy filtered resources.
[INFO] Copying 0 resource
[INFO] 
[INFO] --- maven-compiler-plugin:3.1:compile (default-compile) @ getter ---
[INFO] Changes detected - recompiling the module!
[INFO] Compiling 2 source files to /home/myths/Desktop/test/target/classes
[INFO] 
[INFO] --- maven-resources-plugin:2.6:testResources (default-testResources) @ getter ---
[INFO] Using 'UTF-8' encoding to copy filtered resources.
[INFO] skip non existing resourceDirectory /home/myths/Desktop/test/src/test/resources
[INFO] 
[INFO] --- maven-compiler-plugin:3.1:testCompile (default-testCompile) @ getter ---
[INFO] No sources to compile
[INFO] 
[INFO] --- maven-surefire-plugin:2.12.4:test (default-test) @ getter ---
[INFO] No tests to run.
[INFO] 
[INFO] --- maven-resources-plugin:2.6:copy-resources (process-META) @ getter ---
[INFO] Using 'UTF-8' encoding to copy filtered resources.
[INFO] Copying 1 resource
[INFO] 
[INFO] --- maven-jar-plugin:2.4:jar (default-jar) @ getter ---
[INFO] Building jar: /home/myths/Desktop/test/target/getter-1.0-SNAPSHOT.jar
[INFO] 
[INFO] --- maven-install-plugin:2.4:install (default-install) @ getter ---
[INFO] Installing /home/myths/Desktop/test/target/getter-1.0-SNAPSHOT.jar to /home/myths/.m2/repository/com/mythsman/test/getter/1.0-SNAPSHOT/getter-1.0-SNAPSHOT.jar
[INFO] Installing /home/myths/Desktop/test/pom.xml to /home/myths/.m2/repository/com/mythsman/test/getter/1.0-SNAPSHOT/getter-1.0-SNAPSHOT.pom
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 3.017 s
[INFO] Finished at: 2017-12-19T19:57:04+08:00
[INFO] Final Memory: 16M/201M
[INFO] ------------------------------------------------------------------------

可以看到这里的process-META作用生效。

调用jar包测试

重新创建一个测试项目app:

.
├── pom.xml
└── src
    └── main
        └── java
            └── com
                └── mythsman
                    └── test
                        └── App.java

pom.xml:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.mythsman.test</groupId>
    <artifactId>app</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>main</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>com.mythsman.test</groupId>
            <artifactId>getter</artifactId>
            <version>1.0-SNAPSHOT</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

App.java:

package com.mythsman.test;

@Getter
public class App 
    private String value;

    private String value2;

    public App(String value) 
        this.value = value;
    

    public static void main(String[] args) 
        App app = new App("it works");
        System.out.println(app.getValue());
    

编译并执行:

mvn clean compile && java -cp target/classes com.mythsman.test.App

最后就会在构建成功后打印"it works"。

以上是关于java AST 抽象语法树的主要内容,如果未能解决你的问题,请参考以下文章

java AST 抽象语法树

jsqlparser:基于抽象语法树(AST)遍历SQL语句的语法元素

AST抽象语法树

AST 抽象语法树

解析树和抽象语法树 (AST) 有啥区别?

Nebula Graph 源码解读系列 | Vol.02 详解 Validator