一个 DAG 工作流引擎的设计实现源代码实例

Posted 禅与计算机程序设计艺术

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了一个 DAG 工作流引擎的设计实现源代码实例相关的知识,希望对你有一定的参考价值。

任务Task

package com.bytedance.ecop.kunlun.engine.scheduler;

import java.util.UUID;

import static java.lang.Thread.sleep;

public class Task implements Executor {
private Long id;
private String name;
private int state;
private long timeout;

public Task() {
}

public Task(Long id, String name, int state, long timeout) {
this.id = id;
this.name = name;
this.state = state;
this.timeout = timeout;
}

public Long getId() {
return id;
}

public void setId(Long id) {
this.id = id;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public int getState() {
return state;
}

public void setState(int state) {
this.state = state;
}

public long getTimeout() {
return timeout;
}

public void setTimeout(long timeout) {
this.timeout = timeout;
}

public boolean execute(TaskCallBack callBack) {
System.out.println("Task id: [" + id + "], " + "task name: [" + name + "] is running");
state = 1;
try {
sleep(3000L);
} catch (InterruptedException e) {
}

TaskInstance taskInstance = new TaskInstance();
taskInstance.setTaskId(id);
String taskInstanceId = UUID.randomUUID().toString();
taskInstance.setTaskInstanceId(taskInstanceId);

TaskInstanceResult taskInstanceResult = new TaskInstanceResult("Task[" + id + "], taskInstanceId=" + taskInstanceId + " TaskInstanceResult = " + UUID.randomUUID());
callBack.invoke(taskInstanceResult);
return true;
}

public boolean hasExecuted() {
return state == 1;
}
}



package com.bytedance.ecop.kunlun.engine.scheduler;

public interface Executor {
boolean execute(TaskCallBack callBack);
}


package com.bytedance.ecop.kunlun.engine.scheduler;

public interface TaskCallBack {
Object invoke(TaskInstanceResult result);
}


任务运行实例TaskInstance

package com.bytedance.ecop.kunlun.engine.scheduler;

public class TaskInstance {
private String taskInstanceId;
private Long taskId;
private String name;
private int state;

public TaskInstance() {
}

public TaskInstance(String taskInstanceId, Long taskId, String name, int state) {
this.taskInstanceId = taskInstanceId;
this.taskId = taskId;
this.name = name;
this.state = state;
}

public String getTaskInstanceId() {
return taskInstanceId;
}

public void setTaskInstanceId(String taskInstanceId) {
this.taskInstanceId = taskInstanceId;
}

public Long getTaskId() {
return taskId;
}

public void setTaskId(Long taskId) {
this.taskId = taskId;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public int getState() {
return state;
}

public void setState(int state) {
this.state = state;
}
}


package com.bytedance.ecop.kunlun.engine.scheduler;

public class TaskInstanceResult {
String resultJson;

@Override
public String toString() {
return "TaskInstanceResult{" +
"resultJson='" + resultJson + '\'' +
'}';
}

public TaskInstanceResult(String resultJson) {
this.resultJson = resultJson;
}

public String getResultJson() {
return resultJson;
}

public void setResultJson(String resultJson) {
this.resultJson = resultJson;
}
}

DAG流程定义 Process

package com.bytedance.ecop.kunlun.engine.scheduler;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
* DAG工作流
*/

public class Process {
private Long graphId;
private Set<Task> tasks;
private Map<Task, Set<Task>> map;

public Process() {
this.tasks = new HashSet<Task>();
this.map = new HashMap<Task, Set<Task>>();
}

public void addEdge(Task task, Task prev) {
if (!tasks.contains(task) || !tasks.contains(prev)) {
throw new IllegalArgumentException();
}
Set<Task> prevs = map.get(task);
if (prevs == null) {
prevs = new HashSet<Task>();
map.put(task, prevs);
}
if (prevs.contains(prev)) {
throw new IllegalArgumentException();
}
prevs.add(prev);
}

public void addTask(Task task) {
if (tasks.contains(task)) {
throw new IllegalArgumentException();
}
tasks.add(task);
}

public void remove(Task task) {
if (!tasks.contains(task)) {
return;
}
if (map.containsKey(task)) {
map.remove(task);
}
for (Set<Task> set : map.values()) {
if (set.contains(task)) {
set.remove(task);
}
}
}

public Set<Task> getTasks() {
return tasks;
}

public void setTasks(Set<Task> tasks) {
this.tasks = tasks;
}

public Map<Task, Set<Task>> getMap() {
return map;
}

public void setMap(Map<Task, Set<Task>> map) {
this.map = map;
}
}

DAG流程实例 ProcessInstance

package com.bytedance.ecop.kunlun.engine.scheduler;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
* DAG工作流
*/

public class ProcessInstance {
Long processId;
String processInstanceId;

public ProcessInstance(Long processId, String processInstanceId) {
this.processId = processId;
this.processInstanceId = processInstanceId;
}

public Long getProcessId() {
return processId;
}

public void setProcessId(Long processId) {
this.processId = processId;
}

public String getProcessInstanceId() {
return processInstanceId;
}

public void setProcessInstanceId(String processInstanceId) {
this.processInstanceId = processInstanceId;
}
}

DAG工作流程调度器

package com.bytedance.ecop.kunlun.engine.scheduler;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;

public class Scheduler {

public void schedule(Process process) {
while (true) {
// 1、构建 todoTaskList
List<Task> todoTaskList = new ArrayList<Task>();

for (Task task : process.getTasks()) {
if (!task.hasExecuted()) {
Set<Task> prevs = process.getMap().get(task);
if (prevs != null && !prevs.isEmpty()) {
boolean toAdd = true;
for (Task tsk : prevs) {
if (!tsk.hasExecuted()) {
toAdd = false;
break;
}
}
if (toAdd) {
todoTaskList.add(task);
}
} else {
todoTaskList.add(task);
}
}
}

// 2.执行 todoTaskList
if (!todoTaskList.isEmpty()) {
for (Task task : todoTaskList) {

task.execute(new TaskCallBack() {
@Override
public Object invoke(TaskInstanceResult taskInstanceResult) {
System.out.println(taskInstanceResult);
return null;
}
});

}
} else {
break;
}

}
}

}

测试运行效果

package com.bytedance.ecop.kunlun.engine.scheduler;

public class MainApplication {

public static void main(String[] args) {
// 创建工作流
Process process = new Process();
// 注册任务
Task task1 = new Task(1L, "task1", 0, -1);
Task task2 = new Task(2L, "task2", 0, -1);
Task task3 = new Task(3L, "task3", 0, -1);
Task task4 = new Task(4L, "task4", 0, -1);
Task task5 = new Task(5L, "task5", 0, -1);
Task task6 = new Task(6L, "task6", 0, -1);

process.addTask(task1);
process.addTask(task2);
process.addTask(task3);
process.addTask(task4);
process.addTask(task5);
process.addTask(task6);

process.addEdge(task1, task2);
process.addEdge(task1, task5);
process.addEdge(task6, task2);
process.addEdge(task2, task3);
process.addEdge(task2, task4);

// 创建调度器,执行DAG调度
Scheduler scheduler = new Scheduler();
scheduler.schedule(process);
}

}

Task id: [4], task name: [task4] is running
TaskInstanceResult{resultJson='Task[4], taskInstanceId=b9fc6d44-d564-4131-8995-debf9a90f954 TaskInstanceResult = 37f41979-65b8-4c2d-baf1-b13176c19d02'}
Task id: [5], task name: [task5] is running
TaskInstanceResult{resultJson='Task[5], taskInstanceId=3da4db7f-4a63-482d-900c-64db48030627 TaskInstanceResult = 4f71de6d-8655-4d0f-8a64-c3c8ce8bdd23'}
Task id: [3], task name: [task3] is running
TaskInstanceResult{resultJson='Task[3], taskInstanceId=145a130a-5e39-41ea-98f3-87f62a8dfff8 TaskInstanceResult = d8746617-1df2-46f8-a129-d75070478228'}
Task id: [2], task name: [task2] is running
TaskInstanceResult{resultJson='Task[2], taskInstanceId=f7640b21-58b1-45bd-a886-aac425644d8e TaskInstanceResult = 7182f6c6-6395-4046-b50c-0df19f6de4ae'}
Task id: [1], task name: [task1] is running
TaskInstanceResult{resultJson='Task[1], taskInstanceId=be9ac46f-df32-4da8-b54c-74698c39d309 TaskInstanceResult = ac78327d-8825-4651-b309-c1bb5da6416c'}
Task id: [6], task name: [task6] is running
TaskInstanceResult{resultJson='Task[6], taskInstanceId=cb138e08-6ebe-4b4b-87ab-a1b230a06740 TaskInstanceResult = c0be1628-3a0c-4bfd-b863-500ca512ce80'}

Process finished with exit code 0


以上是关于一个 DAG 工作流引擎的设计实现源代码实例的主要内容,如果未能解决你的问题,请参考以下文章

火山引擎DataLeap数据调度实例的 DAG 优化方案 :功能设计

一天一门编程语言用 Go 语言实现一个 DAG 任务调度系统的API 接口代码

EasyScheduler调度系统的架构原理及实现思路

代码片--实现一个简单的模版方法设计模式(获取一段程序运行的时间)

什么是工作流引擎

驰骋工作流引擎设计系列02