论文代码解读 Hierarchical Reinforcement Learning for Scarce Medical Resource Allocation
Posted UQI-LIUWJ
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了论文代码解读 Hierarchical Reinforcement Learning for Scarce Medical Resource Allocation相关的知识,希望对你有一定的参考价值。
论文解读 论文笔记 Hierarchical Reinforcement Learning for Scarce Medical Resource Allocation_UQI-LIUWJ的博客-CSDN博客
代码部分
KYHKL-Q/Hierarchical-RL (github.com)
1 RL_train.py
训练强化学习模块的部分
由于论文中提及了口罩和床位两个指标,两个的模型在很多方面都是类似的,所以为了方便起见,我只设计口罩部分
1.1 导入库
import numpy as np
import json
import torch
import random
import copy
import os
from torch import nn,optim
from simulator import simulator
from reward_fun import reward_fun
1.2 RL_train 类
1.2.1 __init__
def __init__(self,
action_lr=0.001, #action ,也就是论文中显著性排序DQN的学习率
Actor_lr=0.001, #actor(生成满足因子)的学习率
Critic_lr=0.002, #critic的学习率
decay_rate=0.5, #未来奖励的折现率
batch_size=8, #batch_size
pool_volume=128, #经验回放 buffer的大小
action_explore_factor=0.5, #action 探索的比例 (ε-greedy)
Actor_explore_factor=0.05, #actor探索的比例 (ε-greedy)
soft_replace_rate=0.1, #软更新,eval的参数对target net的影响
#DDPG问题中,每次target net向eval net靠近的幅度
train_steps=150, #总共要训练的轮数(类似于epoch-number)
interval=48, #每隔多久更新一次八状态
mask_total=1000000, #总的口罩数量
mask_quality=0.9, #医用口罩有效过滤病毒的比例
mask_lasting=48, #口罩有效期
city='city_sample' #研究区域的名称
):
print('Initializing...')
self.action_lr=action_lr
self.Actor_lr=Actor_lr
self.Critic_lr=Critic_lr
self.decay_rate=decay_rate
self.batch_size=batch_size
self.pool_volume=pool_volume
self.action_explore_factor=action_explore_factor
self.Actor_explore_factor=Actor_explore_factor
self.soft_replace_rate=soft_replace_rate
self.train_steps=train_steps
self.interval=interval
self.mask_total=mask_total
self.mask_quality=mask_quality
self.mask_lasting=mask_lasting
self.city=city
#Initialize state and replay buffer
with open(os.path.join('../data',self.city,'start.json'),'r') as f:
self.start = np.array(json.load(f))
#由于提供的代码里面没有这个文件,我猜测是初始情况下各区域各状态的人数
##是一个region_num*8的二维数组
from Net.Anet import Anet
#actor network————>计算满足因子
from Net.Qnet import Qnet
#action network————>计算选择哪个显著性排序
from Net.RNN import RNN
#用可以求得的三个状态
'''
It(已经感染疾病,同时被检测出疾病的人。)
Ih(被送医治疗的感染者)
D(死亡人群)
'''
#计算其他三个环境可看到的状态的值
'''
E(暴露人群)
Iu(已经感染疾病,但是没有检测的人。)
R(康复人群)
'''
from Net.Cnet import Cnet
#critic network
self.region_num = 161
#Supposed to be modified according to the number of regions in the studied city.
#区划的数量
self.pool_pointer=0
#经验回放 relay buffer 下一个下标(下一组记录放到哪里)
self.pool_count=0
#目前relay buffer中的记录个数
self.pool_state=np.zeros([self.pool_volume,self.region_num,8])
#relay buffer中的状态数
#buffer_size * region_num * 8
self.pool_mask_action=np.zeros(self.pool_volume)
#relay buffer 中选择了哪个显著性排序
self.pool_mask_perc=np.zeros(self.pool_volume)
#relay buffer 中 口罩的满足因子
self.pool_reward=np.zeros(self.pool_volume)
#relay buffer 中的单步奖励
self.pool_state1=np.zeros([self.pool_volume,self.region_num,8])
#relay buffer 中的下一状态
self.current_state=self.start
#目前的状态
self.next_state=self.current_state
#下一状态
#Initialize the networks
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.Mask_action_eval=Qnet().to(self.device)
self.Mask_action_eval.train()
self.Mask_action_target=copy.deepcopy(self.Mask_action_eval)
#深拷贝,也就是二者不共享参数
#一个是eval network 一个是target network
self.Mask_action_target.eval()
#表示target 不参与训练(后同)
self.Mask_action_optimizer=optim.Adam(self.Mask_action_eval.parameters(),lr=self.action_lr)
#action 指的是论文模型示意图中的DQN,也就是学习显著性排序的部分
self.Mask_Actor_eval=Anet().to(self.device)
self.Mask_Actor_eval.train()
self.Mask_Actor_target=copy.deepcopy(self.Mask_Actor_eval)
self.Mask_Actor_target.eval()
self.Mask_Actor_optimizer=optim.Adam(self.Mask_Actor_eval.parameters(),lr=self.Actor_lr)
#actor 就是论文模型示意图下面的actor
self.Mask_Critic_eval=Cnet(self.region_num).to(self.device)
self.Mask_Critic_eval.train()
self.Mask_Critic_target=copy.deepcopy(self.Mask_Critic_eval)
self.Mask_Critic_eval.eval()
self.Mask_Critic_optimizer=optim.Adam(self.Mask_Critic_eval.parameters(),lr=self.Critic_lr)
#critic 就是论文模型示意图下面的critic
#Initialize the simulator
#各区域疾病状态转换
self.simulator=simulator(city=self.city)
self.simulator.reset(self.start)
#将start中各区域各状态的初始人数设置到simulater 的nodes列表中
#nodes列表的每一个元素都是一个node对象
#node对象有一个id,以及8状态(S,E,Iu,It,Ia,Ih,R,D)
print('Initializing down!')
1.2.2 train
def train(self):
print('Start training...')
loss_record = list()
train_count = 0 #目前训练的轮数
while train_count < self.train_steps:
self.current_state=self.start
##由于提供的代码里面没有这个文件,我猜测start是初始情况下各区域各状态的人数
##是一个region_num*8的二维数组
self.next_state=self.current_state
#下一状态
self.is_end=False
#是否结束episode
end_count = 0
#sampling
step_count=0
while ((not self.is_end) and
step_count <= 60/(self.interval/48)):
mask_action_out = self.Mask_action_eval(
torch.FloatTensor(self.current_state[:,[1,2,3,5,6,7]])
.to(self.device).unsqueeze(0))
'''
选取E,Iu,It,Ih,R,D
同时升维度至[1,region_num,6]
action 指的是论文模型示意图中的DQN,也就是学习显著性排序的部分
mask 就是指看口罩
mask_action_out->[1,7]
这里是表示不同的排序原则的“打分”
论文中一共出现了3中不同的排序原则,其中可以任意组合,所以有2^3-1=7种(不能都不选)
'''
#select an action through epsilon-greedy
mask_action=torch.argmax(mask_action_out).cpu().item()
#选择“打分最高的”显著性排序原则
rand_temp=random.random()
if(rand_temp>(1-self.action_explore_factor)):
mask_action = int(7 * random.random())
#一定比例是随机探索,否则就是利用
print('mask_action:'.format(mask_action))
mask_perc = self.Mask_Actor_eval(
torch.FloatTensor(self.current_state[:,[1,2,3,5,6,7]])
.to(self.device).unsqueeze(0))
#选取E,Iu,It,Ih,R,D
#同时升维度至[1,region_num,6]
#mask_perc是一个[1,1]的0~1之间的数,表示的是论文里提到的满足因子f
mask_perc = mask_perc.squeeze(0).cpu().detach().item()\\
+ self.Actor_explore_factor * np.random.randn()
mask_perc_clip = np.clip(mask_perc, 0.1, 1)
print('mask_perc:'.format(mask_perc_clip))
#我们通过actor算出来的比例,加上一定探索的概率,得到最终的满足因子f
#与此同时,满足因子f需要在0.1~1之间
#get the next state through simulation
self.next_state, _ = self.simulator.simulate(
sim_type='Policy_a',
interval=self.interval,
bed_total=self.bed_total,
bed_action=bed_action,
bed_satisfy_perc=bed_perc_clip,
mask_on=True,
mask_total=self.mask_total,
mask_quality=self.mask_quality,
mask_lasting=self.mask_lasting,
mask_action=mask_action,
mask_satisfy_perc=mask_perc_clip)
#各个区域新的状态
if(self.simulator.full and end_count==0):
end_count=1
#full表示床位有没有满
if((not self.simulator.full) and end_count==1):
self.is_end=True
print('Is_end:'.format(self.is_end))
#get single step reward
reward=reward_fun(self.current_state,self.next_state)
#单步奖励函数
print('Reward:'.format(reward))
#put the sample into the replay buffer
#将数据放入经验回放中
if(self.simulator.full):
self.pool_state[self.pool_pointer]=self.current_state
self.pool_bed_action[self.pool_pointer]=bed_action
self.pool_mask_action[self.pool_pointer]=mask_action
self.pool_bed_perc[self.pool_pointer]=bed_perc
self.pool_mask_perc[self.pool_pointer]=mask_perc
self.pool_reward[self.pool_pointer]=reward
self.pool_state1[self.pool_pointer]=self.next_state
self.pool_pointer=(self.pool_pointer+1)%self.pool_volume
if (self.pool_count<self.pool_volume):
self.pool_count=self.pool_count+1
print('Sampling: sampeles in pool now'.format(self.pool_count))
#将当前的状态、床位和口罩使用哪种显著性排序、床位和口罩的满足因子
#单步奖励,下一状态 送到relay buffer中
#mini batch sampling and updating the parameters
if (self.pool_count>=self.batch_size):
sample_index=random.sample(range(self.pool_count),self.batch_size)
#从relay buffer中随机选择batch_size个index
sample_state=torch.FloatTensor(self.pool_state[sample_index]).to(self.device)
#选择的batch中的当前状态
sample_bed_action=self.pool_bed_action[sample_index]
#选择的batch中的床位显著性排序
sample_mask_action=self.pool_mask_action[sample_index]
#选择的batch中的口罩显著性排序
sample_reward=self.pool_reward[sample_index]
#选择的batch中的单步奖励
sample_bed_perc=torch.FloatTensor(self.pool_bed_perc[sample_index]).to(self.device).unsqueeze(1)
#选择的batch中的床位满足因子
sample_mask_perc=torch.FloatTensor(self.pool_mask_perc[sample_index]).to(self.device).unsqueeze(1)
#选择的batch中的口罩满足因子
sample_state1=torch.FloatTensor(self.pool_state1[sample_index]).to(self.device)
##选择的batch中的下一个状态
mask_action_eval=self.Mask_action_eval(sample_state[:,:,[1,2,3,5,6,7]])
#显著性排序的eval_net
#sample_state[:,:,[1,2,3,5,6,7]]————>[batch_size,region_num,6]
#结果为[batch_size,7]
mask_action_target=self.Mask_action_target(sample_state1[:,:,[1,2,3,5,6,7]])
#显著性排序的target_net
#sample_state[:,:,[1,2,3,5,6,7]]————>[batch_size,region_num,6]
#结果为[batch_size,7]
mask_action_max = torch.argmax(mask_action_target, dim=-1).cpu()
#batch_size 每个最大的action,也就是bacth中每个记录应该选择哪一种显著性排序
mask_perc_eval=self.Mask_Actor_eval(sample_state[:,:,[1,2,3,5,6,7]])
#满足因子的eval_net
#sample_state[:,:,[1,2,3,5,6,7]]————>[batch_size,region_num,6]
#结果为[batch_size,1]
mask_perc_target=self.Mask_Actor_target(sample_state1[:,:,[1,2,3,5,6,7]])
#满足因子的target_net
#sample_state[:,:,[1,2,3,5,6,7]]————>[batch_size,region_num,6]
#结果为[batch_size,1]
mask_reward_eval = self.Mask_Critic_eval(sample_state[:,:,[1,2,3,5,6,7]], sample_mask_perc)
#critic的eval_net
#sample_state[:,:,[1,2,3,5,6,7]]————>[batch_size,region_num,6]
#结果为[batch_size,1]
mask_reward_target=self.Mask_Critic_target(sample_state1[:,:,[1,2,3,5,6,7]],mask_perc_target)
#critic的eval_net
#sample_state[:,:,[1,2,3,5,6,7]]————>[batch_size,region_num,6]
#结果为[batch_size,1]
loss = 0
for i in range(self.batch_size):
y = sample_reward[i]
y = y + \\
self.decay_rate * (
mask_action_target[i][mask_action_max[i]] +
mask_reward_target[i])
#显著性排序中最大的那个的值+critic reward时的值
loss = loss + \\
(mask_reward_eval[i] +
mask_action_eval[i][int(sample_mask_action[i])]
- y)** 2
#和eval的均方差
loss=loss/self.batch_size
print('Loss:'.format(loss.cpu().item()))
self.Mask_action_optimizer.zero_grad()
self.Mask_Critic_optimizer.zero_grad()
loss.backward()
self.Mask_action_optimizer.step()
self.Mask_Critic_optimizer.step()
#训练critic和action
mask_reward0 = self.Mask_Critic_eval(sample_state[:,:,[1,2,3,5,6,7]], mask_perc_eval)
loss_pg = - torch.mean(mask_reward0)
#因为是梯度上升(policy gradient,所以这里有一个负号)
print('Loss_pg:'.format(loss_pg))
self.Mask_Actor_optimizer.zero_grad()
loss_pg.backward()
self.Mask_Actor_optimizer.step()
#训练actor
loss_record.append([loss.cpu().item(),loss_pg.cpu().item()])
#soft
#对target network进行软更新
#无论是action,actor还是critic,都需要进行软更新
#target=(1-soft_rate)target+soft_rate*eval
for x in self.Mask_action_target.state_dict().keys():
eval('self.Mask_action_target.'+x+'.data.mul_(1-self.soft_replace_rate)')
eval('self.Mask_action_target.'+x+'.data.add_(self.soft_replace_rate*self.Mask_action_eval.'+x+'.data)')
for x in self.Mask_Actor_target.state_dict().keys():
eval('self.Mask_Actor_target.'+x+'.data.mul_((1-self.soft_replace_rate))')
eval('self.Mask_Actor_target.'+x+'.data.add_(self.soft_replace_rate*self.Mask_Actor_eval.'+x+'.data)')
for x in self.Mask_Critic_target.state_dict().keys():
eval('self.Mask_Critic_target.'+x+'.data.mul_((1-self.soft_replace_rate))')
eval('self.Mask_Critic_target.'+x+'.data.add_(self.soft_replace_rate*self.Mask_Critic_eval.'+x+'.data)')
train_count += 1
print('Training:epoch /\\n'.format(train_count, self.train_steps))
if train_count == self.train_steps:
break
#update the state
self.current_state=self.next_state
step_count+=1
#save the models
torch.save(self.Mask_action_eval.state_dict(), os.path.join('../model',self.city,'mask_action_model.pth'))
torch.save(self.Mask_Actor_eval.state_dict(), os.path.join('../model',self.city,'mask_Actor_model.pth'))
'''
with open(os.path.join('../model',self.city,'loss.json'),'w') as f:
json.dump(loss_record,f)
'''
print('Training complete!')
1.3 主函数部分
if __name__ == "__main__":
os.chdir(os.path.split(os.path.realpath(__file__))[0])
train_platform = RL_train(mask_total=1000000)
#设置了总的口罩数
train_platform.train()
2 Net/Qnet.py
action 部分
计算显著性排序的DQN
import torch
from torch import nn,optim
import torch.nn.functional as F
class Qnet(nn.Module):
def __init__(self):
super(Qnet,self).__init__()
self.cov1=nn.Conv1d(in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
bias=True)
#这里池化层的输入维度就是我环境可以看到的状态的维度:6
#分别表示:
'''
It(已经感染疾病,同时被检测出疾病的人。)
Ih(被送医治疗的感染者)
D(死亡人群)
E(暴露人群)
Iu(已经感染疾病,但是没有检测的人。)
R(康复人群)
顺序为:E,Iu,It,Ih,R,D
'''
self.cov2=nn.Conv1d(in_channels=16,
out_channels=32,
kernel_size=5,
stride=1,
padding=2,
bias=True)
self.cov3=nn.Conv1d(in_channels=32,
out_channels=16,
kernel_size=5,
stride=2,
padding=0,
bias=True)
self.cov4=nn.Conv1d(in_channels=16,
out_channels=4,
kernel_size=5,
stride=2,
padding=0,
bias=True)
#四层卷积层 6->16->32->16->4
self.lin1=nn.Linear(in_features=664,
out_features=128,
bias=True)
self.lin2=nn.Linear(in_features=128,out_features=7,bias=True)
self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.mask=torch.tensor([[0.01,0,0,0,0,0],
[0,0.05,0,0,0,0],
[0,0,0.01,0,0,0],
[0,0,0,0.05,0,0],
[0,0,0,0,0.01,0],
[0,0,0,0,0,0.05]],dtype=torch.float).to(self.device)
def forward(self,x):
#x是[1,region_num,6]
#E,Iu,It,Ih,R,D
x1=torch.matmul(x,self.mask).permute(0,2,1)
#不同的状态乘以不同的加权系数,然后将维度变成[1,6,region_num]
c1=F.leaky_relu(self.cov1(x1),0.2)
#[1,16,region_num]
c2=F.leaky_relu(self.cov2(c1),0.2)
#[1,32,region_num]
c3=F.leaky_relu(self.cov3(c2),0.2)
#[1,16,region_num]
c4=F.leaky_relu(self.cov4(c3),0.2)
#[1,4,region_num]
l1=F.leaky_relu(self.lin1(c4.view(x.shape[0],-1)),0.2)
#[1,4,region_num]->[1,664]([1,4*161])->[1,128]
l2 = self.lin2(l1)
#->[1,7]
#这里是表示不同的排序原则的“打分”
#论文中一共出现了3中不同的排序原则,其中可以任意组合,所以有2^3-1=7种(不能都不选)
return l2
3 Net/Anet.py
actor 部分
import torch
from torch import nn,optim
import torch.nn.functional as F
class Anet(nn.Module):
def __init__(self):
super(Anet,self).__init__()
self.cov1=nn.Conv1d(in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
bias=True)
#这里池化层的输入维度就是我环境可以看到的状态的维度:6
#分别表示:
'''
It(已经感染疾病,同时被检测出疾病的人。)
Ih(被送医治疗的感染者)
D(死亡人群)
E(暴露人群)
Iu(已经感染疾病,但是没有检测的人。)
R(康复人群)
顺序为:E,Iu,It,Ih,R,D
'''
self.cov2=nn.Conv1d(in_channels=16,
out_channels=32,
kernel_size=5,
stride=1,
padding=2,
bias=True)
self.cov3=nn.Conv1d(in_channels=32,
out_channels=16,
kernel_size=5,
stride=2,
padding=0,
bias=True)
self.cov4=nn.Conv1d(in_channels=16,
out_channels=4,
kernel_size=5,
stride=2,
padding=0,
bias=True)
#四层卷积层 6->16->32->16->4
self.lin1=nn.Linear(in_features=664,out_features=128,bias=True)
self.lin2=nn.Linear(in_features=128,out_features=16,bias=True)
self.lin3=nn.Linear(in_features=16,out_features=1,bias=True)
self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.mask=torch.tensor([[0.01,0,0,0,0,0],
[0,0.05,0,0,0,0],
[0,0,0.01,0,0,0],
[0,0,0,0.05,0,0],
[0,0,0,0,0.01,0],
[0,0,0,0,0,0.05]],dtype=torch.float).to(self.device)
def forward(self,state):
#x是[1,region_num,6]
#E,Iu,It,Ih,R,D
x1 = torch.matmul(state, self.mask).permute(0, 2, 1)
#不同的状态乘以不同的加权系数,然后将维度变成[1,6,region_num]
c1 = F.leaky_relu(self.cov1(x1), 0.2)
#[1,16,region_num]
c2 = F.leaky_relu(self.cov2(c1), 0.2)
#[1,32,region_num]
c3 = F.leaky_relu(self.cov3(c2), 0.2)
#[1,16,region_num]
c4 = F.leaky_relu(self.cov4(c3), 0.2)
#[1,4,region_num]
l1 = F.leaky_relu(self.lin1(c4.view(state.shape[0], -1)), 0.2)
#[1,4,region_num]->[1,664]([1,4*161])->[1,128]
l2 = F.leaky_relu(self.lin2(l1), 0.2)
#->[1,16]
l3 = torch.sigmoid(self.lin3(l2))
#->[1,1]经过sigmoid之后,相当于是一个[0,1]之间的数
return l3
4 Net/Cnet.py
critic 网络
import torch
from torch import nn,optim
import torch.nn.functional as F
class Cnet(nn.Module):
def __init__(self,region_num):
super(Cnet,self).__init__()
self.lin_1 = nn.Linear(in_features=1,
out_features=128,
bias=True)
self.lin_2 = nn.Linear(in_features=128,
out_features=256,
bias=True)
self.lin_3 = nn.Linear(in_features=256,
out_features=region_num,
bias=True)
self.cov1=nn.Conv1d(in_channels=7,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
bias=True)
self.cov2=nn.Conv1d(in_channels=16,
out_channels=32,
kernel_size=5,
stride=1,
padding=2,
bias=True)
self.cov3=nn.Conv1d(in_channels=32,
out_channels=16,
kernel_size=5,
stride=2,
padding=0,
bias=True)
self.cov4=nn.Conv1d(in_channels=16,
out_channels=4,
kernel_size=5,
stride=2,
padding=0,
bias=True)
self.lin1=nn.Linear(in_features=664,
out_features=128,
bias=True)
self.lin2=nn.Linear(in_features=128,
out_features=16,
bias=True)
self.lin3=nn.Linear(in_features=16,
out_features=1,
bias=True)
self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.mask=torch.tensor([[0.01,0,0,0,0,0],
[0,0.05,0,0,0,0],
[0,0,0.01,0,0,0],
[0,0,0,0.05,0,0],
[0,0,0,0,0.01,0],
[0,0,0,0,0,0.05]],dtype=torch.float).to(self.device)
def forward(self,state,perc):
#state是当前各状态[batch_size,region_num,6]
#perc是满足因子 [batch_size,1]
a1 = F.leaky_relu(self.lin_1(perc), 0.2)
#[batch_size,1]——>[batch_size,128]
a2 = F.leaky_relu(self.lin_2(a1), 0.2)
#[batch_size,128]——>[batch_size,256]
a3 = F.leaky_relu(self.lin_3(a2), 0.2)
#[batch_size,256]——>[batch_size,num_region]
x1 = torch.cat((torch.matmul(state, self.mask), a3.unsqueeze(2)), dim=-1).permute(0, 2, 1)
#在permute之前,[batch_size,region_num,7]
#[batch_size,7,region_num]
c1 = F.leaky_relu(self.cov1(x1), 0.2)
#[batch_size,7,region_num]->[batch_size,16,region_num]
c2 = F.leaky_relu(self.cov2(c1), 0.2)
#[batch_size,16,region_num]->[batch_size,32,region_num]
c3 = F.leaky_relu(self.cov3(c2), 0.2)
#[batch_size,32,region_num]->[batch_size,16,region_num]
c4 = F.leaky_relu(self.cov4(c3), 0.2)
#[batch_size,16,region_num]->[batch_size,4,region_num]
l1 = F.leaky_relu(self.lin1(c4.view(state.shape[0], -1)), 0.2)
#c4.view(state.shape[0], -1):[batch_size,664]
#[batch_size,664]->[batch_size,128]
l2 = F.leaky_relu(self.lin2(l1), 0.2)
#[batch_size,128]->[batch_size,16]
l3 = self.lin3(l2)
#[batch_size,16]->[batch_size,1]
return l3
5 simulator.py
import numpy as np
import json
import random
import os
from node import node
5.1 __init__
class simulator(object):
def __init__(self,city='city_sample'):
super(simulator, self).__init__()
self.city = city
#需要考虑的城市名称
#load data
print('Loading dynamic pattern...')
with open(os.path.join('../data',self.city,'prob.json'), 'r') as f:
self.prob = np.array(json.load(f))
#由于提供的代码里面没有这个文件,我猜测是各区域各状态转移比例
print('Down!')
print('Loading population data...')
with open(os.path.join('../data',self.city,'flow.json'), 'r') as f:
self.flow = np.array(json.load(f))
#由于提供的代码里面没有这个文件,我猜测是各区域人流量
with open(os.path.join('../data',self.city,'dense.json'), 'r') as f:
self.dense = np.array(json.load(f))
#由于提供的代码里面没有这个文件,我猜测是各区域人口密度
with open(os.path.join('../data',self.city,'pop_region.json'), 'r') as f:
self.pop = np.array(json.load(f))
##由于提供的代码里面没有这个文件,我猜测是各区域最佳分配的数量(上限)
print('Down!')
self.START = 1
self.region_num = len(self.flow)
#区域数
self.nodes = list()
self.full = False
self.parameters = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 1, 0], [1, 0, 1], [0, 1, 1], [1, 1, 1]]
#三种排序原则,至少选择一种,不同的排序方式(1表示选,0表示不选)
5.2 reset
def reset(self, state):
# state是一个region_num*8的二维数组
#初始状态下各区域各状态的人数
print('\\nReseting...')
self.nodes = list()
for i in range(self.region_num):
self.nodes.append(node(i))
#初始化id为i的点,初始化的时候8状态为0
self.nodes[i].set_susceptible(state[i][0])
#设置i点易感人群数量(S)
self.nodes[i].set_latent(state[i][1])
#设置i点潜伏人群数量(E)
self.nodes[i].set_infected_ut(state[i][2])
#设置i点已经感染,但是没有检测的人的数量(Iu)
self.nodes[i].set_infected_t(state[i][3])
#设置i点已经感染疾病,但是同时被检测出疾病的人的数量(It)
self.nodes[i].set_infected_asymptomatic(state[i][4])
#设置i点已经感染疾病,但是没有测出阳性的人(无症状感染者)的数量(Ia)
self.nodes[i].set_in_hospital(state[i][5])
#设置i点已经感染疾病,且送医治疗的人(Ih)
self.nodes[i].set_recovered(state[i][6])
#设置i点康复人群的数量(R)
self.nodes[i].set_death(state[i][7])
#设置i点死亡人数的数量
print('Down!')
5.3 simulate
各状态转移量模拟
def simulate(self,
sim_type, #分配口罩的方案
interval, #间隔多久更新一次
mask_on=False, #是否戴mask
mask_total=0, #整体口罩数
mask_quality=0,
mask_lasting=48, #口罩有效期
mask_action=0,
mask_distribute_perc=[0], #口罩满足系数
bed_satisfy_perc=1,
mask_satisfy_perc=1,
it_move=False,
is_save=False,
save_time=0):
print('Simulating...')
#temp variables
S_temp = np.zeros((self.region_num, self.region_num + 1))
L_temp = np.zeros((self.region_num, self.region_num + 1))
Iut_temp = np.zeros((self.region_num, self.region_num + 1))
if it_move:
It_temp = np.zeros((self.region_num, self.region_num + 1))
Ia_temp = np.zeros((self.region_num, self.region_num + 1))
R_temp = np.zeros((self.region_num, self.region_num + 1))
#不同的各状态之间region之间的移动比例
Ih_new = 0
for time in range(interval):
#mask
if (time % mask_lasting == 0):
#如果口罩有效期过了,那么就要重新分配口罩
mask_numbers = np.zeros(self.region_num)
#各区域口罩分配情况
if mask_on:
mask_num = mask_total
if sim_type == 'Mean':
mask_numbers = np.ones(self.region_num) * int(mask_num / self.region_num)
#如果选择平均分配的话,那么每个区域均分口罩数量
elif sim_type == 'Lottery':
order = np.array(range(self.region_num))
np.random.shuffle(order)
#随机排序区域id,按照这个排序从前到后分配口罩(排在前面的尽可能分配满)
for i in range(self.region_num):
if mask_num > 0:
mask_numbers[order[i]] = min(mask_num, self.pop[order[i]])
#self.pop[order[i]]——该区域需要分配的数量
mask_num = mask_num - mask_numbers[order[i]]
else:
break
else:
patient_num = np.zeros(self.region_num)
patient_num_order = np.zeros(self.region_num)
for i in range(self.region_num):
patient_num[i] = self.nodes[i].infected_t
#已经感染疾病,但是同时被检测出疾病的人的数量(It)
patient_num_order[i] = self.nodes[i].infected_t + self.nodes[i].in_hospital + self.nodes[i].recovered + self.nodes[i].death
'''
已经感染疾病,但是同时被检测出疾病的人的数量(It)
已经感染疾病,且送医治疗的人(Ih)
康复人群的数量(R)
死亡人数的数量(D)
这四个的和,也即累计感染人数
'''
if sim_type == 'Max_first':
order = np.argsort(-patient_num_order)
#根据累积感染人数排序(累计感染越多的越先)
for i in range(self.region_num):
if mask_num > 0:
mask_numbers[order[i]] = min(mask_num, self.pop[order[i]])
#self.pop[order[i]]——该区域需要分配的数量
mask_num = mask_num - mask_numbers[order[i]]
else:
break
#排序后,前面的区域要尽量满足
elif sim_type == 'Min_first':
order = np.argsort(patient_num_order)
##根据累积感染人数排序(累计感染越少的越先)
for i in range(self.region_num):
if mask_num > 0:
mask_numbers[order[i]] = min(mask_num, self.pop[order[i]])
mask_num = mask_num - mask_numbers[order[i]]
else:
break
#排序后,前面的区域要尽量满足
elif sim_type == 'Policy':
mask_parameter = self.parameters[mask_action]
#mask_parameter 返回的是一个0~7的数,对应的就是我们选择哪种排序方式
infect_num = patient_num_order/np.mean(patient_num_order)
order = np.argsort(-(mask_parameter[0]*infect_num
+ mask_parameter[1]*self.flow
+ mask_parameter[2]*self.dense))
#根据我们选择的排序方式,决定我们是否需要感染人数、人口密度、区域人口流动强度
alloc_mask = np.floor(mask_num*np.array(mask_distribute_perc))
#每个区域可以分配到的口罩数量(bed_distribute_perc就是不同区域床位的满足因子)
for i in range(len(mask_distribute_perc)):
mask_numbers[order[i]] = min(self.pop[order[i]], alloc_mask[i])
mask_num -= mask_numbers[order[i]]
#每个区域分配口罩的数量
if mask_num > 0:
#如果还有口罩,那么按照各区域满足因子从大到小的顺序排序,前面的优先满足
temp_order = np.argsort(-np.array(mask_distribute_perc))
for i in range(len(mask_distribute_perc)):
temp=min(mask_num,
self.pop[order[temp_order[i]]]-mask_numbers[order[temp_order[i]]])
#排序在前面的组,按照可以满足的口罩书填满
mask_num = mask_num - temp
mask_numbers[order[temp_order[i]]] += temp
if mask_num == 0:
break
while mask_num > 0:
index = np.random.randint(0, self.region_num)
if mask_numbers[index] < self.pop[index]:
mask_numbers[index] += 1
mask_num -= 1
else:
break
#如果还有多的口罩,那么就随机分配
elif sim_type == 'Policy_a':
mask_parameter = self.parameters[mask_action]
#mask_parameter 返回的是一个0~7的数,对应的就是我们选择哪种排序方式
infect_num = patient_num_order/np.mean(patient_num_order)
order = np.argsort(-(mask_parameter[0] * infect_num
+ mask_parameter[1] * self.flow
+ mask_parameter[2] * self.dense))
#根据我们选择的排序方式,决定我们是否需要感染人数、人口密度、区域人口流动强度
for i in range(self.region_num):
if mask_num > 0:
mask_numbers[order[i]] = min(mask_num, int(self.pop[order[i]]*mask_satisfy_perc))
mask_num = mask_num - mask_numbers[order[i]]
else:
break
#由于此时各组的满足因子是一样的,所以我们根据排序,从多到少分配口罩(各区域按照上限分配)
#state transition
for i in range(self.region_num):
self.nodes[i].step(mask_numbers[i], mask_quality)
#每一个点,更新各个状态的值
#cross region traveling 空间上的迁移
for k in range(self.region_num):
S_temp[k] = np.random.multinomial(
self.nodes[k].susceptible, self.prob[((self.START-1)*48+time) % (7*48)][k])
#该区域k有多少状态为S的人会到其他区域去
L_temp[k] = np.random.multinomial(
self.nodes[k].latent, self.prob[((self.START-1)*48+time) % (7*48)][k])
#该区域k有多少状态为E的人会到其他区域去
Iut_temp[k] = np.random.multinomial(
self.nodes[k].infected_ut, self.prob[((self.START - 1)* 48 + time) % (7* 48)][k])
#该区域k有多少状态为Iu的人会到其他区域去
#已经感染疾病,但是没有检测的人。这些人的出行不受任何限制
if it_move:
It_temp[k] = np.random.multinomial(
self.nodes[k].infected_t, self.prob[((self.START-1)*48+time) % (7*48)][k])
#该区域k有多少状态为It的人会到其他区域去
#已经感染疾病,同时被检测出疾病的人。这些人的出行被限制在某一个区域内
Ia_temp[k] = np.random.multinomial(
self.nodes[k].infected_asymptomatic, self.prob[((self.START - 1)* 48 + time) % (7* 48)][k])
#该区域k有多少状态为Ia的人会到其他区域去
#已经感染疾病,但是疾病监测没有检测出阳性的人
R_temp[k] = np.random.multinomial(
self.nodes[k].recovered, self.prob[((self.START - 1)* 48 + time) % (7* 48)][k])
#该区域k有多少状态为R的人会到其他区域去
S_temp_sum0 = np.sum(S_temp, axis=0)
L_temp_sum0 = np.sum(L_temp, axis=0)
Iut_temp_sum0 = np.sum(Iut_temp, axis=0)
if it_move:
It_temp_sum0 = np.sum(It_temp, axis=0)
Ia_temp_sum0 = np.sum(Ia_temp, axis=0)
R_temp_sum0 = np.sum(R_temp, axis=0)
#别的区域过来的人数
S_temp_sum1 = np.sum(S_temp, axis=1)
L_temp_sum1 = np.sum(L_temp, axis=1)
Iut_temp_sum1 = np.sum(Iut_temp, axis=1)
if it_move:
It_temp_sum1 = np.sum(It_temp, axis=1)
Ia_temp_sum1 = np.sum(Ia_temp, axis=1)
R_temp_sum1 = np.sum(R_temp, axis=1)
#到别的区域去的人数
for k in range(self.region_num):
self.nodes[k].set_susceptible(
self.nodes[k].susceptible
+S_temp_sum0[k]
-S_temp_sum1[k]
+S_temp[k][self.region_num])
#每个区域S状态的变化:原先的+过来的-到别的地方去的
#过来的和到别的地方去的,各用了一次S_temp[k][self.region_num](自己到自己的),正好抵消了
#所以需要再加一次
self.nodes[k].set_latent(
self.nodes[k].latent
+L_temp_sum0[k]
-L_temp_sum1[k]
+L_temp[k][self.region_num])
#每个区域E状态的变化:原先的+过来的-到别的地方去的
#过来的和到别的地方去的,各用了一次S_temp[k][self.region_num](自己到自己的),正好抵消了
#所以需要再加一次
self.nodes[k].set_infected_ut(
self.nodes[k].infected_ut
+ Iut_temp_sum0[k]
- Iut_temp_sum1[k]
+ Iut_temp[k][self.region_num])
#每个区域Iut状态的变化:原先的+过来的-到别的地方去的
#过来的和到别的地方去的,各用了一次S_temp[k][self.region_num](自己到自己的),正好抵消了
#所以需要再加一次
if it_move:
self.nodes[k].set_infected_t(
self.nodes[k].infected_t
+It_temp_sum0[k]
-It_temp_sum1[k]
+It_temp[k][self.region_num])
#每个区域It状态的变化:原先的+过来的-到别的地方去的
#过来的和到别的地方去的,各用了一次S_temp[k][self.region_num](自己到自己的),正好抵消了
#所以需要再加一次
self.nodes[k].set_infected_asymptomatic(
self.nodes[k].infected_asymptomatic
+Ia_temp_sum0[k]
-Ia_temp_sum1[k]
+Ia_temp[k][self.region_num])
#每个区域Ia状态的变化:原先的+过来的-到别的地方去的
#过来的和到别的地方去的,各用了一次S_temp[k][self.region_num](自己到自己的),正好抵消了
#所以需要再加一次
self.nodes[k].set_recovered(
self.nodes[k].recovered
+ R_temp_sum0[k]
- R_temp_sum1[k]
+ R_temp[k][self.region_num])
#每个区域R状态的变化:原先的+过来的-到别的地方去的
#过来的和到别的地方去的,各用了一次S_temp[k][self.region_num](自己到自己的),正好抵消了
#所以需要再加一次
if(is_save):
save = list()
for i in range(self.region_num):
temp1 = [self.nodes[i].susceptible, self.nodes[i].latent, self.nodes[i].infected_ut, self.nodes[i].infected_t, self.nodes[i].infected_asymptomatic, self.nodes[i].in_hospital, self.nodes[i].recovered, self.nodes[i].death]
save.append(temp1)
save = np.array(save)
save = save.astype(np.float)
with open(os.path.join('../result',self.city,'result_'+str(save_time*interval+time)+'.json'), 'w') as f:
json.dump(save.tolist(), f)
next_state = list()
for i in range(self.region_num):
next_state.append([self.nodes[i].susceptible,
self.nodes[i].latent,
self.nodes[i].infected_ut,
self.nodes[i].infected_t,
self.nodes[i].infected_asymptomatic,
self.nodes[i].in_hospital,
self.nodes[i].recovered,
self.nodes[i].death])
#每个区域的状态
a = self.statistic()
print('S:,L:,Iut:,It:,Ia:,Ih:,R:,D:'.format(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]))
print('Is Full:'.format(self.full))
return np.array(next_state), Ih_new
6 node.py
import numpy as np
Pa = 0.018 # false negative rate
eps_1 = 250 # average length of incubation (30min)
eps_1_d = 5.2 # average length of incubation (day)
d = 0.15 # death rate
t = 672 # average recovery time (30min)
t_d = 14 # average recovery time (day)
d_hospital = 0.04 # death rate in hospital
t_hospital = 614 # average recovery time in hospital(30min)
t_hospital_d = 12.8 # average recovery time in hospital(day)
R_0 = 2.68 # basic reproduction number
r_a = 0.6
r_L = 1.0
eps = 1/eps_1
beta = R_0 / (r_L * eps_1_d + (Pa * r_a + (1 - Pa)) * t_d)
beta = np.power(1+beta, 1/48)-1
L_I = eps * (1 - Pa)
L_Ia = eps * Pa
I_D = d / t
I_R = ((1 - d)/(t_d - d))/48
theta = 48 # testing speed (30min)
I_h = 1/theta
Ia_R = 1 / t
Ih_D = d_hospital / t_hospital
Ih_R = ((1 - d_hospital) / (t_hospital_d - d_hospital))/48
class node:
def __init__(self, id):
self.id = id
self.susceptible = 0
self.infected_ut = 0
self.infected_t = 0
self.death = 0
self.in_hospital = 0
self.infected_asymptomatic = 0
self.recovered = 0
self.latent = 0
def set_susceptible(self, susceptible):
self.susceptible = susceptible
def set_latent(self, latent):
self.latent = latent
def set_infected_ut(self, infected_ut):
self.infected_ut = infected_ut
def set_infected_t(self, infected_t):
self.infected_t = infected_t
def set_infected_asymptomatic(self, infected_asymptomatic):
self.infected_asymptomatic = infected_asymptomatic
def set_death(self, death):
self.death = death
def set_in_hospital(self, in_hospital):
self.in_hospital = in_hospital
def set_recovered(self, recovered):
self.recovered = recovered
def step(self,mask_number,mask_quality):
if (self.susceptible + self.latent + self.infected_ut + self.infected_t + self.infected_asymptomatic + self.in_hospital + self.recovered > 0):
#除去了死亡状态,其他状态人数和大于0(换句话说,就是人没有全死光)
mask_factor = 1 - np.clip(mask_number / (self.susceptible +
self.latent +
self.infected_ut +
self.infected_t +
self.infected_asymptomatic +
self.in_hospital +
self.recovered),
0, 1) * mask_quality
#除的这一串相当于是这个node目前有的口罩数/目前存活的人
#也即论文中说的口罩覆盖率Π(t)
#也就是这一串是论文中的(1-Π(t)γ)
lambda_j = ((self.infected_ut +
self.infected_t +
self.infected_asymptomatic * r_a +
self.latent * r_L) /
(self.susceptible +
self.latent +
self.infected_ut + s
elf.infected_t +
self.infected_asymptomatic +
self.in_hospital +
self.recovered)) * beta * mask_factor
#前面除的那一串是该node感染人数/该node总人数 * β * (1-Π(t)γ)
susceptible_to_latent, __ = np.random.multinomial(self.susceptible, [lambda_j, 1])
#相当于是从S 状态到E 状态的人数
#我们原先一共有self.susceptible个S状态的人,每个人都有lambda_j的概率变成E状态
#于是我们就用多次二项分布的方法,判断每一个个体是否会变成E状态
self.susceptible -= susceptible_to_latent
self.latent += susceptible_to_latent
#S状态增加susceptible_to_latent个人,E状态减少susceptible_to_latent个人
latent_to_infected, latent_to_Ia, __ = np.random.multinomial(self.latent, [L_I, L_Ia, 1])
self.infected_ut += latent_to_infected
self.infected_asymptomatic += latent_to_Ia
self.latent -= (latent_to_Ia + latent_to_infected)
#从E转换到Iu和Ia的人数
prob = I_h
infected_ut_to_t, __ = np.random.multinomial(self.infected_ut, [prob, 1])
self.infected_ut -= infected_ut_to_t
self.infected_t += infected_ut_to_t
#从Iu转换到Ia的人数
infected_to_death, infected_to_recovered, __ = np.random.multinomial(self.infected_ut, [I_D, I_R, 1])
self.death += infected_to_death
self.recovered += infected_to_recovered
self.infected_ut -= (infected_to_death + infected_to_recovered)
#从Iu状态(已经感染疾病,但是没有检测的人)到死亡/康复的人
infected_to_death, infected_to_recovered, __ = np.random.multinomial(self.infected_t, [I_D, I_R, 1])
self.death += infected_to_death
self.recovered += infected_to_recovered
self.infected_t -= (infected_to_death + infected_to_recovered)
#从It状态(已经感染疾病,同时被检测出疾病的人)到死亡/康复的人
Ia_to_recovered, __ = np.random.multinomial(self.infected_asymptomatic, [Ia_R, 1])
self.recovered += Ia_to_recovered
self.infected_asymptomatic -= Ia_to_recovered
#从Ia状态(已经感染疾病,但是疾病监测没有检测出阳性的人)【无症状感染者】到康复的人数
in_hospital_to_death, in_hospital_to_recovered, __ = np.random.multinomial(self.in_hospital, [Ih_D, Ih_R, 1])
self.death += in_hospital_to_death
self.recovered += in_hospital_to_recovered
self.in_hospital -= (in_hospital_to_death + in_hospital_to_recovered)
#从Ih状态(送医治疗的人)到康复/死亡的人
以上是关于论文代码解读 Hierarchical Reinforcement Learning for Scarce Medical Resource Allocation的主要内容,如果未能解决你的问题,请参考以下文章
论文解析[9] Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
论文笔记:FeUdal Networks for Hierarchical Reinforcement Learning
论文笔记 Hierarchical Reinforcement Learning for Scarce Medical Resource Allocation
[论文阅读笔记] HARP Hierarchical Representation Learning for Networks
论文笔记之: Hierarchical Convolutional Features for Visual Tracking
论文阅读-Hierarchical Cross-Modal Talking Face Generation with Dynamic Pixel-Wise Loss