python 依赖解析器
import collections
import tensorflow as tf
import numpy as np
import pickle
import math
from progressbar import ProgressBar
from DependencyTree import DependencyTree
from ParsingSystem import ParsingSystem
from Configuration import Configuration
import Config
import Util
This script defines a transition-based dependency parser which makes
use of a classifier powered by a neural network. The neural network
accepts distributed representation inputs: dense, continuous
representations of words, their part of speech tags, and the labels
which connect words in a partial dependency parse.
This is an implementation of the method described in
Danqi Chen and Christopher Manning. A Fast and Accurate Dependency Parser Using Neural Networks. In EMNLP 2014.
Author: Danqi Chen, Jon Gauthier
Modified by: Heeyoung Kwon (2017)
Modified by: Jun S. Kang (2018 Mar)
class DependencyParserModel(object):
def __init__(self, graph, embedding_array, Config):
self.build_graph(graph, embedding_array, Config)
def build_graph(self, graph, embedding_array, Config):
:param graph:
:param embedding_array:
:param Config:
with graph.as_default():
Define the computational graph with necessary variables.
1) You may need placeholders of:
- Many parameters are defined at Config: batch_size, n_Tokens, etc
- # of transitions can be get by calling parsing_system.numTransitions()
self.train_inputs =
self.train_labels =
self.test_inputs =
2) Call forward_pass and get predictions
self.prediction = self.forward_pass(embed, weights_input, biases_input, weights_output)
3) Implement the loss function described in the paper
- lambda is defined at Config.lam
self.loss =
## PART 1 :
self.train_inputs = tf.placeholder(tf.int32, shape=[Config.batch_size, Config.n_Tokens])
self.train_labels = tf.placeholder(tf.int32, shape=[Config.batch_size, parsing_system.numTransitions()])
self.test_inputs = tf.placeholder(tf.int32, shape=None)
## PART 2 :
## Embeds , Input Weights , Biases, Output Weight
self.embeddings = tf.Variable(embedding_array, dtype=tf.float32)
# To fix embeddings :
# self.embeddings = tf.Variable(embedding_array, dtype=tf.float32, trainable=False)
embed = tf.nn.embedding_lookup(self.embeddings, self.train_inputs)
embed = tf.reshape(embed, [Config.batch_size, -1])
weights_input = tf.Variable(tf.random_normal([Config.embedding_size * Config.n_Tokens, Config.hidden_size],stddev=0.1))
biases_input = tf.Variable(tf.zeros([Config.hidden_size]))
weights_output = tf.Variable(tf.random_normal([Config.hidden_size, parsing_system.numTransitions()], stddev=0.1))
# forward pass
self.train_pred = self.forward_pass(embed, weights_input, biases_input, weights_output)
## PART 3:
# cross entropy loss
train_labels = tf.nn.relu(self.train_labels)
ce_loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.train_pred, labels=train_labels)
# regularizing input weights, biases, output weights and embeddings for l2 loss
ip_reg = tf.nn.l2_loss(weights_input)
bias_reg = tf.nn.l2_loss(biases_input)
embed_reg = tf.nn.l2_loss(embed)
op_weight_reg = tf.nn.l2_loss(weights_output)
l2_loss = (Config.lam) * (ip_reg+bias_reg+embed_reg+op_weight_reg)
self.loss = tf.reduce_mean(ce_loss + l2_loss)
optimizer = tf.train.GradientDescentOptimizer(Config.learning_rate)
grads = optimizer.compute_gradients(self.loss)
clipped_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in grads] = optimizer.apply_gradients(clipped_grads)
test_embed = tf.nn.embedding_lookup(self.embeddings, self.test_inputs)
test_embed = tf.reshape(test_embed, [1, -1])
self.test_pred = self.forward_pass(test_embed, weights_input, biases_input, weights_output)
# intializer
self.init = tf.global_variables_initializer()
def train(self, sess, num_steps):
:param sess:
:param num_steps:
print "Initailized"
average_loss = 0
for step in range(num_steps):
start = (step * Config.batch_size) % len(trainFeats)
end = ((step + 1) * Config.batch_size) % len(trainFeats)
if end < start:
start -= end
end = len(trainFeats)
batch_inputs, batch_labels = trainFeats[start:end], trainLabels[start:end]
feed_dict = {self.train_inputs: batch_inputs, self.train_labels: batch_labels}
_, loss_val =[, self.loss], feed_dict=feed_dict)
average_loss += loss_val
if step % Config.display_step == 0:
if step > 0:
average_loss /= Config.display_step
print "Average loss at step ", step, ": ", average_loss
average_loss = 0
if step % Config.validation_step == 0 and step != 0:
print "\nTesting on dev set at step ", step
predTrees = []
for sent in devSents:
numTrans = parsing_system.numTransitions()
c = parsing_system.initialConfiguration(sent)
while not parsing_system.isTerminal(c):
feat = getFeatures(c)
pred =, feed_dict={self.test_inputs: feat})
optScore = -float('inf')
optTrans = ""
for j in range(numTrans):
if pred[0, j] > optScore and parsing_system.canApply(c, parsing_system.transitions[j]):
optScore = pred[0, j]
optTrans = parsing_system.transitions[j]
c = parsing_system.apply(c, optTrans)
result = parsing_system.evaluate(devSents, predTrees, devTrees)
print result
print "Train Finished."
def evaluate(self, sess, testSents):
:param sess:
print "Starting to predict on test set"
predTrees = []
for sent in testSents:
numTrans = parsing_system.numTransitions()
c = parsing_system.initialConfiguration(sent)
while not parsing_system.isTerminal(c):
# feat = getFeatureArray(c)
feat = getFeatures(c)
pred =, feed_dict={self.test_inputs: feat})
optScore = -float('inf')
optTrans = ""
for j in range(numTrans):
if pred[0, j] > optScore and parsing_system.canApply(c, parsing_system.transitions[j]):
optScore = pred[0, j]
optTrans = parsing_system.transitions[j]
c = parsing_system.apply(c, optTrans)
print "Saved the test results."
Util.writeConll('result_test.conll', testSents, predTrees)
def forward_pass(self, embed, weights_input, biases_input, weights_output ):
:param embed:
:param weights:
:param biases:
Implement the forwrad pass described in
"A Fast and Accurate Dependency Parser using Neural Networks"(2014)
h = tf.add(tf.matmul(embed, weights_input),biases_input)
## Cubic Function
h = tf.pow(h, 3.0)
##Sigmoid Function
# h = tf.nn.sigmoid(h_temp)
## tanh function
# h = tf.nn.tanh(h_temp)
##ReLu Function
# h = tf.nn.relu(h_temp)
# print "tf.matmul(weights_output, h)", tf.matmul(h, weights_output)
h = tf.matmul(h, weights_output)
return h
def genDictionaries(sents, trees):
word = []
pos = []
label = []
for s in sents:
for token in s:
rootLabel = None
for tree in trees:
for k in range(1, tree.n + 1):
if tree.getHead(k) == 0:
rootLabel = tree.getLabel(k)
if rootLabel in label:
index = 0
wordCount = [Config.UNKNOWN, Config.NULL, Config.ROOT]
for word in wordCount:
wordDict[word] = index
index += 1
posCount = [Config.UNKNOWN, Config.NULL, Config.ROOT]
for pos in posCount:
posDict[pos] = index
index += 1
labelCount = [Config.NULL, rootLabel]
for label in labelCount:
labelDict[label] = index
index += 1
return wordDict, posDict, labelDict
def getWordID(s):
if s in wordDict:
return wordDict[s]
return wordDict[Config.UNKNOWN]
def getPosID(s):
if s in posDict:
return posDict[s]
return posDict[Config.UNKNOWN]
def getLabelID(s):
if s in labelDict:
return labelDict[s]
return labelDict[Config.UNKNOWN]
def getFeatures(c):
Implement feature extraction described in
"A Fast and Accurate Dependency Parser using Neural Networks"(2014)
features = []
# FOR S_w
for i in range(3):
for i in range(3):
for i in range(2):
lc1 = c.getLeftChild(features[i], 1)
rc1 = c.getRightChild(features[i], 1)
lc2 = c.getLeftChild(features[i], 2)
rc2 = c.getRightChild(features[i], 2)
lc_lc1 = c.getLeftChild(lc1, 1)
rc_rc1 = c.getRightChild(rc1, 1)
# For S_t
for i in range(18):
# For S_l
for i in range(6, 18):
# Get Corresponding ID's
for i in range(48):
if i < 18:
features[i] = getWordID(c.getWord(features[i]))
elif i >= 18 and i < 36:
features[i] = getPosID(features[i])
features[i] = getLabelID(features[i])
return features
def genTrainExamples(sents, trees):
numTrans = parsing_system.numTransitions()
features = []
labels = []
pbar = ProgressBar()
for i in pbar(range(len(sents))):
if trees[i].isProjective():
c = parsing_system.initialConfiguration(sents[i])
while not parsing_system.isTerminal(c):
oracle = parsing_system.getOracle(c, trees[i])
feat = getFeatures(c)
label = []
for j in range(numTrans):
t = parsing_system.transitions[j]
if t == oracle:
elif parsing_system.canApply(c, t):
if 1.0 not in label:
print i, label
c = parsing_system.apply(c, oracle)
return features, labels
def load_embeddings(filename, wordDict, posDict, labelDict):
dictionary, word_embeds = pickle.load(open(filename, 'rb'))
embedding_array = np.zeros((len(wordDict) + len(posDict) + len(labelDict), Config.embedding_size))
knownWords = wordDict.keys()
foundEmbed = 0
for i in range(len(embedding_array)):
index = -1
if i < len(knownWords):
w = knownWords[i]
if w in dictionary:
index = dictionary[w]
elif w.lower() in dictionary:
index = dictionary[w.lower()]
if index >= 0:
foundEmbed += 1
embedding_array[i] = word_embeds[index]
embedding_array[i] = np.random.rand(Config.embedding_size) * 0.02 - 0.01
print "Found embeddings: ", foundEmbed, "/", len(knownWords)
return embedding_array
if __name__ == '__main__':
wordDict = {}
posDict = {}
labelDict = {}
parsing_system = None
trainSents, trainTrees = Util.loadConll('train.conll')
devSents, devTrees = Util.loadConll('dev.conll')
testSents, _ = Util.loadConll('test.conll')
genDictionaries(trainSents, trainTrees)
embedding_filename = 'word2vec.model'
embedding_array = load_embeddings(embedding_filename, wordDict, posDict, labelDict)
labelInfo = []
for idx in np.argsort(labelDict.values()):
parsing_system = ParsingSystem(labelInfo[1:])
print parsing_system.rootLabel
print "Generating Traning Examples"
trainFeats, trainLabels = genTrainExamples(trainSents, trainTrees)
print "Done."
# Build the graph model
graph = tf.Graph()
model = DependencyParserModel(graph, embedding_array, Config)
num_steps = Config.max_iter
with tf.Session(graph=graph) as sess:
model.train(sess, num_steps)
model.evaluate(sess, testSents)
