Embedding层实现

Posted 2021-06-26 炫云云

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Embedding层实现相关的知识，希望对你有一定的参考价值。

文章目录

- OnDeviceEmbedding
- PositionEmbedding

OnDeviceEmbedding

import tensorflow as tf
from utils import tf_utils
from tensorflow.keras.layers import Layer
import unittest
import numpy as np

class OnDeviceEmbedding(Layer):
    """Performs an embedding lookup suitable for accelerator devices.
    This layer uses either tf.gather or tf.one_hot to translate integer indices to
    float embeddings.
    Arguments:
        vocab_size: Number of elements in the vocabulary.
        embedding_width: Output size of the embedding layer.
        initializer: The initializer to use for the embedding weights. Defaults to
             "glorot_uniform".
        use_one_hot: Whether to use tf.one_hot over tf.gather for the embedding
            lookup. Defaults to False (that is, using tf.gather). Setting this option
            to True may improve performance, especially on small vocabulary sizes,
            but will generally require more memory.
    """

    def __init__(self,
                 vocab_size,
                 embedding_width,
                 initializer = "glorot_uniform",
                 use_one_hot = False,
                 **kwargs):
        # We need to have a default dtype of float32, since the inputs (which Keras
        # usually uses to infer the dtype) will always be int32.
        if "dtype" not in kwargs:
            kwargs["dtype"] = "float32"
        super(OnDeviceEmbedding, self).__init__(**kwargs)
        self._vocab_size = vocab_size
        self._embedding_width = embedding_width
        self._initializer = initializer
        self._use_one_hot = use_one_hot

    def build(self, input_shape):
        self.embeddings = self.add_weight(
                "embeddings",
                shape = [self._vocab_size, self._embedding_width],
                initializer = self._initializer,
                dtype = tf.float32)

        super(OnDeviceEmbedding, self).build(input_shape)

    def call(self, inputs):
        input_shape = tf_utils.get_shape_list(inputs, expected_rank = 2)  # (batch_size, sequence_length)
        input_shape.append(self._embedding_width)
        flat_inputs = tf.reshape(inputs, [-1])
        if self._use_one_hot:
            one_hot_data = tf.one_hot(
                    flat_inputs, depth = self._vocab_size, dtype = self._dtype)
            embeddings = tf.matmul(one_hot_data, self.embeddings)
        else:
            embeddings = tf.gather(self.embeddings, flat_inputs)
        embeddings = tf.reshape(embeddings, input_shape)  # (batch_size, sequence_length, _embedding_width)

        return embeddings

    def get_config(self):
        config = {
                "vocab_size":      self._vocab_size,
                "embedding_width": self._embedding_width,
                "initializer":     self._initializer,
                "use_one_hot":     self._use_one_hot,
        }
        base_config = super(OnDeviceEmbedding, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

PositionEmbedding

class PositionEmbedding(Layer):
    """
    Creates a positional embedding.
    refer to :"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding"

    这一层可以设置为创建一个静态形状的切片或动态形状的片。
    如果`use_dynamic_slicing`为True，则输入张量可以有一个动态的第一维，
    而如果`use_dynamic_slicing` 是False的,输入大小必须是固定的。
    """

    def __init__(self,
                 initializer = "glorot_uniform",
                 use_dynamic_slicing = False,
                 max_sequence_length = None,
                 **kwargs):
        """
        :param initializer: The initializer to use for the embedding weights. Defaults to "glorot_uniform".
        :param use_dynamic_slicing:  Whether to use the dynamic slicing path.
        :param max_sequence_length:  The maximum size of the dynamic sequence. Only applicable if `use_dynamic_slicing` is True.
        :param kwargs: "dtype"
        # We need to have a default dtype of float32, since the inputs (which Keras
        # usually uses to infer the dtype) will always be int32.
        """
        if "dtype" not in kwargs:
            kwargs["dtype"] = "float32"
        super(PositionEmbedding, self).__init__(**kwargs)
        if use_dynamic_slicing and max_sequence_length is None:
            raise ValueError(
                    "If `use_dynamic_slicing` is True, `max_sequence_length` must be set."
            )
        self._max_sequence_length = max_sequence_length
        self._initializer = tf.keras.initializers.get(initializer)
        self._use_dynamic_slicing = use_dynamic_slicing

    def build(self, input_shape):
        if not isinstance(input_shape, list):
            dimension_list = input_shape.as_list()
        else:
            dimension_list = input_shape
        if len(dimension_list) != 3:
            raise ValueError("PositionEmbedding expects a 3-dimensional input tensor "
                             "of shape [batch, sequence, width]")
        seq_length = dimension_list[1]
        width = dimension_list[2]
        # If we are not using dynamic slicing, we must assume that the sequence
        # length is fixed and max_sequence_length should not be specified.
        if not self._use_dynamic_slicing:
            if seq_length is None:
                raise ValueError(
                        "PositionEmbedding must have `use_dynamic_slicing` set "
                        "to True (and max_sequence_length set) when the "
                        "sequence (1st) dimension of the input is None.")
            if self._max_sequence_length is not None:
                raise ValueError(
                        "When `use_dynamic_slicing` is False, max_sequence_length should "
                        "not be specified and we ought to use seq_length to get the "
                        "variable shape.")

        if self._max_sequence_length is not None:
            weight_sequence_length = self._max_sequence_length
        else:
            weight_sequence_length = seq_length  # 不是动态

        self._position_embeddings = self.add_weight(
                "embeddings",
                shape = [weight_sequence_length, width],
                initializer = self._initializer)  # 初始化
        super(PositionEmbedding, self).build(input_shape)

    def call(self, inputs):
        """Implements call() for the layer."""
        if self._use_dynamic_slicing:
            input_shape = tf_utils.get_shape_list(inputs, expected_rank = 3)
            seq_length = input_shape[1]
            width = input_shape[1]

            position_embeddings = tf.expand_dims(
                    tf.slice(self._position_embeddings, [0, 0], [seq_length, width]), axis = 0)  # 切片

        else:
            position_embeddings = tf.expand_dims(self._position_embeddings, axis = 0)  # 不是动态
        return position_embeddings

    def get_config(self):
        config = {
                "max_sequence_length": self._max_sequence_length,
                "initializer":         keras.initializers.serialize(self._initializer),
                "use_dynamic_slicing": self._use_dynamic_slicing,
        }
        base_config = super(PositionEmbedding, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

以上是关于Embedding层实现的主要内容，如果未能解决你的问题，请参考以下文章

使用 tensorflow 实现嵌入层

pytorch如何使用torchtext初始化LSTM的embedding层？如何用各种预训练模型初始化embedding层？

人工智能--Embedding层

self attention pytorch代码

Keras官方中文文档：嵌入层Embedding

keras的Embedding层