cs20_7-1-todo
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了cs20_7-1-todo相关的知识,希望对你有一定的参考价值。
?????????one ?????? when scalar oar ?????? instead between var
1. ??????????????????
1.1 TFrecord
?????????tf??????????????????????????????????????????????????????????????????????????????tf?????????????????????
cs20?????????good example:
???tfrecord???pipeline
# Step 1: create a writer to write tfrecord to that file writer = tf.python_io.TFRecordWriter(out_file) # Step 2: get serialized shape and values of the image shape, binary_image = get_image_binary(image_file) # Step 3: create a tf.train.Features object features = tf.train.Features(feature={'label': _int64_feature(label), 'shape': _bytes_feature(shape), 'image': _bytes_feature(binary_image)}) # Step 4: create a sample containing of features defined above sample = tf.train.Example(features=features) # Step 5: write the sample to the tfrecord file writer.write(sample.SerializeToString()) writer.close()
???tfrecord???pipeline
# _parse_function???????????????????????????????????? dataset = tf.data.TFRecordDataset(tfrecord_files) dataset = dataset.map(_parse_function) def _parse_function(tfrecord_serialized): features={'label': tf.FixedLenFeature([], tf.int64), 'shape': tf.FixedLenFeature([], tf.string), 'image': tf.FixedLenFeature([], tf.string)} parsed_features = tf.parse_single_example(tfrecord_serialized, features) return parsed_features['label'], parsed_features['shape'], parsed_features['image']
???????????????
# ?????? ??????queue?????? ?????????tf.data?????????????????????????????????tf.data????????? def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) def get_image_binary(filename): """ You can read in the image using tensorflow too, but it's a drag since you have to create graphs. It's much easier using Pillow and NumPy """ image = Image.open(filename) image = np.asarray(image, np.uint8) shape = np.array(image.shape, np.int32) return shape.tobytes(), image.tobytes() # convert image to raw data bytes in the array. def write_to_tfrecord(label, shape, binary_image, tfrecord_file): """ This example is to write a sample to TFRecord file. If you want to write more samples, just use a loop. """ writer = tf.python_io.TFRecordWriter(tfrecord_file) # write label, shape, and image content to the TFRecord file example = tf.train.Example(features=tf.train.Features(feature={ 'label': _int64_feature(label), 'shape': _bytes_feature(shape), 'image': _bytes_feature(binary_image) })) writer.write(example.SerializeToString()) writer.close() def write_tfrecord(label, image_file, tfrecord_file): shape, binary_image = get_image_binary(image_file) write_to_tfrecord(label, shape, binary_image, tfrecord_file) def read_from_tfrecord(filenames): tfrecord_file_queue = tf.train.string_input_producer(filenames, name='queue') reader = tf.TFRecordReader() _, tfrecord_serialized = reader.read(tfrecord_file_queue) # label and image are stored as bytes but could be stored as # int64 or float64 values in a serialized tf.Example protobuf. tfrecord_features = tf.parse_single_example(tfrecord_serialized, features={ 'label': tf.FixedLenFeature([], tf.int64), 'shape': tf.FixedLenFeature([], tf.string), 'image': tf.FixedLenFeature([], tf.string), }, name='features') # image was saved as uint8, so we have to decode as uint8. image = tf.decode_raw(tfrecord_features['image'], tf.uint8) shape = tf.decode_raw(tfrecord_features['shape'], tf.int32) # the image tensor is flattened out, so we have to reconstruct the shape image = tf.reshape(image, shape) label = tfrecord_features['label'] return label, shape, image def read_tfrecord(tfrecord_file): label, shape, image = read_from_tfrecord([tfrecord_file]) with tf.Session() as sess: coord = tf.train.Coordinator() # ??????????????? threads = tf.train.start_queue_runners(coord=coord) label, image, shape = sess.run([label, image, shape]) # ??????????????????op node????????????????????? coord.request_stop() # ???????????????????????? coord.join(threads) print(label) print(shape) plt.imshow(image) plt.show() plt.savefig("tfrecord.png") def main(): # assume the image has the label Chihuahua, which corresponds to class number 1 label = 1 image_file = IMAGE_PATH + 'test.jpg' tfrecord_file = IMAGE_PATH + 'test.tfrecord' write_tfrecord(label, image_file, tfrecord_file) read_tfrecord(tfrecord_file)
???????????????queue?????????
# ?????????tf.data????????????????????????????????? N_SAMPLES = 1000 NUM_THREADS = 4 # Generating some simple data # create 1000 random samples, each is a 1D array from the normal distribution (10, 1) data = 10 * np.random.randn(N_SAMPLES, 4) + 1 # create 1000 random labels of 0 and 1 target = np.random.randint(0, 2, size=N_SAMPLES) queue = tf.FIFOQueue(capacity=50, dtypes=[tf.float32, tf.int32], shapes=[[4], []]) # ?????????X???shape???1-d(4-dim) tensor??? Y???shape???0-d,???scalar enqueue_op = queue.enqueue_many([data, target]) data_sample, label_sample = queue.dequeue() # create ops that do something with data_sample and label_sample # create NUM_THREADS to do enqueue qr = tf.train.QueueRunner(queue, [enqueue_op] * NUM_THREADS) # NUM_THREADS????????????????????? with tf.Session() as sess: # create a coordinator, launch the queue runner threads. coord = tf.train.Coordinator() enqueue_threads = qr.create_threads(sess, coord=coord, start=True) try: for step in range(100): # do to 100 iterations if coord.should_stop(): break data_batch, label_batch = sess.run([data_sample, label_sample]) print(data_batch) print(label_batch) except Exception as e: coord.request_stop(e) finally: coord.request_stop() coord.join(enqueue_threads)
??????
[1] https://docs.google.com/presentation/d/1ftgals7pXNOoNoWe0E9PO27miOpXbHrQIXyBm0YOiyc/edit#slide=id.g1c81018da0_0_126 (???tfrecord??????)
[2] https://github.com/chiphuyen/stanford-tensorflow-tutorials/blob/master/2017/examples/09_tfrecord_example.py (2017???cs20)
2. Style Transfer
2.1 ????????????
?????????
Find a new image:
- whose content is closest to the content image and
- whose style is closest to the style image
??????loss:
Content loss: Measure the content loss between the content of the generated image and the content of the content image
Style loss: Measure the style loss between the style of the generated image and the style of the style image
??????????????? content and style ?
???feature map?????????
A convolutional network has many layers, each layer is a function that extracts certain features
lower layers extract features related to content, higher layers extract features related to style
??????paper????????????lower, higher??????????????????lower: conv4_4; higher: [???conv1_1???, ???conv2_1???, ???conv3_1???, ???conv4_1??? and ???conv5_1???]
??????layer_weigths???????????????layers????????????loss?????????????????????????????????loss(content loss, style loss)????????????layer?????????
a paper: Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
????????????feature maps ?
??????????????????(??????????????????????????????)???Use pretrained weights (functions) such as VGG, AlexNet, GoogleNet
Loss??????
??????loss???????????????
?????????
content loss:
- (1) F, P??????????????? feature map of generated image, feature map of content image.
- (2) ??????L?????????layer L(??????????????????????????????VGG19??????????????????feature representation), ?????????????????????VGG19-???conv4_2???.
- (3) ??????feature map??????????????????????????????pixel-wise?????????loss???
- (4) ??????????????????1/2????????????????????????????????? 1/(4 * s) ?????????????????????s ?????? product of the dimension of P. ???????????????if P has dimension [5, 3, 3 ] then s = 5 x 5 x 3 = 75.
style loss:
(1) N is the third dimension of the feature map(?????????W x H x C?????????N??????C), and M is the product of the first two dimensions of the feature map. However, remember that in TensorFlow, we have to add one extra dimension to make it 4D(?????????batch_size x W x H x C) to make it work for the function tf.nn.conv2d, so the first dimension is actually the second, and the second is the third, and so on.
(2) A is the Gram matrix from the original image and G is the Gram matrix of the image to be generated. To obtain the gram matrix, for example, of the style image, we first need to get the feature map of the style image at that layer, then reshape it to 2D tensor of dimension M x N, and take the dot product of 2D tensor with its transpose.
??????gram matrix????????????
(3) ????????????????????????L???l???????????? the layer whose feature maps we want to incorporate into the generated images. In the paper, it suggests that we use feature maps from 5 layers:
[???conv1_1???, ???conv2_1???, ???conv3_1???, ???conv4_1???, ???conv5_1???]
????????????????????????style???gram matrix?????????
(4) After you???ve calculated the tensors E???s, you calculate the style loss by summing them up with their corresponding weight w???s. You can tune w???s, but I???d suggest that you give more emphasis to deep layers. For example, w for ???conv1_1??? can be 1, then weight for ???conv2_1??? can be 2, and so on. (?????????????????????style??? deeper layer????????????)
total_loss
?????????????????????weights (??????weights ?)- TODO
- ?????????
- The paper suggests that we use alpha and beta such that alpha/beta = 0.001 or 0.0001, but I???ve found that the ratio alpha/beta = 1/20 or 1/50 works just fine.
Tricky implementation details
Train input instead of weights
Multiple tensors share the same variable to avoid assembling identical subgraphs
Use pre-trained weights (from VGG-19)
- Weights and biases already loaded for you
- They are numpy, so need to be converted to tensors
- Must not be trainable!!
2.2 ????????????
?????????3????????????
For this model, you have two fixed inputs: content image and style image, but also have a trainable input which will be trained to become the generated artwork. (but weights fixed)
There is not a clear distinction between the two phases of a TensorFlow program: assembling the graph and executing it. All the 3 input (content image, style image, and trainable input) have the same dimensions and act as input to the same computation to extract the same sets of features. To save us from having to assemble the same subgraph multiple times, we will use one variable for all three of them. The variable is already defined for you in the model as:
self.input_img = tf.get_variable('in_img', shape=([1, self.img_height, self.img_width, 3]), dtype=tf.float32, initializer=tf.zeros_initializer())
When we need to do some computation that takes in the content image as the input, we first assign the content image to that variable, and so on.(?????? style img, trainable input), ?????????????????????????????????????????????variable??????
tranfer learning:
we use the weights trained for another task for this task. We will use the weights and biases already trained for the object recognition task of the model VGG-19 (a convolutional network with 19 layers) to extract content and style layers for style transfer. We???ll only use their weights for the convolution layers. The paper by Gatys et al. suggested that average pooling is better than max pooling, so we???ll have to do pooling ourselves.
?????????????????????model pipeline
Step 1: Define inference
Step 2: Create loss functions
Step 3: Create optimizer
Step 4: Create summaries to monitor your training process
Step 5: Train your model
????????????
loss????????????????????????????????????????????????
optimizer????????????
I suggest AdamOptimizer but you can be creative with both optimizers and learning rate to see what you find. You can find this part in the optimize() method in style_transfer.py.
????????????????????????
The training curve of content loss, style loss, and the total loss. Write a few sentences about what you see. (??????????????????summary, tensorboard)
The graph of your model. (by tensorboard)
Change at least two parameters, explain what you did and how that changed the results. (?????????????????????????????????????????????????????????)
3 artworks generated using at least 3 different styles. (????????????3???style image)
?????????
2.3 ????????????
??????VGG19?????????????????????????????????model???
??????
.mat
?????????http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat.mat
????????????????????????http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.svg??????????????????????????????????????????????????????????????????????????????mat???????????????????????????(VGG19)???????????????(???vgg19.svg????????????????????????)?????????
layer_name idx (??????forzen.mat???layer index) shape_w shape_b conv1_1 0 3x3x3x64 64x1 relu1_1 1 conv1_2 2 3x3x64x64 64x1 relu_1_2 3 pool1 4 conv2_1 5 3x3x64x128 128x1 relu2_1 6 conv2_2 7 3x3x128x128 128x1 relu2_2 8 pool2 9 conv3_1 10 3x3x128x256 256x1 relu3_1 11 conv3_2 12 3x3x256x256 256x1 relu3_2 13 conv3_3 14 3x3x256x256 256x1 relu3_3 15 conv3_4 16 3x3x256x256 256x1 relu3_4 17 pool3 18 conv4_1 19 3x3x256x512 512x1 relu4_1 20 conv4_2 21 3x3x512x512 512x1 relu4_2 22 conv4_3 23 3x3x512x512 512x1 relu4_3 24 conv4_4 25 3x3x512x512 512x1 relu4_4 26 pool4 27 conv5_1 28 3x3x512x512 512x1 relu5_1 29 conv5_2 30 3x3x512x512 512x1 relu5_2 31 conv5_3 32 3x3x512x512 512x1 relu5_3 33 conv5_4 34 3x3x512x512 512x1 relu5_4 35 pool5 36 fc6 37 7x7x512x4096 4096x1 relu6 38 fc7 39 1x1x4096x4096 4096x1 relu7 40 fc8 41 1x1x4096x1000 1000x1 prob 42 - ???????????????trainable=True???layer????????????training w and b
- ???????????????????????????relu???conv???????????????????????????????????????????????????conv_relu(...)???????????????????????????????????????node???????????????conv???idx????????????????????????????????????(????????????????????????????????????vgg19???????????????relu??????node???????????????idx)
- ?????????????????????????????????layer????????????????????????19?????????VGG19
python??????mat???????????????https://blog.csdn.net/google19890102/article/details/45672305
# ????????? vgg19.mat?????????????????????????????????????????????????????????vgg19.mat??????????????????????????????vgg19??????????????? def test_1(): # ?????????vgg19????????? import A2_utils import scipy.io # VGG-19 parameters file VGG_DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat' VGG_FILENAME = 'imagenet-vgg-verydeep-19.mat' EXPECTED_BYTES = 534904783 A2_utils.download(VGG_DOWNLOAD_LINK, VGG_FILENAME, EXPECTED_BYTES) vgg19 = scipy.io.loadmat(VGG_FILENAME) # print("vgg19-keys: ", vgg19.keys()) layers = vgg19['layers'] # print("vgg19-layers: ", layers) # dict_keys(['__header__', '__version__', '__globals__', 'layers', 'meta']) # print("vgg19-layers-type: ", type(layers)) # <class 'numpy.ndarray'> # print("???????????????????????????", layers.dtype) # object print("?????????????????????", layers.size) # 43 //??????vgg19.svg????????????43??????????????????(0-42) print("???????????????", layers.shape) # (1, 43) print("?????????????????????", layers.ndim) # 2 # print("????????? pool: ", layers[0][4]) # [[(array(['pool1'], dtype='<U5'), array(['pool'], dtype='<U4'), array()...]] print("????????? conv2_1: ", layers[0][5]) # [[ (array(['conv2_1'], dtype='<U7'), # array(['conv'], dtype='<U4'), # array([[array([[[[-2.40122317e-03, ...]]) # .... # ]] print("????????? prob: ", layers[0][42]) # # ???????????????????????????????????? # # print("vgg19-meta: ", vgg19['meta']) # # print(vgg19) test_1()
??????????????????????????????
vgg19-keys: dict_keys(['__header__', '__version__', '__globals__', 'layers', 'meta']) # 'layers': 43?????????????????????????????????????????????????????????????????????????????????conv op?????????w/b???shape???????????????shape???????????????????????????????????????????????? # 'meta'??? ?????????????????????????????????????????????tensorflow???xxx.meta????????????????????? meta-graph
????????????
- ?????????????????????????????????VGG19???feature extrator?????????????????????VGG19?????????????????????
avgpool5
- ?????????????????????????????????????????????????????????????????????????????????????????????????????????tf???saver() and restore()??????VGG19???????????????????????????????????????????????? meta_graph
- ?????????????????????????????????VGG19???feature extrator?????????????????????VGG19?????????????????????
???????????????loss ?????????????????????TODO??????????????? TODO??????
以上是关于cs20_7-1-todo的主要内容,如果未能解决你的问题,请参考以下文章
Unity报错:Read only asset Packages/com.xxxxx.xxx.xxxx/Editor/VSCodeDiscovery.cs.IPGSD has no meta(代码片段
Unity报错:Read only asset Packages/com.xxxxx.xxx.xxxx/Editor/VSCodeDiscovery.cs.IPGSD has no meta(代码片段