USTC--多媒体内容分析与理解

Posted 2021-10-25 Magician0619

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了USTC--多媒体内容分析与理解相关的知识，希望对你有一定的参考价值。

2021年中国科学技术大学秋季入学课程《多媒体内容分析与理解》

HomeWork1 BMP2YUV

将文件夹中的BMP图像读取，然后将RGB图像转换到YUV颜色空间并保存，不能调用现有的图像读取函数、颜色空间转换函数，代码要求自己编写。

# -*- coding:utf-8 -*-
"""
# @Author: Magician USTC SA21006072
# @Date: 2021-10-06 17:01:41
# @Description: 

# Copyright 2021 by Magician
"""
import struct
import numpy as np
from PIL import Image
import os

'''
本实验代码主要功能是将指定文件夹下的BMP文件转化为RGB文件，
并将RGB文件转化为YUV格式进行存储
@param: 2021-9-30
'''


class BMPImage:
    def __init__(self, path, resultpath):
        self.filepath = path
        self.resultpath = resultpath

    def build(self):
        f = open(self.filepath, 'rb')
        self.bin_file_type = f.read(2)  # 读取文件类型
        self.bin_file_size = f.read(4)  # 读取文件大小
        f.seek(f.tell()+4)  # 跳过中间无用4个字节
        self.bin_head_offset = f.read(4)  # 读取数据偏移量
        self.head_length = f.read(4)  # 位图图信息长度
        self.bin_width = f.read(4)  # 图像宽度
        self.bin_height = f.read(4)  # 图像高度
        f.seek(f.tell()+2)
        self.bin_pixel_size = f.read(2)  # 像素占位大小

        self.file_size = struct.unpack('i', self.bin_file_size)[0]
        self.offset = struct.unpack('i', self.bin_head_offset)[0]
        self.width = struct.unpack('i', self.bin_width)[0]
        self.height = struct.unpack('i', self.bin_height)[0]
        self.pixel_size = struct.unpack('h', self.bin_pixel_size)[0]

        if self.pixel_size != 24:
            print('This is not a 24bit BMP picture！')
            exit()

        f.seek(24)

        bmp_data = np.empty([0, self.width, 3])
        for row in range(self.height):
            count = 0
            bmp_data_row = np.empty([0, 3])
            for col in range(self.width):
                bmp_data_single = np.empty([0])
                bmp_data_single = np.append(bmp_data_single, [struct.unpack('B', f.read(1))[0],
                                                              struct.unpack(
                                                                  'B', f.read(1))[0],
                                                              struct.unpack('B', f.read(1))[0]])

                bmp_data_row = np.vstack([bmp_data_row, bmp_data_single])
                count += 3

            while count % 4 != 0:
                f.read(1)
                count += 1

            bmp_data_row = bmp_data_row.reshape([1, self.width, 3])
            bmp_data = np.vstack([bmp_data, bmp_data_row])

        im = Image.new('RGB', (self.width, self.height))
        for i in range(self.height):
            for j in range(self.width):
                im.putpixel(
                    (j, self.height-i-1), (int(bmp_data[i][j][2]), int(bmp_data[i][j][1]), int(bmp_data[i][j][0])))

        filename = self.filepath.replace('.bmp', '.png')
        path_temp = filename.split('/')
        path_temp[0] = self.resultpath.replace('/', '')
        resultPath = '/'.join(path_temp)
        im.save(resultPath)

        return resultPath


def rgb2yuv(pngpath):
    imagr_arr = np.array(Image.open(pngpath))
    height, width, _ = imagr_arr.shape
    yuvimg = np.empty([0, width, 3])
    for i in range(height):
        yuv_row = np.empty([0, 3])
        for j in range(width):
            yuv_channel = np.empty([0])
            r = imagr_arr[i][j][0]
            g = imagr_arr[i][j][1]
            b = imagr_arr[i][j][2]
            y = 0.299*r+0.587*g+0.114*b
            u = -0.418*r-0.289*g+0.437*b+128
            v = 0.615*r-0.515*g-0.100*b+128
            yuv_channel = np.append(yuv_channel, [y, u, v])
            yuv_row = np.vstack([yuv_row, yuv_channel])

        yuv_row = yuv_row.reshape([1, width, 3])
        yuvimg = np.vstack([yuvimg, yuv_row])

    # 保存为yuv444格式

    y_444 = np.empty([0, width])
    u_444 = np.empty([0, width])
    v_444 = np.empty([0, width])
    for i in range(height):
        y_444_r = np.empty([0])
        u_444_r = np.empty([0])
        v_444_r = np.empty([0])
        for j in range(width):
            y_444_r = np.append(y_444_r, yuvimg[i][j][0])
            u_444_r = np.append(u_444_r, yuvimg[i][j][1])
            v_444_r = np.append(v_444_r, yuvimg[i][j][2])
        y_444 = np.vstack([y_444, y_444_r])
        u_444 = np.vstack([u_444, u_444_r])
        v_444 = np.vstack([v_444, v_444_r])

    y_444 = y_444.reshape([1, height, width])
    u_444 = u_444.reshape([1, height, width])
    v_444 = v_444.reshape([1, height, width])
    yuv444 = np.uint8(np.vstack([y_444, u_444, v_444]))
    yuv_path = pngpath.replace('.png', '.yuv')
    with open(yuv_path, 'wb') as fw:
        for i in range(3):
            for row in range(height):
                for col in range(width):
                    fw.write(struct.pack('B', yuv444[i][row][col]))


def main():
    path = 'BMPImages/'
    result_dir = 'Result/'
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    for (root, _, filename) in os.walk(path):
        for file in filename:
            print('processing file: '+file)
            img = BMPImage(root+file, result_dir)
            png_path = img.build()
            rgb2yuv(png_path)


if __name__ == '__main__':
    main()

以上是关于USTC--多媒体内容分析与理解的主要内容，如果未能解决你的问题，请参考以下文章