格式工厂合并 mp4 和 srt，并利用 python 按照字幕剪辑视频，将其分割为若干小段

Posted 2022-04-02 可能自洽

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了格式工厂合并 mp4 和 srt，并利用 python 按照字幕剪辑视频，将其分割为若干小段相关的知识，希望对你有一定的参考价值。

格式工厂合并 mp4 和 srt，并利用 python 按照字幕剪辑视频，将其分割为若干小段

一、视频合并

1.选择转换为mp4，将视频导入格式工厂

2.调整字幕样式

二、python

1.可能用到的命令：

pip install moviepy

2.main.py

import cut_srt
import cut_video

if __name__ == '__main__':
    my_video_path = r"D:\\Videos\\Star Wars\\Star Wars 9 The.Rise.of.Skywalker.2019.mp4"
    # 将目标文件夹里所有的srt文件都进行格式化txt操作
    my_srt_path = r"D:\\Documents"
    cut_srt.srt_to_format_txt(my_srt_path)
    cut_video.cut_video_by_srt(my_video_path, my_srt_path)

3.cut_srt.py

import re
import os


def check_contain_chinese(check_str):
    for ch in check_str:
        if u'\\u4e00' <= ch <= u'\\u9fff':
            return True
    return False


# 输入原标题，返回格式化后的文件名
def validate_title(str_):
    pattern = r"[\\/\\\\\\:\\*\\?\\"\\<\\>\\|\\.\\,\\!\\'\\-\\♪\\？\\！\\…\\“\\”\\，]"
    new_title = re.sub(pattern, "", str_)
    # strip() 方法用于移除字符串头尾指定的字符（默认为空格或换行符）或字符序列。注意：该方法只能删除开头或是结尾的字符，不能删除中间部分的字符
    # re.sub(' +', ' ', str) 将 str 中的多个空格转化为一个空格
    return re.sub(' +', ' ', new_title.lower().strip())


# 输入字幕文件，得到由字幕组成的二维列表
def get_format_sequences(srt_path_):
    with open(srt_path_, 'r', encoding='utf-8-sig') as f:
        content_ = f.read()
    sequences_ = content_.split('\\n\\n')
    sequences_ = [se.split('\\n') for se in sequences_]
    # 去除每一句空值
    sequences_ = [list(filter(None, sequence)) for sequence in sequences_]
    new_sequences = []
    for se in sequences_:
        if len(se) == 4:
            new_sequences.append(se)
    i = 0
    for se in new_sequences:
        # 序号，时间段，字幕1，字幕2
        new_sequences[i] = [se[0], se[1], se[2], se[3]]
        i += 1
    return new_sequences


# 输入时间 1:20:12, 输出对应时间的秒数（1*3600+20*60+12）
def str2sec(x):
    h, m, s = x.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)


# 输入 02:09:53,440 --> 02:09:55,740，返回一个起始时间对应的秒数和结束时间对应的秒数
def get_start_end_time(str_):
    start_time_, end_time_ = str_.strip().split("-->")
    start_time_ = start_time_.split(",")[0]
    end_time_ = end_time_.split(",")[0]
    return str2sec(start_time_), str2sec(end_time_)


def srt_to_format_txt(srt_path):
    for fileName in os.listdir(srt_path):
        if fileName.endswith(".srt"):
            print(fileName)
            file_path = srt_path + "\\\\" + fileName
            count = 1
            sequences = get_format_sequences(file_path)
            # 判断字幕第一行是否包含中文
            if check_contain_chinese(sequences[0][2]):
                en_position = 3
                ch_position = 2
            else:
                en_position = 2
                ch_position = 3
            # r: 以只读方式打开文件。文件的指针将会放在文件的开头
            # rb: 以二进制格式打开一个文件用于只读。文件指针将会放在文件的开头
            # w: 打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在，创建新文件
            # wb: 以二进制格式打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在，创建新文件
            # a: 打开一个文件用于追加。如果该文件已存在，文件指针将会放在文件的结尾。也就是说，新的内容将会被写入到已有内容之后。如果该文件不存在，创建新文件进行写入
            # ab: 以二进制格式打开一个文件用于追加。如果该文件已存在，文件指针将会放在文件的结尾。也就是说，新的内容将会被写入到已有内容之后。如果该文件不存在，创建新文件进行写入
            with open(file_path.replace(".srt", ".csv"), "w", encoding='utf-8-sig') as f:
                for i in sequences:
                    en = i[en_position]
                    ch = i[ch_position]
                    movie_name = file_path.split("\\\\")[-1].replace(".srt", "")
                    count_format = ":05d".format(count)
                    sentence_id = i[0]
                    en_format = re.sub(' +', ' ', en.replace("- ", " ").replace(",", " ").strip())
                    ch_format = validate_title(ch)
                    line = movie_name + "," + count_format + "," + sentence_id + "," + en_format + "," + ch_format
                    print(line)
                    f.write(line + "\\n")
                    count = count + 1

4.cut_video.py

import os
import time
from moviepy.video.io.VideoFileClip import VideoFileClip
from cut_srt import get_format_sequences, get_start_end_time


def cut_video_by_start_end(video_path_, save_file_path_, my_start, my_end, save_name):
    video = VideoFileClip(video_path_)
    video = video.subclip(my_start, my_end)
    video.write_videofile(save_file_path_ + save_name, fps=24, logger=None)
    video.close()


def cut_video_by_srt(video_path, srt_path):
    # 将裁剪后的视频片段存放在 save_file_path
    save_file_path = video_path.replace(".mp4", "\\\\")
    if not os.path.exists(save_file_path):
        os.makedirs(save_file_path)
    count = len(os.listdir(save_file_path))
    if count == 0:
        count = 1
        start_index = 0
    else:
        count = count
        start_index = count - 1
    sequences = get_format_sequences(srt_path)
    my_time = time.time()
    for i in sequences[start_index:]:
        file_name = ":05d".format(count) + ".mp4"
        print(srt_path.split("\\\\")[-1].replace(".srt", "") +
              "共" + str(len(sequences)) + "个，当前：" + file_name +
              ", 当前进度：" + str(":<.2f".format((count / len(sequences)) * 100)) + "%" + "," +
              " 耗时：" + str(":<.2f".format(time.time() - my_time)) + "s")
        start_time, end_time = get_start_end_time(i[1])
        cut_video_by_start_end(video_path, save_file_path, start_time - 2, end_time + 2, file_name)
        count = count + 1

以上是关于格式工厂合并 mp4 和 srt，并利用 python 按照字幕剪辑视频，将其分割为若干小段的主要内容，如果未能解决你的问题，请参考以下文章