使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字

Posted liangblog

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字相关的知识,希望对你有一定的参考价值。

1、首先需要下载FFmpeg;

2、Gradle依赖

技术分享图片
def void forceVersion(details, group, version) {
    if (details.requested.group == group) {
        details.useVersion version
    }
}

def void forceVersion(details, group, name, version) {
    if (details.requested.group == group && details.requested.name == name) {
        details.useVersion version
    }
}

allprojects { p ->
    group = ‘com.my.spider‘
    version = ‘1.0.0‘
    
    apply plugin: ‘java‘
    apply plugin: ‘maven‘
    apply plugin: ‘maven-publish‘
    
    [compileJava, compileTestJava]*.options*.encoding = ‘UTF-8‘

    jar.doFirst {
        manifest {
            def manifestFile = "${projectDir}/META-INF/MANIFEST.MF"
            if (new File(manifestFile).exists())
                from (manifestFile)
            
            attributes ‘Implementation-Title‘:p.name
            if (p.version.endsWith(‘-SNAPSHOT‘)) {
                attributes ‘Implementation-Version‘: p.version + ‘-‘ + p.ext.Timestamp
            } else {
                attributes ‘Implementation-Version‘: p.version
            }
            attributes ‘Implementation-BuildDateTime‘:new Date()
        }
    }
    
    javadoc {
        options {
            encoding ‘UTF-8‘
            charSet ‘UTF-8‘
            author false
            version true
            links ‘http://docs.oracle.com/javase/8/docs/api/index.html‘
            memberLevel = org.gradle.external.javadoc.JavadocMemberLevel.PRIVATE
        }
    }

    if (System.env.uploadArchives) {
        build.dependsOn publish
    }

    buildscript {
        repositories {
            mavenCentral()
        }
        dependencies {classpath ‘org.springframework.boot:spring-boot-gradle-plugin:1.5.14.RELEASE‘ }
    }
    
    afterEvaluate {Project  project -> 
        if (project.pluginManager.hasPlugin(‘java‘)) {
            configurations.all {
                resolutionStrategy.eachDependency {DependencyResolveDetails details -> 
                    forceVersion details, ‘org.springframework.boot‘, ‘1.4.1.RELEASE‘
                    forceVersion details, ‘org.slf4j‘, ‘1.7.21‘
                    forceVersion details, ‘org.springframework‘, ‘4.3.3.RELEASE‘
                }

                exclude module:‘slf4j-log4j12‘
                exclude module:‘log4j‘
            }

            dependencies {testCompile ‘junit:junit:4.12‘ }
        }
    }

    repositories {
        mavenCentral()
    }
    
    // 时间戳:年月日时分
    p.ext.Timestamp = new Date().format(‘yyyyMMddHHmm‘)
    // Build Number
    p.ext.BuildNumber = System.env.BUILD_NUMBER
    if (p.ext.BuildNumber == null || "" == p.ext.BuildNumber) {
        p.ext.BuildNumber = ‘x‘
    }    
}

task zipSources(type: Zip) {
    description ‘压缩源代码‘
    project.ext.zipSourcesFile = project.name + ‘-‘ + project.version + ‘-‘ +  project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-sources.zip‘ 
    archiveName = project.ext.zipSourcesFile
    includeEmptyDirs = false
    
    from project.projectDir
    
    exclude ‘**/.*‘
    exclude ‘build/*‘
    allprojects.each { p ->
        exclude ‘**/‘ + p.name + ‘/bin/*‘
        exclude ‘**/‘ + p.name + ‘/build/*‘
        exclude ‘**/‘ + p.name + ‘/data/*‘
        exclude ‘**/‘ + p.name + ‘/work/*‘
        exclude ‘**/‘ + p.name + ‘/logs/*‘    
    }
}

def CopySpec appCopySpec(Project prj, dstname = null) {
    if (!dstname) { dstname = prj.name }
    return copySpec{
        // Fat jar
        from (prj.buildDir.toString() + ‘/libs/‘ + prj.name + ‘-‘ + project.version + ‘.jar‘) {
            into dstname
        }        
    
        // Configs
        from (prj.projectDir.toString() + ‘/config/examples‘) {
            into dstname + ‘/config‘
        }
    
        // Windows start script
        from (prj.projectDir.toString() + ‘/‘ + prj.name + ‘.bat‘) {
            into dstname
        }
        
        // Unix conf script
        from (prj.projectDir.toString() + ‘/‘ + prj.name + ‘.conf‘) {
            into dstname
            rename prj.name, prj.name + ‘-‘ + project.version
        }
    }    
}


task zipSetup(type: Zip, dependsOn: subprojects.build) { 
    description ‘制作安装包‘ 
    project.ext.zipSetupFile = project.name + ‘-‘ + project.version + ‘-‘ +  project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-setup.zip‘ 
    archiveName = project.name + ‘-‘ + project.version + ‘-‘ +  project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-setup.zip‘
    
    with appCopySpec(project(‘:spider-demo‘))
}

import java.security.MessageDigest

def generateMD5(final file) {
    MessageDigest digest = MessageDigest.getInstance("MD5")
    file.withInputStream(){is->
        byte[] buffer = new byte[8192]
        int read = 0
        while( (read = is.read(buffer)) > 0) {
            digest.update(buffer, 0, read);
        }
    }
    byte[] md5sum = digest.digest()
    BigInteger bigInt = new BigInteger(1, md5sum)
    return bigInt.toString(16)
}

task md5(dependsOn: [zipSetup, zipSources]) << {
    String md5_setup = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSetupFile));
    String md5_sources = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSourcesFile));
    println project.ext.zipSetupFile + ‘=‘ + md5_setup
    println project.ext.zipSourcesFile + ‘=‘ + md5_sources
    
    def newFile = new File("${projectDir}/build/distributions/" 
                    + project.name + ‘-‘ + project.version + ‘-‘ +  project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-md5.txt‘)
    PrintWriter printWriter = newFile.newPrintWriter()
    printWriter.println project.ext.zipSetupFile + ‘=‘ + md5_setup
    printWriter.println project.ext.zipSourcesFile + ‘=‘ + md5_sources
    printWriter.flush()
    printWriter.close()
}

build.dependsOn subprojects.build, zipSetup, zipSources, md5
bulid.gradle

工程组件gradle依赖: 语音识别使用 百度api;需引入 compile ‘com.baidu.aip:java-sdk:3.2.1‘

apply plugin: ‘spring-boot‘
apply plugin: ‘application‘

distributions {
    main {
        contents {
            from ("${projectDir}/config/examples") {
                into "config"
            }
        }
    }
}

distTar.enabled = false

springBoot {
    executable = true
    mainClass = ‘com.my.ai.Application‘
}

dependencies {
    compile ‘org.springframework.boot:spring-boot-starter-web:1.4.0.RELEASE‘
    compile ‘dom4j:dom4j:1.6.1‘
    compile ‘commons-httpclient:commons-httpclient:3.1‘
    compileOnly ‘com.h2database:h2:1.4.191‘
    compile ‘javax.cache:cache-api:1.0.0‘
    compile ‘org.jboss.resteasy:resteasy-jaxrs:3.0.14.Final‘
    compile ‘org.jboss.resteasy:resteasy-client:3.0.14.Final‘
    // Axis
    compile ‘axis:axis:1.4‘
    
    compile ‘org.jsoup:jsoup:1.10.1‘
    
    compile ‘com.alibaba:fastjson:1.2.21‘
    
    compile ‘com.baidu.aip:java-sdk:3.2.1‘
    
}

3、视频抽取音频服务“

技术分享图片
package com.my.ai.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

//视频抽取音频
@Service
public class ExtractAudioservice {

    public static Logger logger  = LoggerFactory.getLogger(ExtractAudioService.class);
    
    public static void main(String[] args) {
        new ExtractAudioService().getAudioFromVideo("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\氧化还原反应中电子转移的方向和数目的表示方法.mp4",
                "D:\ffmpeg4.2\bin\ffmpeg.exe");
    }
    
    
    public String getAudioFromVideo(String videoPath,String ffmpegPath) {
        File video = new File(videoPath);
        if(video.exists() && video.isFile()){
            String format = "wav";
            String outPath = videoPath.substring(0,videoPath.lastIndexOf(".")) + ".wav";
            processCmd(videoPath, ffmpegPath, format, outPath);
            return outPath;
        }
        return null;
    }
    
    //D:ffmpeg4.2inffmpeg.exe -i 氧化还原反应中电子转移的方向和数目的表示方法.mp4 -f wav -vn -y 3.wav
    public String processCmd(String inputPath,String ffmpegPath,String format,String outPath) {
        List<String> commend = new java.util.ArrayList<String>();
        commend.add(ffmpegPath);
        commend.add("-i");
        commend.add(inputPath);
        commend.add("-y");
        commend.add("-vn");
        commend.add("-f");
        commend.add(format);
        commend.add(outPath);
        try {

            ProcessBuilder builder = new ProcessBuilder();
            builder.command(commend);
            builder.redirectErrorStream(true);
            Process p = builder.start();

            // 1. start
            BufferedReader buf = null; // 保存ffmpeg的输出结果流
            String line = null;
            // read the standard output

            buf = new BufferedReader(new InputStreamReader(p.getInputStream()));

            StringBuffer sb = new StringBuffer();
            while ((line = buf.readLine()) != null) {
                System.out.println(line);
                sb.append(line);
                continue;
            }
            p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行
            // 1. end
            return sb.toString();
        } catch (Exception e) {
//                    System.out.println(e);    
            return null;
        }
    }

}
ExtractAudioService

4、音频切段:

package com.my.ai.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

@Service
public class CutService {

	public static Logger logger = LoggerFactory.getLogger(CutService.class);

	public List<String> cutFile(String media_path, String ffmpeg_path) {

		List<String> audios = new ArrayList<>();
		int mediaTime = getMediaTime(media_path, ffmpeg_path); 
		int num = mediaTime / 59;
		int lastNum = mediaTime % 59;
		System.out.println(mediaTime +"|" + num + "|"+ lastNum);
		int length = 59;
		File file = new File(media_path);
		String filename = file.getName();
		for (int i = 0; i < num; i++) {
			String outputPath = file.getParent() + File.separator + i + "-"+filename;
			processCmd(media_path, ffmpeg_path, String.valueOf(length * i) , 
					String.valueOf(length), outputPath);
			audios.add(outputPath);
		}
		if(lastNum > 0) {
			String outputPath = file.getParent() + File.separator + num + "-"+filename;
			processCmd(media_path, ffmpeg_path, String.valueOf(length * num) , 
					String.valueOf(lastNum), outputPath);
			audios.add(outputPath);
		}
		return audios;
	}

	/**
	 * 获取视频总时间
	 * 
	 * @param viedo_path  视频路径
	 * @param ffmpeg_path ffmpeg路径
	 * @return
	 */
	public int getMediaTime(String video_path, String ffmpeg_path) {
		List<String> commands = new java.util.ArrayList<String>();
		commands.add(ffmpeg_path);
		commands.add("-i");
		commands.add(video_path);
		try {
			ProcessBuilder builder = new ProcessBuilder();
			builder.command(commands);
			final Process p = builder.start();

			// 从输入流中读取视频信息
			BufferedReader br = new BufferedReader(new InputStreamReader(p.getErrorStream()));
			StringBuffer sb = new StringBuffer();
			String line = "";
			while ((line = br.readLine()) != null) {
				sb.append(line);
			}
			System.out.println(sb.toString());
			br.close();

			// 从视频信息中解析时长
			String regexDuration = "Duration: (.*?), bitrate: (\d*) kb\/s";
			Pattern pattern = Pattern.compile(regexDuration);
			Matcher m = pattern.matcher(sb.toString());
			if (m.find()) {
				int time = getTimelen(m.group(1));
				System.out
						.println(video_path + ",视频时长:" + time + ",比特率:" + m.group(2) + "kb/s");
				return time;
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return 0;
	}

	// 格式:"00:00:10.68"
	public int getTimelen(String timelen) {
		int min = 0;
		String strs[] = timelen.split(":");
		if (strs[0].compareTo("0") > 0) {
			min += Integer.valueOf(strs[0]) * 60 * 60;// 秒
		}
		if (strs[1].compareTo("0") > 0) {
			min += Integer.valueOf(strs[1]) * 60;
		}
		if (strs[2].compareTo("0") > 0) {
			min += Math.round(Float.valueOf(strs[2]));
		}  
		return min;
	}
   
	//D:ffmpeg4.2inffmpeg.exe -i 123.pcm -ss 0 -t 59 1-123.wav
	public String processCmd(String inputPath,String ffmpegPath,
			String startTime,String length,String outputPath) {
		List<String> commend = new java.util.ArrayList<String>();
		commend.add(ffmpegPath);
		commend.add("-i");
		commend.add(inputPath);
		commend.add("-ss");
		commend.add(startTime);
		commend.add("-t");
		commend.add(length);
		commend.add(outputPath);
		try {

			ProcessBuilder builder = new ProcessBuilder();
			builder.command(commend);
			builder.redirectErrorStream(true);
			Process p = builder.start();

			// 1. start
			BufferedReader buf = null; // 保存ffmpeg的输出结果流
			String line = null;
			// read the standard output

			buf = new BufferedReader(new InputStreamReader(p.getInputStream()));

			StringBuffer sb = new StringBuffer();
			while ((line = buf.readLine()) != null) {
				System.out.println(line);
				sb.append(line);
				continue;
			}
			p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行
			// 1. end
			return sb.toString();
		} catch (Exception e) {
            System.out.println(e);    
			return null;
		}
	}

	//ffmpeg -y  -i 16k.wav  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 16k.pcm 
	public static String processWavToPcm(String inputPath,String ffmpegPath,String outputPath) {
		List<String> commend = new java.util.ArrayList<String>();
		commend.add(ffmpegPath);
		commend.add("-i");
		commend.add(inputPath);
		commend.add("-acodec");
		commend.add("pcm_s16le");
		commend.add("-f");
		commend.add("s16le");
		commend.add("-ac");
		commend.add("1");
		commend.add("-ar");
		commend.add("16000");
		commend.add(outputPath);
		try {

			ProcessBuilder builder = new ProcessBuilder();
			builder.command(commend);
			builder.redirectErrorStream(true);
			Process p = builder.start();

			// 1. start
			BufferedReader buf = null; // 保存ffmpeg的输出结果流
			String line = null;
			// read the standard output

			buf = new BufferedReader(new InputStreamReader(p.getInputStream()));

			StringBuffer sb = new StringBuffer();
			while ((line = buf.readLine()) != null) {
				System.out.println(line);
				sb.append(line);
				continue;
			}
			p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行
			// 1. end
			return outputPath;
					//sb.toString();
		} catch (Exception e) {
            System.out.println(e);    
			return null;
		}
	}

	
	
	
	public static void main(String[] args) {
		List<String> audios = new CutService().cutFile(
				"E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\氧化还原反应中电子转移的方向和数目的表示方法.wav",
				"D:\ffmpeg4.2\bin\ffmpeg.exe");
		System.out.println(audios.size());
		
		for (String wavPath : audios) {
			String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";
			processWavToPcm(wavPath, "D:\ffmpeg4.2\bin\ffmpeg.exe", out);
		}
		
	}

}

 

5、音频格式转换,便于进行语音识别,代码如上:

6、调用sdk,获取识别结果:

package com.my.ai.service;

import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

import com.baidu.aip.speech.AipSpeech;

@Service
public class TokenService {

	public static Logger logger = LoggerFactory.getLogger(TokenService.class);
	
	 //设置APPID/AK/SK
    public static final String APP_ID = "***";
    public static final String API_KEY = "***";
    public static final String SECRET_KEY = "***";
    static AipSpeech client = null;
    static {
    	if(client == null) {
    		 client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);
    	}
    }
    
    public static void main(String[] args) {
        getResult("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\0-氧化还原反应中电子转移的方向和数目的表示方法.pcm");
    }
    
    public static String getResult(String file) {
    	 
          // 可选:设置网络连接参数
          client.setConnectionTimeoutInMillis(2000);
          client.setSocketTimeoutInMillis(60000);
          // 可选:设置代理服务器地址, http和socket二选一,或者均不设置
          //client.setHttpProxy("proxy_host", proxy_port);  // 设置http代理
          //client.setSocketProxy("proxy_host", proxy_port);  // 设置socket代理
          JSONObject res = client.asr(file, "pcm", 16000, null);
          //System.out.println(res.toString(2));
          System.out.println(res.get("result").toString());
          return res.get("result").toString();
    }
	
	
	
}

  

7、结果写入文件:

package com.my.ai.service;

import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

@Service
public class FileService {

	public static Logger logger = LoggerFactory.getLogger(FileService.class);
	
	
	//最慢
	public static void writeFile1(String file,String content) throws IOException {
		FileOutputStream out = null;
		out = new FileOutputStream(new File(file));
		long begin = System.currentTimeMillis();
		out.write(content.getBytes());
		out.close();
		long end = System.currentTimeMillis();
		System.out.println("FileOutputStream执行耗时:" + (end - begin) + " 毫秒");
	}
	//中
	public static void writeFile2(String file,String content) throws IOException{
		FileWriter fw = null;
		fw = new FileWriter(file);
		long begin3 = System.currentTimeMillis();
		fw.write(content);
		fw.close();
		long end3 = System.currentTimeMillis();
		System.out.println("FileWriter执行耗时:" + (end3 - begin3) + " 毫秒");
	}
	//最快
	public static void writeFile3(String file,String content) throws IOException{
		FileOutputStream outSTr = null;
		BufferedOutputStream buff = null;
		outSTr = new FileOutputStream(new File(file));
		buff = new BufferedOutputStream(outSTr);
		long begin0 = System.currentTimeMillis();
		buff.write(content.getBytes());
		buff.flush();
		buff.close();
		long end0 = System.currentTimeMillis();
		System.out.println("BufferedOutputStream执行耗时:" + (end0 - begin0) + " 毫秒");
	}

	public static void main(String[] args) {
		for (int i = 0; i < 7; i++) {
			String result = TokenService.getResult("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\" + i +"-氧化还原反应中电子转移的方向和数目的表示方法.pcm");
			appendFile2("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\氧化还原反应中电子转移的方向和数目的表示方法.txt", result+"
");
		}
	}

	public static void appendFile1(String file, String conent) {
		BufferedWriter out = null;
		try {
			out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
			out.write(conent);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (out != null) {
					out.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

	/**
	 * 追加文件:使用FileWriter
	 * 
	 * @param fileName
	 * @param content
	 */
	public static void appendFile2(String fileName, String content) {
		FileWriter writer = null;
		try {
			// 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
			writer = new FileWriter(fileName, true);
			writer.write(content);
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if (writer != null) {
					writer.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

	/**
	 *  追加文件:使用RandomAccessFile
	 * 
	 * @param fileName 文件名
	 * @param content  追加的内容
	 */
	public static void appendFile3(String fileName, String content) {
		RandomAccessFile randomFile = null;
		try {
			// 打开一个随机访问文件流,按读写方式
			randomFile = new RandomAccessFile(fileName, "rw");
			// 文件长度,字节数
			long fileLength = randomFile.length();
			// 将写文件指针移到文件尾。
			randomFile.seek(fileLength);
			randomFile.writeBytes(content);
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (randomFile != null) {
				try {
					randomFile.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

}

 

8、测试:

package com.my.ai.test;

import java.util.List;

import com.my.ai.service.CutService;
import com.my.ai.service.ExtractAudioService;
import com.my.ai.service.FileService;
import com.my.ai.service.TokenService;

public class TestService {

	
	public static void main(String[] args) {
		ExtractAudioService audioService = new ExtractAudioService();
		String outPath =  audioService.getAudioFromVideo("G:\Youku Files\transcode\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4", "D:\ffmpeg4.2\bin\ffmpeg.exe");
		List<String> audios = new CutService().cutFile(outPath,"D:\ffmpeg4.2\bin\ffmpeg.exe");
		for (String wavPath : audios) {
			String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";
			String outPcm = CutService.processWavToPcm(wavPath, "D:\ffmpeg4.2\bin\ffmpeg.exe", out);
			String result = TokenService.getResult(outPcm);
			FileService.appendFile2("G:\Youku Files\transcode\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4-字幕.txt", result+"
");
		}
	}
	
}

  

 

以上是关于使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字的主要内容,如果未能解决你的问题,请参考以下文章

python_视频中语音识别转出文本

一次用ffmpeg实现图片+音频合成视频的开发

[ffmpeg] 抽取音视频数据

视频话题识别与跟踪 - demo 问题总结1.1-视频处理

多媒体视频开发_(30)使用ffmpeg在视频中进行抽帧

FastASR+FFmpeg(音视频开发+语音识别)