使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字
Posted liangblog
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字相关的知识,希望对你有一定的参考价值。
1、首先需要下载FFmpeg;
2、Gradle依赖
def void forceVersion(details, group, version) { if (details.requested.group == group) { details.useVersion version } } def void forceVersion(details, group, name, version) { if (details.requested.group == group && details.requested.name == name) { details.useVersion version } } allprojects { p -> group = ‘com.my.spider‘ version = ‘1.0.0‘ apply plugin: ‘java‘ apply plugin: ‘maven‘ apply plugin: ‘maven-publish‘ [compileJava, compileTestJava]*.options*.encoding = ‘UTF-8‘ jar.doFirst { manifest { def manifestFile = "${projectDir}/META-INF/MANIFEST.MF" if (new File(manifestFile).exists()) from (manifestFile) attributes ‘Implementation-Title‘:p.name if (p.version.endsWith(‘-SNAPSHOT‘)) { attributes ‘Implementation-Version‘: p.version + ‘-‘ + p.ext.Timestamp } else { attributes ‘Implementation-Version‘: p.version } attributes ‘Implementation-BuildDateTime‘:new Date() } } javadoc { options { encoding ‘UTF-8‘ charSet ‘UTF-8‘ author false version true links ‘http://docs.oracle.com/javase/8/docs/api/index.html‘ memberLevel = org.gradle.external.javadoc.JavadocMemberLevel.PRIVATE } } if (System.env.uploadArchives) { build.dependsOn publish } buildscript { repositories { mavenCentral() } dependencies {classpath ‘org.springframework.boot:spring-boot-gradle-plugin:1.5.14.RELEASE‘ } } afterEvaluate {Project project -> if (project.pluginManager.hasPlugin(‘java‘)) { configurations.all { resolutionStrategy.eachDependency {DependencyResolveDetails details -> forceVersion details, ‘org.springframework.boot‘, ‘1.4.1.RELEASE‘ forceVersion details, ‘org.slf4j‘, ‘1.7.21‘ forceVersion details, ‘org.springframework‘, ‘4.3.3.RELEASE‘ } exclude module:‘slf4j-log4j12‘ exclude module:‘log4j‘ } dependencies {testCompile ‘junit:junit:4.12‘ } } } repositories { mavenCentral() } // 时间戳:年月日时分 p.ext.Timestamp = new Date().format(‘yyyyMMddHHmm‘) // Build Number p.ext.BuildNumber = System.env.BUILD_NUMBER if (p.ext.BuildNumber == null || "" == p.ext.BuildNumber) { p.ext.BuildNumber = ‘x‘ } } task zipSources(type: Zip) { description ‘压缩源代码‘ project.ext.zipSourcesFile = project.name + ‘-‘ + project.version + ‘-‘ + project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-sources.zip‘ archiveName = project.ext.zipSourcesFile includeEmptyDirs = false from project.projectDir exclude ‘**/.*‘ exclude ‘build/*‘ allprojects.each { p -> exclude ‘**/‘ + p.name + ‘/bin/*‘ exclude ‘**/‘ + p.name + ‘/build/*‘ exclude ‘**/‘ + p.name + ‘/data/*‘ exclude ‘**/‘ + p.name + ‘/work/*‘ exclude ‘**/‘ + p.name + ‘/logs/*‘ } } def CopySpec appCopySpec(Project prj, dstname = null) { if (!dstname) { dstname = prj.name } return copySpec{ // Fat jar from (prj.buildDir.toString() + ‘/libs/‘ + prj.name + ‘-‘ + project.version + ‘.jar‘) { into dstname } // Configs from (prj.projectDir.toString() + ‘/config/examples‘) { into dstname + ‘/config‘ } // Windows start script from (prj.projectDir.toString() + ‘/‘ + prj.name + ‘.bat‘) { into dstname } // Unix conf script from (prj.projectDir.toString() + ‘/‘ + prj.name + ‘.conf‘) { into dstname rename prj.name, prj.name + ‘-‘ + project.version } } } task zipSetup(type: Zip, dependsOn: subprojects.build) { description ‘制作安装包‘ project.ext.zipSetupFile = project.name + ‘-‘ + project.version + ‘-‘ + project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-setup.zip‘ archiveName = project.name + ‘-‘ + project.version + ‘-‘ + project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-setup.zip‘ with appCopySpec(project(‘:spider-demo‘)) } import java.security.MessageDigest def generateMD5(final file) { MessageDigest digest = MessageDigest.getInstance("MD5") file.withInputStream(){is-> byte[] buffer = new byte[8192] int read = 0 while( (read = is.read(buffer)) > 0) { digest.update(buffer, 0, read); } } byte[] md5sum = digest.digest() BigInteger bigInt = new BigInteger(1, md5sum) return bigInt.toString(16) } task md5(dependsOn: [zipSetup, zipSources]) << { String md5_setup = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSetupFile)); String md5_sources = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSourcesFile)); println project.ext.zipSetupFile + ‘=‘ + md5_setup println project.ext.zipSourcesFile + ‘=‘ + md5_sources def newFile = new File("${projectDir}/build/distributions/" + project.name + ‘-‘ + project.version + ‘-‘ + project.ext.Timestamp + ‘.‘ + project.ext.BuildNumber + ‘-md5.txt‘) PrintWriter printWriter = newFile.newPrintWriter() printWriter.println project.ext.zipSetupFile + ‘=‘ + md5_setup printWriter.println project.ext.zipSourcesFile + ‘=‘ + md5_sources printWriter.flush() printWriter.close() } build.dependsOn subprojects.build, zipSetup, zipSources, md5
工程组件gradle依赖: 语音识别使用 百度api;需引入 compile ‘com.baidu.aip:java-sdk:3.2.1‘
apply plugin: ‘spring-boot‘
apply plugin: ‘application‘
distributions {
main {
contents {
from ("${projectDir}/config/examples") {
into "config"
}
}
}
}
distTar.enabled = false
springBoot {
executable = true
mainClass = ‘com.my.ai.Application‘
}
dependencies {
compile ‘org.springframework.boot:spring-boot-starter-web:1.4.0.RELEASE‘
compile ‘dom4j:dom4j:1.6.1‘
compile ‘commons-httpclient:commons-httpclient:3.1‘
compileOnly ‘com.h2database:h2:1.4.191‘
compile ‘javax.cache:cache-api:1.0.0‘
compile ‘org.jboss.resteasy:resteasy-jaxrs:3.0.14.Final‘
compile ‘org.jboss.resteasy:resteasy-client:3.0.14.Final‘
// Axis
compile ‘axis:axis:1.4‘
compile ‘org.jsoup:jsoup:1.10.1‘
compile ‘com.alibaba:fastjson:1.2.21‘
compile ‘com.baidu.aip:java-sdk:3.2.1‘
}
3、视频抽取音频服务“
package com.my.ai.service; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; //视频抽取音频 @Service public class ExtractAudioservice { public static Logger logger = LoggerFactory.getLogger(ExtractAudioService.class); public static void main(String[] args) { new ExtractAudioService().getAudioFromVideo("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\氧化还原反应中电子转移的方向和数目的表示方法.mp4", "D:\ffmpeg4.2\bin\ffmpeg.exe"); } public String getAudioFromVideo(String videoPath,String ffmpegPath) { File video = new File(videoPath); if(video.exists() && video.isFile()){ String format = "wav"; String outPath = videoPath.substring(0,videoPath.lastIndexOf(".")) + ".wav"; processCmd(videoPath, ffmpegPath, format, outPath); return outPath; } return null; } //D:ffmpeg4.2inffmpeg.exe -i 氧化还原反应中电子转移的方向和数目的表示方法.mp4 -f wav -vn -y 3.wav public String processCmd(String inputPath,String ffmpegPath,String format,String outPath) { List<String> commend = new java.util.ArrayList<String>(); commend.add(ffmpegPath); commend.add("-i"); commend.add(inputPath); commend.add("-y"); commend.add("-vn"); commend.add("-f"); commend.add(format); commend.add(outPath); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commend); builder.redirectErrorStream(true); Process p = builder.start(); // 1. start BufferedReader buf = null; // 保存ffmpeg的输出结果流 String line = null; // read the standard output buf = new BufferedReader(new InputStreamReader(p.getInputStream())); StringBuffer sb = new StringBuffer(); while ((line = buf.readLine()) != null) { System.out.println(line); sb.append(line); continue; } p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行 // 1. end return sb.toString(); } catch (Exception e) { // System.out.println(e); return null; } } }
4、音频切段:
package com.my.ai.service; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; @Service public class CutService { public static Logger logger = LoggerFactory.getLogger(CutService.class); public List<String> cutFile(String media_path, String ffmpeg_path) { List<String> audios = new ArrayList<>(); int mediaTime = getMediaTime(media_path, ffmpeg_path); int num = mediaTime / 59; int lastNum = mediaTime % 59; System.out.println(mediaTime +"|" + num + "|"+ lastNum); int length = 59; File file = new File(media_path); String filename = file.getName(); for (int i = 0; i < num; i++) { String outputPath = file.getParent() + File.separator + i + "-"+filename; processCmd(media_path, ffmpeg_path, String.valueOf(length * i) , String.valueOf(length), outputPath); audios.add(outputPath); } if(lastNum > 0) { String outputPath = file.getParent() + File.separator + num + "-"+filename; processCmd(media_path, ffmpeg_path, String.valueOf(length * num) , String.valueOf(lastNum), outputPath); audios.add(outputPath); } return audios; } /** * 获取视频总时间 * * @param viedo_path 视频路径 * @param ffmpeg_path ffmpeg路径 * @return */ public int getMediaTime(String video_path, String ffmpeg_path) { List<String> commands = new java.util.ArrayList<String>(); commands.add(ffmpeg_path); commands.add("-i"); commands.add(video_path); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commands); final Process p = builder.start(); // 从输入流中读取视频信息 BufferedReader br = new BufferedReader(new InputStreamReader(p.getErrorStream())); StringBuffer sb = new StringBuffer(); String line = ""; while ((line = br.readLine()) != null) { sb.append(line); } System.out.println(sb.toString()); br.close(); // 从视频信息中解析时长 String regexDuration = "Duration: (.*?), bitrate: (\d*) kb\/s"; Pattern pattern = Pattern.compile(regexDuration); Matcher m = pattern.matcher(sb.toString()); if (m.find()) { int time = getTimelen(m.group(1)); System.out .println(video_path + ",视频时长:" + time + ",比特率:" + m.group(2) + "kb/s"); return time; } } catch (Exception e) { e.printStackTrace(); } return 0; } // 格式:"00:00:10.68" public int getTimelen(String timelen) { int min = 0; String strs[] = timelen.split(":"); if (strs[0].compareTo("0") > 0) { min += Integer.valueOf(strs[0]) * 60 * 60;// 秒 } if (strs[1].compareTo("0") > 0) { min += Integer.valueOf(strs[1]) * 60; } if (strs[2].compareTo("0") > 0) { min += Math.round(Float.valueOf(strs[2])); } return min; } //D:ffmpeg4.2inffmpeg.exe -i 123.pcm -ss 0 -t 59 1-123.wav public String processCmd(String inputPath,String ffmpegPath, String startTime,String length,String outputPath) { List<String> commend = new java.util.ArrayList<String>(); commend.add(ffmpegPath); commend.add("-i"); commend.add(inputPath); commend.add("-ss"); commend.add(startTime); commend.add("-t"); commend.add(length); commend.add(outputPath); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commend); builder.redirectErrorStream(true); Process p = builder.start(); // 1. start BufferedReader buf = null; // 保存ffmpeg的输出结果流 String line = null; // read the standard output buf = new BufferedReader(new InputStreamReader(p.getInputStream())); StringBuffer sb = new StringBuffer(); while ((line = buf.readLine()) != null) { System.out.println(line); sb.append(line); continue; } p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行 // 1. end return sb.toString(); } catch (Exception e) { System.out.println(e); return null; } } //ffmpeg -y -i 16k.wav -acodec pcm_s16le -f s16le -ac 1 -ar 16000 16k.pcm public static String processWavToPcm(String inputPath,String ffmpegPath,String outputPath) { List<String> commend = new java.util.ArrayList<String>(); commend.add(ffmpegPath); commend.add("-i"); commend.add(inputPath); commend.add("-acodec"); commend.add("pcm_s16le"); commend.add("-f"); commend.add("s16le"); commend.add("-ac"); commend.add("1"); commend.add("-ar"); commend.add("16000"); commend.add(outputPath); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commend); builder.redirectErrorStream(true); Process p = builder.start(); // 1. start BufferedReader buf = null; // 保存ffmpeg的输出结果流 String line = null; // read the standard output buf = new BufferedReader(new InputStreamReader(p.getInputStream())); StringBuffer sb = new StringBuffer(); while ((line = buf.readLine()) != null) { System.out.println(line); sb.append(line); continue; } p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行 // 1. end return outputPath; //sb.toString(); } catch (Exception e) { System.out.println(e); return null; } } public static void main(String[] args) { List<String> audios = new CutService().cutFile( "E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\氧化还原反应中电子转移的方向和数目的表示方法.wav", "D:\ffmpeg4.2\bin\ffmpeg.exe"); System.out.println(audios.size()); for (String wavPath : audios) { String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm"; processWavToPcm(wavPath, "D:\ffmpeg4.2\bin\ffmpeg.exe", out); } } }
5、音频格式转换,便于进行语音识别,代码如上:
6、调用sdk,获取识别结果:
package com.my.ai.service; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import com.baidu.aip.speech.AipSpeech; @Service public class TokenService { public static Logger logger = LoggerFactory.getLogger(TokenService.class); //设置APPID/AK/SK public static final String APP_ID = "***"; public static final String API_KEY = "***"; public static final String SECRET_KEY = "***"; static AipSpeech client = null; static { if(client == null) { client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY); } } public static void main(String[] args) { getResult("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\0-氧化还原反应中电子转移的方向和数目的表示方法.pcm"); } public static String getResult(String file) { // 可选:设置网络连接参数 client.setConnectionTimeoutInMillis(2000); client.setSocketTimeoutInMillis(60000); // 可选:设置代理服务器地址, http和socket二选一,或者均不设置 //client.setHttpProxy("proxy_host", proxy_port); // 设置http代理 //client.setSocketProxy("proxy_host", proxy_port); // 设置socket代理 JSONObject res = client.asr(file, "pcm", 16000, null); //System.out.println(res.toString(2)); System.out.println(res.get("result").toString()); return res.get("result").toString(); } }
7、结果写入文件:
package com.my.ai.service; import java.io.BufferedOutputStream; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.RandomAccessFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; @Service public class FileService { public static Logger logger = LoggerFactory.getLogger(FileService.class); //最慢 public static void writeFile1(String file,String content) throws IOException { FileOutputStream out = null; out = new FileOutputStream(new File(file)); long begin = System.currentTimeMillis(); out.write(content.getBytes()); out.close(); long end = System.currentTimeMillis(); System.out.println("FileOutputStream执行耗时:" + (end - begin) + " 毫秒"); } //中 public static void writeFile2(String file,String content) throws IOException{ FileWriter fw = null; fw = new FileWriter(file); long begin3 = System.currentTimeMillis(); fw.write(content); fw.close(); long end3 = System.currentTimeMillis(); System.out.println("FileWriter执行耗时:" + (end3 - begin3) + " 毫秒"); } //最快 public static void writeFile3(String file,String content) throws IOException{ FileOutputStream outSTr = null; BufferedOutputStream buff = null; outSTr = new FileOutputStream(new File(file)); buff = new BufferedOutputStream(outSTr); long begin0 = System.currentTimeMillis(); buff.write(content.getBytes()); buff.flush(); buff.close(); long end0 = System.currentTimeMillis(); System.out.println("BufferedOutputStream执行耗时:" + (end0 - begin0) + " 毫秒"); } public static void main(String[] args) { for (int i = 0; i < 7; i++) { String result = TokenService.getResult("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\" + i +"-氧化还原反应中电子转移的方向和数目的表示方法.pcm"); appendFile2("E:\QLDownload\氧化还原反应中电子转移的方向和数目的表示方法\氧化还原反应中电子转移的方向和数目的表示方法.txt", result+" "); } } public static void appendFile1(String file, String conent) { BufferedWriter out = null; try { out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true))); out.write(conent); } catch (Exception e) { e.printStackTrace(); } finally { try { if (out != null) { out.close(); } } catch (IOException e) { e.printStackTrace(); } } } /** * 追加文件:使用FileWriter * * @param fileName * @param content */ public static void appendFile2(String fileName, String content) { FileWriter writer = null; try { // 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件 writer = new FileWriter(fileName, true); writer.write(content); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { e.printStackTrace(); } } } /** * 追加文件:使用RandomAccessFile * * @param fileName 文件名 * @param content 追加的内容 */ public static void appendFile3(String fileName, String content) { RandomAccessFile randomFile = null; try { // 打开一个随机访问文件流,按读写方式 randomFile = new RandomAccessFile(fileName, "rw"); // 文件长度,字节数 long fileLength = randomFile.length(); // 将写文件指针移到文件尾。 randomFile.seek(fileLength); randomFile.writeBytes(content); } catch (IOException e) { e.printStackTrace(); } finally { if (randomFile != null) { try { randomFile.close(); } catch (IOException e) { e.printStackTrace(); } } } } }
8、测试:
package com.my.ai.test; import java.util.List; import com.my.ai.service.CutService; import com.my.ai.service.ExtractAudioService; import com.my.ai.service.FileService; import com.my.ai.service.TokenService; public class TestService { public static void main(String[] args) { ExtractAudioService audioService = new ExtractAudioService(); String outPath = audioService.getAudioFromVideo("G:\Youku Files\transcode\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4", "D:\ffmpeg4.2\bin\ffmpeg.exe"); List<String> audios = new CutService().cutFile(outPath,"D:\ffmpeg4.2\bin\ffmpeg.exe"); for (String wavPath : audios) { String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm"; String outPcm = CutService.processWavToPcm(wavPath, "D:\ffmpeg4.2\bin\ffmpeg.exe", out); String result = TokenService.getResult(outPcm); FileService.appendFile2("G:\Youku Files\transcode\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4-字幕.txt", result+" "); } } }
以上是关于使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字的主要内容,如果未能解决你的问题,请参考以下文章