语音识别(支持pcm和m4a)
测试: http://web.chenb.top
Github项目: https://github.com/chenbin-353549444/node-ffmpeg
说明
使用百度的语音识别接口,用FFMPEG转音频文件
部署
修改server.js的API Key
和Secret Key
在安装好Docker的机器上运行sh run.sh
,如果有Jenkins可以直接新建管道构建
完成后访问测试页面(http://127.0.0.1:80/index.htm)
核心代码
server.js1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75let express = require('express');
let ffmpeg = require('fluent-ffmpeg');
let fs = require('fs');
let app = express();
let AipSpeech = require("baidu-aip-sdk").speech;
// 百度语音应用后台中的`API Key`和`Secret Key`
let client = new AipSpeech(0, 'API Key', 'Secret Key');
let bodyParser = require('body-parser');
let multer = require('multer');
app.use('/public', express.static('public'));
app.use(bodyParser.urlencoded({ extended: false }));
app.use(multer({ dest: '/tmp/'}).array('buf'));
app.get('/', function (req, res) {
res.sendFile( __dirname + "/" + "index.htm" );
});
app.post('/pcm', function (req, res) {
res.writeHead(200,{'Content-Type':'text/html;charset=utf-8'});
console.log(req.files[0]); // 上传的文件信息
fs.readFile( req.files[0].path, function (err, data) {
let voiceBase64 = new Buffer(data);
client.recognize(voiceBase64, 'pcm', 16000).then(function(result) {
res.end(JSON.stringify(result));
}, function(err) {
res.end(err);
});
});
});
app.post('/m4a', function (req, res) {
res.writeHead(200,{'Content-Type':'text/html;charset=utf-8'});
console.log(req); // 上传的文件信息
let voiceBase64 = new Buffer('');
let command = ffmpeg(req.files[0].path)
.noVideo()
.inputFormat('m4a')
.audioCodec('pcm_s16le')
.audioBitrate(16)
.audioFrequency(16000)
.audioChannels(1)
.format('s16le')
.on('error', function(err) {
console.log('An error occurred: ' + err.message);
})
.on('end', function() {
console.log('success');
});
let ffstream = command.pipe();
ffstream.on('data', function(chunk) {
voiceBase64 = Buffer.concat([voiceBase64, new Buffer(chunk)]);
console.log('ffmpeg just wrote ' + chunk.length + ' bytes');
});
ffstream.on('end', function() {
console.log('data ' + voiceBase64.length + ' bytes');
client.recognize(voiceBase64, 'pcm', 16000).then(function(result) {
res.end(JSON.stringify(result));
}, function(err) {
res.end(err);
});
});
});
let server = app.listen(80, function () {
var host = server.address().address;
var port = server.address().port;
console.log("应用实例,访问地址为 http://%s:%s", host, port)
});
参考
百度接口文档:(http://ai.baidu.com/docs#/ASR-Online-Node-SDK/8b3186c3)
node-fluent-ffmpeg:(https://github.com/fluent-ffmpeg/node-fluent-ffmpeg#creating-an-ffmpeg-command)
FFMPEG:(http://www.ffmpeg.org)