Node语音识别

语音识别(支持pcm和m4a)

测试: http://web.chenb.top
Github项目: https://github.com/chenbin-353549444/node-ffmpeg

说明

使用百度的语音识别接口,用FFMPEG转音频文件

部署

修改server.js的API KeySecret Key
在安装好Docker的机器上运行sh run.sh,如果有Jenkins可以直接新建管道构建
完成后访问测试页面(http://127.0.0.1:80/index.htm)

核心代码

server.js

1.8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
let express = require('express');
let ffmpeg = require('fluent-ffmpeg');
let fs = require('fs');
let app = express();
let AipSpeech = require("baidu-aip-sdk").speech;

// 百度语音应用后台中的`API Key`和`Secret Key`
let client = new AipSpeech(0, 'API Key', 'Secret Key');

let bodyParser = require('body-parser');
let multer = require('multer');

app.use('/public', express.static('public'));
app.use(bodyParser.urlencoded({ extended: false }));
app.use(multer({ dest: '/tmp/'}).array('buf'));

app.get('/', function (req, res) {
res.sendFile( __dirname + "/" + "index.htm" );
});

app.post('/pcm', function (req, res) {
res.writeHead(200,{'Content-Type':'text/html;charset=utf-8'});
console.log(req.files[0]); // 上传的文件信息
fs.readFile( req.files[0].path, function (err, data) {
let voiceBase64 = new Buffer(data);
client.recognize(voiceBase64, 'pcm', 16000).then(function(result) {
res.end(JSON.stringify(result));
}, function(err) {
res.end(err);
});
});
});

app.post('/m4a', function (req, res) {
res.writeHead(200,{'Content-Type':'text/html;charset=utf-8'});
console.log(req); // 上传的文件信息
let voiceBase64 = new Buffer('');
let command = ffmpeg(req.files[0].path)
.noVideo()
.inputFormat('m4a')
.audioCodec('pcm_s16le')
.audioBitrate(16)
.audioFrequency(16000)
.audioChannels(1)
.format('s16le')
.on('error', function(err) {
console.log('An error occurred: ' + err.message);
})
.on('end', function() {
console.log('success');
});
let ffstream = command.pipe();
ffstream.on('data', function(chunk) {
voiceBase64 = Buffer.concat([voiceBase64, new Buffer(chunk)]);
console.log('ffmpeg just wrote ' + chunk.length + ' bytes');
});
ffstream.on('end', function() {
console.log('data ' + voiceBase64.length + ' bytes');
client.recognize(voiceBase64, 'pcm', 16000).then(function(result) {
res.end(JSON.stringify(result));
}, function(err) {
res.end(err);
});
});

});

let server = app.listen(80, function () {

var host = server.address().address;
var port = server.address().port;

console.log("应用实例,访问地址为 http://%s:%s", host, port)

});

参考

百度接口文档:(http://ai.baidu.com/docs#/ASR-Online-Node-SDK/8b3186c3)
node-fluent-ffmpeg:(https://github.com/fluent-ffmpeg/node-fluent-ffmpeg#creating-an-ffmpeg-command)
FFMPEG:(http://www.ffmpeg.org)

文章目录
  1. 1. 语音识别(支持pcm和m4a)
    1. 1.1. 说明
    2. 1.2. 部署
    3. 1.3. 核心代码
    4. 1.4. 参考