2019-06-26

Node语音识别

语音识别（支持pcm和m4a）

测试： http://web.chenb.top
Github项目： https://github.com/chenbin-353549444/node-ffmpeg

说明

使用百度的语音识别接口，用FFMPEG转音频文件

部署

修改server.js的API Key和Secret Key
在安装好Docker的机器上运行sh run.sh，如果有Jenkins可以直接新建管道构建
完成后访问测试页面(http://127.0.0.1:80/index.htm)

核心代码

server.js

1.8

let express = require('express');
let ffmpeg = require('fluent-ffmpeg');
let fs = require('fs');
let app = express();
let AipSpeech = require("baidu-aip-sdk").speech;

// 百度语音应用后台中的`API Key`和`Secret Key`
let client = new AipSpeech(0, 'API Key', 'Secret Key');

let bodyParser = require('body-parser');
let multer  = require('multer');

app.use('/public', express.static('public'));
app.use(bodyParser.urlencoded({ extended: false }));
app.use(multer({ dest: '/tmp/'}).array('buf'));

app.get('/', function (req, res) {
    res.sendFile( __dirname + "/" + "index.htm" );
});

app.post('/pcm', function (req, res) {
    res.writeHead(200,{'Content-Type':'text/html;charset=utf-8'});
    console.log(req.files[0]);  // 上传的文件信息
    fs.readFile( req.files[0].path, function (err, data) {
        let voiceBase64 = new Buffer(data);
        client.recognize(voiceBase64, 'pcm', 16000).then(function(result) {
            res.end(JSON.stringify(result));
        }, function(err) {
            res.end(err);
        });
    });
});

app.post('/m4a', function (req, res) {
    res.writeHead(200,{'Content-Type':'text/html;charset=utf-8'});
    console.log(req);  // 上传的文件信息
    let voiceBase64 = new Buffer('');
    let command = ffmpeg(req.files[0].path)
        .noVideo()
        .inputFormat('m4a')
        .audioCodec('pcm_s16le')
        .audioBitrate(16)
        .audioFrequency(16000)
        .audioChannels(1)
        .format('s16le')
        .on('error', function(err) {
            console.log('An error occurred: ' + err.message);
        })
        .on('end', function() {
            console.log('success');
        });
    let ffstream = command.pipe();
    ffstream.on('data', function(chunk) {
        voiceBase64 = Buffer.concat([voiceBase64, new Buffer(chunk)]);
        console.log('ffmpeg just wrote ' + chunk.length + ' bytes');
    });
    ffstream.on('end', function() {
        console.log('data ' + voiceBase64.length + ' bytes');
        client.recognize(voiceBase64, 'pcm', 16000).then(function(result) {
            res.end(JSON.stringify(result));
        }, function(err) {
            res.end(err);
        });
    });

});

let server = app.listen(80, function () {

    var host = server.address().address;
    var port = server.address().port;

    console.log("应用实例，访问地址为 http://%s:%s", host, port)

});

参考

百度接口文档:(http://ai.baidu.com/docs#/ASR-Online-Node-SDK/8b3186c3)
node-fluent-ffmpeg:(https://github.com/fluent-ffmpeg/node-fluent-ffmpeg#creating-an-ffmpeg-command)
FFMPEG:(http://www.ffmpeg.org)

本文标题:Node语音识别

文章作者:

发布时间:2019-06-26, 13:47:02

最后更新:2022-11-03, 16:56:40

更新历史: Blame, History文本模式: .md Raw

原始链接:https://www.chenb.top/2019/06/26/node-ffmpeg/

许可协议: "署名-非商用-相同方式共享 4.0" 转载请保留原文链接及作者。