最近在为公司的外呼系统做停关空识别,这里放一个asr mod模板
可以使用 originate sofia/gateway/xxx/13800000000 start_hd:'10',wait_for_answer,echo inline
来唤起识别,和自己的样本库比较~~(当然识别算法这里没有)~~
另外注意,由于需要录制回铃音,所以ignore_early_media一定要是false,否则录制不到回铃音,说啥都没用了。
这里的识别算法是Java 改写过来的:
https://github.com/xdyuchen/AudioScore/blob/master/src/main/java/com/yc/audiodata/AudioDataOperate.java
但是这个版本的算法是不合格的,准确度低,耗时长,计算量大,只是我的一个试验品,所以放出来给大家参考下。
#include <switch.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <pthread.h>
#define PI acos(-1)
#define MAX_PATH_LEN (256)
double DATA_START_VALUE = 0.125;
double DATA_END_VALUE = 0.125;
static switch_status_t load_config(void);
static void* initStandardAudioByDir(char* path);
static bool initStandardAudio(char *filePath, char *fileName);
static void filterWave(double audioData[],long size, double b0, double b1);
static long getAudioData(char *filePath, double ** res);
static void normalize(double data[], long size);
static double findMax(double data[], long size);
static long findDataStartIndex(double audioData[], long size);
static long findDataEndIndex(double audioData[], long size);
static long getUsefulData(double audioData[], long size);
static void dealCompareData(double audioData[],long oldSize, long newSize);
static double* shortTimeEnergy(double audioData[], long size);
static double cosineDistance(double standard[], double compare[], long size);
static long calculateStandard(char *filePath, double ** res);
static long calculateCompare(char *filePath, double ** res, long newSize);
SWITCH_MODULE_LOAD_FUNCTION(mod_asr_load);
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_asr_shutdown);
SWITCH_MODULE_DEFINITION(mod_hangup_detect, mod_asr_load, mod_asr_shutdown, NULL);
static struct {
switch_memory_pool_t *pool;
//配置变量
int deleteFileScore = 90;
int maxSampleSec = 10;
char* pcmDir = "/data/freeswitch";
char* sampleDir = "/data/freeswitch/sample";
} globals;
typedef struct {
switch_core_session_t *session;
switch_media_bug_t *bug;
FILE *stream;
char *callUUID;
char *filePath;
int stop;
int totalSample;
int sampleRate;
int currentMaxSampleSec;
} switch_da_t;
typedef struct {
char audioName[MAX_PATH_LEN];
long size;
double *standardData;
} standard_audio_t;
//全局变量
standard_audio_t *standardAudio[10];
int standardAudioSize = 0;
void* initStandardAudioByDir(void *data) {
char* path = (char *) data;
DIR *d = NULL;
struct dirent *dp = NULL; /* readdir函数的返回值就存放在这个结构体中 */
struct stat st;
char p[MAX_PATH_LEN] = {0};
if(stat(path, &st) < 0 || !S_ISDIR(st.st_mode)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "HD mod invalid path: %s\n", path);
return NULL;
}
if(!(d = opendir(path))) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "HD mod opendir[%s] error: %m\n", path);
return NULL;
}
while((dp = readdir(d)) != NULL) {
/* 把当前目录.,上一级目录..及隐藏文件都去掉,避免死循环遍历目录 */
if((!strncmp(dp->d_name, ".", 1)) || (!strncmp(dp->d_name, "..", 2)))
continue;
snprintf(p, sizeof(p) - 1, "%s/%s", path, dp->d_name);
stat(p, &st);
if(!S_ISDIR(st.st_mode)) {
initStandardAudio(p, dp->d_name);
}
}
closedir(d);
pthread_exit(NULL);
}
bool initStandardAudio(char *filePath, char *fileName) {
double *standardData;
long size = calculateStandard(filePath, &standardData);
standard_audio_t *sa1 = (standard_audio_t *)malloc(sizeof(standard_audio_t));
strcpy(sa1->audioName, fileName);
sa1->size = size;
sa1->standardData = standardData;
standardAudio[standardAudioSize] = sa1;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "initStandardAudio, name:%s, size:%d\n", standardAudio[standardAudioSize]->audioName, standardAudio[standardAudioSize]->size);
standardAudioSize++;
return true;
}
void* doCompare(void *arg) {
switch_da_t *pvt = (switch_da_t *) arg;
double maxScore = 0;
int maxScoreIndex = 0;
/////////////////////////////////////////////////////////////////
double *compareData;
long size = calculateStandard(pvt->filePath, &compareData);
for(int i=0; i<standardAudioSize; i++) {
long minSize = size < standardAudio[i]->size ? size : standardAudio[i]->size;
double score = cosineDistance(standardAudio[i]->standardData, compareData, minSize);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s comparewith name:%s, score:%f\n", pvt->callUUID, standardAudio[i]->audioName, score);
if(score > maxScore) {
maxScore = score;
maxScoreIndex = i;
}
}
//使用完compare 数据需要释放,standard需要后续使用不释放
free(compareData);
////////////////////////////////////////////////////////////////
//为了加快速度 不对目标音频做根据每个标准音频的长度做截取(这样每个都需要计算一次) 不准确
/*for(int i=0; i<standardAudioSize; i++) {
double *compareData;
long size = calculateCompare(pvt->filePath, &compareData, standardAudio[i]->size);
double score = cosineDistance(standardAudio[i]->standardData, compareData, size);
//使用完compare 数据需要释放,standard可能需要后续使用不释放
free(compareData);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s comparewith name:%s, score:%f\n", pvt->callUUID, standardAudio[i]->audioName, score);
if(score > maxScore) {
maxScore = score;
maxScoreIndex = i;
}
}*/
/////////////////////////////////////////////////////////////////
if(maxScore >= globals.deleteFileScore) {
//删除文件
if(remove(pvt->filePath) == 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s remove temp file:%s, score:%f\n", pvt->callUUID, pvt->filePath, maxScore);
}
}
switch_event_t *event = NULL;
if (switch_event_create(&event, SWITCH_EVENT_CUSTOM) == SWITCH_STATUS_SUCCESS) {
event->subclass_name = strdup("hangup_detect");
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Event-Subclass", event->subclass_name);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Call-UUID", pvt->callUUID);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "HD-Sample-File", standardAudio[maxScoreIndex]->audioName);
char scoreStr[6];
sprintf(scoreStr, "%.4f", maxScore);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "HD-Score", scoreStr);
switch_event_fire(&event);
}
pthread_exit(NULL);
}
static switch_bool_t asr_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type)
{
switch_da_t *pvt = (switch_da_t *)user_data;
if(pvt == NULL || pvt->stop == 1)
return SWITCH_FALSE;
switch_channel_t *channel = switch_core_session_get_channel(pvt->session);
switch (type) {
case SWITCH_ABC_TYPE_INIT:
{
pvt->filePath = (char*)malloc(strlen(globals.pcmDir) + strlen(pvt->callUUID) + 5 + 1);
sprintf(pvt->filePath, "%s/%s.pcm", globals.pcmDir, pvt->callUUID);
pvt->stream = fopen(pvt->filePath, "wb");
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s HD Start Succeed channel:%s, file:%s\n",pvt->callUUID, switch_channel_get_name(channel),pvt->filePath);
}
break;
case SWITCH_ABC_TYPE_CLOSE:
{
if ( pvt->stream != NULL ) {
fclose(pvt->stream);
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s HD Stop Succeed channel:%s\n",pvt->callUUID, switch_channel_get_name(channel));
}
break;
case SWITCH_ABC_TYPE_READ_REPLACE:
{
switch_frame_t *frame;
if ((frame = switch_core_media_bug_get_read_replace_frame(bug))) {
char*frame_data = (char*)frame->data;
int frame_len = frame->datalen;
//默认的采样率 需要转换成8K采样率 每次samples个采样(每秒50次), 8000 / 50 / samples 就是倍率
switch_core_media_bug_set_read_replace_frame(bug, frame);
long sampleRate = frame->samples * 50;
if(pvt->sampleRate == 0) {
pvt->sampleRate = 8000;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "%s SWITCH_ABC_TYPE_READ_REPLACE sampleRate before Trans: %f\n",pvt->callUUID, sampleRate);
}
if(sampleRate > 8000) {
int beilv = sampleRate / 8000;
char newData[frame_len / beilv];
for(int i =0 ;i < frame_len / (beilv * 2);i++) {
newData[i * 2] = frame_data[i * beilv * 2];
newData[i * 2 + 1] = frame_data[i * beilv * 2 + 1];
}
if ( pvt->stream != NULL ) {
fwrite(newData, sizeof(char), frame_len / beilv , pvt->stream);
}
} else {
if ( pvt->stream != NULL ) {
fwrite(frame_data, sizeof(char), frame_len , pvt->stream);
}
}
pvt->totalSample ++;
if(pvt->totalSample > pvt->currentMaxSampleSec * 50) {
fclose(pvt->stream);
pvt->stream = NULL;
pvt->stop = 1;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s HD doCompare after %d sec, filePath:%s\n", pvt->callUUID, pvt->currentMaxSampleSec, pvt->filePath);
pthread_t tid;
int ret = pthread_create(&tid, NULL, doCompare, pvt);
if (ret != 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "%s HD doCompare thread create faild, ret: %d\n", pvt->callUUID, ret);
return SWITCH_FALSE;
}
doCompare(pvt);
}
}
}
break;
default: break;
}
return SWITCH_TRUE;
}
SWITCH_STANDARD_APP(stop_asr_session_function)
{
switch_da_t *pvt;
switch_channel_t *channel = switch_core_session_get_channel(session);
if ((pvt = (switch_da_t*)switch_channel_get_private(channel, "asr"))) {
switch_channel_set_private(channel, "asr", NULL);
switch_core_media_bug_remove(session, &pvt->bug);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s Stop HD\n", switch_channel_get_name(channel));
}
}
SWITCH_STANDARD_APP(start_asr_session_function)
{
switch_channel_t *channel = switch_core_session_get_channel(session);
switch_status_t status;
switch_da_t *pvt;
switch_codec_implementation_t read_impl;
memset(&read_impl, 0, sizeof(switch_codec_implementation_t));
char *argv[2] = { 0 };
int argc;
char *lbuf = NULL;
switch_core_session_get_read_impl(session, &read_impl);
if (!(pvt = (switch_da_t*)switch_core_session_alloc(session, sizeof(switch_da_t)))) {
return;
}
pvt->stop = 0;
pvt->totalSample = 0;
pvt->currentMaxSampleSec = globals.maxSampleSec;
pvt->sampleRate = 0;
pvt->session = session;
pvt->callUUID = switch_core_session_get_uuid(session);
if (!zstr(data) && (lbuf = switch_core_session_strdup(session, data))) {
pvt->currentMaxSampleSec = atoi(lbuf);
}
if ((status = switch_core_media_bug_add(session, "asr", NULL,
asr_callback, pvt, 0, SMBF_READ_REPLACE | SMBF_NO_PAUSE | SMBF_ONE_ONLY, &(pvt->bug))) != SWITCH_STATUS_SUCCESS) {
return;
}
switch_channel_set_private(channel, "asr", pvt);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s Start HD %s\n", switch_channel_get_name(channel), switch_core_session_get_uuid(session));
}
SWITCH_MODULE_LOAD_FUNCTION(mod_asr_load)
{
switch_application_interface_t *app_interface;
globals.pool = pool;
*module_interface = switch_loadable_module_create_module_interface(globals.pool, modname);
SWITCH_ADD_APP(app_interface, "start_hd", "start hd", "start hangup detect", start_asr_session_function, "", SAF_MEDIA_TAP);
SWITCH_ADD_APP(app_interface, "stop_hd", "stop hd", "stop hangup detect", stop_asr_session_function, "", SAF_NONE);
switch_status_t status = load_config();
if(SWITCH_STATUS_SUCCESS == status) {
pthread_t tid;
int ret = pthread_create(&tid, NULL, initStandardAudioByDir, globals.sampleDir);
if (ret != 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "HD load thread create faild, ret: %d\n", ret);
return SWITCH_STATUS_FALSE;
}
//initStandardAudioByDir(globals.sampleDir);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, " hangup detect loaded ....\n");
return SWITCH_STATUS_SUCCESS;
}
return status;
}
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_asr_shutdown)
{
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, " hangup detect shutdown\n");
for (int i = 0; i< standardAudioSize; i++) {
free(standardAudio[i]->standardData);
free(standardAudio[i]->audioName);
}
return SWITCH_STATUS_SUCCESS;
}
static switch_status_t load_config(void)
{
char *cf = "hangup_detect.conf";
size_t url_len = 0;
char *api_base;
switch_xml_t cfg, xml = NULL, param, settings;
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "load config\n");
if ( ! (xml = switch_xml_open_cfg(cf, &cfg, NULL)) ) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open of %s failed\n", cf);
status = SWITCH_STATUS_FALSE;
goto done;
}
if ( (settings = switch_xml_child(cfg, "settings")) ) {
for ( param = switch_xml_child(settings, "param"); param; param = param->next ) {
char *var = (char *) switch_xml_attr_soft(param, "name");
char *val = (char *) switch_xml_attr_soft(param, "value");
if ( strcasecmp(var, "pcmDir") == 0 ) {
globals.pcmDir = switch_core_strdup(globals.pool, val);
} else if ( strcasecmp(var, "sampleDir") == 0 ) {
globals.sampleDir = switch_core_strdup(globals.pool, val);
} else if ( strcasecmp(var, "deleteFileScore") == 0 ) {
globals.deleteFileScore = atoi(val);
} else if ( strcasecmp(var, "maxSampleSec") == 0 ) {
globals.maxSampleSec = atoi(val);
}
}
}
done:
if (xml) {
switch_xml_free(xml);
}
return status;
}
/////////////////////////////////////////////////////
static long calculateStandard(char *filePath, double ** res) {
long size = getAudioData(filePath, res);
//=====================================归一化
normalize(*res, size);
//=====================================显示滤波后波形
filterWave(*res, size, 1, -0.9375);
normalize(*res, size);
//=================================截取有效短时数据波形,不然待比较音频数据可能太长导致耗时很久
size = getUsefulData(*res, size);
//===================================短时能量波形
double *arr = shortTimeEnergy(*res, size);
normalize(arr, size);
*res = arr;
return size;
}
static long calculateCompare(char *filePath, double ** res, long newSize) {
long totalSize = getAudioData(filePath, res);
newSize = totalSize < newSize ? totalSize : newSize;
//=====================================归一化
normalize(*res, totalSize);
for(int i=0;i<10;i++) {
printf("%f ", (*res)[i]);
}
printf("\n");
//=====================================显示滤波后波形
filterWave(*res, totalSize, 1, -0.9375);
normalize(*res, totalSize);
//=================================截取有效短时数据波形,不然待比较音频数据可能太长导致耗时很久
dealCompareData(*res, totalSize, newSize);
//===================================短时能量波形
double *arr = shortTimeEnergy(*res, newSize);
normalize(arr, newSize);
*res = arr;
return newSize;
}
/*
* 获取音频数据
*
* @param filePath 音频数据文件路径
*/
static long getAudioData(char *filePath, double ** res) {
FILE *fp = fopen(filePath, "rb");
fseek(fp, 0L, SEEK_END);
long size = ftell(fp) / 2;
fseek(fp, 0L, 0L);
*res = (double *)malloc(size * sizeof(double));
char buf[2] = {0};
int audioDataSize = 0;
while (fread(buf, 1, 2, fp) != -1) {
// 每16位读取一个音频数据
(*res)[(int) audioDataSize] = (double) ((short) (((buf[0] & 0xff) << 8) | (buf[1] & 0xff)));
audioDataSize++;
if (audioDataSize == size) {
break;
}
}
fclose(fp);
// file.length() / 2 +1 : /2 : 两位byte数据保存为一位short数据; +1 : 保存文件结尾标志
return size;
}
/**
* 归一化
*/
static void normalize(double data[], long size) {
double max = findMax(data, size);
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "max %f\n", max);
for (int i = 0; i < size; i++) {
data[i] = ((double) data[i] / max);
}
}
/**
* 查找最大值
*/
static double findMax(double data[], long size) {
double max = data[0];
for (int i = 0; i < size; i++) {
if (max < fabs(data[i])) {
max = fabs(data[i]);
}
}
return max;
}
/**
* 滤波(差分方程)
*/
static void filterWave(double audioData[], long size, double b0, double b1) {
for (long i = size - 1; i > 0; i--) {
audioData[i] = b0 * audioData[i] + b1 * audioData[i - 1];
}
}
/**
* 截取音频有效数据(通过阈值获得的数据前后开始结束的下标来截取数据)
*/
static long getUsefulData(double audioData[], long size) {
long start = findDataStartIndex(audioData, size);
long end = findDataEndIndex(audioData, size);
printf("getUsefulData: oldLen:%ld, start:%ld, end:%ld \n", size, start, end);
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, " getUsefulData %d %d\n", start, end);
if(start != 0) {
for (long i = start; i < end; i++) {
audioData[i - start] = audioData[i];
}
}
return end - start;
}
/**
* 处理对比音频使其与标准音频长度相同(通过阈值获得的数据开始下标截取与标准音频相同长度的音频数据
*/
static void dealCompareData(double audioData[],long oldSize, long newSize) {
long start = findDataStartIndex(audioData, oldSize);
printf("dealCompareData: oldLen:%ld, start:%ld, newSize:%ld \n", oldSize, start, newSize);
for (int i = 0; i + start < oldSize; i++) {
//从有效音频开始点截取
if (i <= newSize) {
audioData[i] = audioData[i + start];
} else {
audioData[i] = 0;
}
}
}
/**
* 通过阈值得到音频有效数据开始的下标
*/
static long findDataStartIndex(double audioData[], long size) {
for (long i = 0; i < size; i++) {
if (audioData[i] > DATA_START_VALUE) {
return i;
}
}
return -1;
}
/**
* 通过阈值得到音频有效数据结束的下标
*/
static long findDataEndIndex(double audioData[], long size) {
for (long i = size - 1; i >= 0; i--) {
if (audioData[i] > DATA_END_VALUE) {
return i;
}
}
return -1;
}
/**
* 点乘
*/
static void dotProduct(double data[], long size) {
for (int i = 0; i < size; i++) {
data[i] = data[i] * data[i];
}
}
/**
* 生成窗函数 hamming窗
*/
static double* generateHammingWindows(int N, int i) {
// 使用最简单的矩形窗
//double wins[i * N];
double *wins = (double *)malloc((i * N) * sizeof(double));
for (int j = 0; j < i * N; j++) {
wins[j] = 0.54 - 0.46 * (cos(2 * PI * j / (i * N)));
}
return wins;
// hamming窗
}
/**
* 计算卷积
*
* @param self 数据段
* @param other 窗函数 (默认窗函数的长度远小于数据长度)
*/
static double* conv(double self[], long sizeS, double other[],long sizeO) {
double *result = (double *)malloc((sizeS + sizeO - 1) * sizeof(double));;
double current = 0;
for (int i = 0; i < sizeS + sizeO - 1; i++) {
current = 0;
for (int j = 0; j <= i; j++) {
if (j >= sizeS || i - j >= sizeO) {
continue;
}
//TODO 去除一些数据的运算提高效率
current += self[j] * other[i - j];
}
result[i] = current;
}
return result;
}
/**
* 短时能量
*/
static double* shortTimeEnergy(double audioData[], long size) {
dotProduct(audioData, size);
double* wins = generateHammingWindows(32, 16);
double* res = conv(audioData, size, wins, 32 * 16);
free(wins);
free(audioData);
return res;
}
/**
* 计算余弦距离 dot(En_compare, En_standard)/(norm(En_compare)*norm(En_standard))
*/
static double cosineDistance(double standard[], double compare[], long size) {
double dot = 0;
double normStandard = 0;
double normCompare = 0;
for (int i = 0; i < size; i++) {
dot += standard[i] * compare[i];
normStandard += standard[i] * standard[i];
normCompare += compare[i] * compare[i];
}
double distance = dot / (sqrt(normStandard) * sqrt(normCompare));
return distance;
}