WBlog

wangzhiwei blog

0%

tfjs-study5

使用预训练模型进行语音识别

模型接受声音信息,输出分类信息
声音在计算机里是声谱图,因此也可以使用卷积神经网络进行识别

script.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import * as speechCommands from '@tensorflow-models/speech-commands';

const MODEL_PATH = 'http://127.0.0.1:8080/speech';

window.onload = async () => {
const recognizer = speechCommands.create(
'BROWSER_FFT',
null,
MODEL_PATH + '/model.json',
MODEL_PATH + '/metadata.json'
);

await recognizer.ensureModelLoaded();

const labels = recognizer.wordLabels().slice(2); //
const resultEl = document.querySelector('#result');
resultEl.innerHTML = labels.map(l => `
<div>${l}</div>
`).join('');
recognizer.listen(result => { //会打开浏览器麦克风
const { scores } = result;
const maxValue = Math.max(...scores);
const index = scores.indexOf(maxValue) - 2;
resultEl.innerHTML = labels.map((l, i) => `
<div style="background: ${i === index && 'green'}">${l}</div>
`).join('');
}, {
overlapFactor: 0.3, //重叠
probabilityThreshold: 0.9 // 置信度
});
};

index.html

1
2
3
4
5
6
7
8
9
10

<script src="script.js"></script>
<style>
#result>div {
float: left;
padding: 20px;
}
</style>
<div id="result"></div>

基于迁移学习的语音识别器:声控轮播图

在浏览器中收集中文语音训练数据

使用 speech commands (英文语音识别) 包进行迁移学习并预测 中文语音

语音训练数据的保存和加载

操作步骤

使用 speech commands 包创建迁移学习器
编写前端界面准备收集语音数据
调用collectExample函数收集语音数据

index.html

1
2
3
4
5
6
7
8
9
10
<script src="script.js"></script>
<button onclick="collect(this)">上一张</button>
<button onclick="collect(this)">下一张</button>
<button onclick="collect(this)">背景噪音</button>
<button onclick="save()">保存</button>
<pre id="count"></pre>
<button onclick="train()">训练</button>
<br><br>
监听开关:<input type="checkbox" onchange="toggle(this.checked)">

script.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import * as speechCommands from '@tensorflow-models/speech-commands';
import * as tfvis from '@tensorflow/tfjs-vis';

const MODEL_PATH = 'http://127.0.0.1:8080';
let transferRecognizer;

window.onload = async () => {
const recognizer = speechCommands.create(
'BROWSER_FFT',
null,
MODEL_PATH + '/speech/model.json',
MODEL_PATH + '/speech/metadata.json'
);
await recognizer.ensureModelLoaded(); // 加载模型
transferRecognizer = recognizer.createTransfer('轮播图'); // 创建一个训练器
};

window.collect = async (btn) => {
btn.disabled = true;
const label = btn.innerText;
await transferRecognizer.collectExample( // 收集样本
label === '背景噪音' ? '_background_noise_' : label
);
btn.disabled = false;
document.querySelector('#count').innerHTML = JSON.stringify(transferRecognizer.countExamples(), null, 2);
};

window.train = async () => {
await transferRecognizer.train({
epochs: 30,
callback: tfvis.show.fitCallbacks(
{ name: '训练效果' },
['loss', 'acc'],
{ callbacks: ['onEpochEnd'] }
)
});
};

window.toggle = async (checked) => {
if (checked) {
await transferRecognizer.listen(result => { // 监听
const { scores } = result;
const labels = transferRecognizer.wordLabels();
const index = scores.indexOf(Math.max(...scores));
console.log(labels[index]);
}, {
overlapFactor: 0,
probabilityThreshold: 0.75
});
} else {
transferRecognizer.stopListening();
}
};

window.save = () => {
const arrayBuffer = transferRecognizer.serializeExamples(); //把采集好的数据存下来
const blob = new Blob([arrayBuffer]);
const link = document.createElement('a');
link.href = window.URL.createObjectURL(blob);
link.download = 'data.bin';
link.click();
};

轮播图

index.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
<script src="script.js"></script>
监听开关:<input type="checkbox" onchange="toggle(this.checked)">

<style>
.slider {
width: 600px;
overflow: hidden;
margin: 10px auto;
}
.slider > div{
display: flex;
align-items: center;
}
</style>
<div class="slider">
<div>
<img src="https://cdn.pixabay.com/photo/2019/10/29/15/57/vancouver-4587302__480.jpg" alt="" width="600">
<img src="https://cdn.pixabay.com/photo/2019/10/31/07/14/coffee-4591159__480.jpg" alt="" width="600">
<img src="https://cdn.pixabay.com/photo/2019/11/01/11/08/landscape-4593909__480.jpg" alt="" width="600">
<img src="https://cdn.pixabay.com/photo/2019/11/02/21/45/maple-leaf-4597501__480.jpg" alt="" width="600">
<img src="https://cdn.pixabay.com/photo/2019/11/02/03/13/in-xinjiang-4595560__480.jpg" alt="" width="600">
<img src="https://cdn.pixabay.com/photo/2019/11/01/22/45/reschensee-4595385__480.jpg" alt="" width="600">
</div>
</div>

script.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import * as speechCommands from '@tensorflow-models/speech-commands';

const MODEL_PATH = 'http://127.0.0.1:8080';
let transferRecognizer;
let curIndex = 0;

window.onload = async () => {
const recognizer = speechCommands.create(
'BROWSER_FFT',
null,
MODEL_PATH + '/speech/model.json',
MODEL_PATH + '/speech/metadata.json',
);
await recognizer.ensureModelLoaded();
transferRecognizer = recognizer.createTransfer('轮播图');
const res = await fetch(MODEL_PATH + '/slider/data.bin');
const arrayBuffer = await res.arrayBuffer();
transferRecognizer.loadExamples(arrayBuffer);
await transferRecognizer.train({ epochs: 30 });
console.log('done');
};

window.toggle = async (checked) => {
if (checked) {
await transferRecognizer.listen(result => {
const { scores } = result;
const labels = transferRecognizer.wordLabels();
const index = scores.indexOf(Math.max(...scores));
window.play(labels[index]);
}, {
overlapFactor: 0,
probabilityThreshold: 0.5
});
} else {
transferRecognizer.stopListening();
}
};

window.play = (label) => {
const div = document.querySelector('.slider>div');
if (label === '上一张') {
if (curIndex === 0) { return; }
curIndex -= 1;
} else {
if (curIndex === document.querySelectorAll('img').length - 1) { return; }
curIndex += 1;
}
div.style.transition = "transform 1s"
div.style.transform = `translateX(-${100 * curIndex}%)`;
};