录音并识别琴键
Imports NAudio.Wave Imports MathNet.Numerics.IntegralTransforms Imports System.Numerics Imports TensorFlow Imports System.IO Public Class Form1 \'录音 Dim wav As New WaveInEvent Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click \'设置缓冲区大小 wav.BufferMilliseconds = 128 \'缓冲区大小= 频率*Milliseconds*字节/1000 ; wav.NumberOfBuffers = 6 \'原12 减少缓冲区数量,使用录音不中断 wav.WaveFormat = New WaveFormat(16000, 16, 1) \'格式 16000 \'添加回调函数 AddHandler wav.DataAvailable, AddressOf waveIn_DataAvailable wav.StartRecording() End Sub \'回调函数 Dim WavData16(2048 - 1) As Int16 Dim WavDataDb(2048 - 1) As Single Dim mfcc As New MFCC Private Sub waveIn_DataAvailable(sender As Object, e As WaveInEventArgs) Buffer.BlockCopy(e.Buffer, 0, WavData16, 0, e.BytesRecorded) \'Array.Copy(WavData16, WavDataDb, 2048) \'计算mfcc 返回182维数 Dim mfccs = WavTMfcc(WavData16) Dim float(0, 182 - 1) As Single For i = 0 To 182 - 1 float(0, i) = mfccs(i) Next Dim out() As Single = piano.Detect(float) Dim max As Single = out(0) Dim num = 0 For i = 0 To 7 - 1 If out(i) > max Then max = out(i) num = i End If Next If max > 0.9 Then \' Label1.Text = num.ToString Me.Invoke(New ShowText(AddressOf ShowTxt), num.ToString) End If \'Debug.Print(num & ":" & max) End Sub Public Delegate Sub ShowText(txt As String) Public Sub ShowTxt(txt As String) Label1.Text = txt End Sub Public Function WavTMfcc(data() As Int16) As Single() \'分帧,每一帧进行mfcc计算 帧长512 帧移256 Dim len = data.Length Dim FrmSize = 512 Dim FrmNum = len / 256 - 1 Dim mfccs(FrmNum * 26 - 1) As Single \'7*26 =182 Dim Frame(FrmSize - 1) As Single For i As Integer = 0 To FrmNum - 1 Array.Copy(data, i * 256, Frame, 0, FrmSize) mfcc.Hamming_window(Frame) Dim fft As Complex() = mfcc.FFT(Frame) Dim rs As Single() = mfcc.MFCC(fft) Array.Copy(rs, 0, mfccs, i * 26, 26) Next Return mfccs End Function Private Sub Button7_Click(sender As Object, e As EventArgs) Handles Button7.Click Dim txtwrite As New IO.StreamWriter("d:\mfcc.txt", False) Dim fs As New DirectoryInfo("d:\piano") For Each i As FileInfo In fs.GetFiles("*.wav", SearchOption.AllDirectories) Dim fn = i.Name Dim fullname = i.FullName \'打开文件获取mfcc Dim len As Integer = 2048 Dim bt(len * 2 - 1) As Byte Dim it16(len - 1) As Int16 Dim wf As New WaveFileReader(fullname) wf.Read(bt, 0, len * 2) Buffer.BlockCopy(bt, 0, it16, 0, len * 2) \'VAD(it16) \'计算mfcc 返回182维数 Dim mfccs = WavTMfcc(it16) Dim StrMfcc = String.Join(",", mfccs.ToArray) Dim Ans = {0, 0, 0, 0, 0, 0, 0, 0} Ans(Val(Mid(fn, 1, 1))) = 1 Dim StrAns = String.Join(",", Ans.ToArray) txtwrite.WriteLine(StrMfcc) txtwrite.WriteLine(StrAns) Next txtwrite.Close() \'语音区域识别-指数平均法 v = 0.1*vo + (1-0.1)v(0-1) End Sub Public Function VAD(data As Int16()) As List(Of Point) Dim Belta As Single = 0.1 Dim Sum As UInt64 Dim StartP, EndP As Integer Dim Status As Boolean = False Dim WaveArea As New List(Of Point) For i = 0 To data.Length - 1 Sum = Belta * Math.Pow(data(i), 2) + (1 - Belta) * Sum \'声音开始位置 If (Sum > Math.Pow(1024, 2)) Then If Status = False Then Status = True StartP = i End If End If \'声音结束位置 If Status = True Then If Sum < Math.Pow(100, 2) Then WaveArea.Add(New Point(StartP, i)) Status = False End If End If Next Return WaveArea End Function Private Sub Button8_Click(sender As Object, e As EventArgs) Handles Button8.Click Dim wf As New WaveFileReader("d:\d00.wav") Dim len As Integer = wf.Length / 2 Dim bt(len * 2 - 1) As Byte Dim it16(len - 1) As Int16 wf.Read(bt, 0, len * 2) Buffer.BlockCopy(bt, 0, it16, 0, len * 2) VAD(it16) End Sub Dim piano As New TensorflowPiano Private Sub Button9_Click(sender As Object, e As EventArgs) Handles Button9.Click Dim wf As New WaveFileReader("d:\testpiano.wav") Dim len As Integer = wf.Length / 2 Dim bt(len * 2 - 1) As Byte Dim it16(len - 1) As Int16 wf.Read(bt, 0, len * 2) Buffer.BlockCopy(bt, 0, it16, 0, len * 2) Dim frame(2048 - 1) As Int16 For k As Integer = 0 To Math.Floor(len / 2048) - 1 Array.Copy(it16, k * 2048, frame, 0, 2048) \'计算mfcc 返回182维数 Dim mfccs = WavTMfcc(frame) Dim float(0, 182 - 1) As Single For i = 0 To 182 - 1 float(0, i) = mfccs(i) Next Dim out() As Single = piano.Detect(float) Dim max As Single = out(0) Dim num = 0 For i = 0 To 7 - 1 If out(i) > max Then max = out(i) num = i End If Next Debug.Print(num & ":" & max) Next End Sub Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load End Sub End Class
在vb中对训练数据的MFCC进行读取,保存为TXT文件;在PYTHON文件中进行训练
import tensorflow as tf import librosa import numpy as np from tensorflow.python.framework.graph_util import convert_variables_to_constants f = open("d:/mfcc.txt","r") xdata = [] ydata = [] while True: v1 = f.readline() if v1 == \'\': break; xd = np.array(tuple(eval(v1))) #xd = list(xd.reshape(1,-1)) xdata.append(xd) v2 = f.readline() yd = np.array(tuple(eval(v2))) ydata.append(yd) x = tf.placeholder("float32",[None,182],name= \'input\') w = tf.Variable(tf.truncated_normal([182,8],stddev = 0.1)) b = tf.Variable(tf.truncated_normal(shape=[8],stddev = 0.1,dtype = tf.float32)) y = tf.nn.softmax(tf.matmul(x,w)+b,name = \'out\') y_ = tf.placeholder("float32",[None,8]) loss = -tf.reduce_sum(y_*tf.log(y)) train = tf.train.GradientDescentOptimizer(1e-5).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(20000): sess.run(train,feed_dict={x:xdata,y_:ydata}) if i%100 == 0: print("step %d,%g"%(i,i)) print(sess.run(y_,feed_dict={y_:ydata})) print(sess.run(loss,feed_dict={x:xdata,y_:ydata})) print(sess.run(y,feed_dict={x:xdata})) #保存为pb格式 graph = convert_variables_to_constants(sess,sess.graph_def,[\'out\']) tf.train.write_graph(graph,\'d:/\',\'piano.pb\',as_text=False) #保存训练参数
在VB中使用训练好的模型
Imports System.IO Imports System.Numerics Imports TensorFlow \'Install-Package TensorFlowSharp Public Class TensorflowPiano Dim graph As TFGraph Dim session As TFSession \'加载模型 Public Sub New() Dim model As Byte() = File.ReadAllBytes("d:\piano.pb") graph = New TFGraph() graph.Import(model, "") session = New TFSession(graph) \' Threading.ThreadPool.SetMaxThreads(5, 5) End Sub Protected Overrides Sub finalize() \' session.CloseSession() End Sub Dim output Dim runner As TFSession.Runner Dim result Dim rshape \'运行模型 Public Function Detect(Data(,) As Single) As Single() runner = session.GetRunner() runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0)) output = runner.Run() result = output(0) rshape = result.Shape Dim rt As Single() rt = result.GetValue(True)(0) \'For k = 0 To rshape.GetValue(0) - 1 \' rt = result.GetValue(True)(k)(0) \' \'Debug.Print(rt) \' If (rt > 0.8) Then \' Debug.Print("-----------recogxili") \' \' MsgBox("recgo") \' End If \'Next Return rt End Function End Class
声音特征提取:
Imports System.Numerics Imports MathNet.Numerics.IntegralTransforms Public Class MFCC Public H As Double(,) Private MFCCNum As Integer Private FrameSize As Integer \'帧长512 Public Sub New(Optional framesize As Integer = 512, Optional MFCCNum As Integer = 26) \'注意设置最小频率 freMin 0 ,300 Me.MFCCNum = MFCCNum Me.FrameSize = framesize H = New Double(MFCCNum, Me.FrameSize / 2) {} \'计算mel系数 Dim filter_points(40 + 1) As Integer \'40个滤波器,需要41点 Const sampleRate As Integer = 16000 \'采样频率 16000 Const filterNum As Integer = 40 \'滤波器数量 取40个 Dim freMax As Double = sampleRate / 2 \'实际最大频率 Dim freMin As Double = 0 \'实际最小频率 Dim melFremax As Double = 1125 * Math.Log(1 + freMax / 700) \'将实际频率转换成梅尔频率 Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700) Dim k As Double = (melFremax - melFremin) / (filterNum + 1) Dim m As Double() = New Double(filterNum + 1) {} Dim r As Double() = New Double(filterNum + 1) {} For i As Integer = 0 To filterNum + 1 m(i) = melFremin + k * i r(i) = 700 * (Math.Exp(m(i) / 1125) - 1) \'将梅尔频率转换成实际频率 filter_points(i) = Math.Floor((Me.FrameSize + 1) * r(i) / sampleRate) Next \'生成mel滤波器 For i As Integer = 0 To MFCCNum For j As Integer = 0 To Me.FrameSize / 2 - 1 If j < filter_points(i) Then H(i, j) = 0 End If If (filter_points(i) <= j) And (j <= filter_points(i + 1)) Then H(i, j) = (CDbl(j - filter_points(i)) / (filter_points(i + 1) - filter_points(i))) End If If (filter_points(i + 1) <= j) And (j <= filter_points(i + 2)) Then H(i, j) = (CDbl(filter_points(i + 2) - j) / (filter_points(i + 2) - filter_points(i + 1))) End If If j > filter_points(i + 2) Then H(i, j) = 0 End If Next Next End Sub \'汉明窗 Public Sub Hamming_window(WaveData() As Single) Dim len As Integer = WaveData.Length Dim omega As Single = 2.0 * Math.PI / len For j As Integer = 0 To len - 1 WaveData(j) = (0.54 - 0.46 * Math.Cos(omega * (j))) * WaveData(j) Next End Sub \'傅里叶计算 Public Function FFT(WaveData() As Single) As Complex() Dim FFT_Complex(WaveData.Length - 1) As Complex For i = 0 To WaveData.Length - 1 FFT_Complex(i) = WaveData(i) Next MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_Complex, FourierOptions.Matlab) Return FFT_Complex End Function Public Function MFCC(fft() As Complex) As Single() \'取LOG Dim S As Single() = New Single(MFCCNum - 1) {} For i As Integer = 0 To MFCCNum - 1 For j As Integer = 0 To Me.FrameSize / 2 - 1 S(i) = S(i) + Math.Pow(fft(j).Magnitude, 2) * H(i, j) Next If S(i) <> 0 Then S(i) = Math.Log(S(i), Math.E) End If Next \'DCT运算 Dim mfcc_mass(MFCCNum - 1) As Double For l As Integer = 0 To MFCCNum - 1 For i As Integer = 0 To MFCCNum - 1 mfcc_mass(l) += S(i) * Math.Cos(Math.PI * l * ((i * 0.5) / 20)) Next Next Return S End Function End Class
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:tensorflow 钢琴谱练习 - Python技术站