录音并识别琴键

Imports NAudio.Wave
Imports MathNet.Numerics.IntegralTransforms
Imports System.Numerics
Imports TensorFlow
Imports System.IO

Public Class Form1

    \'录音
    Dim wav As New WaveInEvent

    Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
        \'设置缓冲区大小
        wav.BufferMilliseconds = 128  \'缓冲区大小= 频率*Milliseconds*字节/1000 ;
        wav.NumberOfBuffers = 6   \'原12 减少缓冲区数量,使用录音不中断
        wav.WaveFormat = New WaveFormat(16000, 16, 1) \'格式 16000

        \'添加回调函数
        AddHandler wav.DataAvailable, AddressOf waveIn_DataAvailable

        wav.StartRecording()


    End Sub


    \'回调函数
    Dim WavData16(2048 - 1) As Int16
    Dim WavDataDb(2048 - 1) As Single
    Dim mfcc As New MFCC
    Private Sub waveIn_DataAvailable(sender As Object, e As WaveInEventArgs)

        Buffer.BlockCopy(e.Buffer, 0, WavData16, 0, e.BytesRecorded)
        \'Array.Copy(WavData16, WavDataDb, 2048)



        \'计算mfcc 返回182维数
        Dim mfccs = WavTMfcc(WavData16)


        Dim float(0, 182 - 1) As Single
        For i = 0 To 182 - 1
            float(0, i) = mfccs(i)
        Next

        Dim out() As Single = piano.Detect(float)

        Dim max As Single = out(0)
        Dim num = 0
        For i = 0 To 7 - 1
            If out(i) > max Then
                max = out(i)
                num = i

            End If
        Next




        If max > 0.9 Then
            \' Label1.Text = num.ToString
            Me.Invoke(New ShowText(AddressOf ShowTxt), num.ToString)
        End If
        \'Debug.Print(num & ":" & max)

    End Sub

    Public Delegate Sub ShowText(txt As String)

    Public Sub ShowTxt(txt As String)
        Label1.Text = txt
    End Sub





    Public Function WavTMfcc(data() As Int16) As Single()
        \'分帧,每一帧进行mfcc计算 帧长512 帧移256 
        Dim len = data.Length
        Dim FrmSize = 512
        Dim FrmNum = len / 256 - 1

        Dim mfccs(FrmNum * 26 - 1) As Single  \'7*26 =182
        Dim Frame(FrmSize - 1) As Single
        For i As Integer = 0 To FrmNum - 1
            Array.Copy(data, i * 256, Frame, 0, FrmSize)
            mfcc.Hamming_window(Frame)
            Dim fft As Complex() = mfcc.FFT(Frame)
            Dim rs As Single() = mfcc.MFCC(fft)
            Array.Copy(rs, 0, mfccs, i * 26, 26)

        Next

        Return mfccs
    End Function

    Private Sub Button7_Click(sender As Object, e As EventArgs) Handles Button7.Click
        Dim txtwrite As New IO.StreamWriter("d:\mfcc.txt", False)

        Dim fs As New DirectoryInfo("d:\piano")
        For Each i As FileInfo In fs.GetFiles("*.wav", SearchOption.AllDirectories)
            Dim fn = i.Name
            Dim fullname = i.FullName


            \'打开文件获取mfcc 
            Dim len As Integer = 2048
            Dim bt(len * 2 - 1) As Byte
            Dim it16(len - 1) As Int16

            Dim wf As New WaveFileReader(fullname)
            wf.Read(bt, 0, len * 2)

            Buffer.BlockCopy(bt, 0, it16, 0, len * 2)

            \'VAD(it16)

            \'计算mfcc 返回182维数
            Dim mfccs = WavTMfcc(it16)


            Dim StrMfcc = String.Join(",", mfccs.ToArray)
            Dim Ans = {0, 0, 0, 0, 0, 0, 0, 0}

            Ans(Val(Mid(fn, 1, 1))) = 1
            Dim StrAns = String.Join(",", Ans.ToArray)


            txtwrite.WriteLine(StrMfcc)
            txtwrite.WriteLine(StrAns)

        Next

        txtwrite.Close()

        \'语音区域识别-指数平均法 v = 0.1*vo + (1-0.1)v(0-1)


    End Sub


    Public Function VAD(data As Int16()) As List(Of Point)
        Dim Belta As Single = 0.1
        Dim Sum As UInt64

        Dim StartP, EndP As Integer
        Dim Status As Boolean = False

        Dim WaveArea As New List(Of Point)

        For i = 0 To data.Length - 1
            Sum = Belta * Math.Pow(data(i), 2) + (1 - Belta) * Sum

            \'声音开始位置
            If (Sum > Math.Pow(1024, 2)) Then
                If Status = False Then
                    Status = True
                    StartP = i
                End If
            End If

            \'声音结束位置
            If Status = True Then
                If Sum < Math.Pow(100, 2) Then
                    WaveArea.Add(New Point(StartP, i))
                    Status = False

                End If
            End If


        Next

        Return WaveArea


    End Function

    Private Sub Button8_Click(sender As Object, e As EventArgs) Handles Button8.Click
        Dim wf As New WaveFileReader("d:\d00.wav")
        Dim len As Integer = wf.Length / 2
        Dim bt(len * 2 - 1) As Byte
        Dim it16(len - 1) As Int16


        wf.Read(bt, 0, len * 2)

        Buffer.BlockCopy(bt, 0, it16, 0, len * 2)

        VAD(it16)

    End Sub

    Dim piano As New TensorflowPiano

    Private Sub Button9_Click(sender As Object, e As EventArgs) Handles Button9.Click
        Dim wf As New WaveFileReader("d:\testpiano.wav")
        Dim len As Integer = wf.Length / 2
        Dim bt(len * 2 - 1) As Byte
        Dim it16(len - 1) As Int16

        wf.Read(bt, 0, len * 2)

        Buffer.BlockCopy(bt, 0, it16, 0, len * 2)

        Dim frame(2048 - 1) As Int16

        For k As Integer = 0 To Math.Floor(len / 2048) - 1
            Array.Copy(it16, k * 2048, frame, 0, 2048)

            \'计算mfcc 返回182维数
            Dim mfccs = WavTMfcc(frame)


            Dim float(0, 182 - 1) As Single
            For i = 0 To 182 - 1
                float(0, i) = mfccs(i)
            Next

            Dim out() As Single = piano.Detect(float)

            Dim max As Single = out(0)
            Dim num = 0
            For i = 0 To 7 - 1
                If out(i) > max Then
                    max = out(i)
                    num = i

                End If
            Next
            Debug.Print(num & ":" & max)
        Next

    End Sub

    Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load

    End Sub
End Class

  

 

在vb中对训练数据的MFCC进行读取,保存为TXT文件;在PYTHON文件中进行训练

import tensorflow as tf
import librosa
import numpy as np
from tensorflow.python.framework.graph_util import convert_variables_to_constants


f = open("d:/mfcc.txt","r")

xdata = []
ydata = []

while True:
    v1 = f.readline()
    if v1 == \'\':
        break;
    xd =  np.array(tuple(eval(v1)))
    #xd = list(xd.reshape(1,-1))
    xdata.append(xd)

    v2 = f.readline()
    yd = np.array(tuple(eval(v2)))
    ydata.append(yd)






x = tf.placeholder("float32",[None,182],name= \'input\')
w = tf.Variable(tf.truncated_normal([182,8],stddev = 0.1))
b = tf.Variable(tf.truncated_normal(shape=[8],stddev = 0.1,dtype = tf.float32))

y = tf.nn.softmax(tf.matmul(x,w)+b,name = \'out\')

y_ = tf.placeholder("float32",[None,8])

loss = -tf.reduce_sum(y_*tf.log(y))

train = tf.train.GradientDescentOptimizer(1e-5).minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(20000):
        sess.run(train,feed_dict={x:xdata,y_:ydata})
        if i%100 == 0:
                 print("step %d,%g"%(i,i))
                 print(sess.run(y_,feed_dict={y_:ydata}))
                 print(sess.run(loss,feed_dict={x:xdata,y_:ydata}))
                 print(sess.run(y,feed_dict={x:xdata}))


     #保存为pb格式
    graph = convert_variables_to_constants(sess,sess.graph_def,[\'out\'])
    tf.train.write_graph(graph,\'d:/\',\'piano.pb\',as_text=False)
    #保存训练参数        

  

 

在VB中使用训练好的模型

Imports System.IO
Imports System.Numerics
Imports TensorFlow

\'Install-Package TensorFlowSharp

Public Class TensorflowPiano

    Dim graph As TFGraph
    Dim session As TFSession

    \'加载模型
    Public Sub New()
        Dim model As Byte() = File.ReadAllBytes("d:\piano.pb")
        graph = New TFGraph()
        graph.Import(model, "")

        session = New TFSession(graph)

        \' Threading.ThreadPool.SetMaxThreads(5, 5)
    End Sub

    Protected Overrides Sub finalize()
        \' session.CloseSession()
    End Sub





    Dim output
    Dim runner As TFSession.Runner
    Dim result
    Dim rshape

    \'运行模型
    Public Function Detect(Data(,) As Single) As Single()

        runner = session.GetRunner()

        runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0))

        output = runner.Run()


        result = output(0)
        rshape = result.Shape
        Dim rt As Single()
        rt = result.GetValue(True)(0)
        \'For k = 0 To rshape.GetValue(0) - 1
        \'    rt = result.GetValue(True)(k)(0)
        \'    \'Debug.Print(rt)
        \'    If (rt > 0.8) Then
        \'        Debug.Print("-----------recogxili")
        \'        \' MsgBox("recgo")
        \'    End If
        \'Next

        Return rt

    End Function




End Class

  

 

声音特征提取:

Imports System.Numerics
Imports MathNet.Numerics.IntegralTransforms

Public Class MFCC


    Public H As Double(,)

    Private MFCCNum As Integer
    Private FrameSize As Integer \'帧长512


    Public Sub New(Optional framesize As Integer = 512, Optional MFCCNum As Integer = 26)
        \'注意设置最小频率 freMin  0  ,300 
        Me.MFCCNum = MFCCNum
        Me.FrameSize = framesize

        H = New Double(MFCCNum, Me.FrameSize / 2) {}

        \'计算mel系数
        Dim filter_points(40 + 1) As Integer \'40个滤波器,需要41点

        Const sampleRate As Integer = 16000  \'采样频率 16000
        Const filterNum As Integer = 40  \'滤波器数量 取40个

        Dim freMax As Double = sampleRate / 2   \'实际最大频率 
        Dim freMin As Double = 0    \'实际最小频率 
        Dim melFremax As Double = 1125 * Math.Log(1 + freMax / 700)     \'将实际频率转换成梅尔频率 
        Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700)


        Dim k As Double = (melFremax - melFremin) / (filterNum + 1)


        Dim m As Double() = New Double(filterNum + 1) {}
        Dim r As Double() = New Double(filterNum + 1) {}


        For i As Integer = 0 To filterNum + 1
            m(i) = melFremin + k * i
            r(i) = 700 * (Math.Exp(m(i) / 1125) - 1)
            \'将梅尔频率转换成实际频率 
            filter_points(i) = Math.Floor((Me.FrameSize + 1) * r(i) / sampleRate)
        Next


        \'生成mel滤波器
        For i As Integer = 0 To MFCCNum
            For j As Integer = 0 To Me.FrameSize / 2 - 1
                If j < filter_points(i) Then
                    H(i, j) = 0
                End If
                If (filter_points(i) <= j) And (j <= filter_points(i + 1)) Then
                    H(i, j) = (CDbl(j - filter_points(i)) / (filter_points(i + 1) - filter_points(i)))
                End If
                If (filter_points(i + 1) <= j) And (j <= filter_points(i + 2)) Then
                    H(i, j) = (CDbl(filter_points(i + 2) - j) / (filter_points(i + 2) - filter_points(i + 1)))
                End If
                If j > filter_points(i + 2) Then
                    H(i, j) = 0
                End If
            Next
        Next



    End Sub

    \'汉明窗
    Public Sub Hamming_window(WaveData() As Single)
        Dim len As Integer = WaveData.Length
        Dim omega As Single = 2.0 * Math.PI / len
        For j As Integer = 0 To len - 1
            WaveData(j) = (0.54 - 0.46 * Math.Cos(omega * (j))) * WaveData(j)
        Next
    End Sub

    \'傅里叶计算
    Public Function FFT(WaveData() As Single) As Complex()
        Dim FFT_Complex(WaveData.Length - 1) As Complex
        For i = 0 To WaveData.Length - 1
            FFT_Complex(i) = WaveData(i)
        Next
        MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_Complex, FourierOptions.Matlab)
        Return FFT_Complex


    End Function


    Public Function MFCC(fft() As Complex) As Single()

        \'取LOG
        Dim S As Single() = New Single(MFCCNum - 1) {}
        For i As Integer = 0 To MFCCNum - 1
            For j As Integer = 0 To Me.FrameSize / 2 - 1
                S(i) = S(i) + Math.Pow(fft(j).Magnitude, 2) * H(i, j)
            Next
            If S(i) <> 0 Then
                S(i) = Math.Log(S(i), Math.E)
            End If
        Next


        \'DCT运算
        Dim mfcc_mass(MFCCNum - 1) As Double
        For l As Integer = 0 To MFCCNum - 1
            For i As Integer = 0 To MFCCNum - 1
                mfcc_mass(l) += S(i) * Math.Cos(Math.PI * l * ((i * 0.5) / 20))




            Next
        Next

        Return S


    End Function

End Class