Tech Off Post

Single Post Permalink

View Thread: Kinect Speech Help
  • Largo010

    Hi,
    I'm making an C# application that uses the Kinects Skeletal tracking and its voice recognition as an method of input. I've got to a point where the application uses skeletal data to position a cursor and I'm trying to integrate voice commands.
    I have run into a problem, where I have created and started the speech recogniser but I can't get the react to any audio inputs ( display whether the speech command was recognised).
    Any help will be greatly appreciated, thanks.

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.IO;
    using System.Text;
    using System.Windows;
    using System.Windows.Controls;
    using System.Windows.Data;
    using System.Windows.Documents;
    using System.Windows.Input;
    using System.Windows.Media;
    using System.Windows.Media.Imaging;
    using System.Windows.Navigation;
    using System.Windows.Threading;
    using System.Windows.Shapes;
    using Microsoft.Kinect;
    using Coding4Fun.Kinect.Wpf;
    using Microsoft.Speech.AudioFormat;
    using Microsoft.Speech.Recognition;
    
    namespace SkeletalTracking
    {
        /// <summary>
        /// Interaction logic for MainWindow.xaml
        /// </summary>
        public partial class MainWindow : Window
        {
            // Variables
            bool closing = false;
            const int skeletonCount = 6;
            Skeleton[] allSkeletons = new Skeleton[skeletonCount];
            private SpeechRecognitionEngine speechRecognizer;
            private KinectAudioSource kinectAudioSource;
            private KinectSensor kinect;
    
            public MainWindow()
            {
                InitializeComponent();
                // Set up the Kinect
                kinectSensorChooser1.KinectSensorChanged += this.kinectSensorChooser1_KinectSensorChanged;
            }
    
            void kinectSensorChooser1_KinectSensorChanged(object sender, DependencyPropertyChangedEventArgs e)
            {
                KinectSensor old = (KinectSensor)e.OldValue as KinectSensor;
    
                StopKinect(old);
    
                KinectSensor sensor = (from sensorToCheck in KinectSensor.KinectSensors where sensorToCheck.Status == KinectStatus.Connected select sensorToCheck).FirstOrDefault();
                if (sensor == null)
                {
                    Console.WriteLine(
                            "No Kinect sensors are attached to this computer or none of the ones that are\n" +
                            "attached are \"Connected\".\n" +
                            "Attach the KinectSensor and restart this application.\n" +
                            "If that doesn't work run SkeletonViewer-WPF to better understand the Status of\n" +
                            "the Kinect sensors.\n\n" +
                            "Press any key to continue.\n");
    
                    // Give a chance for user to see console output before it is dismissed
                    Console.ReadKey(true);
                    return;
                }
    
                this.kinect = sensor;
                if (sensor != null)
                {
                    this.InitializeKinect();
                }
            }
    
            private void InitializeKinect()
            {
                // ===========KINECT SENSOR============================================================== //
                // Set up the sensor for the Kinect 
                var sensor = this.kinect;
    
                var parameters = new TransformSmoothParameters
                {
                    Smoothing = 0.3f,
                    Correction = 0.0f,
                    Prediction = 0.0f,
                    JitterRadius = 1.0f,
                    MaxDeviationRadius = 0.5f
                };
    
                try
                {
                    sensor.SkeletonStream.Enable(parameters);
                    // sensor.SkeletonStream.Enable();
    
                    sensor.AllFramesReady += new EventHandler<AllFramesReadyEventArgs>(sensor_AllFramesReady);
                    sensor.DepthStream.Enable(DepthImageFormat.Resolution640x480Fps30);
                    sensor.ColorStream.Enable(ColorImageFormat.RgbResolution640x480Fps30);
                    sensor.Start();
    
                }
                catch (System.IO.IOException)
                {
                    kinectSensorChooser1.AppConflictOccurred();
                }
    
                // ===========KINECT SPEECH RECOGNIZER=================================================== //
                // Setup the speech recognizer            
                this.speechRecognizer = CreateSpeechRecognizer();
    
                if (this.speechRecognizer != null && sensor != null)
                {
                    StartRecognizer(sensor.AudioSource);
                }
    
            }
    
            private SpeechRecognitionEngine CreateSpeechRecognizer()
            {
                SpeechRecognitionEngine sre;                    //Creates the speech recognition engine
                RecognizerInfo ri = GetKinectRecognizer();      //Creates the speech recognizer info
    
                if (ri == null)
                {
                    MessageBox.Show(@"There was a problem initializing Speech Recognition.Ensure you have the Microsoft Speech SDK installed.",
                        "Failed to load Speech SDK",
                        MessageBoxButton.OK,
                        MessageBoxImage.Error);
                    this.Close();
                    return null;
                }
    
                try
                {
                    sre = new SpeechRecognitionEngine(ri.Id);
                }
                catch
                {
                    MessageBox.Show(@"There was a problem initializing Speech Recognition.Ensure you have the Microsoft Speech SDK installed and configured.",
                            "Failed to load Speech SDK",
                            MessageBoxButton.OK,
                            MessageBoxImage.Error);
                    this.Close();
                    return null;
                }
    
                var grammar = new Choices();
                grammar.Add("red");
                grammar.Add("green");
                grammar.Add("blue");
    
                var gb = new GrammarBuilder { Culture = ri.Culture };
                gb.Append(grammar);
    
                // Create the actual Grammar instance, and then load it into the speech recognizer.
                var g = new Grammar(gb);
    
                sre.LoadGrammar(g);
                sre.SpeechRecognized += this.SreSpeechRecognized;
                sre.SpeechHypothesized += this.SreSpeechHypothesized;
                sre.SpeechRecognitionRejected += this.SreSpeechRecognitionRejected;
                return sre;
            }
    
            private void StartRecognizer(KinectAudioSource kinectSource)
            {
    
                // Obtain the KinectAudioSource to do audio capture
                this.kinectAudioSource = kinectSource;
                this.kinectAudioSource.AutomaticGainControlEnabled = false;
                this.kinectAudioSource.EchoCancellationMode = EchoCancellationMode.None;    // No AEC for this sample
                this.kinectAudioSource.AutomaticGainControlEnabled = false;                 // Important to turn this off for speech recognition
    
                using (Stream s = kinectAudioSource.Start())
                {
                    speechRecognizer.SetInputToAudioStream(s,
                                                           new SpeechAudioFormatInfo(
                                                               EncodingFormat.Pcm, 16000, 16, 1,
                                                               32000, 2, null));             //creates the audio stream for the use of a single Kinect
    
                    speechRecognizer.RecognizeAsync(RecognizeMode.Multiple);
                }
            }
    
    
    
            private RecognizerInfo GetKinectRecognizer()
            //Creates the Kinect Recogizer used for the voice commands 
            {
                Func<RecognizerInfo, bool> matchingFunc = r =>
                {
                    string value;
                    r.AdditionalInfo.TryGetValue("Kinect", out value);
                    return "True".Equals(value, StringComparison.InvariantCultureIgnoreCase) && "en-US".Equals(r.Culture.Name, StringComparison.InvariantCultureIgnoreCase);
                };
                return SpeechRecognitionEngine.InstalledRecognizers().Where(matchingFunc).FirstOrDefault();
            }
    
    
    
            private void SreSpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
            {
                Console.WriteLine("\nSpeech Rejected");
            }
    
            private void SreSpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
            {
                Console.Write("\rSpeech Hypothesized: \t{0}", e.Result.Text);
            }
    
            private void SreSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
            {
                Console.WriteLine("\nSpeech Recognized as: \t{0}", e.Result.Text);
            }
    
            void sensor_AllFramesReady(object sender, AllFramesReadyEventArgs e)
            {
                if (closing)
                {
                    return;
                }
    
                //Get a skeleton
                Skeleton first = GetFirstSkeleton(e);
    
                if (first == null)
                {
                    return;
                }
    
                //set scaled position
                ScalePosition(rightEllipse, first.Joints[JointType.HandRight]);
            }
    
            void GetCameraPoint(Skeleton first, AllFramesReadyEventArgs e)
            {
    
                using (DepthImageFrame depth = e.OpenDepthImageFrame())
                {
                    if (depth == null ||
                        kinectSensorChooser1.Kinect == null)
                    {
                        return;
                    }
    
                    //Map a joint location to a point on the depth map
                    //right hand
                    DepthImagePoint rightDepthPoint =
                        depth.MapFromSkeletonPoint(first.Joints[JointType.HandRight].Position);
    
                    //Map a depth point to a point on the color image
                    //head
                    ColorImagePoint rightColorPoint =
                        depth.MapToColorImagePoint(rightDepthPoint.X, rightDepthPoint.Y,
                        ColorImageFormat.RgbResolution640x480Fps30);
    
    
                    //Set location
                    CameraPosition(rightEllipse, rightColorPoint);
                }
            }
    
    
            Skeleton GetFirstSkeleton(AllFramesReadyEventArgs e)
            {
                using (SkeletonFrame skeletonFrameData = e.OpenSkeletonFrame())
                {
                    if (skeletonFrameData == null)
                    {
                        return null;
                    }
    
                    skeletonFrameData.CopySkeletonDataTo(allSkeletons);
    
                    //get the first tracked skeleton
                    Skeleton first = (from s in allSkeletons
                                      where s.TrackingState == SkeletonTrackingState.Tracked
                                      select s).FirstOrDefault();
    
                    return first;
    
                }
            }
    
            private void StopKinect(KinectSensor sensor)
            {
                if (sensor != null)
                {
                    if (sensor.IsRunning)
                    {
                        //stop sensor 
                        sensor.Stop();
    
                        //stop audio if not null
                        if (sensor.AudioSource != null)
                        {
                            sensor.AudioSource.Stop();
                        }
                    }
                }
            }
    
            private void CameraPosition(FrameworkElement element, ColorImagePoint point)
            {
                //Divide by 2 for width and height so point is right in the middle 
                // instead of in top/left corner
                Canvas.SetLeft(element, point.X - element.Width / 2);
                Canvas.SetTop(element, point.Y - element.Height / 2);
            }
    
            private void ScalePosition(FrameworkElement element, Joint joint)
            {
                //convert & scale (.3 = means 1/3 of joint distance)
                Joint scaledJoint = joint.ScaleTo(1280, 1280, .2f, .2f);
    
                Canvas.SetLeft(element, scaledJoint.Position.X);
                Canvas.SetTop(element, scaledJoint.Position.Y);
            }
    
            private void Window_Loaded(object sender, RoutedEventArgs e)
            {
                closing = false;
            }
    
            private void Window_Closing(object sender, System.ComponentModel.CancelEventArgs e)
            {
                closing = true;
                StopKinect(kinectSensorChooser1.Kinect);
            }
        }
    }