Hi,
I'm making an C# application that uses the Kinects Skeletal tracking and its voice recognition as an method of input. I've got to a point where the application uses skeletal data to position a cursor and I'm trying to integrate voice commands.
I have run into a problem, where I have created and started the speech recogniser but I can't get the react to any audio inputs ( display whether the speech command was recognised).
Any help will be greatly appreciated, thanks.
using System;
using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Text;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Threading;
using System.Windows.Shapes;
using Microsoft.Kinect;
using Coding4Fun.Kinect.Wpf;
using Microsoft.Speech.AudioFormat;
using Microsoft.Speech.Recognition;
namespace SkeletalTracking
{
/// <summary>
/// Interaction logic for MainWindow.xaml
/// </summary>
public partial class MainWindow : Window
{
// Variables
bool closing = false;
const int skeletonCount = 6;
Skeleton[] allSkeletons = new Skeleton[skeletonCount];
private SpeechRecognitionEngine speechRecognizer;
private KinectAudioSource kinectAudioSource;
private KinectSensor kinect;
public MainWindow()
{
InitializeComponent();
// Set up the Kinect
kinectSensorChooser1.KinectSensorChanged += this.kinectSensorChooser1_KinectSensorChanged;
}
void kinectSensorChooser1_KinectSensorChanged(object sender, DependencyPropertyChangedEventArgs e)
{
KinectSensor old = (KinectSensor)e.OldValue as KinectSensor;
StopKinect(old);
KinectSensor sensor = (from sensorToCheck in KinectSensor.KinectSensors where sensorToCheck.Status == KinectStatus.Connected select sensorToCheck).FirstOrDefault();
if (sensor == null)
{
Console.WriteLine(
"No Kinect sensors are attached to this computer or none of the ones that are\n" +
"attached are \"Connected\".\n" +
"Attach the KinectSensor and restart this application.\n" +
"If that doesn't work run SkeletonViewer-WPF to better understand the Status of\n" +
"the Kinect sensors.\n\n" +
"Press any key to continue.\n");
// Give a chance for user to see console output before it is dismissed
Console.ReadKey(true);
return;
}
this.kinect = sensor;
if (sensor != null)
{
this.InitializeKinect();
}
}
private void InitializeKinect()
{
// ===========KINECT SENSOR============================================================== //
// Set up the sensor for the Kinect
var sensor = this.kinect;
var parameters = new TransformSmoothParameters
{
Smoothing = 0.3f,
Correction = 0.0f,
Prediction = 0.0f,
JitterRadius = 1.0f,
MaxDeviationRadius = 0.5f
};
try
{
sensor.SkeletonStream.Enable(parameters);
// sensor.SkeletonStream.Enable();
sensor.AllFramesReady += new EventHandler<AllFramesReadyEventArgs>(sensor_AllFramesReady);
sensor.DepthStream.Enable(DepthImageFormat.Resolution640x480Fps30);
sensor.ColorStream.Enable(ColorImageFormat.RgbResolution640x480Fps30);
sensor.Start();
}
catch (System.IO.IOException)
{
kinectSensorChooser1.AppConflictOccurred();
}
// ===========KINECT SPEECH RECOGNIZER=================================================== //
// Setup the speech recognizer
this.speechRecognizer = CreateSpeechRecognizer();
if (this.speechRecognizer != null && sensor != null)
{
StartRecognizer(sensor.AudioSource);
}
}
private SpeechRecognitionEngine CreateSpeechRecognizer()
{
SpeechRecognitionEngine sre; //Creates the speech recognition engine
RecognizerInfo ri = GetKinectRecognizer(); //Creates the speech recognizer info
if (ri == null)
{
MessageBox.Show(@"There was a problem initializing Speech Recognition.Ensure you have the Microsoft Speech SDK installed.",
"Failed to load Speech SDK",
MessageBoxButton.OK,
MessageBoxImage.Error);
this.Close();
return null;
}
try
{
sre = new SpeechRecognitionEngine(ri.Id);
}
catch
{
MessageBox.Show(@"There was a problem initializing Speech Recognition.Ensure you have the Microsoft Speech SDK installed and configured.",
"Failed to load Speech SDK",
MessageBoxButton.OK,
MessageBoxImage.Error);
this.Close();
return null;
}
var grammar = new Choices();
grammar.Add("red");
grammar.Add("green");
grammar.Add("blue");
var gb = new GrammarBuilder { Culture = ri.Culture };
gb.Append(grammar);
// Create the actual Grammar instance, and then load it into the speech recognizer.
var g = new Grammar(gb);
sre.LoadGrammar(g);
sre.SpeechRecognized += this.SreSpeechRecognized;
sre.SpeechHypothesized += this.SreSpeechHypothesized;
sre.SpeechRecognitionRejected += this.SreSpeechRecognitionRejected;
return sre;
}
private void StartRecognizer(KinectAudioSource kinectSource)
{
// Obtain the KinectAudioSource to do audio capture
this.kinectAudioSource = kinectSource;
this.kinectAudioSource.AutomaticGainControlEnabled = false;
this.kinectAudioSource.EchoCancellationMode = EchoCancellationMode.None; // No AEC for this sample
this.kinectAudioSource.AutomaticGainControlEnabled = false; // Important to turn this off for speech recognition
using (Stream s = kinectAudioSource.Start())
{
speechRecognizer.SetInputToAudioStream(s,
new SpeechAudioFormatInfo(
EncodingFormat.Pcm, 16000, 16, 1,
32000, 2, null)); //creates the audio stream for the use of a single Kinect
speechRecognizer.RecognizeAsync(RecognizeMode.Multiple);
}
}
private RecognizerInfo GetKinectRecognizer()
//Creates the Kinect Recogizer used for the voice commands
{
Func<RecognizerInfo, bool> matchingFunc = r =>
{
string value;
r.AdditionalInfo.TryGetValue("Kinect", out value);
return "True".Equals(value, StringComparison.InvariantCultureIgnoreCase) && "en-US".Equals(r.Culture.Name, StringComparison.InvariantCultureIgnoreCase);
};
return SpeechRecognitionEngine.InstalledRecognizers().Where(matchingFunc).FirstOrDefault();
}
private void SreSpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
Console.WriteLine("\nSpeech Rejected");
}
private void SreSpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
{
Console.Write("\rSpeech Hypothesized: \t{0}", e.Result.Text);
}
private void SreSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("\nSpeech Recognized as: \t{0}", e.Result.Text);
}
void sensor_AllFramesReady(object sender, AllFramesReadyEventArgs e)
{
if (closing)
{
return;
}
//Get a skeleton
Skeleton first = GetFirstSkeleton(e);
if (first == null)
{
return;
}
//set scaled position
ScalePosition(rightEllipse, first.Joints[JointType.HandRight]);
}
void GetCameraPoint(Skeleton first, AllFramesReadyEventArgs e)
{
using (DepthImageFrame depth = e.OpenDepthImageFrame())
{
if (depth == null ||
kinectSensorChooser1.Kinect == null)
{
return;
}
//Map a joint location to a point on the depth map
//right hand
DepthImagePoint rightDepthPoint =
depth.MapFromSkeletonPoint(first.Joints[JointType.HandRight].Position);
//Map a depth point to a point on the color image
//head
ColorImagePoint rightColorPoint =
depth.MapToColorImagePoint(rightDepthPoint.X, rightDepthPoint.Y,
ColorImageFormat.RgbResolution640x480Fps30);
//Set location
CameraPosition(rightEllipse, rightColorPoint);
}
}
Skeleton GetFirstSkeleton(AllFramesReadyEventArgs e)
{
using (SkeletonFrame skeletonFrameData = e.OpenSkeletonFrame())
{
if (skeletonFrameData == null)
{
return null;
}
skeletonFrameData.CopySkeletonDataTo(allSkeletons);
//get the first tracked skeleton
Skeleton first = (from s in allSkeletons
where s.TrackingState == SkeletonTrackingState.Tracked
select s).FirstOrDefault();
return first;
}
}
private void StopKinect(KinectSensor sensor)
{
if (sensor != null)
{
if (sensor.IsRunning)
{
//stop sensor
sensor.Stop();
//stop audio if not null
if (sensor.AudioSource != null)
{
sensor.AudioSource.Stop();
}
}
}
}
private void CameraPosition(FrameworkElement element, ColorImagePoint point)
{
//Divide by 2 for width and height so point is right in the middle
// instead of in top/left corner
Canvas.SetLeft(element, point.X - element.Width / 2);
Canvas.SetTop(element, point.Y - element.Height / 2);
}
private void ScalePosition(FrameworkElement element, Joint joint)
{
//convert & scale (.3 = means 1/3 of joint distance)
Joint scaledJoint = joint.ScaleTo(1280, 1280, .2f, .2f);
Canvas.SetLeft(element, scaledJoint.Position.X);
Canvas.SetTop(element, scaledJoint.Position.Y);
}
private void Window_Loaded(object sender, RoutedEventArgs e)
{
closing = false;
}
private void Window_Closing(object sender, System.ComponentModel.CancelEventArgs e)
{
closing = true;
StopKinect(kinectSensorChooser1.Kinect);
}
}
}
Add your 2¢