Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0f71f29
Fix inconsistent whitespace and indentation
petschekr May 30, 2017
b110382
Initial speech testing UI commit
petschekr May 30, 2017
616f5cb
Add testing .wav files
petschekr May 31, 2017
ee58f02
Update SpeechRecognizer to support reading audio from .wav files
petschekr May 31, 2017
d3434d0
Add method to stop speech recognition without stopping SpeechRecogniz…
petschekr May 31, 2017
d2460bd
Implement starting / stopping audio recording in the UI
petschekr May 31, 2017
79cbda4
Simplify file loading in SpeechRecognizer
petschekr May 31, 2017
d5542a5
Implement opening and recording audio for speech recognition
petschekr May 31, 2017
7b45475
Default open file dialog's directory to the directory of current file
petschekr May 31, 2017
c67632f
Improve recognition accuracy by applying threshold relatively
petschekr May 31, 2017
535779e
Normalize quote style
petschekr May 31, 2017
18b63cd
Subscribe to recognizer topic and list recognized commands in the GUI
petschekr May 31, 2017
f3b6265
Add support for opening a folder of .wav files for recognition
petschekr Jun 1, 2017
180a90e
Add UNKNOWN speech keyword for when the phrase can't be determined
petschekr Jun 1, 2017
e90aa83
Don't error for invalid file / directory name
petschekr Jun 1, 2017
2383685
Ensure that speech results are correctly matched to files when openin…
petschekr Jun 1, 2017
69860ee
Add clear and export output buttons
petschekr Jun 1, 2017
f610aac
Improve background noise handling with a minimum absolute threshold
petschekr Jun 2, 2017
d841ea6
Add weights that seem to improve recognition accuracy
petschekr Jun 2, 2017
3617503
Replace begin experiment keyphrase to improve recognition
petschekr Jun 2, 2017
21cc14a
Publish speech debug info if requested
petschekr Jun 12, 2017
12bdf29
Merge branch 'master' into speech-testing
petschekr Aug 6, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion hlpr_speech_recognition/data/kps.dic
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ CAN(2) K AH N
CLOSE K L OW S
CLOSE(2) K L OW Z
END EH N D
EXPERIMENT IH K S P EH R AH M AH N T
FINISH F IH N IH SH
GO G OW
HAND HH AE N D
Expand Down
2 changes: 1 addition & 1 deletion hlpr_speech_recognition/data/kps.map
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
GREETING:HELLO POLI!
HEAR_CHECK:CAN YOU HEAR ME?
SMALL_TALK:HOW ARE YOU TODAY?
START_EXP:LET'S BEGIN THE EXPERIMENT
START_EXP:LET'S START
OPEN_HAND:OPEN YOUR HAND
CLOSE_HAND:CLOSE YOUR HAND
START_GC:RELEASE YOUR ARM
Expand Down
26 changes: 13 additions & 13 deletions hlpr_speech_recognition/data/kps.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
HELLO POLI!
CAN YOU HEAR ME?
HOW ARE YOU TODAY?
LET'S BEGIN THE EXPERIMENT
OPEN YOUR HAND
CLOSE YOUR HAND
RELEASE YOUR ARM
HOLD YOUR ARM
START HERE
BEGIN HERE
END HERE
FINISH HERE
GO HERE
HELLO POLI!/1e-35/
CAN YOU HEAR ME?/1e-40/
HOW ARE YOU TODAY?/1e-40/
LET'S START/1e-25/
OPEN YOUR HAND/1e-30/
CLOSE YOUR HAND/1e-30/
RELEASE YOUR ARM/1e-20/
HOLD YOUR ARM/1e-25/
START HERE/1e-20/
BEGIN HERE/1e-10/
END HERE/1e-20/
FINISH HERE/1e-20/
GO HERE/1e-20/
5 changes: 4 additions & 1 deletion hlpr_speech_recognition/data/kps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ tag: "SMALL_TALK"
speech: ["HOW ARE YOU TODAY?"]
---
tag: "START_EXP"
speech: ["LET'S BEGIN THE EXPERIMENT"]
speech: ["LET'S START"]
---
tag: "OPEN_HAND"
speech: ["OPEN YOUR HAND"]
Expand All @@ -30,3 +30,6 @@ speech: ["END HERE","FINISH HERE"]
---
tag: "KEYFRAME"
speech: ["GO HERE"]
---
tag: "UNKNOWN"
speech: ["UNKNOWN"]
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class SpeechGui(QtGui.QWidget):

def __init__(self):
QtGui.QWidget.__init__(self)

newFont = QtGui.QFont("Times", 24, QtGui.QFont.Bold)

# Add a main layout
Expand All @@ -66,8 +66,8 @@ def __init__(self):

# Initialize rosnode
rospy.init_node("speech_gui")
# Default values for speech listeners

# Default values for speech listeners
rospack = rospkg.RosPack()
default_pub_topic = 'hlpr_speech_commands'

Expand All @@ -85,9 +85,9 @@ def __init__(self):
self.keywords = rospy.get_param(SpeechListener.KEYWORDS_PARAM, dict()).values()
self.commands = [val for sublist in self.keywords for val in sublist]
self.commands.sort()

positions = [(i,j) for i in range(len(self.commands)) for j in range(3)]

for position, name in zip(positions, self.commands):
button = QtGui.QPushButton(name)
button.setObjectName('%s' % name)
Expand All @@ -98,8 +98,8 @@ def __init__(self):

mainLayout.addLayout(grid)
mainLayout.addStretch()
# Show the GUI

# Show the GUI
self.adjustSize()
self.setWindowTitle("Speech Commands Interface")
self.show()
Expand All @@ -109,7 +109,7 @@ def __init__(self):
self.pub = rospy.Publisher(self.recog_topic, StampedString, queue_size=1)

rospy.loginfo("Finished initializing speech GUI")

# Button handler after its clicked
def handleButton(self):
clicked_button = self.sender()
Expand Down
219 changes: 109 additions & 110 deletions hlpr_speech_recognition/src/hlpr_speech_recognition/speech_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# A script to use pocketsphinx's "keyphrase spotting" feature with
# A script to use pocketsphinx's "keyphrase spotting" feature with
# python and ros. Note that it
#
# Authors: Baris Akgun, Priyanka Khante
# Edited: Vivian Chu, 8-29-16 - rosparam and multiple yaml files
#
# A convenience class to map speech recognition result to commands
# A convenience class to map speech recognition result to commands
# while keeping the time stamp.
#
# Note that currently the mapping is done by hand
Expand All @@ -53,115 +53,114 @@

class SpeechListener:

COMMAND_TOPIC_PARAM = "/speech/publish_topic"
SERVICE_TOPIC_PARAM = "/speech/service_topic"
KEYWORDS_PARAM = "/speech/keywords"
COMMAND_TYPE = "/speech/command_type"
LEAVE_COMMAND = "/speech/leave_command"

def __init__(self, commandBuffSize=10, init_node=True):

if (init_node):
# initialize the ros node
rospy.init_node("speech_listener")
# Default values for speech listener
rospack = rospkg.RosPack()
default_pub_topic = 'hlpr_speech_commands'
default_yaml_files = [rospack.get_path('hlpr_speech_recognition')+'/data/kps.yaml']
default_service_topic = 'get_last_speech_cmd'

# Pull values from rosparam
self.recog_topic = rospy.get_param(SpeechListener.COMMAND_TOPIC_PARAM, default_pub_topic)
self.yaml_files = rospy.get_param("~yaml_list", default_yaml_files)
self.service_topic = rospy.get_param(SpeechListener.SERVICE_TOPIC_PARAM, default_service_topic)
self.msg_type = eval(rospy.get_param(SpeechListener.COMMAND_TYPE, 'StampedString')) # True if message is only str, false includes header
self.leave_command_flag = rospy.get_param(SpeechListener.LEAVE_COMMAND, False) #do we care if we the last command is old

rospy.Subscriber(self.recog_topic, self.msg_type, self.callback)

# Converts the yaml files into keywords to store into the dictionary
self.keywords_to_commands = {}
for kps_path in self.yaml_files:
for data in yaml.load_all(file(kps_path,'r')):
self.keywords_to_commands[str(data['tag'])] = data['speech']

# Store this on the rosparam server now
rospy.set_param(SpeechListener.KEYWORDS_PARAM, self.keywords_to_commands)

self._commandBuffSize = commandBuffSize
#self.commandsQueue = deque(maxlen=self._commandBuffSize)

# Flags for starting/stopping the node
self.spinning = False
self.last_command_fresh = False
self.last_command = None
self.last_ts = None
self.last_string = None

# Setup service call
s = rospy.Service(self.service_topic, SpeechService, self.get_last_command)
rospy.loginfo("Speech listener initialized")

# The following function is called each time, for every message
def callback(self, msg):

if self.msg_type == StampedString:
self.last_string = msg.keyphrase
self.last_ts = msg.stamp
else:
self.last_string = msg.data

self.last_command = self._map_keyword_to_command(self.last_string)
self.last_command_fresh = True
if self.spinning:
rospy.loginfo(rospy.get_caller_id() + ' I heard %s', str(self.last_command))

# method to extract command string from msg
def _map_keyword_to_command(self, data):
for (command, keywords) in self.keywords_to_commands.iteritems():
for word in keywords:
if data.find(word) > -1:
return command

# This is now made a service call
def get_last_command(self, req=None):

# Check if we care how "recent" the command was
if not self.leave_command_flag:

# returns a service request error
if not self.last_command_fresh:
return None

# The command hasn't been ask for before
self.last_command_fresh = False
if (req):
return {'speech_cmd': self.last_command}
else:
return self.last_command

def get_last_string(self):
return self.last_string

def get_last_ts(self):
return self.last_ts
# clears commands queue
def cleanup(self):
#commandsQueue.clear()
pass

def spin(self):
self.spinning = True
# if shutdown, need to clean up the commands queue
rospy.on_shutdown(self.cleanup)
rospy.spin()
COMMAND_TOPIC_PARAM = "/speech/publish_topic"
SERVICE_TOPIC_PARAM = "/speech/service_topic"
KEYWORDS_PARAM = "/speech/keywords"
COMMAND_TYPE = "/speech/command_type"
LEAVE_COMMAND = "/speech/leave_command"

def __init__(self, commandBuffSize=10, init_node=True):

if (init_node):
# initialize the ros node
rospy.init_node("speech_listener")

# Default values for speech listener
rospack = rospkg.RosPack()
default_pub_topic = 'hlpr_speech_commands'
default_yaml_files = [rospack.get_path('hlpr_speech_recognition')+'/data/kps.yaml']
default_service_topic = 'get_last_speech_cmd'

# Pull values from rosparam
self.recog_topic = rospy.get_param(SpeechListener.COMMAND_TOPIC_PARAM, default_pub_topic)
self.yaml_files = rospy.get_param("~yaml_list", default_yaml_files)
self.service_topic = rospy.get_param(SpeechListener.SERVICE_TOPIC_PARAM, default_service_topic)
self.msg_type = eval(rospy.get_param(SpeechListener.COMMAND_TYPE, 'StampedString')) # True if message is only str, false includes header
self.leave_command_flag = rospy.get_param(SpeechListener.LEAVE_COMMAND, False) #do we care if we the last command is old

rospy.Subscriber(self.recog_topic, self.msg_type, self.callback)

# Converts the yaml files into keywords to store into the dictionary
self.keywords_to_commands = {}
for kps_path in self.yaml_files:
for data in yaml.load_all(file(kps_path,'r')):
self.keywords_to_commands[str(data['tag'])] = data['speech']

# Store this on the rosparam server now
rospy.set_param(SpeechListener.KEYWORDS_PARAM, self.keywords_to_commands)

self._commandBuffSize = commandBuffSize
#self.commandsQueue = deque(maxlen=self._commandBuffSize)

# Flags for starting/stopping the node
self.spinning = False
self.last_command_fresh = False
self.last_command = None
self.last_ts = None
self.last_string = None

# Setup service call
s = rospy.Service(self.service_topic, SpeechService, self.get_last_command)
rospy.loginfo("Speech listener initialized")

# The following function is called each time, for every message
def callback(self, msg):

if self.msg_type == StampedString:
self.last_string = msg.keyphrase
self.last_ts = msg.stamp
else:
self.last_string = msg.data

self.last_command = self._map_keyword_to_command(self.last_string)
self.last_command_fresh = True
if self.spinning:
rospy.loginfo(rospy.get_caller_id() + ' I heard %s', str(self.last_command))

# method to extract command string from msg
def _map_keyword_to_command(self, data):
for (command, keywords) in self.keywords_to_commands.iteritems():
for word in keywords:
if data.find(word) > -1:
return command

# This is now made a service call
def get_last_command(self, req=None):

# Check if we care how "recent" the command was
if not self.leave_command_flag:

# returns a service request error
if not self.last_command_fresh:
return None

# The command hasn't been ask for before
self.last_command_fresh = False
if (req):
return {'speech_cmd': self.last_command}
else:
return self.last_command

def get_last_string(self):
return self.last_string

def get_last_ts(self):
return self.last_ts

# clears commands queue
def cleanup(self):
#commandsQueue.clear()
pass

def spin(self):
self.spinning = True
# if shutdown, need to clean up the commands queue
rospy.on_shutdown(self.cleanup)
rospy.spin()

def listener():
sl = SpeechListener()
sl.spin()
sl = SpeechListener()
sl.spin()

if __name__ == '__main__':
listener()

listener()
Loading