diff --git a/.gitignore b/.gitignore
index c9b568f..86dcf69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
*.pyc
*.swp
+hlpr_dialogue_production/data/*
+hlpr_speech_synthesis/src/hlpr_speech_synthesis/data/*
diff --git a/.travis.yml b/.travis.yml
index 4f2b6e0..0328c29 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -121,6 +121,7 @@ install:
before_script:
# source dependencies: install using wstool.
- cd ~/catkin_ws/src
+ - git clone -b catkin git://github.com/interaction-lab/cordial-public.git
- wstool init
- if [[ -f $ROSINSTALL_FILE ]] ; then wstool merge $ROSINSTALL_FILE ; fi
- wstool up
diff --git a/hlpr_dialogue_production/CMakeLists.txt b/hlpr_dialogue_production/CMakeLists.txt
new file mode 100644
index 0000000..846b9c3
--- /dev/null
+++ b/hlpr_dialogue_production/CMakeLists.txt
@@ -0,0 +1,194 @@
+cmake_minimum_required(VERSION 2.8.3)
+project(hlpr_dialogue_production)
+
+## Find catkin macros and libraries
+## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
+## is used, also find other catkin packages
+find_package(catkin REQUIRED COMPONENTS
+ rospy
+ smach
+ smach_ros
+ control_msgs
+ actionlib
+ actionlib_msgs
+)
+
+## System dependencies are found with CMake's conventions
+# find_package(Boost REQUIRED COMPONENTS system)
+
+
+## Uncomment this if the package has a setup.py. This macro ensures
+## modules and global scripts declared therein get installed
+## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
+catkin_python_setup()
+
+################################################
+## Declare ROS messages, services and actions ##
+################################################
+
+## To declare and build messages, services or actions from within this
+## package, follow these steps:
+## * Let MSG_DEP_SET be the set of packages whose message types you use in
+## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
+## * In the file package.xml:
+## * add a build_depend tag for "message_generation"
+## * add a build_depend and a run_depend tag for each package in MSG_DEP_SET
+## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
+## but can be declared for certainty nonetheless:
+## * add a run_depend tag for "message_runtime"
+## * In this file (CMakeLists.txt):
+## * add "message_generation" and every package in MSG_DEP_SET to
+## find_package(catkin REQUIRED COMPONENTS ...)
+## * add "message_runtime" and every package in MSG_DEP_SET to
+## catkin_package(CATKIN_DEPENDS ...)
+## * uncomment the add_*_files sections below as needed
+## and list every .msg/.srv/.action file to be processed
+## * uncomment the generate_messages entry below
+## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
+
+## Generate messages in the 'msg' folder
+# add_message_files(
+# FILES
+# Message1.msg
+# Message2.msg
+# )
+
+# add_service_files(
+# FILES
+# LookAt.srv
+# LookAtT.srv
+# LookAtTS.srv
+# )
+
+## Generate actions in the 'action' folder
+ add_action_files(
+ FILES
+ DialogueAct.action
+ Gesture.action
+ Viseme.action
+ )
+
+# Generate added messages and services with any dependencies listed here
+ generate_messages(
+ DEPENDENCIES
+ geometry_msgs
+ actionlib_msgs
+ std_msgs
+ )
+
+################################################
+## Declare ROS dynamic reconfigure parameters ##
+################################################
+
+## To declare and build dynamic reconfigure parameters within this
+## package, follow these steps:
+## * In the file package.xml:
+## * add a build_depend and a run_depend tag for "dynamic_reconfigure"
+## * In this file (CMakeLists.txt):
+## * add "dynamic_reconfigure" to
+## find_package(catkin REQUIRED COMPONENTS ...)
+## * uncomment the "generate_dynamic_reconfigure_options" section below
+## and list every .cfg file to be processed
+
+## Generate dynamic reconfigure parameters in the 'cfg' folder
+# generate_dynamic_reconfigure_options(
+# cfg/DynReconf1.cfg
+# cfg/DynReconf2.cfg
+# )
+
+###################################
+## catkin specific configuration ##
+###################################
+## The catkin_package macro generates cmake config files for your package
+## Declare things to be passed to dependent projects
+## INCLUDE_DIRS: uncomment this if you package contains header files
+## LIBRARIES: libraries you create in this project that dependent projects also need
+## CATKIN_DEPENDS: catkin_packages dependent projects also need
+## DEPENDS: system dependencies of this project that dependent projects also need
+catkin_package(
+# INCLUDE_DIRS include
+# LIBRARIES hlpr_lookat
+# CATKIN_DEPENDS rospy
+# DEPENDS system_lib
+)
+
+###########
+## Build ##
+###########
+
+## Specify additional locations of header files
+## Your package locations should be listed before other locations
+# include_directories(include)
+include_directories(
+ ${catkin_INCLUDE_DIRS}
+)
+
+## Declare a C++ library
+# add_library(hlpr_lookat
+# src/${PROJECT_NAME}/hlpr_lookat.cpp
+# )
+
+## Add cmake target dependencies of the library
+## as an example, code may need to be generated before libraries
+## either from message generation or dynamic reconfigure
+# add_dependencies(hlpr_lookat ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+
+## Declare a C++ executable
+# add_executable(hlpr_lookat_node src/hlpr_lookat_node.cpp)
+
+## Add cmake target dependencies of the executable
+## same as for the library above
+# add_dependencies(hlpr_lookat_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+
+## Specify libraries to link a library or executable target against
+# target_link_libraries(hlpr_lookat_node
+# ${catkin_LIBRARIES}
+# )
+
+#############
+## Install ##
+#############
+
+# all install targets should use catkin DESTINATION variables
+# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
+
+## Mark executable scripts (Python etc.) for installation
+## in contrast to setup.py, you can choose the destination
+# install(PROGRAMS
+# scripts/my_python_script
+# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+
+## Mark executables and/or libraries for installation
+# install(TARGETS hlpr_lookat hlpr_lookat_node
+# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+
+## Mark cpp header files for installation
+# install(DIRECTORY include/${PROJECT_NAME}/
+# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
+# FILES_MATCHING PATTERN "*.h"
+# PATTERN ".svn" EXCLUDE
+# )
+
+## Mark other files for installation (e.g. launch and bag files, etc.)
+# install(FILES
+# # myfile1
+# # myfile2
+# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+# )
+
+#############
+## Testing ##
+#############
+
+## Add gtest based cpp test target and link libraries
+# catkin_add_gtest(${PROJECT_NAME}-test test/test_hlpr_lookat.cpp)
+# if(TARGET ${PROJECT_NAME}-test)
+# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
+# endif()
+
+## Add folders to be run by python nosetests
+# catkin_add_nosetests(test)
diff --git a/hlpr_dialogue_production/LICENSE b/hlpr_dialogue_production/LICENSE
new file mode 100644
index 0000000..2511acd
--- /dev/null
+++ b/hlpr_dialogue_production/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2017, Elaine Short
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of contingency nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/hlpr_dialogue_production/README.md b/hlpr_dialogue_production/README.md
new file mode 100644
index 0000000..151b350
--- /dev/null
+++ b/hlpr_dialogue_production/README.md
@@ -0,0 +1,4 @@
+# Contingency Detection Package
+
+This package detects contingency in behaviors.
+
diff --git a/hlpr_dialogue_production/action/DialogueAct.action b/hlpr_dialogue_production/action/DialogueAct.action
new file mode 100644
index 0000000..e4dabd5
--- /dev/null
+++ b/hlpr_dialogue_production/action/DialogueAct.action
@@ -0,0 +1,7 @@
+string text_or_key
+string audio_file
+string behavior_yaml
+---
+string outcome
+---
+string[] active_states
\ No newline at end of file
diff --git a/hlpr_dialogue_production/action/Gesture.action b/hlpr_dialogue_production/action/Gesture.action
new file mode 100644
index 0000000..531da63
--- /dev/null
+++ b/hlpr_dialogue_production/action/Gesture.action
@@ -0,0 +1,5 @@
+string[] poses
+---
+bool success
+---
+string current_pose
\ No newline at end of file
diff --git a/hlpr_dialogue_production/action/Viseme.action b/hlpr_dialogue_production/action/Viseme.action
new file mode 100644
index 0000000..aaa72b6
--- /dev/null
+++ b/hlpr_dialogue_production/action/Viseme.action
@@ -0,0 +1,5 @@
+string viseme
+---
+bool success
+---
+string current_pose
\ No newline at end of file
diff --git a/hlpr_dialogue_production/docs/hlpr_dialogue_production.rst b/hlpr_dialogue_production/docs/hlpr_dialogue_production.rst
new file mode 100644
index 0000000..9f8d5f3
--- /dev/null
+++ b/hlpr_dialogue_production/docs/hlpr_dialogue_production.rst
@@ -0,0 +1,32 @@
+HLPR Dialogue Production
+=========================
+
+Controllers
+-----------
+.. automodule:: controllers
+ :members:
+
+Dialogue
+---------
+.. automodule:: dialogue
+
+.. autoclass:: Synchronizer
+ :members: __init__, start, reset
+
+.. autoclass:: ControllerState
+ :members: __init__, setup_sync
+
+.. autoclass:: TTSSpeechStart
+ :members: __init__
+
+.. autoclass:: TTSFallbackSpeechStart
+ :members: __init__
+
+.. autoclass:: FileSpeechStart
+ :members: __init__
+
+.. autoclass:: NoPrepSpeechStart
+ :members: __init__
+
+.. autoclass:: SpeechState
+ :members: __init__
diff --git a/hlpr_dialogue_production/launch/all_controllers.launch b/hlpr_dialogue_production/launch/all_controllers.launch
new file mode 100644
index 0000000..5f0c8d7
--- /dev/null
+++ b/hlpr_dialogue_production/launch/all_controllers.launch
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hlpr_dialogue_production/package.xml b/hlpr_dialogue_production/package.xml
new file mode 100644
index 0000000..00acf57
--- /dev/null
+++ b/hlpr_dialogue_production/package.xml
@@ -0,0 +1,62 @@
+
+
+ hlpr_dialogue_production
+ 0.0.0
+ Speech production with simultaneous behavior for HLP-R robot.
+
+
+
+
+ Elaine Short
+
+
+
+
+
+ BSD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ catkin
+ cordial_tts
+ hlpr_manipulation_utils
+ message_generation
+ control_msgs
+ message_runtime
+ rospy
+ actionlib
+ actionlib_msgs
+ smach
+ smach_ros
+ hlpr_manipulation_utils
+ cordial_tts
+
+
+
+
+
+
+
+
diff --git a/hlpr_dialogue_production/setup.py b/hlpr_dialogue_production/setup.py
new file mode 100644
index 0000000..bac6e30
--- /dev/null
+++ b/hlpr_dialogue_production/setup.py
@@ -0,0 +1,12 @@
+## ! DO NOT MANUALLY INVOKE THIS setup.py, USE CATKIN INSTEAD
+
+from distutils.core import setup
+from catkin_pkg.python_setup import generate_distutils_setup
+
+# fetch values from package.xml
+setup_args = generate_distutils_setup(
+ packages=['hlpr_dialogue_production'],
+ package_dir={'': 'src'},
+)
+
+setup(**setup_args)
diff --git a/hlpr_dialogue_production/speech/gen_phrases.sh b/hlpr_dialogue_production/speech/gen_phrases.sh
new file mode 100755
index 0000000..49ba659
--- /dev/null
+++ b/hlpr_dialogue_production/speech/gen_phrases.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+mkdir audio_data
+rosrun cordial_tts gen_phrases.py -o phrases.yaml -d ./audio_data -v Kendra script.txt
+oggdec audio_data/*.ogg
diff --git a/hlpr_dialogue_production/speech/script.txt b/hlpr_dialogue_production/speech/script.txt
new file mode 100644
index 0000000..a0cef2c
--- /dev/null
+++ b/hlpr_dialogue_production/speech/script.txt
@@ -0,0 +1,2 @@
+[test1] Hello there! How are you?
+[test2] This phrase tests the timing of the actions.
diff --git a/hlpr_dialogue_production/src/hlpr_dialogue_production/AWS_text2speech.py b/hlpr_dialogue_production/src/hlpr_dialogue_production/AWS_text2speech.py
new file mode 100644
index 0000000..6048dc0
--- /dev/null
+++ b/hlpr_dialogue_production/src/hlpr_dialogue_production/AWS_text2speech.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python
+import boto3
+import pygame
+from pygame import mixer
+import sys
+import re
+import json
+import tempfile
+import os
+from contextlib import closing
+import io
+from botocore.exceptions import BotoCoreError, ClientError
+
+class TextToSpeech():
+ def __init__(self, voice='Kimberly'):
+ self.tag_follow_indices = {}
+ self.tags = []
+ self.client = boto3.client('polly')
+ self.voice = voice
+ self.actions = []
+ self.polly_tags = [
+ 'break', # adding a pause
+ 'emphasis', # emphasizing words
+ 'lang', # specify another language for specific words
+ 'mark', # placing a custom tag in your text
+ 'p', # adding a pause between paragraphs
+ 'phoneme', # using phonetic pronunciation
+ 'prosody', # controlling volume, speaking rate and pitch
+ 'prosody amazon:max-duration', # setting a maximum duration for synthesized speech
+ 's', # adding a pause between sentences
+ 'say-as', # controlling how special types of words are spoken
+ 'speak', # identifying SSML-Enhanced text
+ 'sub', # pronouncing acronyms and abbreviations
+ 'w', # improving pronunciation by specifying parts of speech
+ 'amazon:auto-breaths', # adding the sound of breathing
+ 'amazon:effect',
+ #'amazon:effect name=\"drc\"', #adding dynamic range compression
+ #'amazon:effect phonation=\"soft\"', # speaking softly
+ #'amazon:effect vocal-tract-length', # controlling timbre
+ #'amazon:effect name=\"whispered\"' # whispering
+ 'amazon:breath' # breathing
+ ]
+
+
+ def remove_tags(self, tagged_text):
+ untagged_text = ''
+ open_brackets_idx = [m.start() for m in re.finditer('<', tagged_text)]
+ close_brackets_idx = [m.start() for m in re.finditer('>', tagged_text)]
+ assert(len(open_brackets_idx)==len(close_brackets_idx))
+
+ open_brackets_idx.sort()
+ close_brackets_idx.sort()
+
+ pointer = 0
+ for i in range(len(open_brackets_idx)):
+ retain = ''
+ tag = tagged_text[open_brackets_idx[i]:close_brackets_idx[i]+1]
+
+ # keep the tags from amazon polly as part of untagged_text string
+ if any(pt for pt in self.polly_tags if ('<'+pt+' ' in tag) or ('<'+pt+'>' in tag) or (''+pt in tag)):
+ retain = tag
+ # print('retaining: ',retain)
+ else:
+ self.tags.append(tag)
+
+ if(open_brackets_idx[i]!=0):
+ untagged_text += tagged_text[pointer:open_brackets_idx[i]] + retain
+
+ else:
+ untagged_text += retain
+
+ pointer = close_brackets_idx[i]+1
+ if(pointer!=len(tagged_text)):
+ untagged_text = untagged_text + tagged_text[pointer:]
+
+ return "{}".format(untagged_text)
+
+ def extract_behaviors(self, tagged_text):
+ # Remove tags from the input string
+ untagged_text = self.remove_tags(tagged_text)
+
+ # Store which word the tags come after
+ for tag in list(set(self.tags)):
+ tag_idxs = [m.start() for m in re.finditer(tag, tagged_text)] # find all occurances
+ for tag_idx in tag_idxs:
+ if tag_idx!=0:
+ substring = tagged_text[:tag_idx]
+ if(substring[-1]==' '):
+ substring = substring[:-1]
+ untagged_substring = self.remove_tags(substring)
+ prev_word_idx = substring.count(' ') + 1
+
+ if tag not in self.tag_follow_indices:
+ self.tag_follow_indices[tag] = [prev_word_idx]
+ else:
+ self.tag_follow_indices[tag].append(prev_word_idx)
+ else:
+ if tag not in self.tag_follow_indices:
+ self.tag_follow_indices[tag] = [0]
+ else:
+ self.tag_follow_indices[tag].append(0)
+
+ print(self.tag_follow_indices)
+ for t in self.tag_follow_indices:
+ for idx in self.tag_follow_indices[t]:
+ args = t.strip("<>").split()
+ act = args.pop(0)
+ self.actions.append([idx, act, args])
+
+
+ # Fetch meta info about speech from AWS using boto3
+ try:
+ response = self.client.synthesize_speech(
+ OutputFormat='json',
+ SpeechMarkTypes=['viseme','word'],
+ Text=untagged_text,
+ TextType="ssml",
+ VoiceId=self.voice)
+ except (BotoCoreError, ClientError) as error:
+ print(error)
+ response = ""
+
+ # Unpack meta info json to an unsorted list of dictionaries
+ output_data = []
+ if "AudioStream" in response:
+ with closing(response["AudioStream"]) as stream:
+ data = stream.read()
+
+ s = data.split('\n')
+ s = [json.loads(line) for line in s if line != '']
+ else:
+ print("Could not stream audio")
+ return untagged_text, []
+
+ word_times = filter(lambda l: l["type"]=="word", s) # Start edits
+ for a in self.actions:
+ if a[0] > len(word_times)-1:
+ a[0] = s[-1]["time"] / 1000. # convert ms to seconds
+ else:
+ a[0] = (word_times[a[0]]["time"]) / 1000. # convert ms to seconds
+
+ data=[]
+ for a in self.actions:
+ args = a[2]
+ data.append({"start":float(a[0]),
+ "type":a[1],
+ "args":args,
+ "id": a[1]}) # End edits
+
+
+ visemes = map(lambda l: [l["time"],l["value"]], filter(lambda l: l["type"]=="viseme",s))
+ for v in visemes:
+ data.append({"start":float(v[0]) / 1000., # convert ms to seconds
+ "type":"viseme",
+ "args":"",
+ "id": v[1]})
+
+ return untagged_text, data
+
+
+ def phrase_to_file(self, name, tagged_text, output_dir):
+
+ # Remove tags from the input string
+ untagged_text, s = obj.extract_behaviors(tagged_text)
+
+ # Fetch audio for speech from AWS using boto3
+ spoken_text = self.client.synthesize_speech(
+ OutputFormat='mp3',
+ Text=untagged_text,
+ TextType="ssml",
+ VoiceId=self.voice)
+
+ # get absolute dir path
+ output_dir_path = os.path.abspath(os.path.expanduser(output_dir))
+
+ b = {
+ 'text': '"'+untagged_text+'"',
+ 'file': output_dir_path + '/' + name + '.ogg',
+ 'behaviors': s
+ }
+
+ # Write audio to a file
+ with open(b['file'],'wb') as f:
+ f.write(spoken_text['AudioStream'].read())
+ f.close()
+
+ return b
+
+ def say(self, untagged_text, wait=False, interrupt=True):
+
+ try:
+ response = self.client.synthesize_speech(
+ OutputFormat='ogg_vorbis',
+ Text=untagged_text,
+ TextType="ssml",
+ VoiceId=self.voice)
+ except:
+ print "Error synthesizing speech"
+ return 0
+
+ with tempfile.TemporaryFile() as f:
+ if "AudioStream" in response:
+ with closing(response["AudioStream"]) as stream:
+ try:
+ f.write(stream.read())
+ except IOError as error:
+ print(error)
+
+
+ else:
+ print("Could not stream audio")
+ return
+
+ f.seek(0)
+
+ if not pygame.mixer.get_init():
+ pygame.mixer.init()
+ else:
+ if interrupt:
+ pygame.mixer.stop()
+ channel = pygame.mixer.Channel(5)
+ channel.set_volume(1.0)
+ sound = pygame.mixer.Sound(f)
+ channel.play(sound)
+
+ if wait:
+ while channel.get_busy():
+ pass
+ return -1
+
+
+ return sound.get_length()
+
+ def is_speaking(self):
+ # Returns whether or not audio is being played
+ return not pygame.mixer.get_init() or pygame.mixer.Channel(5).get_busy()
+
+ def shutup(self):
+ if pygame.mixer.get_init():
+ pygame.mixer.stop()
+
+
+if __name__ == '__main__':
+ tagged_string = "Hello ! How are you? hello again! "
+ print(tagged_string)
+ obj = TextToSpeech(voice='Kimberly')
+ b = obj.phrase_to_file('out',tagged_string, '../../data/')
+ obj.say(b['text'], wait=True, interrupt=False)
diff --git a/hlpr_dialogue_production/src/hlpr_dialogue_production/__init__.py b/hlpr_dialogue_production/src/hlpr_dialogue_production/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/hlpr_dialogue_production/src/hlpr_dialogue_production/controllers.py b/hlpr_dialogue_production/src/hlpr_dialogue_production/controllers.py
new file mode 100644
index 0000000..2e1501b
--- /dev/null
+++ b/hlpr_dialogue_production/src/hlpr_dialogue_production/controllers.py
@@ -0,0 +1,306 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017, Elaine Short, SIM Lab
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the SIM Lab nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Defines functions to create behavior controllers for dialogue production.
+
+This module contains functions that create controllers for each of the
+behaviors that one might want to synchronize with robot speech. Currently,
+this includes looking at a point in space, executing a gesture with pre-
+defined waypoints, and a test controller that just plays a beeping sound (and
+can be used to check synchronization).
+"""
+
+
+import rospy
+import smach
+import smach_ros
+import actionlib
+import random
+import os
+
+from geometry_msgs.msg import TransformStamped, Transform, Vector3, Quaternion
+from std_msgs.msg import Header
+import hlpr_dialogue_production.msg as dialogue_msgs
+import hlpr_record_demonstration.msg as record_msgs
+import hlpr_lookat.msg as lookat_msgs
+import hlpr_lookat.msg
+
+
+from hlpr_dialogue_production.dialogue import ControllerState
+import actionlib_tutorials.msg
+
+
+def lookat_controller_cb(behavior_name, string_args):
+ """Callback to create a LookatAction goal from string arguments
+
+ Given a string of arguments (pulled from the behavior tags in a speech
+ string), determines whether the robot should look at the base of the
+ frame or a point relative to the frame. Prints a warning message to the
+ screen and returns None if the number of arguments is incorrect or they
+ are of the wrong type, but does not check that the first argument is a
+ valid frame (if it is not, the LookatWaypoints action server will return
+ the error instead).
+
+ Parameters
+ ----------
+ behavior_name : str
+ The name of the behavior being handled (right now, this will always
+ be "lookat")
+ string_args : list of str
+ The arguments that were parsed from the speech string, as strings
+
+ Returns
+ -------
+ LookatWaypointsGoal
+ Goal containing the one point to look at.
+
+ """
+ if len(string_args)==1: #have just a frame name
+ vec3=Vector3(0.0,0.0,0.0)
+ elif len(string_args)==4:
+ try:
+ point = map(lambda s: float(s), string_args[1:])
+ except ValueError:
+ rospy.logerr("Invalid arguments to lookat: {}".format(string_args))
+ return None
+ vec3=Vector3(point[0],point[1],point[2])
+ else:
+ rospy.logerr("Invalid arguments to lookat: {}".format(string_args))
+ return None
+
+ frame = string_args[0]
+
+ pos = TransformStamped()
+ pos.child_frame_id = frame
+ pos.transform = Transform()
+ pos.transform.translation = vec3
+ pos.header = Header()
+ waypoints = [pos]
+ waytimes = [rospy.Duration(0.0)]
+ return hlpr_lookat.msg.LookatWaypointsGoal(waypoints,waytimes)
+
+def get_lookat_controller():
+ """ Sets up the lookat controller state
+
+ Sets up the lookat controller state to connect to the lookat_waypoints
+ action server in hlpr_lookat. Does not adjust the timing of lookat
+ behaviors.
+
+ """
+ behaviors=["lookat"]
+ time_adj = None
+ return ControllerState("LOOKAT_CONTROLLER",behaviors,
+ "/lookat_waypoints_action_server",
+ hlpr_lookat.msg.LookatWaypointsAction,
+ lookat_controller_cb,time_adj)
+
+
+
+def keyframe_playback_controller_cb(behavior_name, string_args):
+ """Callback to create a PlaybackKeyframeDemoGoal from string arguments
+
+ Given the name of a trajectory, looks up the file associated with the
+ name and packs it into a goal for the hlpr_kinesthetic_teaching playback
+ server.
+
+ Parameters
+ ----------
+ behavior_name : str
+ The name of the behavior being handled
+ string_args : list of str
+ The arguments that were parsed from the speech string, as strings
+ (not used)
+
+ Returns
+ -------
+ PlaybackKeyframeDemoGoal
+ Goal containing the location of the pickle file to play back
+
+ """
+ rospy.loginfo("Got keyframe controller callback")
+
+ pickle_locations={"turn_cup":"/home/eshort/robot_movements/rot_90_deg_joint.pkl",
+ "wave":"/home/eshort/robot_movements/rot_90_deg_joint.pkl"}
+ filename = ""
+
+ home_path = os.path.expanduser("~") + "/robot_movements/" + string_args[0] + ".pkl"
+
+ if string_args[0] in pickle_locations:
+ filename = pickle_locations[string_args[0]]
+ elif os.path.exists(string_args[0]):
+ filename = string_args[0]
+ elif os.path.exists(home_path):
+ filename = home_path
+ else:
+ pass
+
+ if filename == "":
+ rospy.logerr("could not find file for the behavior specified as '" + string_args[0] + "'")
+ rospy.logerr("I looked in the following locations:")
+ looked_in = pickle_locations.values() + [string_args[0], home_path]
+ rospy.logerr(str(looked_in))
+ else:
+ rospy.loginfo("loading behavior from file " + filename)
+ return record_msgs.PlaybackKeyframeDemoGoal(bag_file_name=filename)
+
+def get_keyframe_playback_controller():
+ """ Sets up the keyframe playback controller state
+
+ Sets up the keyframe playback controller to connect to the
+ playback_demonstration_action_server action server in hlpr_record_demonstration.
+
+ """
+ behaviors=["keyframe"]
+ time_adj = None
+ return ControllerState("KEYFRAME_PLAYBACK_CONTROLLER",behaviors,
+ "/playback_keyframe_demo",
+ record_msgs.PlaybackKeyframeDemoAction,
+ keyframe_playback_controller_cb,time_adj)
+
+def gesture_controller_cb(behavior_name, string_args):
+ """Callback to create a GestureGoal from string arguments
+
+ Given a string of arguments (pulled from the behavior tags in a speech
+ string), selects the series of named keypoints that define the geature.
+ These keypoints are currently defined in the Gesture Action Server (in
+ ``gesture_action_server.py``. To add more keypoints, see the documentation
+ for that file.
+
+ Parameters
+ ----------
+ behavior_name : str
+ The name of the behavior being handled
+ string_args : list of str
+ The arguments that were parsed from the speech string, as strings
+
+ Returns
+ -------
+ GestureGoal
+ Gesture goal containing the list of keypoints for the gesture
+
+ """
+
+ gesture_poses = {"wave":["wave1","wave2","wave1","wave2"],
+ "shrug":["right_home","shrug","right_home"],
+ "thinking":["right_home","hmm","right_home"]}
+ return dialogue_msgs.GestureGoal(poses=gesture_poses[behavior_name])
+
+def get_gesture_controller():
+ """ Sets up the gesture controller state
+
+ Sets up the gesture controller state to connect to the gesture action
+ server in this package. Adjusts the timing of the gesture behaviors
+ by 1s; you may want to change this for your application.
+
+ """
+ behaviors=["wave", "shrug", "thinking"]
+ time_adj = {"wave":1.0,
+ "shrug":1.0,
+ "thinking":1.0}
+
+ return ControllerState("GESTURE_CONTROLLER", behaviors, "/HLPR_Gesture",
+ dialogue_msgs.GestureAction,
+ gesture_controller_cb, time_adj)
+
+def viseme_controller_cb(behavior_name, string_args):
+ """Callback to create a VisemeGoal from string arguments
+
+ Given a string of arguments (pulled from the behavior tags in a speech
+ string), selects the series of named keypoints that define the geature.
+ These keypoints are currently defined in the Gesture Action Server (in
+ ``gesture_action_server.py``. To add more keypoints, see the documentation
+ for that file.
+
+ Parameters
+ ----------
+ behavior_name : str
+ The name of the behavior being handled
+ string_args : list of str
+ The arguments that were parsed from the speech string, as strings
+
+ Returns
+ -------
+ VisemeGoal
+ Viseme goal containing the name of the viseme
+
+ """
+ return dialogue_msgs.VisemeGoal(viseme=behavior_name)
+
+def get_viseme_controller():
+ """ Sets up the viseme controller state
+
+ Sets up the viseme controller state to connect to the viseme action
+ server in this package.
+
+ """
+
+
+ time_adj = {"viseme":0.0}
+
+ behaviors=time_adj.keys()
+
+ return ControllerState("VISEME_CONTROLLER", behaviors, "/HLPR_Visemes",
+ dialogue_msgs.VisemeAction,
+ viseme_controller_cb, time_adj)
+
+
+def test_controller_cb(behavior_name, string_args):
+ """Callback to create a FibonacciGoal from string arguments
+
+ The test action server just uses the Fibonacci goal from the
+ actionlib_tutorials package. The contents of the goal are not used.
+
+ Parameters
+ ----------
+ behavior_name : str
+ The name of the behavior being handled (right now, this will always
+ be "lookat")
+ string_args : list of str
+ The arguments that were parsed from the speech string, as strings
+
+ Returns
+ -------
+ FibonacciGoal
+ Fibonacci goal with order=0; contents not used.
+
+ """
+ return actionlib_tutorials.msg.FibonacciGoal(order=0)
+
+def get_test_controller():
+ """ Sets up the test controller state
+
+ Sets up the test controller state to connect to the test action
+ server in this package. Does not adjust the timing of behaviors.
+
+ """
+ behaviors=["test"]
+ time_adj = {"test":0.0}
+ return ControllerState("TEST_CONTROLLER",behaviors,"/test_beep_controller",
+ actionlib_tutorials.msg.FibonacciAction,
+ test_controller_cb, time_adj)
diff --git a/hlpr_dialogue_production/src/hlpr_dialogue_production/dialogue.py b/hlpr_dialogue_production/src/hlpr_dialogue_production/dialogue.py
new file mode 100644
index 0000000..cc2867e
--- /dev/null
+++ b/hlpr_dialogue_production/src/hlpr_dialogue_production/dialogue.py
@@ -0,0 +1,901 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017, Elaine Short, SIM Lab
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the SIM Lab nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import rospy
+import smach
+import smach_ros
+import wave
+import yaml
+import threading
+import actionlib
+import contextlib
+from sound_play.libsoundplay import SoundClient
+from AWS_text2speech import TextToSpeech
+
+class Synchronizer():
+ """ Object that can be passed to all controllers to synchronize their start
+
+ A single Synchronizer object is passed to all controllers so that the
+ speech state can signal when the audio has begun to play. This is needed
+ because online text-to-speech has to download the audio from the internet
+ before it can begin to play, leading to a variable start time for the
+ speech audio.
+ """
+
+ def __init__(self):
+ """ Creates a Synchronizer object"""
+ self.started = False
+ self.time = None
+
+ def start(self):
+ """ Sets the state of the Synchronizer to started and saves the time"""
+ self.started = True
+ self.time = rospy.Time.now()
+
+ def reset(self):
+ """ Resets the state of the Synchronizer to not started"""
+ self.started = False
+ self.time = None
+
+class ControllerState(smach.State):
+ """ Interface to action servers controlling robot behavior
+
+ A ControllerState is a smach.State that sends requests to a specific
+ action server at the appropriate times.
+
+ .. warning:: After creating the object, you *must* call ``setup_sync`` to provide the state with a Synchronizer. The Synchronizer is not included in the constructor so that the object can be created in ``controllers.py`` or elsewhere in code and passed to the SmachWrapper state, which will then set up the Synchronizers and assemble the dialogue act state machine.
+
+
+ **Input keys**
+ * ordered_behaviors : list of dict
+ An ordered list of all the behaviors the controller should play. A
+ behavior is a dict with keys "start", "id", and "args", indicating
+ the start time (after speech begins), name of the behavior, and any
+ arguments to the behavior, respectively.
+
+ **Outcomes**
+ * preempted
+ The behavior was preempted before playing all the behaviors in the list
+ to completion.
+ * done
+ The controller played all of the behaviors in the list.
+
+ """
+ def __init__(self, name, behaviors, topic, action_type, arg_list_to_goal_cb, behavior_time_adj=None):
+ """ Constructor
+
+ Creates the ControllerState with the given name, able to handle the
+ given behaviors by sending the arguments to the arg_list_to_goal_cb
+ with a time adjustment factor of behavior_time_adj (assigned on a per-
+ behavior basis). By convention, the name should be in all caps, and
+ must not conflict with any other names in the state machine containing
+ this state. If using the SmachWrapper, the names "SPEECH", "START",
+ "DIALOGUE" are taken.
+
+
+ Parameters
+ ----------
+ name : str
+ The name of this state in the state machine
+
+ behaviors : list of str
+ The behaviors (from the speech string tags) that this controller
+ can handle
+
+ topic : str
+ The topic for the action server that this controller calls
+
+ action_type : ROS Action Type
+ The type for the action server that this controller calls
+
+ arg_list_to_goal_cb : function
+ A function taking the name of the behavior and a list of arguments
+ and returning a goal for the action of type ``action_type``
+
+ behavior_time_adj : dict
+ A mapping from behavior names (from ``behaviors``) to times (in s).
+ The controller will send the action goal that much earlier.
+ Negative values will cause the controller to call the action
+ server later.
+
+ """
+
+ smach.State.__init__(self,outcomes=["preempted","done"],
+ input_keys=["ordered_behaviors"])
+ self.name = name
+ self._can_handle = behaviors
+ self._cb = arg_list_to_goal_cb
+ self._client = actionlib.SimpleActionClient(topic,action_type)
+ self._sync = None
+ if behavior_time_adj!=None:
+ self._time_adj = behavior_time_adj
+ else:
+ self._time_adj = {}
+
+ def setup_sync(self,synchronizer):
+ """ Provides the controller with a Synchronizer object
+
+ This provides the controller with information about when the speech
+ audio has begun. This will only work if the controller and speech
+ state share the same Synchronizer object.
+
+ Parameters
+ ----------
+ synchronizer : Synchronizer
+ A synchronizer object. All controllers, including the speech state,
+ should share the same synchronizer.
+
+ """
+ self._sync=synchronizer
+
+ def execute(self,userdata):
+ if userdata.ordered_behaviors==None:
+ rospy.logwarn("Controller {} got empty behavior list".format(self.name))
+ return "done"
+
+ ordered_behaviors = filter(lambda b: b["type"] in self._can_handle,
+ userdata.ordered_behaviors)
+
+ while not (self._sync!=None and self._sync.started) and not self.preempt_requested():
+ rospy.sleep(0.001)
+
+ if self.preempt_requested():
+ self._client.cancel_all_goals()
+ self.service_preempt()
+ return "preempted"
+
+ goal_sent=False
+ for b in ordered_behaviors:
+ if b["id"] in self._time_adj:
+ start = b["start"]-self._time_adj[b["id"]]
+ else:
+ start = b["start"]
+
+ goal = self._cb(b["id"],b["args"])
+ if goal==None:
+ continue
+ while not self.preempt_requested() and rospy.Time.now()-self._sync.time < rospy.Duration.from_sec(start):
+ rospy.sleep(0.001)
+
+ if self.preempt_requested():
+ self._client.cancel_all_goals()
+ self.service_preempt()
+ return "preempted"
+
+ self._client.send_goal(goal)
+ goal_sent = True
+ if goal_sent:
+ while not self._client.wait_for_result(rospy.Duration(0.05)):
+ if self.preempt_requested():
+ self._client.cancel_all_goals()
+ self.service_preempt()
+ return "preempted"
+ return "done"
+
+class TTSSpeechStart(smach.State):
+ """ Speech prep state when using TTS
+
+ State to process marked text to ordered behaviors, unmarked text, and an
+ empty wav_file, that can be passed on to the Speech state.
+
+
+ **Input keys**
+ * marked_text : str
+ Text marked up with . Behaviors will be synced to
+ the word following the tag.
+
+ **Output keys**
+ * text : str
+ Text marked up with . Behaviors will be synced to
+ the word following the tag.
+ * ordered_behaviors : list of dict
+ A list of behavior dictionaries. A behavior has the keys "id",
+ "start", and "args". Ordered by start time.
+ * wav_file : str
+ Always None
+
+ **Outcomes**
+ * done
+ Finished fetching behaviors
+
+ """
+ def __init__(self,voice="Kimberly"):
+ """ Constructor
+
+ Initializes TTS for speech using Amazon Polly with the given voice
+
+ Parameters
+ -----------
+ voice : str, optional
+ Which Amazon Polly voice to use. Defaults to Kimberly
+ """
+
+ smach.State.__init__(self,outcomes=["done"],
+ input_keys=["marked_text"],
+ output_keys=["text","ordered_behaviors","wav_file"])
+
+ self._tts = TextToSpeech(voice)
+
+ def execute(self,userdata):
+ text, behaviors = self._tts.extract_behaviors(userdata.marked_text)
+ wav_file = None
+ userdata.ordered_behaviors = sorted(behaviors,key = lambda b: b["start"])
+ userdata.text = text
+ userdata.wav_file = wav_file
+ return "done"
+
+class TTSFallbackSpeechStart(smach.State):
+ """ Speech prep from file with online TTS fallback
+
+ State to read text, wave file, and ordered behaviors from a phrase file,
+ given a key into that file. If the key isn't found, uses online TTS using
+ Amazon Polly. This is useful for debugging or if there are a small number
+ of phrases that can't be known in advance.
+
+ **Input keys**
+ * key_or_marked_text : str
+ Either a key into the phrase file or text marked up with
+ . The state tries to read from the phrase file
+ with this string. If it's not found, assume it is marked up text
+ and generate the audio with TTS
+
+ **Output keys**
+ * text : str
+ Text marked up with . Behaviors will be synced to
+ the word following the tag.
+ * ordered_behaviors : list of dict
+ A list of behavior dictionaries. A behavior has the keys "id",
+ "start", and "args". Ordered by start time.
+ * wav_file : str
+ None if the audio needs to be fetched, otherwise the path to the
+ audio file.
+
+ **Outcomes**
+ * done
+ Finished fetching behaviors
+
+ """
+ def __init__(self, phrases, voice="Kimberly"):
+ """ Constructor
+
+ Initializes TTS for speech using Amazon Polly with the given voice, and
+ reads in the phrase file.
+
+ Parameters
+ -----------
+ voice : str, optional
+ Which Amazon Polly voice to use. Defaults to Kimberly
+
+ phrases : str
+ Path to the phrase file to use.
+ """
+ smach.State.__init__(self,outcomes=["done"],
+ input_keys=["key_or_marked_text"],
+ output_keys=["text","ordered_behaviors","wav_file"])
+
+ self._tts = TextToSpeech(voice)
+ self._phrases = phrases
+
+
+ def execute(self,userdata):
+ if userdata.key_or_marked_text in self._phrases:
+ phrase = self._phrases[userdata.key_or_marked_text]
+ if "text" in phrase:
+ text = phrase["text"]
+ else:
+ text = None
+ wav_file = phrase["file"]
+ behaviors = phrase["behaviors"]
+ else:
+ text, behaviors = self._tts.extract_behaviors(userdata.key_or_marked_text)
+ wav_file = None
+
+ userdata.ordered_behaviors = sorted(behaviors,key = lambda b: b["start"])
+ userdata.text = text
+ userdata.wav_file = wav_file
+ return "done"
+
+class DebugSpeechStart(smach.State):
+ """ Debugging speech state with no audio
+
+ Prints the values of the provided userdata, allowing for debugging. Doesn't
+ actually play any sound.
+
+ **Input keys**
+ * behaviors : list of dict
+ A list of behavior dictionaries. A behavior has the keys "id",
+ "start", and "args".
+ * wav_file_loc : str
+ The path to the audio file to play
+ * key_or_marked_text : str
+ Either a key into the phrase file or text marked up with
+ .
+
+ **Output keys**
+ * text : str
+ Text marked up with . Behaviors will be synced to
+ the word following the tag.
+ * ordered_behaviors : list of dict
+ A list of behavior dictionaries. A behavior has the keys "id",
+ "start", and "args". Ordered by start time.
+ * wav_file : str
+ None if the audio needs to be fetched, otherwise the path to the
+ audio file.
+
+ """
+
+ def __init__(self):
+ smach.State.__init__(self,outcomes=["done"],
+ input_keys=["key_or_marked_text", "behaviors",
+ "wav_file_loc"],
+ output_keys=["text","ordered_behaviors","wav_file"])
+
+ def execute(self,userdata):
+ if userdata.behaviors != None:
+ userdata.ordered_behaviors = sorted(userdata.behaviors,key = lambda b: b["start"])
+ else:
+ userdata.ordered_behaviors = userdata.behaviors
+ userdata.text = userdata.key_or_marked_text
+ userdata.wav_file = userdata.wav_file_loc
+
+
+ rospy.logwarn("Speech debug info:")
+ rospy.logwarn("Ordered Behaviors: {}".format(userdata.behaviors))
+ rospy.logwarn("Text: {}".format(userdata.key_or_marked_text))
+ rospy.logwarn("Wave file: {}".format(userdata.wav_file_loc))
+
+ return "done"
+
+
+
+class FileSpeechStart(smach.State):
+ """ Speech prep from file
+
+ State to read text, wave file, and ordered behaviors from a phrase file,
+ given a key into that file.
+
+ **Input keys**
+ * key : str
+ A key into the phrase file
+
+ **Output keys**
+ * text : str
+ If the text is included in the phrase file, the text, otherwise
+ None
+ * ordered_behaviors : list of dict
+ A list of behavior dictionaries. A behavior has the keys "id",
+ "start", and "args". Ordered by start time.
+ * wav_file : str
+ The path to the audio file.
+
+ **Outcomes**
+ * done
+ Finished fetching behaviors
+ * not_found
+ Unable to find the key in the phrase file
+
+ """
+ def __init__(self, phrases):
+ """ Constructor
+
+ Initializes TTS for speech using a phrase file.
+
+ Parameters
+ -----------
+
+ phrases : str
+ Path to the phrase file to use.
+ """
+ smach.State.__init__(self,outcomes=["done","not_found"],
+ input_keys=["key"],
+ output_keys=["text","ordered_behaviors","wav_file"])
+ self._phrases = phrases
+
+ def execute(self,userdata):
+ if userdata.key not in self._phrases:
+ return "not_found"
+ phrase = self._phrases[userdata.key]
+ if "text" in phrase:
+ text = phrase["text"]
+ else:
+ text = None
+ wav_file = phrase["file"]
+ behaviors = phrase["behaviors"]
+
+ userdata.ordered_behaviors = sorted(behaviors,key = lambda b: b["start"])
+ userdata.text = text
+ userdata.wav_file = wav_file
+ return "done"
+
+class NoPrepSpeechStart(smach.State):
+ """ Pass through speech info without prep
+
+ State for the case where the dialogue state machine will be given text,
+ behaviors (not necessarily ordered), and a path to a wave file in
+ the userdata.
+
+ **Input keys**
+ * behaviors : list of dict
+ A list of behavior dictionaries. A behavior has the keys "id",
+ "start", and "args".
+ * wav_file_loc : str
+ The path to the audio file to play
+
+ **Output keys**
+ * text : str
+ Always None
+ * ordered_behaviors : list of dict
+ A list of behavior dictionaries. A behavior has the keys "id",
+ "start", and "args". Ordered by start time.
+ * wav_file : str
+ The path to the audio file.
+
+ **Outcomes**
+ * done
+ Finished fetching behaviors
+ * missing_info
+ Missing information in the input keys
+
+ """
+ def __init__(self):
+ """ Constructor
+
+ Initializes passthrough state for speech features.
+ """
+ smach.State.__init__(self,outcomes=["done","missing_info"],
+ input_keys=["behaviors","wav_file_loc"],
+ output_keys=["text","ordered_behaviors","wav_file"])
+
+ def execute(self,userdata):
+ if userdata.behaviors==None:
+ return "missing_info"
+ if userdata.wav_file==None:
+ return "missing_info"
+ behaviors = userdata.behaviors
+ text = None
+ wav_file = userdata.wav_file
+
+ userdata.ordered_behaviors = sorted(behaviors,key = lambda b: b["start"])
+ userdata.text = text
+ userdata.wav_file = wav_file
+
+ return "done"
+
+class SpeechDebugState(smach.State):
+ """ Speech debug state
+
+ Doesn't do anything, allowing for debugging of speech input without
+ requiring tts or audio files.
+
+ **Input keys**
+ * text : str
+ Can be None; the text version of the robot's speech. Used to print
+ to the screen.
+ * wav_file : str
+ Can be None if TTS is on. The path to the audio file.
+
+ **Outcomes**
+ * done
+ Finished fetching behaviors. Always returns this.
+ * no_audio
+ Couldn't find the wave file, or no wave file provided and TTS
+ turned off. Never returns this.
+ * preempted
+ State was preempted before audio finished playing. If preempted,
+ will try to stop the audio. Never returns this.
+
+
+ """
+ def __init__(self, synchronizer):
+
+ smach.State.__init__(self,outcomes=["preempted","no_audio","done"],
+ input_keys=["text","wav_file"])
+ self._sync = synchronizer
+
+ def execute(self,userdata):
+ return "done"
+
+class SpeechState(smach.State):
+ """ Speech player state
+
+ Takes in text and/or a wave file. Given a wave file, plays the file.
+ If no wave file is provided and text-to-speech is on, fetch the audio from
+ Amazon Polly and play the audio. If no wave file is provided and text-to-
+ speech is off, return "no_audio"
+
+ **Input keys**
+ * text : str
+ Can be None; the text version of the robot's speech. Used to print
+ to the screen.
+ * wav_file : str
+ Can be None if TTS is on. The path to the audio file.
+
+ **Outcomes**
+ * done
+ Finished fetching behaviors
+ * no_audio
+ Couldn't find the wave file, or no wave file provided and TTS
+ turned off
+ * preempted
+ State was preempted before audio finished playing. If preempted,
+ will try to stop the audio.
+
+ """
+ def __init__(self, use_tts, synchronizer, phrases=None, voice="Kimberly"):
+ """ Constructor
+
+ Initializes the speech state with the desired parameters; either with
+ or without online TTS, and with or without a pre-generated phrase
+ file.
+
+ Parameters
+ -----------
+ use_tts : bool
+ If true, allow online TTS
+ synchronizer : Synchronizer
+ Synchronizer object to allow sync of speech and behaviors. Should
+ be the same object as is passed to behavior ControllerState objects
+ with ``setup_sync``
+ voice : str, optional
+ Which Amazon Polly voice to use. Defaults to Kimberly
+
+ phrases : str, optional
+ Path to the phrase file to use. If None, require online TTS.
+ """
+ smach.State.__init__(self,outcomes=["preempted","no_audio","done"],
+ input_keys=["text","wav_file"])
+ self._tts = use_tts
+ if use_tts:
+
+ self._talker = TextToSpeech(voice)
+
+ self._sound_client = SoundClient()
+ self._sync = synchronizer
+
+ def execute(self,userdata):
+ rospy.loginfo("Saying: {}".format(userdata.text))
+ if userdata.wav_file==None:
+ if userdata.text!=None and len(userdata.text)!=0:
+ speech_duration = self._talker.say(userdata.text,wait=False)
+ rospy.sleep(0.5)
+ self._sync.start()
+ else:
+ return "no_audio"
+ else:
+ wav_file = userdata.wav_file
+ with contextlib.closing(wave.open(wav_file,'r')) as f:
+ frames=f.getnframes()
+ rate=f.getframerate()
+ speech_duration=frames/float(rate)
+ self._sound_client.playWave(wav_file)
+ self._sync.start()
+
+ while rospy.Time.now()-self._sync.timeLupay'))
+
+
+if __name__ == "__main__":
+ rospy.init_node("test_action_client", disable_signals=True)
+ client = actionlib.SimpleActionClient("/HLPR_Dialogue",dialogue_msgs.DialogueActAction)
+ rospy.loginfo("waiting for server")
+ client.wait_for_server()
+ rospy.loginfo("got server")
+ while not rospy.is_shutdown():
+ print("Say what?")
+ try:
+ s = raw_input()
+ except KeyboardInterrupt:
+ print("")
+ print("bye!")
+ rospy.signal_shutdown("requested by user")
+ sys.exit()
+ client.send_goal(dialogue_msgs.DialogueActGoal(text_or_key=s))
+ client.wait_for_result()
+ print(client.get_result())
diff --git a/hlpr_dialogue_production/src/hlpr_dialogue_production/test_action_server.py b/hlpr_dialogue_production/src/hlpr_dialogue_production/test_action_server.py
new file mode 100755
index 0000000..e5fe871
--- /dev/null
+++ b/hlpr_dialogue_production/src/hlpr_dialogue_production/test_action_server.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017, Elaine Short, SIM Lab
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the SIM Lab nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import rospy
+import smach
+import smach_ros
+import actionlib
+import actionlib_tutorials.msg
+import os
+
+class TestActionServer():
+ def __init__(self,name):
+ self._action_name = name
+ self._as = actionlib.SimpleActionServer(self._action_name, actionlib_tutorials.msg.FibonacciAction, execute_cb=self.execute_cb, auto_start = False)
+ self._as.start()
+
+ def execute_cb(self, goal):
+ print "Got goal: {}".format(goal)
+ os.system("aplay ~/code/sandbox/contingency/src/contingency/short_chime.wav")
+ self._as.set_succeeded(actionlib_tutorials.msg.FibonacciResult())
+
+if __name__=="__main__":
+ rospy.init_node("test_action_server")
+ server = TestActionServer(rospy.get_name())
+ rospy.spin()
diff --git a/hlpr_dialogue_production/src/hlpr_dialogue_production/viseme_action_server.py b/hlpr_dialogue_production/src/hlpr_dialogue_production/viseme_action_server.py
new file mode 100755
index 0000000..10c2c9e
--- /dev/null
+++ b/hlpr_dialogue_production/src/hlpr_dialogue_production/viseme_action_server.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017, Elaine Short, SIM Lab
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the SIM Lab nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import rospy
+import actionlib
+from poli_msgs.srv import LedEye, LedEyeRequest
+import hlpr_dialogue_production.msg as dialogue_msgs
+
+class LedVisemeActionServer:
+ def __init__(self):
+ self._as = actionlib.SimpleActionServer("HLPR_Visemes",dialogue_msgs.VisemeAction, execute_cb=self.execute_cb, auto_start=False)
+ rospy.wait_for_service("/led_eye")
+ self.change_eye = rospy.ServiceProxy("/led_eye",LedEye)
+ self.viseme_mapping = {"p": LedEyeRequest.FLAT,
+ "t": LedEyeRequest.FLAT,
+ "S": LedEyeRequest.FLAT,
+ "T": LedEyeRequest.FLAT,
+ "f": LedEyeRequest.FLAT,
+ "k": LedEyeRequest.BIGOPEN,
+ "i": LedEyeRequest.OPEN,
+ "r": LedEyeRequest.FLAT,
+ "s": LedEyeRequest.FLAT,
+ "u": LedEyeRequest.FLAT,
+ "@": LedEyeRequest.BIGOPEN,
+ "a": LedEyeRequest.BIGOPEN,
+ "e": LedEyeRequest.OPEN,
+ "E": LedEyeRequest.OPEN,
+ "i": LedEyeRequest.OPEN,
+ "o": LedEyeRequest.WHISTLE,
+ "O": LedEyeRequest.BIGOPEN,
+ "u": LedEyeRequest.WHISTLE,
+ "sil": LedEyeRequest.SMILE}
+ self.prev_viseme = "sil"
+ self._as.start()
+
+ rospy.loginfo("Viseme server ready to go")
+
+ def execute_cb(self,req):
+ rospy.loginfo("Got request: {}".format(req))
+ if req.viseme != self.prev_viseme:
+ self.change_eye(command=LedEyeRequest.UPDATE, which_part=LedEyeRequest.MOUTH, which_feature=LedEyeRequest.SHAPE, mouth_shape=self.viseme_mapping[req.viseme])
+ self.prev_viseme = req.viseme
+ self._as.set_succeeded(dialogue_msgs.VisemeResult(success=True))
+
+
+'''{"p": "M_B_P",
+ "t": "N_NG_D_Z",
+ "S": "CH_SH_ZH",
+ "T": "N_NG_D_Z",
+ "f": "M_B_P",
+ "k": "AA_AH",
+ "i": "EY",
+ "r": "R_ER",
+ "s": "N_NG_D_Z",
+ "u": "CH_SH_ZH",
+ "@": "AA_AH",
+ "a": "AA_AH",
+ "e": "EY",
+ "E": "EH_AE_AY",
+ "i": "EY",
+ "o": "AO_AW",
+ "O": "AA_AH",
+ "u": "AO_AW",
+ "sil": "IDLE"}'''
+
+
+if __name__=="__main__":
+ rospy.init_node("viseme_action_server")
+ l = LedVisemeActionServer()
+
diff --git a/hlpr_speech/cordial-public b/hlpr_speech/cordial-public
new file mode 160000
index 0000000..5a8e9f1
--- /dev/null
+++ b/hlpr_speech/cordial-public
@@ -0,0 +1 @@
+Subproject commit 5a8e9f16f97f28db1422319ccd0943224249beb8
diff --git a/hlpr_speech_recognition/scripts/demo.py b/hlpr_speech_recognition/scripts/demo.py
new file mode 100644
index 0000000..05d51f4
--- /dev/null
+++ b/hlpr_speech_recognition/scripts/demo.py
@@ -0,0 +1,34 @@
+
+from record import Recorder
+from speech2text import TranscribeClient
+import sounddevice as sd
+import argparse
+import os
+
+DEVICE = 6
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser('Speech2Text demo')
+ parser.add_argument('-l', '--list-devices', action='store_true', help='show list of audio devices and exit')
+ parser.add_argument('-d', '--device', type=int, default=DEVICE, help='numeric id of input deviec')
+
+ args = parser.parse_args()
+ list_devices = args.list_devices
+ device = args.device
+
+ if list_devices:
+ print(sd.query_devices())
+ else:
+ recorder = Recorder(device)
+ input('Press Enter to start recording')
+ record_file = recorder.record()
+ print('Recording...')
+ input('Press Enter to stop recording')
+ recorder.stop()
+ print('Recording finished.')
+
+ transcriber = TranscribeClient()
+ results = transcriber.transcribe(record_file)
+
+ print('Transcript: {}'.format(results[0]))
+ os.remove(record_file)
diff --git a/hlpr_speech_recognition/scripts/record.py b/hlpr_speech_recognition/scripts/record.py
new file mode 100644
index 0000000..da1ce63
--- /dev/null
+++ b/hlpr_speech_recognition/scripts/record.py
@@ -0,0 +1,64 @@
+from threading import Thread
+import sounddevice as sd
+import soundfile as sf
+import tempfile
+import argparse
+import Queue as queue
+import sys
+
+DEVICE = 6
+
+class Recorder(object):
+ def __init__(self, device, type='flac', save_dir=''):
+ self.device = device
+ self.device_info = sd.query_devices(self.device, 'input')
+ self.sample_rate = int(self.device_info['default_samplerate'])
+ self.type = type
+ self.save_dir = save_dir
+ self.recording = False
+
+ def _record(self, filename):
+ q = queue.Queue()
+ def callback(indata, frames, time, status):
+ #if status:
+ # print(status, file=sys.stderr)
+ q.put(indata.copy())
+
+ with sf.SoundFile(filename, mode='x', samplerate=self.sample_rate, channels=1) as f:
+ with sd.InputStream(samplerate=self.sample_rate, device=self.device,
+ channels=1, callback=callback):
+ while self.recording:
+ f.write(q.get())
+
+ def record(self, filename=None):
+ if filename is None:
+ filename = tempfile.mktemp(prefix='rec_', suffix='.'+self.type, dir=self.save_dir)
+
+ self.recording = True
+ record_thread = Thread(target=self._record, args=(filename,))
+ record_thread.start()
+
+ return repr(filename)
+
+ def stop(self):
+ self.recording = False
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser('Record sound from microphone')
+ parser.add_argument('-l', '--list-devices', action='store_true', help='show list of audio decives and exit')
+ parser.add_argument('-d', '--device', type=int, default=DEVICE, help='numeric id of input device')
+
+ args = parser.parse_args()
+ list_devices = args.list_devices
+ device = args.device
+
+ if list_devices:
+ print(sd.query_devices())
+ else:
+ recorder = Recorder(device)
+ input('Press Enter to start recording')
+ record_file = recorder.record()
+ print('Recording...')
+ input('Press Enter to stop recording')
+ recorder.stop()
+ print('Recording finished: ', record_file)
diff --git a/hlpr_speech_recognition/scripts/speech2text.py b/hlpr_speech_recognition/scripts/speech2text.py
new file mode 100644
index 0000000..dd5e90d
--- /dev/null
+++ b/hlpr_speech_recognition/scripts/speech2text.py
@@ -0,0 +1,36 @@
+from google.cloud import speech
+from google.cloud.speech import enums
+from google.cloud.speech import types
+import sys
+import io
+
+class TranscribeClient(object):
+ def __init__(self):
+ self.client = speech.SpeechClient()
+ self.SUPPORTED_FILE_TYPES = ['flac']
+
+ def transcribe(self, filename):
+ filetype = filename.split('.')[-1].lower()
+ if not filetype in self.SUPPORTED_FILE_TYPES:
+ raise NotImplementedError('filetype not supported.')
+
+ with io.open(filename, 'rb') as audio_file:
+ content = audio_file.read()
+ audio = types.RecognitionAudio(content=content)
+
+ config = types.RecognitionConfig(language_code='en-US')
+ response = self.client.recognize(config, audio)
+
+ results = []
+ for result in response.results:
+ results.append(result.alternatives[0].transcript)
+
+ return results
+
+if __name__ == '__main__':
+ filename = sys.argv[1]
+ transcriber = TranscribeClient()
+ results = transcriber.transcribe(filename)
+
+ for result in results:
+ print('Transcript: {}'.format(result))
diff --git a/hlpr_speech_synthesis/src/hlpr_speech_synthesis/AWS_speech_synthesis.py b/hlpr_speech_synthesis/src/hlpr_speech_synthesis/AWS_speech_synthesis.py
new file mode 100644
index 0000000..d895929
--- /dev/null
+++ b/hlpr_speech_synthesis/src/hlpr_speech_synthesis/AWS_speech_synthesis.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+import boto3
+import pygame
+from pygame import mixer
+import sys
+import re
+import json
+import tempfile
+import os
+from contextlib import closing
+import io
+
+class TextToSpeech():
+ def __init__(self, voice='Kimberly'):
+ self.tag_follow_indices = {}
+ self.tags = []
+ self.client = boto3.client('polly')
+ self.voice = voice
+
+ def collect_tags(self, tagged_text):
+ self.tags = re.findall(r'<.+?>',tagged_text)
+
+ def remove_tags(self, tagged_text):
+ untagged_text = ''
+ open_brackets_idx = [m.start() for m in re.finditer('<', tagged_text)]
+ close_brackets_idx = [m.start() for m in re.finditer('>', tagged_text)]
+ assert(len(open_brackets_idx)==len(close_brackets_idx))
+
+ open_brackets_idx.sort()
+ close_brackets_idx.sort()
+
+ pointer = 0
+ for i in range(len(open_brackets_idx)):
+ if(open_brackets_idx[i]!=0):
+ untagged_text = untagged_text + tagged_text[pointer:open_brackets_idx[i]-1]
+ pointer = close_brackets_idx[i]+1
+ if(pointer!=len(tagged_text)):
+ untagged_text = untagged_text + tagged_text[pointer:]
+
+ return untagged_text
+
+ def extract_behaviors(self, tagged_text):
+ # Remove tags from the input string
+ untagged_text = self.remove_tags(tagged_text)
+ untagged_word_list = untagged_text.split()
+ self.collect_tags(tagged_text)
+
+ # Store which word the tags come after
+ for tag in list(set(self.tags)):
+ tag_idxs = [m.start() for m in re.finditer(tag, tagged_text)] # find all occurances
+ for tag_idx in tag_idxs:
+ if tag_idx!=0:
+ substring = tagged_text[:tag_idx]
+ if(substring[-1]==' '):
+ substring = substring[:-1]
+ untagged_substring = self.remove_tags(substring)
+ prev_word_idx = untagged_substring.count(' ') + 1
+
+ if tag not in self.tag_follow_indices:
+ self.tag_follow_indices[tag] = [prev_word_idx]
+ else:
+ self.tag_follow_indices[tag].append(prev_word_idx)
+ else:
+ if tag not in self.tag_follow_indices:
+ self.tag_follow_indices[tag] = [0]
+ else:
+ self.tag_follow_indices[tag].append(0)
+
+ # Fetch meta info about speech from AWS using boto3
+ response = self.client.synthesize_speech(
+ OutputFormat='json',
+ SpeechMarkTypes=['viseme','word'],
+ Text=untagged_text,
+ VoiceId=self.voice)
+
+ # Unpack meta info json to an unsorted list of dictionaries
+ s = []
+ if "AudioStream" in response:
+ with closing(response["AudioStream"]) as stream:
+ data = stream.read()
+
+ s = data.split('\n')
+ s = [json.loads(line) for line in s if line != '']
+ else:
+ print("Could not stream audio")
+
+ return untagged_text, s
+
+ def phrase_to_file(self, tagged_text, output_file_loc):
+
+ # Remove tags from the input string
+ untagged_text, s = obj.extract_behaviors(tagged_text)
+
+ # Fetch audio for speech from AWS using boto3
+ spoken_text = self.client.synthesize_speech(
+ OutputFormat='mp3',
+ Text=untagged_text,
+ VoiceId=self.voice)
+
+ # Write audio to a file
+ with open(output_file_loc,'wb') as f:
+ f.write(spoken_text['AudioStream'].read())
+ f.close()
+
+ b = {
+ 'text': untagged_text,
+ 'file': os.getcwd() + '/' + output_file_loc,
+ 'behaviors': s
+ }
+
+ return b
+
+ def say(self, untagged_text, wait=False, interrupt=True):
+
+ response = self.client.synthesize_speech(
+ OutputFormat='ogg_vorbis',
+ Text=untagged_text,
+ VoiceId=self.voice)
+
+ with tempfile.TemporaryFile() as f:
+ if "AudioStream" in response:
+ with closing(response["AudioStream"]) as stream:
+ try:
+ f.write(stream.read())
+ except IOError as error:
+ print(error)
+ sys.exit(-1)
+
+ else:
+ print("Could not stream audio")
+ sys.exit(-1)
+
+ f.seek(0)
+
+ if not pygame.mixer.get_init():
+ pygame.mixer.init()
+ else:
+ if interrupt:
+ pygame.mixer.stop()
+ channel = pygame.mixer.Channel(5)
+ channel.set_volume(1.0)
+ sound = pygame.mixer.Sound(f)
+ channel.play(sound)
+
+ if wait:
+ while channel.get_busy():
+ pass
+ return -1
+
+
+ return sound.get_length()
+
+ def is_speaking(self):
+ # Returns whether or not audio is being played
+ return mixer.music.get_busy()
+
+if __name__ == '__main__':
+ tagged_string = "Hello ! How are you?"
+ print(tagged_string)
+ obj = TextToSpeech(voice='Kimberly')
+ b = obj.phrase_to_file(tagged_string, 'data/out.mp3')
+ obj.say(b['text'], wait=True, interrupt=False)
\ No newline at end of file