我刚开始学习编程和图像处理。最近我开发了一个系统,可以从视频源中检测人脸并识别人物。如果数据库中已有该人物的信息,系统会在画面中标记其姓名;如果是新人物,系统会要求输入姓名,并拍摄足够的照片存入数据库,以便下次识别。我使用的是Fisher人脸算法来完成这项任务。现在我的问题是,我想让系统能够说话。我希望它能说出它最近识别的人的名字。我可以使用
static class Once { public: Once(){talk();}} Once_;
来调用talk函数一次,但这并不自然,而且talk函数无法接受用户输入。
请问有人能建议我一个解决方案,或者告诉我从哪里开始解决这个问题吗?
talk函数是这样的
int speech(char* value){ISpVoice * pVoice = NULL;if (FAILED(::CoInitialize(NULL))) return FALSE;HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice);if( SUCCEEDED( hr ) ){ hr = pVoice->Speak(L"userINPUT", SPF_IS_XML, NULL); pVoice->Release(); pVoice = NULL;}::CoUninitialize();return TRUE;}
回答:
所以,我的提议是:
// -- >8 ---------- speech.h --------------------------#ifndef __speech_onboard__#define __speech_onboard__struct ISpVoice; // fwd ref, since mixing opencv and windows headers is a receipt for desasternamespace Speech{ class Voice { ISpVoice * spVoice; public: Voice(); ~Voice(); int speak( const char * txt, int flags=0 ) const ; // Supported values range from -10 to 10 int setRate( int s ); // Supported values range from 0 to 100 int setVolume( int s ); };};#endif // __speech_onboard__// ---- >8 speech.cpp ------------------------------#include <windows.h>#include <sapi.h>#include "speech.h"#define COM_RELEASE(x) { if ((x)) (x)->Release(); (x) = NULL; }namespace Speech{ struct _ComUser { _ComUser() {CoInitialize(0);} ~_ComUser() {CoUninitialize();} } _we_need_a_singleton_per_module; inline int w2a( WCHAR *in, char *out ) { out[0]=0; return WideCharToMultiByte(CP_ACP, 0, in, -1, out, MAX_PATH, 0, 0); } inline int a2w( const char *in, WCHAR *out ) { out[0]=0; return MultiByteToWideChar(CP_ACP, 0, in, -1, out, MAX_PATH); } Voice::Voice() : spVoice(0) { HRESULT hr = CoCreateInstance( CLSID_SpVoice, NULL, CLSCTX_INPROC_SERVER, IID_ISpVoice, (LPVOID *)&(spVoice) ); } Voice::~Voice() { COM_RELEASE( spVoice ); } //SPF_ASYNC = ( 1L << 0 ) , //SPF_PURGEBEFORESPEAK = ( 1L << 1 ) , //SPF_IS_FILENAME = ( 1L << 2 ) , //SPF_IS_XML = ( 1L << 3 ) , //SPF_IS_NOT_XML = ( 1L << 4 ) , //SPF_PERSIST_XML = ( 1L << 5 ) , //SPF_NLP_SPEAK_PUNC = ( 1L << 6 ) , //SPF_PARSE_SAPI = ( 1L << 7 ) , //SPF_PARSE_SSML = ( 1L << 8 ) , //SPF_PARSE_AUTODETECT = 0, int Voice::speak( const char * txt, int flags ) const { if ( ! spVoice ) return 0; WCHAR wtxt[800]; a2w(txt,wtxt); ULONG pulstream = 0; HRESULT hr = spVoice->Speak( wtxt, flags, &pulstream ); return hr==S_OK; } // Supported values range from -10 to 10 int Voice::setRate( int s ) { if ( ! spVoice ) return 0; HRESULT hr = spVoice->SetRate( s ); return hr==S_OK; } // Supported values range from 0 to 100 int Voice::setVolume( int s ) { if ( ! spVoice ) return 0; HRESULT hr = spVoice->SetVolume ( s ); return hr==S_OK; }}// ----- >8 main.cpp --------------------------------------------#include "opencv2/core/core.hpp"#include "opencv2/highgui/highgui.hpp"using namespace cv;#include "speech.h"int main(int argc, char** argv){ Speech::Voice voice; voice.speak("hello , oh, hello!", 1); // async Mat img(300,300,CV_8UC3,Scalar(255,0,0)); namedWindow("Display window",0); putText(img,"lala la",Point(20,120),0,2.5,Scalar(0,200,0),5); imshow("Display window", img); waitKey(0); voice.speak("bye bye, see you later !"); // sync return 0;}