PocketSphinx 5prealpha
kws_search.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2013 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 *
19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ====================================================================
32 *
33 */
34
35/*
36* kws_search.c -- Search object for key phrase spotting.
37*/
38
39#include <stdio.h>
40#include <string.h>
41#include <assert.h>
42
43#include <sphinxbase/err.h>
44#include <sphinxbase/ckd_alloc.h>
45#include <sphinxbase/strfuncs.h>
46#include <sphinxbase/pio.h>
47#include <sphinxbase/cmd_ln.h>
48
50#include "kws_search.h"
51
53#define hmm_is_active(hmm) ((hmm)->frame > 0)
54#define kws_nth_hmm(keyphrase,n) (&((keyphrase)->hmms[n]))
55
56/* Value selected experimentally as maximum difference between triphone
57score and phone loop score, used in confidence computation to make sure
58that confidence value is less than 1. This might be different for
59different models. Corresponds to threshold of about 1e+50 */
60#define KWS_MAX 1500
61
62static ps_lattice_t *
63kws_search_lattice(ps_search_t * search)
64{
65 return NULL;
66}
67
68static int
69kws_search_prob(ps_search_t * search)
70{
71 return 0;
72}
73
74static void
75kws_seg_free(ps_seg_t *seg)
76{
77 kws_seg_t *itor = (kws_seg_t *)seg;
78 ckd_free(itor);
79}
80
81static void
82kws_seg_fill(kws_seg_t *itor)
83{
84 kws_detection_t* detection = (kws_detection_t*)gnode_ptr(itor->detection);
85
86 itor->base.word = detection->keyphrase;
87 itor->base.sf = detection->sf;
88 itor->base.ef = detection->ef;
89 itor->base.prob = detection->prob;
90 itor->base.ascr = detection->ascr;
91 itor->base.lscr = 0;
92}
93
94static ps_seg_t *
95kws_seg_next(ps_seg_t *seg)
96{
97 kws_seg_t *itor = (kws_seg_t *)seg;
98
99 gnode_t *detect_head = gnode_next(itor->detection);
100 while (detect_head != NULL && ((kws_detection_t*)gnode_ptr(detect_head))->ef > itor->last_frame)
101 detect_head = gnode_next(detect_head);
102 itor->detection = detect_head;
103
104 if (!itor->detection) {
105 kws_seg_free(seg);
106 return NULL;
107 }
108
109 kws_seg_fill(itor);
110
111 return seg;
112}
113
114static ps_segfuncs_t kws_segfuncs = {
115 /* seg_next */ kws_seg_next,
116 /* seg_free */ kws_seg_free
117};
118
119static ps_seg_t *
120kws_search_seg_iter(ps_search_t * search)
121{
122 kws_search_t *kwss = (kws_search_t *)search;
123 kws_seg_t *itor;
124 gnode_t *detect_head = kwss->detections->detect_list;
125
126 while (detect_head != NULL && ((kws_detection_t*)gnode_ptr(detect_head))->ef > kwss->frame - kwss->delay)
127 detect_head = gnode_next(detect_head);
128
129 if (!detect_head)
130 return NULL;
131
132 itor = (kws_seg_t *)ckd_calloc(1, sizeof(*itor));
133 itor->base.vt = &kws_segfuncs;
134 itor->base.search = search;
135 itor->base.lwf = 1.0;
136 itor->detection = detect_head;
137 itor->last_frame = kwss->frame - kwss->delay;
138 kws_seg_fill(itor);
139 return (ps_seg_t *)itor;
140}
141
142static ps_searchfuncs_t kws_funcs = {
143 /* start: */ kws_search_start,
144 /* step: */ kws_search_step,
145 /* finish: */ kws_search_finish,
146 /* reinit: */ kws_search_reinit,
147 /* free: */ kws_search_free,
148 /* lattice: */ kws_search_lattice,
149 /* hyp: */ kws_search_hyp,
150 /* prob: */ kws_search_prob,
151 /* seg_iter: */ kws_search_seg_iter,
152};
153
154
155/* Activate senones for scoring */
156static void
157kws_search_sen_active(kws_search_t * kwss)
158{
159 int i;
160 gnode_t *gn;
161
162 acmod_clear_active(ps_search_acmod(kwss));
163
164 /* active phone loop hmms */
165 for (i = 0; i < kwss->n_pl; i++)
166 acmod_activate_hmm(ps_search_acmod(kwss), &kwss->pl_hmms[i]);
167
168 /* activate hmms in active nodes */
169 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
170 kws_keyphrase_t *keyphrase = gnode_ptr(gn);
171 for (i = 0; i < keyphrase->n_hmms; i++) {
172 if (hmm_is_active(kws_nth_hmm(keyphrase, i)))
173 acmod_activate_hmm(ps_search_acmod(kwss), kws_nth_hmm(keyphrase, i));
174 }
175 }
176}
177
178/*
179* Evaluate all the active HMMs.
180* (Executed once per frame.)
181*/
182static void
183kws_search_hmm_eval(kws_search_t * kwss, int16 const *senscr)
184{
185 int32 i;
186 gnode_t *gn;
187 int32 bestscore = WORST_SCORE;
188
189 hmm_context_set_senscore(kwss->hmmctx, senscr);
190
191 /* evaluate hmms from phone loop */
192 for (i = 0; i < kwss->n_pl; ++i) {
193 hmm_t *hmm = &kwss->pl_hmms[i];
194 int32 score;
195
196 score = hmm_vit_eval(hmm);
197 if (score BETTER_THAN bestscore)
198 bestscore = score;
199 }
200 /* evaluate hmms for active nodes */
201 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
202 kws_keyphrase_t *keyphrase = gnode_ptr(gn);
203 for (i = 0; i < keyphrase->n_hmms; i++) {
204 hmm_t *hmm = kws_nth_hmm(keyphrase, i);
205
206 if (hmm_is_active(hmm)) {
207 int32 score;
208 score = hmm_vit_eval(hmm);
209 if (score BETTER_THAN bestscore)
210 bestscore = score;
211 }
212 }
213 }
214
215 kwss->bestscore = bestscore;
216}
217
218/*
219* (Beam) prune the just evaluated HMMs, determine which ones remain
220* active. Executed once per frame.
221*/
222static void
223kws_search_hmm_prune(kws_search_t * kwss)
224{
225 int32 thresh, i;
226 gnode_t *gn;
227
228 thresh = kwss->bestscore + kwss->beam;
229
230 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
231 kws_keyphrase_t *keyphrase = gnode_ptr(gn);
232 for (i = 0; i < keyphrase->n_hmms; i++) {
233 hmm_t *hmm = kws_nth_hmm(keyphrase, i);
234 if (hmm_is_active(hmm) && hmm_bestscore(hmm) < thresh)
235 hmm_clear(hmm);
236 }
237 }
238}
239
240
244static void
245kws_search_trans(kws_search_t * kwss)
246{
247 hmm_t *pl_best_hmm = NULL;
248 int32 best_out_score = WORST_SCORE;
249 int i;
250 gnode_t *gn;
251
252 /* select best hmm in phone-loop to be a predecessor */
253 for (i = 0; i < kwss->n_pl; i++)
254 if (hmm_out_score(&kwss->pl_hmms[i]) BETTER_THAN best_out_score) {
255 best_out_score = hmm_out_score(&kwss->pl_hmms[i]);
256 pl_best_hmm = &kwss->pl_hmms[i];
257 }
258
259 /* out probs are not ready yet */
260 if (!pl_best_hmm)
261 return;
262
263 /* Check whether keyphrase wasn't spotted yet */
264 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
265 kws_keyphrase_t *keyphrase = gnode_ptr(gn);
266 hmm_t *last_hmm;
267
268 if (keyphrase->n_hmms < 1)
269 continue;
270
271 last_hmm = kws_nth_hmm(keyphrase, keyphrase->n_hmms - 1);
272
273 if (hmm_is_active(last_hmm)
274 && hmm_out_score(pl_best_hmm) BETTER_THAN WORST_SCORE) {
275
276 if (hmm_out_score(last_hmm) - hmm_out_score(pl_best_hmm)
277 >= keyphrase->threshold) {
278
279 int32 prob = hmm_out_score(last_hmm) - hmm_out_score(pl_best_hmm) - KWS_MAX;
280 kws_detections_add(kwss->detections, keyphrase->word,
281 hmm_out_history(last_hmm),
282 kwss->frame, prob,
283 hmm_out_score(last_hmm));
284 } /* keyphrase is spotted */
285 } /* last hmm of keyphrase is active */
286 } /* keyphrase loop */
287
288 /* Make transition for all phone loop hmms */
289 for (i = 0; i < kwss->n_pl; i++) {
290 if (hmm_out_score(pl_best_hmm) + kwss->plp BETTER_THAN
291 hmm_in_score(&kwss->pl_hmms[i])) {
292 hmm_enter(&kwss->pl_hmms[i],
293 hmm_out_score(pl_best_hmm) + kwss->plp,
294 hmm_out_history(pl_best_hmm), kwss->frame + 1);
295 }
296 }
297
298 /* Activate new keyphrase nodes, enter their hmms */
299 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
300 kws_keyphrase_t *keyphrase = gnode_ptr(gn);
301
302 if (keyphrase->n_hmms < 1)
303 continue;
304
305 for (i = keyphrase->n_hmms - 1; i > 0; i--) {
306 hmm_t *pred_hmm = kws_nth_hmm(keyphrase, i - 1);
307 hmm_t *hmm = kws_nth_hmm(keyphrase, i);
308
309 if (hmm_is_active(pred_hmm)) {
310 if (!hmm_is_active(hmm)
311 || hmm_out_score(pred_hmm) BETTER_THAN
312 hmm_in_score(hmm))
313 hmm_enter(hmm, hmm_out_score(pred_hmm),
314 hmm_out_history(pred_hmm), kwss->frame + 1);
315 }
316 }
317
318 /* Enter keyphrase start node from phone loop */
319 if (hmm_out_score(pl_best_hmm) BETTER_THAN
320 hmm_in_score(kws_nth_hmm(keyphrase, 0)))
321 hmm_enter(kws_nth_hmm(keyphrase, 0), hmm_out_score(pl_best_hmm),
322 kwss->frame, kwss->frame + 1);
323 }
324}
325
326static int
327kws_search_read_list(kws_search_t *kwss, const char* keyfile)
328{
329 FILE *list_file;
330 lineiter_t *li;
331 char *line;
332
333 if ((list_file = fopen(keyfile, "r")) == NULL) {
334 E_ERROR_SYSTEM("Failed to open keyphrase file '%s'", keyfile);
335 return -1;
336 }
337
338 kwss->keyphrases = NULL;
339
340 /* read keyphrases */
341 for (li = lineiter_start_clean(list_file); li; li = lineiter_next(li)) {
342 size_t begin, end;
343 kws_keyphrase_t *keyphrase;
344
345 if (li->len == 0)
346 continue;
347
348 keyphrase = ckd_calloc(1, sizeof(kws_keyphrase_t));
349
350 line = li->buf;
351 end = strlen(line) - 1;
352 begin = end - 1;
353 if (line[end] == '/') {
354 while (line[begin] != '/' && begin > 0)
355 begin--;
356 line[end] = 0;
357 line[begin] = 0;
358 keyphrase->threshold = (int32) logmath_log(kwss->base.acmod->lmath, atof_c(line + begin + 1))
359 >> SENSCR_SHIFT;
360 } else {
361 keyphrase->threshold = kwss->def_threshold;
362 }
363
364 keyphrase->word = ckd_salloc(line);
365
366 kwss->keyphrases = glist_add_ptr(kwss->keyphrases, keyphrase);
367 }
368
369 fclose(list_file);
370 return 0;
371}
372
374kws_search_init(const char *name,
375 const char *keyphrase,
376 const char *keyfile,
377 cmd_ln_t * config,
378 acmod_t * acmod, dict_t * dict, dict2pid_t * d2p)
379{
380 kws_search_t *kwss = (kws_search_t *) ckd_calloc(1, sizeof(*kwss));
381 ps_search_init(ps_search_base(kwss), &kws_funcs, PS_SEARCH_TYPE_KWS, name, config, acmod, dict,
382 d2p);
383
384 kwss->detections = (kws_detections_t *)ckd_calloc(1, sizeof(*kwss->detections));
385
386 kwss->beam =
387 (int32) logmath_log(acmod->lmath,
388 cmd_ln_float64_r(config,
389 "-beam")) >> SENSCR_SHIFT;
390
391 kwss->plp =
392 (int32) logmath_log(acmod->lmath,
393 cmd_ln_float32_r(config,
394 "-kws_plp")) >> SENSCR_SHIFT;
395
396
397 kwss->def_threshold =
398 (int32) logmath_log(acmod->lmath,
399 cmd_ln_float64_r(config,
400 "-kws_threshold")) >>
402
403 kwss->delay = (int32) cmd_ln_int32_r(config, "-kws_delay");
404
405 E_INFO("KWS(beam: %d, plp: %d, default threshold %d, delay %d)\n",
406 kwss->beam, kwss->plp, kwss->def_threshold, kwss->delay);
407
408 if (keyfile) {
409 if (kws_search_read_list(kwss, keyfile) < 0) {
410 E_ERROR("Failed to create kws search\n");
411 kws_search_free(ps_search_base(kwss));
412 return NULL;
413 }
414 } else {
415 kws_keyphrase_t *k = ckd_calloc(1, sizeof(kws_keyphrase_t));
416 k->threshold = kwss->def_threshold;
417 k->word = ckd_salloc(keyphrase);
418 kwss->keyphrases = glist_add_ptr(NULL, k);
419 }
420
421 /* Reinit for provided keyphrase */
422 if (kws_search_reinit(ps_search_base(kwss),
423 ps_search_dict(kwss),
424 ps_search_dict2pid(kwss)) < 0) {
425 ps_search_free(ps_search_base(kwss));
426 return NULL;
427 }
428
429 ptmr_init(&kwss->perf);
430
431 return ps_search_base(kwss);
432}
433
434void
435kws_search_free(ps_search_t * search)
436{
437 kws_search_t *kwss;
438 double n_speech;
439 gnode_t *gn;
440
441 kwss = (kws_search_t *) search;
442
443 n_speech = (double)kwss->n_tot_frame
444 / cmd_ln_int32_r(ps_search_config(kwss), "-frate");
445
446 E_INFO("TOTAL kws %.2f CPU %.3f xRT\n",
447 kwss->perf.t_tot_cpu,
448 kwss->perf.t_tot_cpu / n_speech);
449 E_INFO("TOTAL kws %.2f wall %.3f xRT\n",
450 kwss->perf.t_tot_elapsed,
451 kwss->perf.t_tot_elapsed / n_speech);
452
453
454 ps_search_base_free(search);
455 hmm_context_free(kwss->hmmctx);
456 kws_detections_reset(kwss->detections);
457 ckd_free(kwss->detections);
458
459 ckd_free(kwss->pl_hmms);
460 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
461 kws_keyphrase_t *keyphrase = gnode_ptr(gn);
462 ckd_free(keyphrase->hmms);
463 ckd_free(keyphrase->word);
464 ckd_free(keyphrase);
465 }
466 glist_free(kwss->keyphrases);
467 ckd_free(kwss);
468}
469
470int
471kws_search_reinit(ps_search_t * search, dict_t * dict, dict2pid_t * d2p)
472{
473 char **wrdptr;
474 char *tmp_keyphrase;
475 int32 wid, pronlen, in_dict;
476 int32 n_hmms, n_wrds;
477 int32 ssid, tmatid;
478 int i, j, p;
479 kws_search_t *kwss = (kws_search_t *) search;
480 bin_mdef_t *mdef = search->acmod->mdef;
481 int32 silcipid = bin_mdef_silphone(mdef);
482 gnode_t *gn;
483
484 /* Free old dict2pid, dict */
485 ps_search_base_reinit(search, dict, d2p);
486
487 /* Initialize HMM context. */
488 if (kwss->hmmctx)
489 hmm_context_free(kwss->hmmctx);
490 kwss->hmmctx =
491 hmm_context_init(bin_mdef_n_emit_state(search->acmod->mdef),
492 search->acmod->tmat->tp, NULL,
493 search->acmod->mdef->sseq);
494 if (kwss->hmmctx == NULL)
495 return -1;
496
497 /* Initialize phone loop HMMs. */
498 if (kwss->pl_hmms) {
499 for (i = 0; i < kwss->n_pl; ++i)
500 hmm_deinit((hmm_t *) & kwss->pl_hmms[i]);
501 ckd_free(kwss->pl_hmms);
502 }
503 kwss->n_pl = bin_mdef_n_ciphone(search->acmod->mdef);
504 kwss->pl_hmms =
505 (hmm_t *) ckd_calloc(kwss->n_pl, sizeof(*kwss->pl_hmms));
506 for (i = 0; i < kwss->n_pl; ++i) {
507 hmm_init(kwss->hmmctx, (hmm_t *) & kwss->pl_hmms[i],
508 FALSE,
509 bin_mdef_pid2ssid(search->acmod->mdef, i),
510 bin_mdef_pid2tmatid(search->acmod->mdef, i));
511 }
512
513 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
514 kws_keyphrase_t *keyphrase = gnode_ptr(gn);
515
516 /* Initialize keyphrase HMMs */
517 tmp_keyphrase = (char *) ckd_salloc(keyphrase->word);
518 n_wrds = str2words(tmp_keyphrase, NULL, 0);
519 wrdptr = (char **) ckd_calloc(n_wrds, sizeof(*wrdptr));
520 str2words(tmp_keyphrase, wrdptr, n_wrds);
521
522 /* count amount of hmms */
523 n_hmms = 0;
524 in_dict = TRUE;
525 for (i = 0; i < n_wrds; i++) {
526 wid = dict_wordid(dict, wrdptr[i]);
527 if (wid == BAD_S3WID) {
528 E_ERROR("Word '%s' in phrase '%s' is missing in the dictionary\n", wrdptr[i], keyphrase->word);
529 in_dict = FALSE;
530 break;
531 }
532 pronlen = dict_pronlen(dict, wid);
533 n_hmms += pronlen;
534 }
535
536 if (!in_dict) {
537 ckd_free(wrdptr);
538 ckd_free(tmp_keyphrase);
539 continue;
540 }
541
542 /* allocate node array */
543 if (keyphrase->hmms)
544 ckd_free(keyphrase->hmms);
545 keyphrase->hmms = (hmm_t *) ckd_calloc(n_hmms, sizeof(hmm_t));
546 keyphrase->n_hmms = n_hmms;
547
548 /* fill node array */
549 j = 0;
550 for (i = 0; i < n_wrds; i++) {
551 wid = dict_wordid(dict, wrdptr[i]);
552 pronlen = dict_pronlen(dict, wid);
553 for (p = 0; p < pronlen; p++) {
554 int32 ci = dict_pron(dict, wid, p);
555 if (p == 0) {
556 /* first phone of word */
557 int32 rc =
558 pronlen > 1 ? dict_pron(dict, wid, 1) : silcipid;
559 ssid = dict2pid_ldiph_lc(d2p, ci, rc, silcipid);
560 }
561 else if (p == pronlen - 1) {
562 /* last phone of the word */
563 int32 lc = dict_pron(dict, wid, p - 1);
564 xwdssid_t *rssid = dict2pid_rssid(d2p, ci, lc);
565 int j = rssid->cimap[silcipid];
566 ssid = rssid->ssid[j];
567 }
568 else {
569 /* word internal phone */
570 ssid = dict2pid_internal(d2p, wid, p);
571 }
572 tmatid = bin_mdef_pid2tmatid(mdef, ci);
573 hmm_init(kwss->hmmctx, &keyphrase->hmms[j], FALSE, ssid,
574 tmatid);
575 j++;
576 }
577 }
578
579 ckd_free(wrdptr);
580 ckd_free(tmp_keyphrase);
581 }
582
583
584
585 return 0;
586}
587
588int
589kws_search_start(ps_search_t * search)
590{
591 int i;
592 kws_search_t *kwss = (kws_search_t *) search;
593
594 kwss->frame = 0;
595 kwss->bestscore = 0;
596 kws_detections_reset(kwss->detections);
597
598 /* Reset and enter all phone-loop HMMs. */
599 for (i = 0; i < kwss->n_pl; ++i) {
600 hmm_t *hmm = (hmm_t *) & kwss->pl_hmms[i];
601 hmm_clear(hmm);
602 hmm_enter(hmm, 0, -1, 0);
603 }
604
605 ptmr_reset(&kwss->perf);
606 ptmr_start(&kwss->perf);
607
608 return 0;
609}
610
611int
612kws_search_step(ps_search_t * search, int frame_idx)
613{
614 int16 const *senscr;
615 kws_search_t *kwss = (kws_search_t *) search;
616 acmod_t *acmod = search->acmod;
617
618 /* Activate senones */
619 if (!acmod->compallsen)
620 kws_search_sen_active(kwss);
621
622 /* Calculate senone scores for current frame. */
623 senscr = acmod_score(acmod, &frame_idx);
624
625 /* Evaluate hmms in phone loop and in active keyphrase nodes */
626 kws_search_hmm_eval(kwss, senscr);
627
628 /* Prune hmms with low prob */
629 kws_search_hmm_prune(kwss);
630
631 /* Do hmms transitions */
632 kws_search_trans(kwss);
633
634 ++kwss->frame;
635 return 0;
636}
637
638int
639kws_search_finish(ps_search_t * search)
640{
641 kws_search_t *kwss;
642 int32 cf;
643
644 kwss = (kws_search_t *) search;
645
646 kwss->n_tot_frame += kwss->frame;
647
648 /* Print out some statistics. */
649 ptmr_stop(&kwss->perf);
650 /* This is the number of frames processed. */
651 cf = ps_search_acmod(kwss)->output_frame;
652 if (cf > 0) {
653 double n_speech = (double) (cf + 1)
654 / cmd_ln_int32_r(ps_search_config(kwss), "-frate");
655 E_INFO("kws %.2f CPU %.3f xRT\n",
656 kwss->perf.t_cpu, kwss->perf.t_cpu / n_speech);
657 E_INFO("kws %.2f wall %.3f xRT\n",
658 kwss->perf.t_elapsed, kwss->perf.t_elapsed / n_speech);
659 }
660
661 return 0;
662}
663
664char const *
665kws_search_hyp(ps_search_t * search, int32 * out_score)
666{
667 kws_search_t *kwss = (kws_search_t *) search;
668 if (out_score)
669 *out_score = 0;
670
671 if (search->hyp_str)
672 ckd_free(search->hyp_str);
673 search->hyp_str = kws_detections_hyp_str(kwss->detections, kwss->frame, kwss->delay);
674
675 return search->hyp_str;
676}
677
678char *
679kws_search_get_keyphrases(ps_search_t * search)
680{
681 int c, len;
682 kws_search_t *kwss;
683 char* line;
684 gnode_t *gn;
685
686 kwss = (kws_search_t *) search;
687
688 len = 0;
689 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn))
690 len += strlen(((kws_keyphrase_t *)gnode_ptr(gn))->word) + 1;
691
692 c = 0;
693 line = (char *)ckd_calloc(len, sizeof(*line));
694 for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
695 const char *str = ((kws_keyphrase_t *)gnode_ptr(gn))->word;
696 memcpy(&line[c], str, strlen(str));
697 c += strlen(str);
698 line[c++] = '\n';
699 }
700 line[--c] = '\0';
701
702 return line;
703}
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition acmod.c:1213
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition acmod.c:1106
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition acmod.c:1197
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition dict2pid.c:367
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition dict2pid.h:115
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition dict.h:165
#define BETTER_THAN
Is one score better than another?
Definition hmm.h:95
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition hmm.h:227
#define WORST_SCORE
Large "bad" score.
Definition hmm.h:84
#define SENSCR_SHIFT
Shift count for senone scores.
Definition hmm.h:73
Internal implementation of PocketSphinx decoder.
#define BAD_S3WID
Dictionary word id.
Definition s3types.h:90
Acoustic model structure.
Definition acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition acmod.h:159
logmath_t * lmath
Log-math computation.
Definition acmod.h:151
tmat_t * tmat
Transition matrices.
Definition acmod.h:160
uint8 compallsen
Compute all senones?
Definition acmod.h:188
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition bin_mdef.h:134
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition dict2pid.h:84
a structure for a dictionary.
Definition dict.h:76
An individual HMM among the HMM search space.
Implementation of KWS search structure.
Definition kws_search.h:70
hmm_context_t * hmmctx
HMM context.
Definition kws_search.h:73
int32 bestscore
For beam pruning.
Definition kws_search.h:83
ptmr_t perf
Performance counter.
Definition kws_search.h:90
int32 plp
Phone loop probability.
Definition kws_search.h:82
glist_t keyphrases
Keyphrases to spot.
Definition kws_search.h:75
frame_idx_t frame
Frame index.
Definition kws_search.h:78
hmm_t * pl_hmms
Phone loop hmms - hmms of CI phones.
Definition kws_search.h:88
int32 n_pl
Number of CI phones.
Definition kws_search.h:87
kws_detections_t * detections
Keyword spotting history.
Definition kws_search.h:77
int32 delay
Delay to wait for best detection score.
Definition kws_search.h:85
int32 def_threshold
default threshold for p(hyp)/p(altern) ratio
Definition kws_search.h:84
Segmentation "iterator" for KWS history.
Definition kws_search.h:54
gnode_t * detection
Keyphrase detection correspondent to segment.
Definition kws_search.h:56
frame_idx_t last_frame
Last frame to raise the detection.
Definition kws_search.h:57
ps_seg_t base
Base structure.
Definition kws_search.h:55
Word graph structure used in bestpath/nbest search.
Base structure for search module.
acmod_t * acmod
Acoustic model.
char * hyp_str
Current hypothesis string.
V-table for search algorithm.
Base structure for hypothesis segmentation iterator.
ps_search_t * search
Search object from whence this came.
float32 lwf
Language weight factor (for second-pass searches)
ps_segfuncs_t * vt
V-table of seg methods.
int32 lscr
Language model score.
int32 ascr
Acoustic score.
frame_idx_t sf
Start frame.
char const * word
Word string (pointer into dictionary hash)
frame_idx_t ef
End frame.
int32 prob
Log posterior probability.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state].
Definition tmat.h:56
cross word triphone model structure
Definition dict2pid.h:73
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition dict2pid.h:75
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition dict2pid.h:74