%Aigaion2 BibTeX export from Idiap Publications
%Friday 18 April 2025 01:06:58 AM

@INCOLLECTION{Keshet_WILEY_2009,
         author = {Keshet, Joseph and Shalev-Shwartz, Shai and Singer, Yoram and Chazan, Dan},
         editor = {Keshet, Joseph and Bengio, Samy},
       projects = {Idiap},
          month = {3},
          title = {A Large Margin Algorithm for Forced Alignment},
      booktitle = {Automatic Speech and Speaker Recognition: Large Margin and Kernel Methods},
           year = {2009},
      publisher = {John Wiley and Sons},
       abstract = {We describe and analyze a discriminative algorithm for learning to align a phoneme sequence
of a speech utterance with its acoustical signal counterpart by predicting a timing sequence representing the phoneme start times. In contrast to common HMM-based approaches, our method employs a discriminative learning procedure in which the learning phase is tightly coupled with the forced alignment  task. The alignment function we devise is based on mapping the input acoustic-symbolic representations of the speech utterance along with the target timing sequence into an abstract vector space. We suggest a specific mapping into the abstract vector-space which utilizes standard speech features (e.g. spectral distances) as well as confidence outputs of a frame-based phoneme classifier. Generalizing the notion of separation with a margin  used in support vector machines (SVM) for binary classification, we cast the learning task as the problem of finding a vector in an abstract inner-product space. We set the prediction vector to be the solution of a  minimization  problem with a large set of constraints. Each constraint enforces a gap between the projection of the correct target timing sequence and the projection of an alternative,  incorrect, timing sequence onto the vector. Though the number of constraints is very large,  we describe a simple iterative algorithm for efficiently learning the vector and analyze the formal properties of the resulting learning algorithm. We report experimental results
comparing the proposed algorithm to previous studies on forced alignment, which use hidden
Markov models (HMM). The results obtained in our experiments using the discriminative
alignment algorithm outperform the state-of-the-art systems on the TIMIT corpus.}
}