@inproceedings{d8ba33120b6147d0b05e928ab4b24721,
title = "Speeding up HMM decoding and training by exploiting sequence repetitions",
abstract = "We present a method to speed up the dynamic program algorithms used for solving the HMM decoding and training problems for discrete time-independent HMMs. We discuss the application of our method to Viterbi's decoding and training algorithms [21], as well as to the forward-backward and Baum-Welch [4] algorithms. Our approach is based on identifying repeated substrings in the observed input sequence. We describe three algorithms based alternatively on byte pair encoding (BPE) [19], run length encoding (RLE) and Lempel-Ziv (LZ78) parsing [22]. Compared to Viterbi's algorithm, we achieve a speedup of Ω(r) using BPE, a speedup of Ω(r/log r) using RLE, and a speedup of Ω(log n/k) using LZ78, where k is the number of hidden states, n is the length of the observed sequence and r is its compression ratio (under each compression scheme). Our experimental results demonstrate that our new algorithms are indeed faster in practice. Furthermore, unlike Viterbi's algorithm, our algorithms are highly parallelizable.",
keywords = "Compression, Dynamic programming, HMM, Viterbi",
author = "Shay Mozes and Oren Weimann and Michal Ziv-Ukelson",
year = "2007",
doi = "10.1007/978-3-540-73437-6_4",
language = "English",
isbn = "9783540734369",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "4--15",
booktitle = "Combinatorial Pattern Matching - 18th Annual Symposium, CPM 2007, Proceedings",
address = "Germany",
note = "18th Annual Symposium on Combinatorial Pattern Matching, CPM 2007 ; Conference date: 09-07-2007 Through 11-07-2007",
}