1
package diffson.lcs
2

3
import cats.Eq
4
import cats.implicits._
5

6
import scala.annotation.tailrec
7

8
/** Implementation of the LCS using dynamic programming.
9
 *
10
 *  @author Lucas Satabin
11
 */
12
class DynamicProgLcs[T: Eq] extends Lcs[T] {
13

14
  def lcs(s1: List[T], s2: List[T], low1: Int, high1: Int, low2: Int, high2: Int): List[(Int, Int)] = {
15 1
    val seq1 = s1.slice(low1, high1)
16 1
    val seq2 = s2.slice(low2, high2)
17 1
    if (seq1.isEmpty || seq2.isEmpty) {
18
      // shortcut if at least on sequence is empty, the lcs, is empty as well
19 1
      Nil
20 1
    } else if (seq1 === seq2) {
21
      // both sequences are equal, the lcs is either of them
22 1
      seq1.indices.map(i => (i + low1, i + low2)).toList
23 1
    } else if (seq1.startsWith(seq2)) {
24
      // the second sequence is a prefix of the first one
25
      // the lcs is the second sequence
26 1
      seq2.indices.map(i => (i + low1, i + low2)).toList
27 1
    } else if (seq2.startsWith(seq1)) {
28
      // the first sequence is a prefix of the second one
29
      // the lcs is the first sequence
30 1
      seq1.indices.map(i => (i + low1, i + low2)).toList
31 1
    } else {
32
      // we try to reduce the problem by stripping common suffix and prefix
33 1
      val (prefix, middle1, middle2, suffix) = splitPrefixSuffix(seq1, seq2, low1, low2)
34 1
      val indexedMiddle1: Vector[T] = middle1.toVector
35 1
      val indexedMiddle2: Vector[T] = middle2.toVector
36 1
      val offset = prefix.size
37 1
      val lengths: Array[Array[Int]] = Array.ofDim[Int](middle1.size + 1, middle2.size + 1)
38

39
      // fill up the length matrix
40
      // impl chosen based on microbenchmarks
41 1
      val cols = indexedMiddle1.length
42 1
      val rows = indexedMiddle2.length
43

44
      @tailrec
45
      def fillJs(i: Int, j: Int): Unit = {
46 1
        if (j < rows) {
47 1
          if (indexedMiddle1(i) == indexedMiddle2(j))
48 1
            lengths(i + 1)(j + 1) = lengths(i)(j) + 1
49
          else
50 1
            lengths(i + 1)(j + 1) = math.max(lengths(i + 1)(j), lengths(i)(j + 1))
51 1
          fillJs(i, j + 1)
52
        }
53
      }
54

55
      @tailrec
56
      def fillIs(i: Int): Unit = {
57 1
        if (i < cols) {
58 1
          fillJs(i, 0)
59 1
          fillIs(i + 1)
60
        }
61
      }
62

63 1
      fillIs(0)
64

65
      // and compute the lcs out of the matrix
66
      @tailrec
67
      def loop(idx1: Int, idx2: Int, acc: List[(Int, Int)]): List[(Int, Int)] =
68 1
        if (idx1 == 0 || idx2 == 0) {
69 1
          acc
70 1
        } else if (lengths(idx1)(idx2) == lengths(idx1 - 1)(idx2)) {
71 1
          loop(idx1 - 1, idx2, acc)
72 1
        } else if (lengths(idx1)(idx2) == lengths(idx1)(idx2 - 1)) {
73 1
          loop(idx1, idx2 - 1, acc)
74 1
        } else {
75 1
          assert(seq1(offset + idx1 - 1) == seq2(offset + idx2 - 1))
76 1
          loop(idx1 - 1, idx2 - 1, (low1 + offset + idx1 - 1, low2 + offset + idx2 - 1) :: acc)
77
        }
78

79 1
      prefix ++ loop(indexedMiddle1.size, indexedMiddle2.size, Nil) ++ suffix
80
    }
81
  }
82

83
  /* Extract common prefix and suffix from both sequences */
84
  private def splitPrefixSuffix(seq1: List[T], seq2: List[T], low1: Int, low2: Int): (List[(Int, Int)], List[T], List[T], List[(Int, Int)]) = {
85 1
    val size1 = seq1.size
86 1
    val size2 = seq2.size
87
    val prefix =
88 1
      seq1.zip(seq2).takeWhile {
89 1
        case (e1, e2) => e1 == e2
90 1
      }.indices.map(i => (i + low1, i + low2)).toList
91
    val suffix =
92 1
      seq1.reverse.zip(seq2.reverse).takeWhile {
93 1
        case (e1, e2) => e1 == e2
94 1
      }.indices.map(i => (size1 - i - 1 + low1, size2 - i - 1 + low2)).toList.reverse
95 1
    (prefix, seq1.drop(prefix.size).dropRight(suffix.size), seq2.drop(prefix.size).dropRight(suffix.size), suffix)
96
  }
97

98
  def savedHashes: Lcs[T] =
99 0
    new HashedLcs(new DynamicProgLcs[Hashed[T]])
100
}

Read our documentation on viewing source code .

Loading