Source code for pytudes._2021.leetcode.hard._72__edit_distance

"""https://leetcode.com/problems/edit-distance/

Constraints:
    - 0 ≤ word1.length, word2.length ≤ 500
    - word1 and word2 consist of lowercase English letters.

Examples:
    >>> Solution().minDistance("fat", "cat")
    1

See Also:
    - https://trekhleb.dev/blog/2018/dynamic-programming-vs-divide-and-conquer/

"""
import functools


[docs] class Solution:
[docs] def minDistance(self, word1: str, word2: str) -> int: return compute_edit_distance(word1, word2)
[docs] def compute_edit_distance(word1: str, word2: str) -> int: """Returns the Levenshtein (edit) distance between two words The edit distance is the minimum number of operations required to convert word1 to word2. Based on the observation that word transformation can be achieved via three distinct character operations: 1. Insertion 2. Deletion 3. Substitution Args: word2: word1: Examples: >>> compute_edit_distance('','') 0 >>> compute_edit_distance("a",'') 1 >>> compute_edit_distance('',"a") 1 >>> compute_edit_distance("abc",'') 3 >>> compute_edit_distance('',"abc") 3 >>> compute_edit_distance("islander","slander") 1 >>> compute_edit_distance("mart","karma") 3 >>> compute_edit_distance("kitten","sitting") 3 >>> compute_edit_distance("ball","football") 4 >>> compute_edit_distance("football","foot") 4 >>> compute_edit_distance("horse","ros") 3 >>> # horse -> rorse (replace "h" with "r") >>> # rorse -> rose (remove "r") >>> # rose -> ros (remove "e") >>> compute_edit_distance("intention","execution") 5 >>> # intention -> inention (remove "t") >>> # inention -> enention (replace "i" with "e") >>> # enention -> exention (replace "n" with "x") >>> # exention -> exection (replace "n" with "c") >>> # exection -> execution (insert "u") """ ## EDGE CASES ## # if one or both strings are empty, # edit distance is the length of the longer string if min(len(word1), len(word2)) == 0: return max(len(word1), len(word2)) # Initialize word1 x word2 matrix where `edit_distance[i][j]` will contain # the number of operations necessary to transform `word1[:i]` to/from # `word2[:j]`. Matrix is 1-indexed with the empty string being the shared # root entry (i.e., `edit_distance[0][0] == 0`) num_rows, num_cols = len(word1) + 1, len(word2) + 1 edit_distance = [] for _ in range(num_rows): edit_distance.append([-1] * num_cols) # The first column corresponds to the edit distance between the empty string # and `word1` substrings (e.g., `edit_distance[len(word1)][0] == len(word1)`) for word1_char_num in range(num_rows): edit_distance[word1_char_num][0] = word1_char_num # The first row corresponds to the edit distance between the empty string # and `word2` substrings (e.g., `edit_distance[0][len(word2)] == len(word2)`) for word2_char_num in range(num_cols): edit_distance[0][word2_char_num] = word2_char_num # As the edit distance between two strings is is a function of the edit # distance between its substrings, we use tabulation to efficiently compute # edit distances for all substring combinations between `word1` & `word2` for word1_char_num in range(1, num_rows): for word2_char_num in range(1, num_cols): insertion_distance = edit_distance[word1_char_num - 1][word2_char_num] + 1 deletion_distance = edit_distance[word1_char_num][word2_char_num - 1] + 1 substitution_distance = edit_distance[word1_char_num - 1][ word2_char_num - 1 ] if word2[word2_char_num - 1] != word1[word1_char_num - 1]: substitution_distance += 1 edit_distance[word1_char_num][word2_char_num] = min( insertion_distance, deletion_distance, substitution_distance ) return edit_distance[len(word1)][len(word2)]
[docs] def compute_edit_distance_cache(word1: str, word2: str) -> int: """Returns the Levenshtein (edit) distance between two words The edit distance is the minimum number of operations required to convert word1 to word2. Based on the observation that word transformation can be achieved via three distinct character operations: 1. Insertion 2. Deletion 3. Substitution Args: word2: word1: Examples: >>> compute_edit_distance_cache('','') 0 >>> compute_edit_distance_cache("a",'') 1 >>> compute_edit_distance_cache('',"a") 1 >>> compute_edit_distance_cache("abc",'') 3 >>> compute_edit_distance_cache('',"abc") 3 >>> compute_edit_distance_cache("islander","slander") 1 >>> compute_edit_distance_cache("mart","karma") 3 >>> compute_edit_distance_cache("kitten","sitting") 3 >>> compute_edit_distance_cache("ball","football") 4 >>> compute_edit_distance_cache("football","foot") 4 >>> compute_edit_distance_cache("horse","ros") 3 >>> # horse -> rorse (replace "h" with "r") >>> # rorse -> rose (remove "r") >>> # rose -> ros (remove "e") >>> compute_edit_distance_cache("intention","execution") 5 >>> # intention -> inention (remove "t") >>> # inention -> enention (replace "i" with "e") >>> # enention -> exention (replace "n" with "x") >>> # exention -> exection (replace "n" with "c") >>> # exection -> execution (insert "u") """ ## EDGE CASES ## # if one or both strings are empty, # edit distance is the length of the longer string if min(len(word1), len(word2)) == 0: return max(len(word1), len(word2)) @functools.lru_cache(maxsize=None) def edit_distance(word1_char_num: int, word2_char_num: int) -> int: if word1_char_num == 0: return word2_char_num if word2_char_num == 0: return word1_char_num insertion_distance = edit_distance(word1_char_num - 1, word2_char_num) + 1 deletion_distance = edit_distance(word1_char_num, word2_char_num - 1) + 1 substitution_distance = edit_distance(word1_char_num - 1, word2_char_num - 1) if word1[word1_char_num - 1] != word2[word2_char_num - 1]: substitution_distance += 1 return min(insertion_distance, deletion_distance, substitution_distance) return edit_distance(len(word1), len(word2))