function findBestMatch(mainString: string, targets: string[]): string | null {
if (targets.length === 0) return null;
let bestMatch = targets[0];
let highestScore = stringMatchingAlgorithm(mainString, bestMatch);
for (let i = 1; i < targets.length; i++) {
const currentScore = stringMatchingAlgorithm(mainString, targets[i]);
if (currentScore > highestScore) {
highestScore = currentScore;
bestMatch = targets[i];
}
}
return bestMatch;
}
type Title = {
english: string;
userPreferred: string;
};
export const findBestMatchingTitle = (
title: Partial
,
titles: Partial[],
): string | null => {
const { english, userPreferred } = title;
const userPreferredBestMatch = userPreferred
? findBestMatch(
userPreferred,
titles
.map((title) => title.userPreferred)
.filter((title) => title !== undefined),
)
: null;
const englishBestMatch = english
? findBestMatch(
english,
titles
.map((title) => title.english)
.filter((title) => title !== undefined),
)
: null;
const userPreferredScore = userPreferredBestMatch
? stringMatchingAlgorithm(userPreferred!, userPreferredBestMatch)
: 0;
const englishScore = englishBestMatch
? stringMatchingAlgorithm(english!, englishBestMatch)
: 0;
console.log(title.english, englishScore);
console.log(title.userPreferred, userPreferredScore);
if (userPreferredScore >= englishScore) {
console.log("User preferred", userPreferredBestMatch);
return userPreferredBestMatch;
} else {
console.log("English", englishBestMatch);
return englishBestMatch;
}
};
function stringMatchingAlgorithm(s1: string, s2: string): number {
// current implementation is the Jaro-Winkler algorithm
// copied from https://discord.com/channels/987492554486452315/1273988465222090783/1273988465222090783
const m = s1.length;
const n = s2.length;
if (m === 0 && n === 0) return 1.0;
if (m === 0 || n === 0) return 0.0;
const matchDistance = Math.floor(Math.max(m, n) / 2) - 1;
const s1Matches = new Array(m).fill(false);
const s2Matches = new Array(n).fill(false);
let matches = 0;
let transpositions = 0;
for (let i = 0; i < m; i++) {
const start = Math.max(0, i - matchDistance);
const end = Math.min(n - 1, i + matchDistance);
for (let j = start; j <= end; j++) {
if (s2Matches[j]) continue;
if (s1[i] !== s2[j]) continue;
s1Matches[i] = true;
s2Matches[j] = true;
matches++;
break;
}
}
if (matches === 0) return 0.0;
let k = 0;
for (let i = 0; i < m; i++) {
if (!s1Matches[i]) continue;
while (!s2Matches[k]) k++;
if (s1[i] !== s2[k]) transpositions++;
k++;
}
transpositions /= 2;
const jaro =
(matches / m + matches / n + (matches - transpositions) / matches) / 3;
const prefix = Math.min(
4,
[...s1].findIndex((ch, i) => s1[i] !== s2[i]),
);
const p = 0.1;
return jaro + prefix * p * (1 - jaro);
}