114 lines
2.9 KiB
TypeScript
114 lines
2.9 KiB
TypeScript
function findBestMatch(mainString: string, targets: string[]): string | null {
|
|
if (targets.length === 0) return null;
|
|
|
|
let bestMatch = targets[0];
|
|
let highestScore = stringMatchingAlgorithm(mainString, bestMatch);
|
|
|
|
for (let i = 1; i < targets.length; i++) {
|
|
const currentScore = stringMatchingAlgorithm(mainString, targets[i]);
|
|
if (currentScore > highestScore) {
|
|
highestScore = currentScore;
|
|
bestMatch = targets[i];
|
|
}
|
|
}
|
|
|
|
return bestMatch;
|
|
}
|
|
|
|
type Title = {
|
|
english: string;
|
|
userPreferred: string;
|
|
};
|
|
|
|
export const findBestMatchingTitle = (
|
|
title: Partial<Title>,
|
|
titles: Partial<Title>[],
|
|
): string | null => {
|
|
const { english, userPreferred } = title;
|
|
|
|
const userPreferredBestMatch = userPreferred
|
|
? findBestMatch(
|
|
userPreferred,
|
|
titles
|
|
.map((title) => title.userPreferred)
|
|
.filter((title) => title !== undefined),
|
|
)
|
|
: null;
|
|
const englishBestMatch = english
|
|
? findBestMatch(
|
|
english,
|
|
titles
|
|
.map((title) => title.english)
|
|
.filter((title) => title !== undefined),
|
|
)
|
|
: null;
|
|
|
|
const userPreferredScore = userPreferredBestMatch
|
|
? stringMatchingAlgorithm(userPreferred!, userPreferredBestMatch)
|
|
: 0;
|
|
const englishScore = englishBestMatch
|
|
? stringMatchingAlgorithm(english!, englishBestMatch)
|
|
: 0;
|
|
|
|
console.log(title.english, englishScore);
|
|
console.log(title.userPreferred, userPreferredScore);
|
|
if (userPreferredScore >= englishScore) {
|
|
return userPreferredBestMatch;
|
|
} else {
|
|
return englishBestMatch;
|
|
}
|
|
};
|
|
|
|
function stringMatchingAlgorithm(s1: string, s2: string): number {
|
|
// current implementation is the Jaro-Winkler algorithm
|
|
// copied from https://discord.com/channels/987492554486452315/1273988465222090783/1273988465222090783
|
|
const m = s1.length;
|
|
const n = s2.length;
|
|
|
|
if (m === 0 && n === 0) return 1.0;
|
|
if (m === 0 || n === 0) return 0.0;
|
|
|
|
const matchDistance = Math.floor(Math.max(m, n) / 2) - 1;
|
|
const s1Matches = new Array(m).fill(false);
|
|
const s2Matches = new Array(n).fill(false);
|
|
|
|
let matches = 0;
|
|
let transpositions = 0;
|
|
for (let i = 0; i < m; i++) {
|
|
const start = Math.max(0, i - matchDistance);
|
|
const end = Math.min(n - 1, i + matchDistance);
|
|
|
|
for (let j = start; j <= end; j++) {
|
|
if (s2Matches[j]) continue;
|
|
if (s1[i] !== s2[j]) continue;
|
|
s1Matches[i] = true;
|
|
s2Matches[j] = true;
|
|
matches++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (matches === 0) return 0.0;
|
|
|
|
let k = 0;
|
|
for (let i = 0; i < m; i++) {
|
|
if (!s1Matches[i]) continue;
|
|
while (!s2Matches[k]) k++;
|
|
if (s1[i] !== s2[k]) transpositions++;
|
|
k++;
|
|
}
|
|
|
|
transpositions /= 2;
|
|
|
|
const jaro =
|
|
(matches / m + matches / n + (matches - transpositions) / matches) / 3;
|
|
|
|
const prefix = Math.min(
|
|
4,
|
|
[...s1].findIndex((ch, i) => s1[i] !== s2[i]),
|
|
);
|
|
const p = 0.1;
|
|
|
|
return jaro + prefix * p * (1 - jaro);
|
|
}
|