From 342e2c2bd8c8d1c3de1006870f24f84d51bfb0ee Mon Sep 17 00:00:00 2001 From: Rushil Perera Date: Mon, 26 Aug 2024 21:55:11 -0400 Subject: [PATCH] fix: use string matching to map to correct title in Aniwatch --- .../episodes/getByAniListId/aniwatch.ts | 26 +++- src/libs/findBestMatchingTitle.ts | 113 ++++++++++++++++++ 2 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 src/libs/findBestMatchingTitle.ts diff --git a/src/controllers/episodes/getByAniListId/aniwatch.ts b/src/controllers/episodes/getByAniListId/aniwatch.ts index 90cb6b9..7f57227 100644 --- a/src/controllers/episodes/getByAniListId/aniwatch.ts +++ b/src/controllers/episodes/getByAniListId/aniwatch.ts @@ -1,3 +1,5 @@ +import { findBestMatchingTitle } from "~/libs/findBestMatchingTitle"; + import { Episode, type EpisodesResponse } from "./episode"; export async function getEpisodesFromAniwatch( @@ -8,9 +10,12 @@ export async function getEpisodesFromAniwatch( .then(({ fetchTitleFromAnilist }) => fetchTitleFromAnilist(aniListId, undefined), ) - .then((title) => title?.title?.english ?? title?.title?.userPreferred); + .then((title) => ({ + english: title?.title?.english, + userPreferred: title?.title?.userPreferred, + })); - if (!animeTitle) { + if (!animeTitle.english && !animeTitle.userPreferred) { return null; } @@ -55,12 +60,23 @@ export async function getEpisodesFromAniwatch( return null; } -function getAniwatchId(animeTitle: string): Promise { +function getAniwatchId( + animeTitle: Partial<{ english: string; userPreferred: string }>, +): Promise { return fetch( - `https://aniwatch.up.railway.app/anime/search?q=${encodeURIComponent(animeTitle)}`, + `https://aniwatch.up.railway.app/anime/search?q=${encodeURIComponent(animeTitle.english ?? animeTitle.userPreferred!)}`, ) .then((res) => res.json()) - .then(({ animes }) => animes[0]?.id); + .then(({ animes }) => { + const bestMatchingTitle = findBestMatchingTitle( + animeTitle, + animes.map((anime) => ({ + english: anime.name, + userPreferred: anime.jname, + })), + ); + return animes.find((anime) => anime.name === bestMatchingTitle)?.id; + }); } export interface AniwatchEpisodesResponse { diff --git a/src/libs/findBestMatchingTitle.ts b/src/libs/findBestMatchingTitle.ts new file mode 100644 index 0000000..47f9bca --- /dev/null +++ b/src/libs/findBestMatchingTitle.ts @@ -0,0 +1,113 @@ +function findBestMatch(mainString: string, targets: string[]): string | null { + if (targets.length === 0) return null; + + let bestMatch = targets[0]; + let highestScore = stringMatchingAlgorithm(mainString, bestMatch); + + for (let i = 1; i < targets.length; i++) { + const currentScore = stringMatchingAlgorithm(mainString, targets[i]); + if (currentScore > highestScore) { + highestScore = currentScore; + bestMatch = targets[i]; + } + } + + return bestMatch; +} + +type Title = { + english: string; + userPreferred: string; +}; + +export const findBestMatchingTitle = ( + title: Partial, + titles: Partial<Title>[], +): string | null => { + const { english, userPreferred } = title; + + const userPreferredBestMatch = userPreferred + ? findBestMatch( + userPreferred, + titles + .map((title) => title.userPreferred) + .filter((title) => title !== undefined), + ) + : null; + const englishBestMatch = english + ? findBestMatch( + english, + titles + .map((title) => title.english) + .filter((title) => title !== undefined), + ) + : null; + + const userPreferredScore = userPreferredBestMatch + ? stringMatchingAlgorithm(userPreferred!, userPreferredBestMatch) + : 0; + const englishScore = englishBestMatch + ? stringMatchingAlgorithm(english!, englishBestMatch) + : 0; + + console.log(title.english, englishScore); + console.log(title.userPreferred, userPreferredScore); + if (userPreferredScore >= englishScore) { + return userPreferredBestMatch; + } else { + return englishBestMatch; + } +}; + +function stringMatchingAlgorithm(s1: string, s2: string): number { + // current implementation is the Jaro-Winkler algorithm + // copied from https://discord.com/channels/987492554486452315/1273988465222090783/1273988465222090783 + const m = s1.length; + const n = s2.length; + + if (m === 0 && n === 0) return 1.0; + if (m === 0 || n === 0) return 0.0; + + const matchDistance = Math.floor(Math.max(m, n) / 2) - 1; + const s1Matches = new Array(m).fill(false); + const s2Matches = new Array(n).fill(false); + + let matches = 0; + let transpositions = 0; + for (let i = 0; i < m; i++) { + const start = Math.max(0, i - matchDistance); + const end = Math.min(n - 1, i + matchDistance); + + for (let j = start; j <= end; j++) { + if (s2Matches[j]) continue; + if (s1[i] !== s2[j]) continue; + s1Matches[i] = true; + s2Matches[j] = true; + matches++; + break; + } + } + + if (matches === 0) return 0.0; + + let k = 0; + for (let i = 0; i < m; i++) { + if (!s1Matches[i]) continue; + while (!s2Matches[k]) k++; + if (s1[i] !== s2[k]) transpositions++; + k++; + } + + transpositions /= 2; + + const jaro = + (matches / m + matches / n + (matches - transpositions) / matches) / 3; + + const prefix = Math.min( + 4, + [...s1].findIndex((ch, i) => s1[i] !== s2[i]), + ); + const p = 0.1; + + return jaro + prefix * p * (1 - jaro); +}