添加字幕搜索功能

This commit is contained in:
IndieKKY
2023-11-28 13:53:01 +08:00
parent 9f486b7269
commit 8d1bac4623
13 changed files with 403 additions and 29 deletions

125
src/util/pinyin_util.ts Normal file
View File

@@ -0,0 +1,125 @@
import pinyin from 'tiny-pinyin'
import {uniq} from 'lodash-es'
/**
* pinyin的返回结果
*/
interface Ret {
type: 1 | 2 | 3
source: string
target: string
}
interface Phase {
pinyin: boolean
list: Ret[]
}
/**
* 获取Phase列表(中英文分离列表)
*/
export const getPhases = (str: string) => {
const rets = pinyin.parse(str)
const phases: Phase[] = []
let curPinyin_ = false
let curPhase_: Ret[] = []
const addCurrentPhase = () => {
if (curPhase_.length > 0) {
phases.push({
pinyin: curPinyin_,
list: curPhase_,
})
}
}
// 遍历rets
for (const ret of rets) {
const newPinyin = ret.type === 2
// 如果跟旧的pinyin类型不同先保存旧的
if (newPinyin !== curPinyin_) {
addCurrentPhase()
// 重置
curPinyin_ = newPinyin
curPhase_ = []
}
// 添加新的
curPhase_.push(ret)
}
// 最后一个
addCurrentPhase()
return phases
}
/**
* 获取原子字符列表,如 tool tab 汉 字
*/
export const getAtoms = (str: string) => {
const phases = getPhases(str)
const atoms = []
for (const phase of phases) {
if (phase.pinyin) { // all words
atoms.push(...phase.list.map(e => e.source).filter(e => e))
} else { // split
atoms.push(...(phase.list.map((e: any) => e.source).join('').match(/\w+/g)??[]).filter((e: string) => e))
}
}
return atoms
}
const fixStrs = (atoms: string[]) => {
// 小写
atoms = atoms.map(e => e.toLowerCase())
// 去重
atoms = uniq(atoms)
// 返回
return atoms
}
export const getWords = (str: string) => {
// 获取全部原子字符
const atoms = getAtoms(str)
// fix
return fixStrs(atoms)
}
/**
* 我的世界Minecraft => ['wodeshijie', 'deshijie', 'shijie', 'jie'] + ['wdsj', 'dsj', 'sj', 'j']
*
* 1. only handle pinyin, other is ignored
*/
export const getWordsPinyin = (str: string) => {
let result: string[] = []
for (const phase of getPhases(str)) {
// only handle pinyin
if (phase.pinyin) { // 我的世界
// 获取全部原子字符
// 我的世界 => [我, 的, 世, 界]
const atoms: string[] = []
atoms.push(...phase.list.map(e => e.source).filter(e => e))
// 获取全部子串
// [我, 的, 世, 界] => [我的世界, 的世界, 世界, 界]
const allSubStr = []
for (let i = 0; i < atoms.length; i++) {
allSubStr.push(atoms.slice(i).join(''))
}
// pinyin version
const pinyinList = allSubStr.map((e: string) => pinyin.convertToPinyin(e))
result.push(...pinyinList)
// pinyin first version
const pinyinFirstList = allSubStr.map((e: string) => pinyin.parse(e).map((e: any) => e.type === 2?e.target[0]:null).filter(e => !!e).join(''))
result.push(...pinyinFirstList)
}
}
// fix
result = fixStrs(result)
return result
}

65
src/util/search.ts Normal file
View File

@@ -0,0 +1,65 @@
import * as JsSearch from 'js-search'
import {uniq} from 'lodash-es'
import {getWords, getWordsPinyin} from './pinyin_util'
const tokenize = (maxLength: number, content: string, options?: SearchOptions) => {
const result: string[] = []
// 最大长度
if (content.length > maxLength) {
content = content.substring(0, maxLength)
}
result.push(...getWords(content))
// check cn
if (options?.cnSearchEnabled) {
result.push(...getWordsPinyin(content))
}
// console.debug('[Search] tokenize:', str, '=>', result)
return uniq(result)
}
export interface SearchOptions {
cnSearchEnabled?: boolean
}
export const Search = (uidFieldName: string, index: string, maxLength: number, options?: SearchOptions) => {
let searchRef: JsSearch.Search | undefined// 搜索器
/**
* 重置索引
*/
const reset = (documents?: Object[]) => {
// 搜索器
searchRef = new JsSearch.Search(uidFieldName)
searchRef.tokenizer = {
tokenize: (str) => {
return tokenize(maxLength, str, options)
}
}
searchRef.addIndex(index)
// 检测添加文档
if (documents != null) {
searchRef.addDocuments(documents)
}
}
/**
* 添加文档
*/
const add = (document: Object) => {
searchRef?.addDocument(document)
}
/**
* 搜索
* @return 未去重
*/
const search = (text: string) => {
return searchRef?.search(text.toLowerCase())
}
return {reset, add, search}
}