You've already forked bilibili-subtitle
优化
This commit is contained in:
125
src/utils/pinyinUtil.ts
Normal file
125
src/utils/pinyinUtil.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
import pinyin from 'tiny-pinyin'
|
||||
import {uniq} from 'lodash-es'
|
||||
|
||||
/**
|
||||
* pinyin的返回结果
|
||||
*/
|
||||
interface Ret {
|
||||
type: 1 | 2 | 3
|
||||
source: string
|
||||
target: string
|
||||
}
|
||||
|
||||
interface Phase {
|
||||
pinyin: boolean
|
||||
list: Ret[]
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取Phase列表(中英文分离列表)
|
||||
*/
|
||||
export const getPhases = (str: string) => {
|
||||
const rets = pinyin.parse(str)
|
||||
|
||||
const phases: Phase[] = []
|
||||
let curPinyin_ = false
|
||||
let curPhase_: Ret[] = []
|
||||
const addCurrentPhase = () => {
|
||||
if (curPhase_.length > 0) {
|
||||
phases.push({
|
||||
pinyin: curPinyin_,
|
||||
list: curPhase_,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// 遍历rets
|
||||
for (const ret of rets) {
|
||||
const newPinyin = ret.type === 2
|
||||
// 如果跟旧的pinyin类型不同,先保存旧的
|
||||
if (newPinyin !== curPinyin_) {
|
||||
addCurrentPhase()
|
||||
// 重置
|
||||
curPinyin_ = newPinyin
|
||||
curPhase_ = []
|
||||
}
|
||||
// 添加新的
|
||||
curPhase_.push(ret)
|
||||
}
|
||||
// 最后一个
|
||||
addCurrentPhase()
|
||||
|
||||
return phases
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取原子字符列表,如 tool tab 汉 字
|
||||
*/
|
||||
export const getAtoms = (str: string) => {
|
||||
const phases = getPhases(str)
|
||||
|
||||
const atoms = []
|
||||
for (const phase of phases) {
|
||||
if (phase.pinyin) { // all words
|
||||
atoms.push(...phase.list.map(e => e.source).filter(e => e))
|
||||
} else { // split
|
||||
atoms.push(...(phase.list.map((e: any) => e.source).join('').match(/\w+/g)??[]).filter((e: string) => e))
|
||||
}
|
||||
}
|
||||
|
||||
return atoms
|
||||
}
|
||||
|
||||
const fixStrs = (atoms: string[]) => {
|
||||
// 小写
|
||||
atoms = atoms.map(e => e.toLowerCase())
|
||||
|
||||
// 去重
|
||||
atoms = uniq(atoms)
|
||||
|
||||
// 返回
|
||||
return atoms
|
||||
}
|
||||
|
||||
export const getWords = (str: string) => {
|
||||
// 获取全部原子字符
|
||||
const atoms = getAtoms(str)
|
||||
// fix
|
||||
return fixStrs(atoms)
|
||||
}
|
||||
|
||||
/**
|
||||
* 我的世界Minecraft => ['wodeshijie', 'deshijie', 'shijie', 'jie'] + ['wdsj', 'dsj', 'sj', 'j']
|
||||
*
|
||||
* 1. only handle pinyin, other is ignored
|
||||
*/
|
||||
export const getWordsPinyin = (str: string) => {
|
||||
let result: string[] = []
|
||||
|
||||
for (const phase of getPhases(str)) {
|
||||
// only handle pinyin
|
||||
if (phase.pinyin) { // 我的世界
|
||||
// 获取全部原子字符
|
||||
// 我的世界 => [我, 的, 世, 界]
|
||||
const atoms: string[] = []
|
||||
atoms.push(...phase.list.map(e => e.source).filter(e => e))
|
||||
// 获取全部子串
|
||||
// [我, 的, 世, 界] => [我的世界, 的世界, 世界, 界]
|
||||
const allSubStr = []
|
||||
for (let i = 0; i < atoms.length; i++) {
|
||||
allSubStr.push(atoms.slice(i).join(''))
|
||||
}
|
||||
// pinyin version
|
||||
const pinyinList = allSubStr.map((e: string) => pinyin.convertToPinyin(e))
|
||||
result.push(...pinyinList)
|
||||
// pinyin first version
|
||||
const pinyinFirstList = allSubStr.map((e: string) => pinyin.parse(e).map((e: any) => e.type === 2?e.target[0]:null).filter(e => !!e).join(''))
|
||||
result.push(...pinyinFirstList)
|
||||
}
|
||||
}
|
||||
|
||||
// fix
|
||||
result = fixStrs(result)
|
||||
|
||||
return result
|
||||
}
|
Reference in New Issue
Block a user