initial commit
This commit is contained in:
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
.DS_Store
|
||||||
|
*/DS_Store
|
||||||
|
zdic.prc
|
||||||
|
zdic.txt
|
||||||
|
zdic_json/*
|
||||||
|
tools/mobi/*
|
||||||
|
zdic-cli-*
|
||||||
88
README.md
Normal file
88
README.md
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
# zdic-cli
|
||||||
|
|
||||||
|
An alternative, offline, regex-supporting, command-line interface to [zdic (漢典)](zdic.net), featuring:
|
||||||
|
|
||||||
|
- No internet connection required
|
||||||
|
- Full text search with regex: a command for finding characters/phrases in body text of definitions.
|
||||||
|
- Colorful text for highlighting entries
|
||||||
|
|
||||||
|
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
## Downloads
|
||||||
|
|
||||||
|
Standalone version available for windows, mac and linux. Please refer to the Releases page.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
zdic-cli runs as a REPL loop, and there are 5 types of commands you can enter:
|
||||||
|
|
||||||
|
```
|
||||||
|
def 甲 display entry for 甲
|
||||||
|
pre 甲 list words that starts with 甲
|
||||||
|
has 甲 乙 ... list words that contains 甲 and 乙 ...
|
||||||
|
txt 甲 list words whose full entry text contains 甲 (regex supported)
|
||||||
|
sel n display entry at previously returned list index n
|
||||||
|
```
|
||||||
|
|
||||||
|
`def` is probably the most common one for simply looking up characters/words. `pre` `has` `txt` will return an enumerated list listing all elligible entries, and `sel 0`/`sel 1`/`sel n` can be used to select from the list.
|
||||||
|
|
||||||
|
|
||||||
|
## Development setup
|
||||||
|
|
||||||
|
**This section is for compiling from source (and is somewhat complex), if you just would like to use the software, please check out the Releases page.**
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
|
||||||
|
- node.js/npm
|
||||||
|
- python2. Tested on 2.7. It has to be python 2 instead of 3 because much of the data-processing work were done way back in a time when 2 was the norm. Sorry folks, but if you're a python3 purist feel free to send a PR!
|
||||||
|
- pkg (optional, to build standalone binaries for multiple platforms) `npm install -g pkg`
|
||||||
|
|
||||||
|
### Downloading and compiling the dictionary files
|
||||||
|
|
||||||
|
This repo does not contain the dictonary files themselves as they're too large. Instead, a shell script (`setup.sh`) is provided to automate the process:
|
||||||
|
|
||||||
|
- Automatically download the original Kindle dictionary format (.PRC) from the internet. The download link hardcoded in `setup.sh` might fail in the future, in which case a google search for `汉典.prc` should yield alternative resources.
|
||||||
|
- Automatically download a python library (kroo/mobi-python) for parsing mobi files. A rather old library and have some oddities, a find-and-replace script will be automatically run to patch some glitches in the source code :P
|
||||||
|
- `python/to_txt.py` is run to extract a raw `txt` file from the kindle format `prc`.
|
||||||
|
- `phthon/to_json.py` is run to generate a directory of `json` files from the `txt` to make lookup and formatting more efficient.
|
||||||
|
|
||||||
|
Run the shell script with:
|
||||||
|
|
||||||
|
```
|
||||||
|
sh setup.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
At this point you'll have `zdic.prc`, `zdic.txt` and directory `zdic_json/`. Only `zdic_json` is needed, so if everything went well with the script, you can freely delete the other two and gain some 700MB of free space :)
|
||||||
|
|
||||||
|
### Compiling the binary
|
||||||
|
|
||||||
|
At this point you can also run the software by simply doing:
|
||||||
|
|
||||||
|
```
|
||||||
|
node index.js
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also package it into a binary using
|
||||||
|
|
||||||
|
```
|
||||||
|
pkg .
|
||||||
|
```
|
||||||
|
|
||||||
|
provided you have the node and pkg dependencies installed.
|
||||||
|
|
||||||
|
Tip: to run the software by typing `zdic` anywhere, you can symlink it to `/user/bin`, e.g.
|
||||||
|
|
||||||
|
```
|
||||||
|
ln -s /user/bin/zdic path/to/zdic-cli/zdic-cli
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
259
index.js
Normal file
259
index.js
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
const fs = require("fs")
|
||||||
|
const readline = require('readline');
|
||||||
|
|
||||||
|
const dict_path = __dirname+'/zdic_json'
|
||||||
|
|
||||||
|
function locate_file(word){
|
||||||
|
const folder = dict_path;
|
||||||
|
var files = fs.readdirSync(folder)
|
||||||
|
for (var i = 0; i < files.length; i++){
|
||||||
|
if (files[i].includes(word.slice(0,1))){
|
||||||
|
return files[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "extended.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
function read_json(file_path){
|
||||||
|
return JSON.parse(fs.readFileSync(dict_path+"/"+file_path).toString())
|
||||||
|
}
|
||||||
|
|
||||||
|
function define(word){
|
||||||
|
var dict_path = locate_file(word)
|
||||||
|
var dict = read_json(dict_path)
|
||||||
|
var ret = []
|
||||||
|
if (word in dict){
|
||||||
|
return [word,dict[word]]
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function starts_with(word){
|
||||||
|
var ret = []
|
||||||
|
var dict = read_json(locate_file(word))
|
||||||
|
for (w in dict){
|
||||||
|
if (w.startsWith(word)){
|
||||||
|
ret.push(w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
function contains(words){
|
||||||
|
// console.log(words)
|
||||||
|
var ret = []
|
||||||
|
var files = fs.readdirSync(dict_path)
|
||||||
|
for (var i = 0; i < files.length; i++){
|
||||||
|
if (!files[i].endsWith(".json")){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
var dict = read_json(files[i]);
|
||||||
|
for (var w in dict){
|
||||||
|
var ok = true;
|
||||||
|
for (var j = 0; j < words.length; j++){
|
||||||
|
if (!w.includes(words[j])){
|
||||||
|
ok = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ok){
|
||||||
|
ret.push(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
function full_text_search(word,view_len,callback){
|
||||||
|
var ret = []
|
||||||
|
var pd = Math.floor((view_len-word.length)/2);
|
||||||
|
var files = fs.readdirSync(dict_path)
|
||||||
|
var re = new RegExp(word);
|
||||||
|
for (var i = 0; i < files.length; i++){
|
||||||
|
if (!files[i].endsWith(".json")){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
var dict = read_json(files[i]);
|
||||||
|
for (var w in dict){
|
||||||
|
for (var j = 0; j < dict[w]['DEF'].length; j++){
|
||||||
|
var idx = dict[w]['DEF'][j].search(re)
|
||||||
|
if (idx == -1){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
var it = [ret.length,w,dict[w]['DEF'][j].slice(Math.max(idx-pd,0),idx+word.length+pd)];
|
||||||
|
ret.push(w);
|
||||||
|
callback(it);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
function render_def(word,entry){
|
||||||
|
var result = ""
|
||||||
|
result += "\x1b[32m\033[1m"+word+"\x1b[0m"
|
||||||
|
if (entry['TRD'] != ''){
|
||||||
|
result += " \x1b[32m\033[1m("+entry['TRD']+")\x1b[0m "
|
||||||
|
}
|
||||||
|
if (entry['PRN'][0] != ''){
|
||||||
|
result += " \x1b[33m[ "+entry['PRN'][0].trim()+""
|
||||||
|
}
|
||||||
|
if (entry['PRN'][1] != ''){
|
||||||
|
result += " , "+entry['PRN'][1].trim()
|
||||||
|
}
|
||||||
|
if (entry['PRN'][0] != ''){
|
||||||
|
result += " ]\x1b[0m"
|
||||||
|
}
|
||||||
|
result += "\n"
|
||||||
|
var n = process.stdout.columns-4;
|
||||||
|
|
||||||
|
for (var i = 0; i < entry['DEF'].length; i++){
|
||||||
|
result += "\x1b[2m〇\x1b[0m"
|
||||||
|
var j = 1;
|
||||||
|
var t = entry['DEF'][i];
|
||||||
|
var isf = true;
|
||||||
|
for (var c of t){
|
||||||
|
if (c == "《"){
|
||||||
|
result += "\x1b[31m"
|
||||||
|
}
|
||||||
|
if (c == "~"){
|
||||||
|
result += "\x1b[33m"+c+"\x1b[0m";
|
||||||
|
if (!isf){
|
||||||
|
result += "\x1b[2m"
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
result += c;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == "》"){
|
||||||
|
result += "\x1b[0m"
|
||||||
|
if (!isf){
|
||||||
|
result += "\x1b[2m"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (c == "。" && isf){
|
||||||
|
result += "\x1b[2m"
|
||||||
|
isf = false;
|
||||||
|
}
|
||||||
|
j+=2;
|
||||||
|
if (j >= n){
|
||||||
|
j = 2;
|
||||||
|
result += "\n "
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result+="\x1b[0m\n"
|
||||||
|
}
|
||||||
|
result += ""
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
function char_pad(x,n){
|
||||||
|
return (x+" ".repeat(n)).slice(0,n);
|
||||||
|
}
|
||||||
|
function render_item(i,x,n){
|
||||||
|
return `\x1b[33m${i.toString().padStart(4)}\x1b[0m ${char_pad(x,n)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function render_list(lst){
|
||||||
|
var npi = 6;
|
||||||
|
var npl = Math.floor((process.stdout.columns-2)/(npi*2+5))
|
||||||
|
var result = "";
|
||||||
|
for (var i = 0; i < lst.length; i++){
|
||||||
|
var li = lst[i];
|
||||||
|
li = li.replace(/[^一-鿿]/g,"")
|
||||||
|
if (!li.length){//oops
|
||||||
|
li = lst[i];
|
||||||
|
}
|
||||||
|
result += render_item(i,li,npi);
|
||||||
|
if (i % npl == npl-1){
|
||||||
|
result += "\n"
|
||||||
|
}else{
|
||||||
|
result += ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
var commands = {
|
||||||
|
"def":["def x ","display entry for x"],
|
||||||
|
"pre":["pre x ","list words that starts with x"],
|
||||||
|
"has":["has x y ...","list words that contains x and y ..."],
|
||||||
|
"txt":["txt x ","list words whose full entry text contains x (regex supported)"],
|
||||||
|
"sel":["sel n ","display entry at previously returned list index n"],
|
||||||
|
}
|
||||||
|
|
||||||
|
function main(prev){
|
||||||
|
var curr = null;
|
||||||
|
var def = null;
|
||||||
|
var fail = false;
|
||||||
|
const rl = readline.createInterface({
|
||||||
|
input: process.stdin,
|
||||||
|
output: process.stdout
|
||||||
|
});
|
||||||
|
rl.question('> ', (answer) => {
|
||||||
|
try{
|
||||||
|
answer = answer.trim();
|
||||||
|
var cmd = answer.slice(0,3);
|
||||||
|
var arg = answer.slice(4);
|
||||||
|
if (!arg.length){
|
||||||
|
console.log(`\x1b[36m${commands[cmd][0]} \x1b[0m${commands[cmd][1]}`);
|
||||||
|
}else{
|
||||||
|
if (cmd == "def"){
|
||||||
|
var def = define(arg);
|
||||||
|
if (def){
|
||||||
|
console.log(render_def(...def));
|
||||||
|
}
|
||||||
|
}else if (cmd == "pre"){
|
||||||
|
curr = starts_with(arg);
|
||||||
|
console.log(render_list(curr));
|
||||||
|
}else if (cmd == "has"){
|
||||||
|
curr = contains(arg.split(" "));
|
||||||
|
console.log(render_list(curr));
|
||||||
|
}else if (cmd == "txt"){
|
||||||
|
curr = full_text_search(arg,Math.floor((process.stdout.columns-20)/2),function(x){
|
||||||
|
console.log(`${render_item(x[0],x[1],6)}\x1b[2m${x[2]}\x1b[0m`);
|
||||||
|
});
|
||||||
|
}else if (cmd == "sel"){
|
||||||
|
var def = define(prev[parseInt(arg)]);
|
||||||
|
if (def){
|
||||||
|
console.log(render_def(...def))
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
console.log("\x1b[31munsupported command.\x1b[0m")
|
||||||
|
fail = true
|
||||||
|
}
|
||||||
|
if (!fail && (def == null && (curr == null || curr.length == 0))){
|
||||||
|
console.log("\x1b[2m(0 result returned)\x1b[0m")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}catch(e){
|
||||||
|
console.log("\x1b[31mcommand parse failed. trying as direct query...\x1b[0m")
|
||||||
|
// console.log(e)
|
||||||
|
try{
|
||||||
|
var def = define(answer);
|
||||||
|
if (def){
|
||||||
|
console.log(render_def(...def));
|
||||||
|
}else{
|
||||||
|
throw new Error();
|
||||||
|
}
|
||||||
|
}catch(ee){
|
||||||
|
console.log("\x1b[31mcommand parse totally failed.\x1b[0m")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rl.close()
|
||||||
|
main(curr||prev);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("╔═════════════════════════════════════════════╗")
|
||||||
|
console.log("║\x1b[31m 漢 典 CLI \x1b[0m║")
|
||||||
|
console.log("║Unoffical offline 漢典 (zdic.net) commandline║")
|
||||||
|
console.log("║\x1b[2m w/ data derived from `汉典.prc` (for Kindle)\x1b[0m║")
|
||||||
|
console.log("║\x1b[2m Lingdong Huang 2020 \x1b[0m║");
|
||||||
|
console.log("╚═════════════════════════════════════════════╝")
|
||||||
|
console.log(`commands: ${Object.keys(commands).map(x=>( "\x1b[36m"+x+"\x1b[0m" )).join(",") }, run without arguments to see help`);
|
||||||
|
main();
|
||||||
7
package.json
Normal file
7
package.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"name":"zdic-cli",
|
||||||
|
"pkg": {
|
||||||
|
"assets": "zdic_json/*"
|
||||||
|
},
|
||||||
|
"bin":"index.js"
|
||||||
|
}
|
||||||
BIN
screenshots/screen000.png
Normal file
BIN
screenshots/screen000.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 226 KiB |
BIN
screenshots/screen001.png
Normal file
BIN
screenshots/screen001.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 370 KiB |
BIN
screenshots/screen002.png
Normal file
BIN
screenshots/screen002.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 413 KiB |
BIN
screenshots/screen003.png
Normal file
BIN
screenshots/screen003.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 407 KiB |
BIN
screenshots/screen004.png
Normal file
BIN
screenshots/screen004.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 454 KiB |
25
setup.sh
Normal file
25
setup.sh
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
echo "downloading the dictionary..."
|
||||||
|
curl https://blog.xjpvictor.info/wp-content/uploads/汉典.prc > zdic.prc
|
||||||
|
|
||||||
|
cd tools
|
||||||
|
|
||||||
|
echo "downloading dependencies..."
|
||||||
|
git clone https://github.com/kroo/mobi-python
|
||||||
|
cp -r mobi-python/mobi ./mobi
|
||||||
|
rm -rf mobi-python
|
||||||
|
|
||||||
|
echo "patching bugs in dependencies..."
|
||||||
|
chmod +x patch.py
|
||||||
|
./patch.py
|
||||||
|
|
||||||
|
echo "converting dictionary to txt... (this might take a while ~10 mins)"
|
||||||
|
chmod +x to_txt.py
|
||||||
|
./to_txt.py > ../zdic.txt
|
||||||
|
|
||||||
|
echo "converting txt to json..."
|
||||||
|
mkdir ../zdic_json
|
||||||
|
chmod +x to_json.py
|
||||||
|
./to_json.py
|
||||||
|
|
||||||
|
echo "done setting up!"
|
||||||
|
echo "to use the app, either run 'pkg .' to package into a binary, or run 'node index.js' directly."
|
||||||
16
tools/patch.py
Executable file
16
tools/patch.py
Executable file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python2.7
|
||||||
|
t = open("mobi/__init__.py",'r').read().replace(
|
||||||
|
"uncompress_lz77(self.contents[self.records[recordnum]['record Data Offset']:self.records[recordnum+1]['record Data Offset']-self.config['mobi']['extra bytes']])",
|
||||||
|
"result = uncompress_lz77(self.contents[self.records[recordnum]['record Data Offset']:self.records[recordnum+2]['record Data Offset']-self.config['mobi']['extra bytes']])"
|
||||||
|
).replace(
|
||||||
|
"for record in range(1, self.config['mobi']['First Non-book index'] - 1):",
|
||||||
|
"for record in range(0, 10000000,1):"
|
||||||
|
)
|
||||||
|
open("mobi/__init__.py",'w').write(t)
|
||||||
|
t = open("mobi/lz77.py",'r').read().replace(
|
||||||
|
"print(\"WARNING:","#print(\"WARNING:"
|
||||||
|
).replace(
|
||||||
|
"\" beginning of text!",
|
||||||
|
"#"
|
||||||
|
)
|
||||||
|
open("mobi/lz77.py",'w').write(t)
|
||||||
114
tools/to_json.py
Executable file
114
tools/to_json.py
Executable file
@@ -0,0 +1,114 @@
|
|||||||
|
#!/usr/bin/env python2.7
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
|
reload(sys)
|
||||||
|
sys.setdefaultencoding('utf8')
|
||||||
|
|
||||||
|
txt = open("../zdic.txt",'r').read()
|
||||||
|
ent = re.findall(r'<h2>([^<>]*?)</h2>(.*?)<mbp:pagebreak/>', txt)
|
||||||
|
ent = [(x[0].strip(),x[1]) for x in ent]
|
||||||
|
ent = [x for x in ent if x[0] != ""]
|
||||||
|
|
||||||
|
def first(l):
|
||||||
|
return "" if len(l) == 0 else l[0]
|
||||||
|
|
||||||
|
def rem_bad_char(t):
|
||||||
|
return t.decode('utf-8','ignore')
|
||||||
|
|
||||||
|
def heads_to_name(t):
|
||||||
|
u = [x for x in t if len(hex(ord(x)))-2 <= 4]
|
||||||
|
u = ("".join(u)).decode("utf-8","ignore")
|
||||||
|
#print u
|
||||||
|
return u
|
||||||
|
|
||||||
|
def append_if_ok(heads,word):
|
||||||
|
try:
|
||||||
|
fn = "../zdic_json/"+heads_to_name(heads+[unicode(word)[0].lower()])+".json"
|
||||||
|
open(fn,'wb').write("test.")
|
||||||
|
os.remove(fn)
|
||||||
|
heads.append(unicode(word)[0].lower())
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
print("BAD WORD NAME:",word)
|
||||||
|
return False
|
||||||
|
|
||||||
|
heads = []
|
||||||
|
result = {}
|
||||||
|
weirdo = {}
|
||||||
|
for e in ent:
|
||||||
|
|
||||||
|
word = e[0]
|
||||||
|
content = e[1]
|
||||||
|
print word,
|
||||||
|
|
||||||
|
is_ok = True
|
||||||
|
|
||||||
|
if len(heads) == 0:
|
||||||
|
heads = []
|
||||||
|
is_ok = append_if_ok(heads,word)
|
||||||
|
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
b = not (word.lower()).startswith(heads[-1].lower())
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if b:
|
||||||
|
if len(result) > 1000 or len(heads) >= 128:
|
||||||
|
fn = heads_to_name(heads)
|
||||||
|
open("../zdic_json/"+fn+".json",'wb').write(json.dumps(result))
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
heads = []
|
||||||
|
is_ok = append_if_ok(heads,word)
|
||||||
|
else:
|
||||||
|
is_ok = append_if_ok(heads,word)
|
||||||
|
|
||||||
|
|
||||||
|
traditional = first(re.findall(r"#444\">\((.*?)\)",content)) + first(re.findall(r"繁体字:(.*?)</li>",content))
|
||||||
|
pinyin = first(re.findall(r"拼音.*?:(.*?)[<\t ]",content))
|
||||||
|
zhuyin = first(re.findall(r"注音.*?:(.*?)<",content))
|
||||||
|
definition = re.findall(r"<li.*?>(.*?)</li>",content)
|
||||||
|
if len(definition) == 0:
|
||||||
|
definition = [content]
|
||||||
|
|
||||||
|
definition = [re.sub(r"<.*?>","",d).strip() for d in definition]
|
||||||
|
definition = [re.sub(r"^.{0,1}\d.*?[\.\)]","",d).strip() for d in definition]
|
||||||
|
definition = [re.sub(r"===汉英互译===","",d).strip() for d in definition]
|
||||||
|
#definition = [re.sub(r"【解释】:","",d).strip() for d in definition]
|
||||||
|
traditional = re.sub(r"<.*?>","",traditional).replace(" ","")
|
||||||
|
|
||||||
|
definition = [rem_bad_char(d) for d in definition
|
||||||
|
if (len(pinyin) == 0 or (pinyin not in d and pinyin.replace(" ","") not in d)) \
|
||||||
|
and (len(zhuyin) == 0 or zhuyin not in d ) \
|
||||||
|
and "繁体" not in d
|
||||||
|
and "简体" not in d
|
||||||
|
and "郑码" not in d
|
||||||
|
and "拼音" not in d
|
||||||
|
and "粤语:" not in d
|
||||||
|
and "潮州话:" not in d
|
||||||
|
and "UniCode" not in d
|
||||||
|
and "◎" not in d
|
||||||
|
and len(d.replace(word,"")) > 0
|
||||||
|
]
|
||||||
|
|
||||||
|
if len(definition) == 0:
|
||||||
|
definition = [rem_bad_char(content)]
|
||||||
|
definition = [re.sub(r"<.*?>","",d).strip() for d in definition]
|
||||||
|
|
||||||
|
|
||||||
|
thing = {"TRD":traditional.decode('utf-8',"ignore"),
|
||||||
|
"PRN":[pinyin.decode('utf-8',"ignore"), zhuyin.decode('utf-8',"ignore")],
|
||||||
|
"DEF":definition,
|
||||||
|
}
|
||||||
|
if is_ok:
|
||||||
|
result[word]=thing
|
||||||
|
else:
|
||||||
|
weirdo[word]=thing
|
||||||
|
|
||||||
|
open("../zdic_json/extended.json",'wb').write(json.dumps(weirdo))
|
||||||
|
|
||||||
8
tools/to_txt.py
Executable file
8
tools/to_txt.py
Executable file
@@ -0,0 +1,8 @@
|
|||||||
|
#!/usr/bin/env python2.7
|
||||||
|
from mobi import Mobi
|
||||||
|
|
||||||
|
book = Mobi("../zdic.prc");
|
||||||
|
book.parse();
|
||||||
|
|
||||||
|
for record in book:
|
||||||
|
print record,
|
||||||
Reference in New Issue
Block a user