-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathawesome-japanese-nlp-resources.json
7949 lines (7949 loc) · 519 KB
/
awesome-japanese-nlp-resources.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{
"version": "2.1.0",
"contents": {
"Python library": {
"https://github.com/WorksApplications/sudachi.rs": {
"sub_category": "Morphology analysis",
"repository_name": "sudachi.rs",
"user_name": "WorksApplications",
"description": "SudachiPy 0.6* and above are developed as Sudachi.rs.",
"multilingual_descriptions": {
"en": "SudachiPy 0.6* and above are developed as Sudachi.rs.",
"ja": "SudachiPy 0.6以上はSudachi.rsとして開発されています。",
"zh-hans": "SudachiPy 0.6及以上版本已经开发为Sudachi.rs。",
"zh-hant": "SudachiPy 0.6*及以上版本已開發為Sudachi.rs。"
}
},
"https://github.com/mocobeta/janome": {
"sub_category": "Morphology analysis",
"repository_name": "janome",
"user_name": "mocobeta",
"description": "Japanese morphological analysis engine written in pure Python",
"multilingual_descriptions": {
"en": "Japanese morphological analysis engine written in pure Python",
"ja": "純粋なPythonで書かれた日本語形態素解析エンジン",
"zh-hans": "用纯Python编写的日语形态分析引擎",
"zh-hant": "純Python編寫的日語形態分析引擎"
}
},
"https://github.com/SamuraiT/mecab-python3": {
"sub_category": "Morphology analysis",
"repository_name": "mecab-python3",
"user_name": "SamuraiT",
"description": "mecab-python. mecab-python. you can find original version here:http://taku910.github.io/mecab/",
"multilingual_descriptions": {
"en": "mecab-python. mecab-python. you can find original version here:http://taku910.github.io/mecab/",
"ja": "mecab-pythonです。mecab-pythonです。オリジナルバージョンはこちらから見つけることができます:http://taku910.github.io/mecab/。",
"zh-hans": "mecab-python。mecab-python。您可以在此处找到原始版本:http://taku910.github.io/mecab/。",
"zh-hant": "mecab-python。mecab-python。您可以在此處找到原始版本:http://taku910.github.io/mecab/。"
}
},
"https://github.com/ikegami-yukino/mecab": {
"sub_category": "Morphology analysis",
"repository_name": "mecab",
"user_name": "ikegami-yukino",
"description": "This repository is for building Windows 64-bit MeCab binary and improving MeCab Python binding.",
"multilingual_descriptions": {
"en": "This repository is for building Windows 64-bit MeCab binary and improving MeCab Python binding.",
"ja": "このリポジトリは、Windows 64ビット用のMeCabバイナリのビルドと、MeCab Pythonバインディングの改善に使用されます。",
"zh-hans": "这个仓库用于构建Windows 64位MeCab二进制文件并改进MeCab Python绑定。",
"zh-hant": "這個存儲庫是用於構建Windows 64位MeCab二進制文件和改進MeCab Python綁定的。"
}
},
"https://github.com/polm/fugashi": {
"sub_category": "Morphology analysis",
"repository_name": "fugashi",
"user_name": "polm",
"description": "A Cython MeCab wrapper for fast, pythonic Japanese tokenization and morphological analysis.",
"multilingual_descriptions": {
"en": "A Cython MeCab wrapper for fast, pythonic Japanese tokenization and morphological analysis.",
"ja": "高速でPythonicな日本語トークナイズと形態素解析のためのCython MeCabラッパー。",
"zh-hans": "一个Cython MeCab包装器,用于快速、Pythonic的日语分词和形态分析。",
"zh-hant": "一個Cython MeCab包裝器,用於快速、Python式的日語分詞和形態分析。"
}
},
"https://github.com/taishi-i/nagisa": {
"sub_category": "Morphology analysis",
"repository_name": "nagisa",
"user_name": "taishi-i",
"description": "A Japanese tokenizer based on recurrent neural networks",
"multilingual_descriptions": {
"en": "A Japanese tokenizer based on recurrent neural networks",
"ja": "再帰型ニューラルネットワークに基づく日本語トークナイザー",
"zh-hans": "基于循环神经网络的日语分词器",
"zh-hant": "基於循環神經網絡的日語分詞器"
}
},
"https://github.com/ku-nlp/pyknp": {
"sub_category": "Morphology analysis",
"repository_name": "pyknp",
"user_name": "ku-nlp",
"description": "A Python Module for JUMAN++/KNP",
"multilingual_descriptions": {
"en": "A Python Module for JUMAN++/KNP",
"ja": "JUMAN++/KNP用のPythonモジュール",
"zh-hans": "一个用于JUMAN++/KNP的Python模块",
"zh-hant": "一個用於 JUMAN++/KNP 的 Python 模組"
}
},
"https://github.com/chezou/Mykytea-python": {
"sub_category": "Morphology analysis",
"repository_name": "Mykytea-python",
"user_name": "chezou",
"description": "Python wrapper for KyTea",
"multilingual_descriptions": {
"en": "Python wrapper for KyTea",
"ja": "KyTeaのPythonラッパー",
"zh-hans": "KyTea的Python封装程序",
"zh-hant": "KyTea 的 Python 封裝程式"
}
},
"https://github.com/himkt/konoha": {
"sub_category": "Morphology analysis",
"repository_name": "konoha",
"user_name": "himkt",
"description": "Konoha: Simple wrapper of Japanese Tokenizers",
"multilingual_descriptions": {
"en": "Konoha: Simple wrapper of Japanese Tokenizers",
"ja": "Konoha:日本語トークナイザーのシンプルなラッパー",
"zh-hans": "Konoha:日语分词器的简单封装",
"zh-hant": "木葉:日本分詞器的簡單封裝"
}
},
"https://github.com/buruzaemon/natto-py": {
"sub_category": "Morphology analysis",
"repository_name": "natto-py",
"user_name": "buruzaemon",
"description": "natto-py combines the Python programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.",
"multilingual_descriptions": {
"en": "natto-py combines the Python programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.",
"ja": "natto-pyは、Pythonプログラミング言語と日本語の品詞や形態素解析器であるMeCabを組み合わせたものです。",
"zh-hans": "natto-py将Python编程语言与MeCab(日语的词性和形态分析器)结合起来。",
"zh-hant": "natto-py 將 Python 程式語言與 MeCab 結合,後者是日語詞性和形態分析器。"
}
},
"https://github.com/ikegami-yukino/rakutenma-python": {
"sub_category": "Morphology analysis",
"repository_name": "rakutenma-python",
"user_name": "ikegami-yukino",
"description": "Rakuten MA (Python version)",
"multilingual_descriptions": {
"en": "Rakuten MA (Python version)",
"ja": "楽天MA(Python版)",
"zh-hans": "乐天 MA(Python 版本)",
"zh-hant": "樂天 MA(Python 版本)"
}
},
"https://github.com/daac-tools/python-vaporetto": {
"sub_category": "Morphology analysis",
"repository_name": "python-vaporetto",
"user_name": "daac-tools",
"description": "Vaporetto is a fast and lightweight pointwise prediction based tokenizer. This is a Python wrapper for Vaporetto.",
"multilingual_descriptions": {
"en": " Vaporetto is a fast and lightweight pointwise prediction based tokenizer. This is a Python wrapper for Vaporetto.",
"ja": "Vaporettoは、高速で軽量なポイントワイズ予測ベースのトークナイザーです。これはVaporettoのPythonラッパーです。",
"zh-hans": "Vaporetto是一种快速且轻量级的基于点预测的分词器。这是Vaporetto的Python封装。",
"zh-hant": "Vaporetto 是一個快速且輕量級的基於點預測的分詞器。這是 Vaporetto 的 Python 封裝。"
}
},
"https://github.com/mkartawijaya/dango": {
"sub_category": "Morphology analysis",
"repository_name": "dango",
"user_name": "mkartawijaya",
"description": "An easy to use tokenizer for Japanese text, aimed at language learners and non-linguists",
"multilingual_descriptions": {
"en": "An easy to use tokenizer for Japanese text, aimed at language learners and non-linguists",
"ja": "日本語テキスト用の使いやすいトークナイザー。言語学習者や非言語学者を対象としています。",
"zh-hans": "一个易于使用的日语文本分词器,旨在为语言学习者和非语言学家提供帮助。",
"zh-hant": "一個易於使用的日文分詞器,針對語言學習者和非語言學家。"
}
},
"https://github.com/ku-nlp/rhoknp": {
"sub_category": "Morphology analysis",
"repository_name": "rhoknp",
"user_name": "ku-nlp",
"description": "Yet another Python binding for Juman++/KNP",
"multilingual_descriptions": {
"en": "Yet another Python binding for Juman++/KNP",
"ja": "Juman++/KNPのための別のPythonバインディング",
"zh-hans": "又一个Python绑定Juman++/KNP",
"zh-hant": "又一個 Juman++/KNP 的 Python 綁定程式"
}
},
"https://github.com/daac-tools/python-vibrato": {
"sub_category": "Morphology analysis",
"repository_name": "python-vibrato",
"user_name": "daac-tools",
"description": "Viterbi-based accelerated tokenizer (Python wrapper)",
"multilingual_descriptions": {
"en": " Viterbi-based accelerated tokenizer (Python wrapper)",
"ja": "Viterbiベースの高速トークナイザー(Pythonラッパー)",
"zh-hans": "基于维特比算法的加速分词器(Python封装)",
"zh-hant": "基於維特比算法的加速分詞器(Python封裝)"
}
},
"https://github.com/lighttransport/jagger-python": {
"sub_category": "Morphology analysis",
"repository_name": "jagger-python",
"user_name": "lighttransport",
"description": "Python binding for Jagger(C++ implementation of Pattern-based Japanese Morphological Analyzer)",
"multilingual_descriptions": {
"en": "Python binding for Jagger(C++ implementation of Pattern-based Japanese Morphological Analyzer)",
"ja": "JaggerのPythonバインディング(パターンベースの日本語形態素解析器のC++実装)",
"zh-hans": "Python绑定Jagger(基于模式的日语形态分析器的C++实现)",
"zh-hant": "Jagger的Python綁定(基於模式的日語形態分析器的C++實現)"
}
},
"https://github.com/megagonlabs/ginza": {
"sub_category": "Parsing",
"repository_name": "ginza",
"user_name": "megagonlabs",
"description": "A Japanese NLP Library using spaCy as framework based on Universal Dependencies",
"multilingual_descriptions": {
"en": "A Japanese NLP Library using spaCy as framework based on Universal Dependencies",
"ja": "ユニバーサル依存関係に基づくspaCyフレームワークを使用した日本語NLPライブラリ",
"zh-hans": "一个基于通用依存关系的spaCy框架的日语NLP库。",
"zh-hant": "一個基於Universal Dependencies的spaCy框架的日本NLP庫"
}
},
"https://github.com/ikegami-yukino/cabocha": {
"sub_category": "Parsing",
"repository_name": "cabocha",
"user_name": "ikegami-yukino",
"description": "Yet Another Japanese Dependency Structure Analyzer",
"multilingual_descriptions": {
"en": "Yet Another Japanese Dependency Structure Analyzer",
"ja": "もう一つの日本語依存構造解析ツール",
"zh-hans": "另一个日语依存结构分析器",
"zh-hant": "另一個日本依存結構分析器"
}
},
"https://github.com/KoichiYasuoka/UniDic2UD": {
"sub_category": "Parsing",
"repository_name": "UniDic2UD",
"user_name": "KoichiYasuoka",
"description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese",
"multilingual_descriptions": {
"en": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese",
"ja": "現代日本語のためのトークナイザー、POSタガー、レンマ化器、依存構造解析器",
"zh-hans": "现代和当代日语的分词器、词性标注器、词形还原器和依存句法分析器",
"zh-hant": "現代和當代日語的分詞器、詞性標記器、詞形還原器和依存句法分析器"
}
},
"https://github.com/PKSHATechnology-Research/camphr": {
"sub_category": "Parsing",
"repository_name": "camphr",
"user_name": "PKSHATechnology-Research",
"description": "NLP libary for creating pipeline components",
"multilingual_descriptions": {
"en": "Camphr - NLP libary for creating pipeline components",
"ja": "Camphr - パイプラインコンポーネントを作成するためのNLPライブラリ",
"zh-hans": "Camphr - 用于创建管道组件的NLP库",
"zh-hant": "Camphr - 用於創建管道組件的 NLP 庫"
}
},
"https://github.com/KoichiYasuoka/SuPar-UniDic": {
"sub_category": "Parsing",
"repository_name": "SuPar-UniDic",
"user_name": "KoichiYasuoka",
"description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese with BERT models",
"multilingual_descriptions": {
"en": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese with BERT models",
"ja": "モダン・コンテンポラリー日本語用のTokenizer、POS-tagger、Lemmatizer、およびDependency-parserには、BERTモデルが使用されます。",
"zh-hans": "使用BERT模型的现代和当代日语分词器、词性标注器、词形还原器和依存句法分析器。",
"zh-hant": "使用BERT模型的現代和當代日語分詞器、詞性標記器、詞形還原器和依存句法分析器"
}
},
"https://github.com/masashi-y/depccg": {
"sub_category": "Parsing",
"repository_name": "depccg",
"user_name": "masashi-y",
"description": "A* CCG Parser with a Supertag and Dependency Factored Model",
"multilingual_descriptions": {
"en": "A* CCG Parser with a Supertag and Dependency Factored Model",
"ja": "スーパータグと依存関係ファクタリングモデルを備えたA* CCGパーサー",
"zh-hans": "带有超级标签和依存因素模型的A* CCG解析器",
"zh-hant": "具有超標記和依存因素模型的A* CCG解析器"
}
},
"https://github.com/ku-nlp/bertknp": {
"sub_category": "Parsing",
"repository_name": "bertknp",
"user_name": "ku-nlp",
"description": "A Japanese dependency parser based on BERT",
"multilingual_descriptions": {
"en": "A Japanese dependency parser based on BERT",
"ja": "BERTに基づく日本語依存構造解析器",
"zh-hans": "基于BERT的日语依存句法分析器",
"zh-hant": "基於BERT的日語依存句法分析器"
}
},
"https://github.com/KoichiYasuoka/esupar": {
"sub_category": "Parsing",
"repository_name": "esupar",
"user_name": "KoichiYasuoka",
"description": "Tokenizer POS-Tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models for Japanese and other languages",
"multilingual_descriptions": {
"en": "Tokenizer POS-Tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models for Japanese and other languages",
"ja": "日本語や他の言語に対応したBERT/RoBERTa/DeBERTaモデルを使用したトークナイザー、POSタガー、依存構造解析器。",
"zh-hans": "使用BERT/RoBERTa/DeBERTa模型的分词器POS-标注器和依存句法分析器,适用于日语和其他语言。",
"zh-hant": "使用BERT/RoBERTa/DeBERTa模型的分詞器POS-標記器和依存句法分析器,適用於日語和其他語言。"
}
},
"https://github.com/passaglia/yomikata": {
"sub_category": "Parsing",
"repository_name": "yomikata",
"user_name": "passaglia",
"description": "Heteronym disambiguation library using a fine-tuned BERT model.",
"multilingual_descriptions": {
"en": "Heteronym disambiguation library using a fine-tuned BERT model.",
"ja": "微調整されたBERTモデルを使用した異音異義語の曖昧性解消ライブラリ。",
"zh-hans": "使用经过微调的BERT模型的异音词消歧库。",
"zh-hant": "使用微調的BERT模型進行異音詞消歧的程式庫。"
}
},
"https://github.com/lighttransport/jdepp-python": {
"sub_category": "Parsing",
"repository_name": "jdepp-python",
"user_name": "lighttransport",
"description": "Python binding for J.DepP(C++ implementation of Japanese Dependency Parsers)",
"multilingual_descriptions": {
"en": "Python binding for J.DepP(C++ implementation of Japanese Dependency Parsers)",
"ja": "J.DepPのPythonバインディング(日本語依存構造解析器のC++実装)",
"zh-hans": "Python绑定J.DepP(日语依存解析器的C++实现)",
"zh-hant": "J.DepP的Python綁定(日本依存句法分析器的C++實現)"
}
},
"https://github.com/daisukebekki/lightblue": {
"sub_category": "Parsing",
"repository_name": "lightblue",
"user_name": "daisukebekki",
"description": "A CCG parser for Japanese with DTS-representations",
"multilingual_descriptions": {
"en": "A CCG parser for Japanese with DTS-representations",
"ja": "DTS表現を使用した日本語のCCGパーサー",
"zh-hans": "一个用于日语的带有DTS表示的CCG解析器",
"zh-hant": "一個具有DTS表示的日語CCG解析器"
}
},
"https://github.com/borh-lab/natsume-simple": {
"sub_category": "Parsing",
"repository_name": "natsume-simple",
"user_name": "borh-lab",
"description": "natsume-simpleは日本語の係り受け関係検索システム",
"multilingual_descriptions": {
"en": "natsume-simple is a Japanese dependency relation search system.",
"ja": "natsume-simpleは日本語の係り受け関係検索システム",
"zh-hans": "natsume-simple是一个日语依存关系搜索系统",
"zh-hant": "natsume-simple是一個日語的依存關係搜索系統。"
}
},
"https://github.com/miurahr/pykakasi": {
"sub_category": "Converter",
"repository_name": "pykakasi",
"user_name": "miurahr",
"description": "Lightweight converter from Japanese Kana-kanji sentences into Kana-Roman.",
"multilingual_descriptions": {
"en": "Lightweight converter from Japanese Kana-kanji sentences into Kana-Roman.",
"ja": "日本語の仮名漢字文から仮名ローマ字文に変換する軽量コンバーター。",
"zh-hans": "将日语假名汉字句子转换为假名罗马字的轻量级转换器。",
"zh-hant": "輕量級的轉換器,可將日文假名漢字句子轉換為假名羅馬字。"
}
},
"https://github.com/polm/cutlet": {
"sub_category": "Converter",
"repository_name": "cutlet",
"user_name": "polm",
"description": "Japanese to romaji converter in Python",
"multilingual_descriptions": {
"en": "Japanese to romaji converter in Python",
"ja": "Pythonでの日本語からローマ字への変換ツール",
"zh-hans": "Python中的日语转罗马字转换器",
"zh-hant": "Python中的日文轉羅馬字轉換器"
}
},
"https://github.com/shihono/alphabet2kana": {
"sub_category": "Converter",
"repository_name": "alphabet2kana",
"user_name": "shihono",
"description": "Convert English alphabet to Katakana",
"multilingual_descriptions": {
"en": "Convert English alphabet to Katakana",
"ja": "英語アルファベットをカタカナに変換してください。",
"zh-hans": "将英文字母转换为片假名",
"zh-hant": "將英文字母轉換為片假名"
}
},
"https://github.com/Greatdane/Convert-Numbers-to-Japanese": {
"sub_category": "Converter",
"repository_name": "Convert-Numbers-to-Japanese",
"user_name": "Greatdane",
"description": "Converts Arabic numerals, or 'western' style numbers, to a Japanese context.",
"multilingual_descriptions": {
"en": "Converts Arabic numerals, or 'western' style numbers, to a Japanese context.",
"ja": "アラビア数字、または「西洋式」の数字を日本の文脈に変換します。",
"zh-hans": "将阿拉伯数字或“西方”风格的数字转换为日本语境。",
"zh-hant": "將阿拉伯數字或「西方」風格的數字轉換為日本上下文。"
}
},
"https://github.com/ikegami-yukino/mozcpy": {
"sub_category": "Converter",
"repository_name": "mozcpy",
"user_name": "ikegami-yukino",
"description": "Mozc for Python: Kana-Kanji converter",
"multilingual_descriptions": {
"en": "Mozc for Python: Kana-Kanji converter",
"ja": "Python用Mozc:かな漢字変換器",
"zh-hans": "Python的Mozc:假名汉字转换器",
"zh-hant": "Python的Mozc:假名漢字轉換器"
}
},
"https://github.com/tachi-hi/jamorasep": {
"sub_category": "Converter",
"repository_name": "jamorasep",
"user_name": "tachi-hi",
"description": "Japanese text parser to separate Hiragana/Katakana string into morae (syllables).",
"multilingual_descriptions": {
"en": "Japanese text parser that separates Hiragana/Katakana strings into morae (syllables).",
"ja": "ひらがな/カタカナの文字列をモーラ(音節)に分割する日本語テキストパーサー。",
"zh-hans": "日语文本解析器,将平假名/片假名字符串分离成音节(拼音)。",
"zh-hant": "日文文本解析器,將平假名/片假名字符串分離成音節(拼音)。"
}
},
"https://github.com/korguchi/text2phoneme": {
"sub_category": "Converter",
"repository_name": "text2phoneme",
"user_name": "korguchi",
"description": "日本語文を音素列へ変換するスクリプト",
"multilingual_descriptions": {
"en": "Script to convert Japanese text into phoneme sequence.",
"ja": "日本語文を音素列へ変換するスクリプト",
"zh-hans": "将日语文本转换为音素序列的脚本",
"zh-hant": "將日文轉換為音素序列的腳本"
}
},
"https://github.com/opencollector/jntajis-python": {
"sub_category": "Converter",
"repository_name": "jntajis-python",
"user_name": "opencollector",
"description": "A fast character conversion and transliteration library based on the scheme defined for Japan National Tax Agency (国税庁) 's",
"multilingual_descriptions": {
"en": "A fast character conversion and transliteration library based on the scheme defined for Japan National Tax Agency (国税庁) 's corporate number (法人番号) system.",
"ja": "日本国税庁の法人番号システムで定義されたスキームに基づく、高速な文字変換および転写ライブラリ。",
"zh-hans": "一个快速的字符转换和音译库,基于日本国税局的法人番号系统定义的方案。",
"zh-hant": "一個快速的字符轉換和音譯庫,基於日本國稅廳的法人番號系統定義的方案。"
}
},
"https://github.com/eggplants/wiredify": {
"sub_category": "Converter",
"repository_name": "wiredify",
"user_name": "eggplants",
"description": "Convert japanese kana from ba-bi-bu-be-bo into va-vi-vu-ve-vo",
"multilingual_descriptions": {
"en": "Convert japanese kana from ba-bi-bu-be-bo into va-vi-vu-ve-vo",
"ja": "「ばびぶべぼ」から「ヴァヴィヴヴェヴォ」に変換してください。",
"zh-hans": "将日语假名从ba-bi-bu-be-bo转换为va-vi-vu-ve-vo",
"zh-hant": "將日文假名從ba-bi-bu-be-bo轉換為va-vi-vu-ve-vo\n將日文假名從ba-bi-bu-be-bo轉換為va-vi-vu-ve-vo"
}
},
"https://github.com/34j/mecab-text-cleaner": {
"sub_category": "Converter",
"repository_name": "mecab-text-cleaner",
"user_name": "34j",
"description": "Simple Python package (CLI/Python API) for getting japanese readings (yomigana) and accents using MeCab.",
"multilingual_descriptions": {
"en": "Simple Python package (CLI/Python API) for getting japanese readings (yomigana) and accents using MeCab.",
"ja": "MeCabを使用して、日本語の読み仮名とアクセントを取得するためのシンプルなPythonパッケージ(CLI/Python API)。",
"zh-hans": "使用MeCab获取日语读音(yomigana)和重音的简单Python包(CLI/Python API)。",
"zh-hant": "使用MeCab獲取日文讀音(yomigana)和重音的簡單Python套件(CLI/Python API)。"
}
},
"https://github.com/tkscode/pynormalizenumexp": {
"sub_category": "Converter",
"repository_name": "pynormalizenumexp",
"user_name": "tkscode",
"description": "数量表現や時間表現の抽出・正規化を行うNormalizeNumexpのPython実装",
"multilingual_descriptions": {
"en": "Python implementation of NormalizeNumexp for extracting and normalizing quantity expressions and time expressions.",
"ja": "数量表現や時間表現の抽出・正規化を行うNormalizeNumexpのPython実装\n\n数量表現や時間表現の抽出・正規化を行うNormalizeNumexpのPython実装",
"zh-hans": "使用Python实现NormalizeNumexp,用于提取和规范化数量和时间表达式。",
"zh-hant": "執行NormalizeNumexp的Python實作,進行數量表達和時間表達的提取和規範化。"
}
},
"https://github.com/nagataaaas/Jusho": {
"sub_category": "Converter",
"repository_name": "Jusho",
"user_name": "nagataaaas",
"description": "Easy wrapper for the postal code data of Japan",
"multilingual_descriptions": {
"en": "Easy wrapper for the postal code data of Japan",
"ja": "日本の郵便番号データの簡単なラッパー",
"zh-hans": "日本邮政编码数据的简易封装器",
"zh-hant": "日本郵政編碼數據的簡單封裝"
}
},
"https://github.com/sea-turt1e/yurenizer": {
"sub_category": "Converter",
"repository_name": "yurenizer",
"user_name": "sea-turt1e",
"description": "Japanese text normalizer that resolves spelling inconsistencies. (日本語表記揺れ解消ツール)",
"multilingual_descriptions": {
"en": "Japanese text normalizer that resolves spelling inconsistencies. (日本語表記揺れ解消ツール",
"ja": "日本語テキストの表記の一貫性を解消する日本語テキスト正規化ツール",
"zh-hans": "日语文本规范化工具,解决拼写不一致问题。",
"zh-hant": "日文文本正規化工具,解決拼寫不一致。"
}
},
"https://github.com/ikegami-yukino/neologdn": {
"sub_category": "Preprocessor",
"repository_name": "neologdn",
"user_name": "ikegami-yukino",
"description": "Japanese text normalizer for mecab-neologd",
"multilingual_descriptions": {
"en": "Japanese text normalizer for mecab-neologd",
"ja": "mecab-neologd用の日本語テキスト正規化ツール",
"zh-hans": "针对mecab-neologd的日语文本规范化工具",
"zh-hant": "mecab-neologd 的日文文本正規化工具"
}
},
"https://github.com/ikegami-yukino/jaconv": {
"sub_category": "Preprocessor",
"repository_name": "jaconv",
"user_name": "ikegami-yukino",
"description": "Pure-Python Japanese character interconverter for Hiragana, Katakana, Hankaku, and Zenkaku",
"multilingual_descriptions": {
"en": "A Python-based tool for converting Japanese characters between Hiragana, Katakana, Hankaku, and Zenkaku.",
"ja": "ひらがな、カタカナ、半角、全角のための純粋なPython日本語文字相互変換器",
"zh-hans": "纯Python日语字符互转器,支持平假名、片假名、半角和全角。",
"zh-hant": "純Python日文字符互轉器,支援平假名、片假名、半角和全角。"
}
},
"https://github.com/studio-ousia/mojimoji": {
"sub_category": "Preprocessor",
"repository_name": "mojimoji",
"user_name": "studio-ousia",
"description": "A fast converter between Japanese hankaku and zenkaku characters",
"multilingual_descriptions": {
"en": "A quick converter for Japanese half-width and full-width characters.",
"ja": "日本語半角と全角の素早い変換ツール",
"zh-hans": "一个快速转换日语半角和全角字符的转换器",
"zh-hant": "一個快速轉換日文半角和全角字符的轉換器"
}
},
"https://github.com/ku-nlp/text-cleaning": {
"sub_category": "Preprocessor",
"repository_name": "text-cleaning",
"user_name": "ku-nlp",
"description": "A powerful text cleaner for Japanese web texts",
"multilingual_descriptions": {
"en": "A powerful text cleaner for Japanese web texts",
"ja": "日本語のウェブテキスト用の強力なテキストクリーナー",
"zh-hans": "一款强大的日语网页文本清理工具",
"zh-hant": "一款強大的日文網頁文本清理工具"
}
},
"https://github.com/HojiChar/HojiChar": {
"sub_category": "Preprocessor",
"repository_name": "HojiChar",
"user_name": "HojiChar",
"description": "複数の前処理を構成して管理するテキスト前処理ツール",
"multilingual_descriptions": {
"en": "A text preprocessing tool that configures and manages multiple preprocessing steps.",
"ja": "複数の前処理を構成して管理するテキスト前処理ツール",
"zh-hans": "管理多个前处理的文本前处理工具",
"zh-hant": "構成並管理多個前處理的文字前處理工具"
}
},
"https://github.com/juno-rmks/utsuho": {
"sub_category": "Preprocessor",
"repository_name": "utsuho",
"user_name": "juno-rmks",
"description": "Utsuho is a Python module that facilitates bidirectional conversion between half-width katakana and full-width katakana in Japanese.",
"multilingual_descriptions": {
"en": "Utsuho is a Python module that facilitates bidirectional conversion between half-width katakana and full-width katakana in Japanese.",
"ja": "Utsuhoは、日本語の半角カタカナと全角カタカナの間で双方向変換を容易にするPythonモジュールです。",
"zh-hans": "Utsuho是一个Python模块,用于在日语中半角片假名和全角片假名之间进行双向转换的工具。",
"zh-hant": "Utsuho是一個Python模組,用於在日語中半角片假名和全角片假名之間進行雙向轉換。"
}
},
"https://github.com/Hizuru3/python-habachen": {
"sub_category": "Preprocessor",
"repository_name": "python-habachen",
"user_name": "Hizuru3",
"description": "Yet Another Fast Japanese String Converter",
"multilingual_descriptions": {
"en": "Yet Another Fast Japanese String Converter",
"ja": "もう一つの高速な日本語文字列変換ツール",
"zh-hans": "另一个快速的日语字符串转换器",
"zh-hant": "另一個快速的日本字符串轉換器"
}
},
"https://github.com/bikatr7/kairyou": {
"sub_category": "Preprocessor",
"repository_name": "kairyou",
"user_name": "bikatr7",
"description": "Quickly preprocesses Japanese text using NLP/NER from SpaCy for Japanese translation or other NLP tasks.",
"multilingual_descriptions": {
"en": "Quickly preprocesses Japanese text using NLP/NER from SpaCy for Japanese translation or other NLP tasks.",
"ja": "SpaCyを使用して、日本語テキストをNLP/NERで迅速に前処理し、日本語翻訳やその他のNLPタスクに使用します。",
"zh-hans": "使用SpaCy快速预处理日文文本,以便进行日文翻译或其他自然语言处理任务。",
"zh-hant": "使用SpaCy快速預處理日文文本,以進行日文翻譯或其他NLP任務。"
}
},
"https://github.com/megagonlabs/bunkai": {
"sub_category": "Sentence spliter",
"repository_name": "bunkai",
"user_name": "megagonlabs",
"description": "Sentence boundary disambiguation tool for Japanese texts (日本語文境界判定器)",
"multilingual_descriptions": {
"en": "Sentence boundary disambiguation tool for Japanese texts (日本語文境界判定器)",
"ja": "日本語テキストの文境界曖昧性解消ツール (にほんごぶんきょうかいはんていき)",
"zh-hans": "日语文本句子边界消歧工具",
"zh-hant": "日本語文境界判定工具"
}
},
"https://github.com/hppRC/japanese-sentence-breaker": {
"sub_category": "Sentence spliter",
"repository_name": "japanese-sentence-breaker",
"user_name": "hppRC",
"description": "Japanese Sentence Breaker",
"multilingual_descriptions": {
"en": "Japanese Sentence Breaker",
"ja": "日本語の文分割器",
"zh-hans": "日语句子分割器",
"zh-hant": "日本語句子分解器"
}
},
"https://github.com/ikegami-yukino/sengiri": {
"sub_category": "Sentence spliter",
"repository_name": "sengiri",
"user_name": "ikegami-yukino",
"description": "Yet another sentence-level tokenizer for the Japanese text",
"multilingual_descriptions": {
"en": "Yet another sentence-level tokenizer for the Japanese text",
"ja": "日本語テキストのための別の文レベルのトークナイザー",
"zh-hans": "又一个用于日语文本的句子级分词器",
"zh-hant": "另一個針對日文文本的句子級分詞器"
}
},
"https://github.com/google/budoux": {
"sub_category": "Sentence spliter",
"repository_name": "budoux",
"user_name": "google",
"description": "Standalone. Small. Language-neutral. BudouX is the successor to Budou, the machine learning powered line break organizer tool.",
"multilingual_descriptions": {
"en": "Standalone. Small. Language-neutral. BudouX is the successor to Budou, the machine learning powered line break organizer tool.",
"ja": "スタンドアロン。小さい。言語に依存しない。BudouXは、機械学習による行の整理ツールであるBudouの後継者です。",
"zh-hans": "独立的。小巧的。语言中立的。BudouX是机器学习驱动的断行组织工具Budou的继承者。",
"zh-hant": "獨立的。小巧的。語言中立的。BudouX 是機器學習驅動的斷行整理工具 Budou 的後繼者。"
}
},
"https://github.com/wwwcojp/ja_sentence_segmenter": {
"sub_category": "Sentence spliter",
"repository_name": "ja_sentence_segmenter",
"user_name": "wwwcojp",
"description": "japanese sentence segmentation library for python",
"multilingual_descriptions": {
"en": "japanese sentence segmentation library for python",
"ja": "Python用の日本語文分割ライブラリ",
"zh-hans": "Python的日语句子分割库",
"zh-hant": "Python 的日文句子分割程式庫"
}
},
"https://github.com/mkartawijaya/hasami": {
"sub_category": "Sentence spliter",
"repository_name": "hasami",
"user_name": "mkartawijaya",
"description": "A tool to perform sentence segmentation on Japanese text",
"multilingual_descriptions": {
"en": "A tool to perform sentence segmentation on Japanese text",
"ja": "日本語テキストの文分割を実行するツール",
"zh-hans": "一个用于对日语文本进行句子分割的工具",
"zh-hant": "一個用於對日文文本進行句子分割的工具"
}
},
"https://github.com/alinear-corp/kuzukiri": {
"sub_category": "Sentence spliter",
"repository_name": "kuzukiri",
"user_name": "alinear-corp",
"description": "Japanese Text Segmenter for Python written in Rust",
"multilingual_descriptions": {
"en": "Japanese Text Segmenter for Python written in Rust",
"ja": "Rustで書かれたPython用の日本語テキストセグメンター",
"zh-hans": "用Rust编写的Python日语文本分段器",
"zh-hant": "用 Rust 編寫的 Python 日文文本分割器"
}
},
"https://github.com/hkiyomaru/ja-senter-benchmark": {
"sub_category": "Sentence spliter",
"repository_name": "ja-senter-benchmark",
"user_name": "hkiyomaru",
"description": "Comparison of Japanese Sentence Segmentation Tools",
"multilingual_descriptions": {
"en": "Comparison of Japanese Sentence Segmentation Tools",
"ja": "日本語文分割ツールの比較",
"zh-hans": "日语句子分割工具比较",
"zh-hant": "日本語句子分割工具比較"
}
},
"https://github.com/ikegami-yukino/oseti": {
"sub_category": "Sentiment analysis",
"repository_name": "oseti",
"user_name": "ikegami-yukino",
"description": "Dictionary based Sentiment Analysis for Japanese",
"multilingual_descriptions": {
"en": "Dictionary based Sentiment Analysis for Japanese",
"ja": "日本語の辞書ベースの感情分析",
"zh-hans": "基于词典的日语情感分析",
"zh-hant": "基於詞典的日語情感分析"
}
},
"https://github.com/liaoziyang/negapoji": {
"sub_category": "Sentiment analysis",
"repository_name": "negapoji",
"user_name": "liaoziyang",
"description": "Japanese negative positive classification.日本語文書のネガポジを判定。",
"multilingual_descriptions": {
"en": "Japanese document sentiment analysis to determine negative or positive.",
"ja": "日本語のネガティブ・ポジティブの分類。日本語の文章のネガティブ・ポジティブを判定します。",
"zh-hans": "日语文档的负面和正面分类判断。",
"zh-hant": "日本語文書的正負面分類。"
}
},
"https://github.com/ikegami-yukino/pymlask": {
"sub_category": "Sentiment analysis",
"repository_name": "pymlask",
"user_name": "ikegami-yukino",
"description": "Emotion analyzer for Japanese text",
"multilingual_descriptions": {
"en": "Emotion analyzer for Japanese text",
"ja": "日本語テキストの感情分析ツール",
"zh-hans": "日语文本情感分析器",
"zh-hant": "日文文本情感分析器"
}
},
"https://github.com/Hironsan/asari": {
"sub_category": "Sentiment analysis",
"repository_name": "asari",
"user_name": "Hironsan",
"description": "Japanese sentiment analyzer implemented in Python.",
"multilingual_descriptions": {
"en": "Japanese sentiment analyzer implemented in Python.",
"ja": "Pythonで実装された日本語感情分析器。",
"zh-hans": "Python实现的日语情感分析器。",
"zh-hant": "使用Python實現的日語情感分析器。"
}
},
"https://github.com/MorinoseiMorizo/jparacrawl-finetune": {
"sub_category": "Machine translation",
"repository_name": "jparacrawl-finetune",
"user_name": "MorinoseiMorizo",
"description": "An example usage of JParaCrawl pre-trained Neural Machine Translation (NMT) models.",
"multilingual_descriptions": {
"en": "An example usage of JParaCrawl pre-trained Neural Machine Translation (NMT) models.",
"ja": "JParaCrawlの事前学習済みニューラル機械翻訳(NMT)モデルの使用例。",
"zh-hans": "JParaCrawl预训练神经机器翻译(NMT)模型的示例用法。",
"zh-hant": "JParaCrawl 預訓練神經機器翻譯 (NMT) 模型的使用示例。"
}
},
"https://github.com/Mao-KU/JASS": {
"sub_category": "Machine translation",
"repository_name": "JASS",
"user_name": "Mao-KU",
"description": "JASS: Japanese-specific Sequence to Sequence Pre-training for Neural Machine Translation (LREC2020) & Linguistically Driven Multi-Task Pre-Training for Low-Resource Neural Machine Translation (ACM TALLIP)",
"multilingual_descriptions": {
"en": "JASS: Japanese-specific Sequence to Sequence Pre-training for Neural Machine Translation (LREC2020) & Linguistically Driven Multi-Task Pre-Training for Low-Resource Neural Machine Translation (ACM TALLIP)",
"ja": "JASS:ニューラル機械翻訳のための日本語固有のシーケンス・トゥ・シーケンス事前学習(LREC2020)&言語学的に駆動された低リソースニューラル機械翻訳のためのマルチタスク事前学習(ACM TALLIP)",
"zh-hans": "JASS:面向日本特定序列到序列预训练的神经机器翻译(LREC2020)和基于语言驱动的多任务预训练的低资源神经机器翻译(ACM TALLIP)。",
"zh-hant": "JASS:針對日本特定序列的序列到序列預訓練,用於神經機器翻譯(LREC2020)和基於語言學的多任務預訓練,用於低資源神經機器翻譯(ACM TALLIP)。"
}
},
"https://github.com/cl-tohoku/PheMT": {
"sub_category": "Machine translation",
"repository_name": "PheMT",
"user_name": "cl-tohoku",
"description": "A phenomenon-wise evaluation dataset for Japanese-English machine translation robustness. The dataset is based on the MTNT dataset, with additional annotations of four linguistic phenomena; Proper Noun, Abbreviated Noun, Colloquial Expression, and Variant. COLING 2020.",
"multilingual_descriptions": {
"en": "A phenomenon-wise evaluation dataset for Japanese-English machine translation robustness. The dataset is based on the MTNT dataset, with additional annotations of four linguistic phenomena; Proper Noun, Abbreviated Noun, Colloquial Expression, and Variant. COLING 2020.",
"ja": "日英機械翻訳の堅牢性に関する現象別評価データセット。このデータセットは、MTNTデータセットをベースに、固有名詞、略語、口語表現、および変異形の4つの言語現象の追加注釈を含んでいます。COLING 2020。",
"zh-hans": "一份针对日英机器翻译鲁棒性的现象级评估数据集。该数据集基于MTNT数据集,额外注释了四种语言现象:专有名词、缩写名词、口语表达和变体。COLING 2020。",
"zh-hant": "一個針對日英機器翻譯韌性的現象級評估數據集。該數據集基於MTNT數據集,並附加了四種語言現象的註釋;專有名詞、縮寫名詞、口語表達和變體。COLING 2020。"
}
},
"https://github.com/ku-nlp/VISA": {
"sub_category": "Machine translation",
"repository_name": "VISA",
"user_name": "ku-nlp",
"description": "An ambiguous subtitles dataset for visual scene-aware machine translation",
"multilingual_descriptions": {
"en": "An ambiguous subtitles dataset for visual scene-aware machine translation",
"ja": "視覚シーンに関する機械翻訳のための曖昧な字幕データセット",
"zh-hans": "一份用于视觉场景感知机器翻译的模糊字幕数据集",
"zh-hant": "一個用於視覺場景感知機器翻譯的模糊字幕數據集"
}
},
"https://github.com/chakki-works/namaco": {
"sub_category": "Named entity recognition",
"repository_name": "namaco",
"user_name": "chakki-works",
"description": "Character Based Named Entity Recognition.",
"multilingual_descriptions": {
"en": "Character Based Named Entity Recognition.",
"ja": "文字ベースの固有表現認識。",
"zh-hans": "基于字符的命名实体识别。",
"zh-hant": "基於字元的命名實體識別。"
}
},
"https://github.com/chakki-works/entitypedia": {
"sub_category": "Named entity recognition",
"repository_name": "entitypedia",
"user_name": "chakki-works",
"description": "Entitypedia is an Extended Named Entity Dictionary from Wikipedia.",
"multilingual_descriptions": {
"en": "Entitypedia is an Extended Named Entity Dictionary from Wikipedia.",
"ja": "Entitypediaは、Wikipediaからの拡張された固有名詞辞書です。",
"zh-hans": "Entitypedia是来自维基百科的扩展命名实体词典。",
"zh-hant": "Entitypedia是一個從維基百科擴展出來的命名實體詞典。"
}
},
"https://github.com/ken11/noyaki": {
"sub_category": "Named entity recognition",
"repository_name": "noyaki",
"user_name": "ken11",
"description": "Converts character span label information to tokenized text-based label information.",
"multilingual_descriptions": {
"en": "Converts character span label information to tokenized text-based label information.",
"ja": "文字の範囲ラベル情報をトークン化されたテキストベースのラベル情報に変換します。",
"zh-hans": "将字符跨度标签信息转换为基于分词文本的标签信息。",
"zh-hant": "將字符跨度標籤信息轉換為基於分詞文本的標籤信息。"
}
},
"https://github.com/ken11/bert-japanese-ner-finetuning": {
"sub_category": "Named entity recognition",
"repository_name": "bert-japanese-ner-finetuning",
"user_name": "ken11",
"description": "Code to perform finetuning of the BERT model. BERTモデルのファインチューニングで固有表現抽出用タスクのモデルを作成・使用するサンプルです",
"multilingual_descriptions": {
"en": "This is a sample code for creating and using a model for named entity recognition task through finetuning of the BERT model.",
"ja": "Code to perform finetuning of the BERT model. BERTモデルのファインチューニングで固有表現抽出用タスクのモデルを作成・使用するサンプルです",
"zh-hans": "用于BERT模型微调的代码。这是一个用于创建和使用用于命名实体识别任务的模型的示例。",
"zh-hant": "用於BERT模型微調的代碼。這是用於創建和使用用於實體識別任務的模型的BERT模型微調示例。"
}
},
"https://github.com/aih-uth/joint-information-extraction-hs": {
"sub_category": "Named entity recognition",
"repository_name": "joint-information-extraction-hs",
"user_name": "aih-uth",
"description": "詳細なアノテーション基準に基づく症例報告コーパスからの固有表現及び関係の抽出精度の推論を行うコード",
"multilingual_descriptions": {
"en": "Code for inferring the accuracy of named entity and relation extraction from a case report corpus based on detailed annotation criteria.",
"ja": "詳細なアノテーション基準に基づく症例報告コーパスからの固有表現及び関係の抽出精度の推論を行うコード",
"zh-hans": "基于详细的注释标准的病例报告语料库,进行实体和关系抽取精度推理的代码。",
"zh-hant": "從基於詳細註釋標準的病例報告語料庫中進行固有表達和關係抽取精度推論的代碼。"
}
},
"https://github.com/geonlp-platform/pygeonlp": {
"sub_category": null,
"repository_name": "pygeonlp",
"user_name": "geonlp-platform",
"description": "pygeonlp, A python module for geotagging Japanese texts.",
"multilingual_descriptions": {
"en": "pygeonlp, A python module for geotagging Japanese texts.",
"ja": "pygeonlpは、日本語テキストのジオタギングに使用するPythonモジュールです。",
"zh-hans": "pygeonlp,一个用于对日语文本进行地理标记的Python模块。",
"zh-hant": "pygeonlp,一個用於對日文文本進行地理標記的Python模塊。"
}
},
"https://github.com/jurabiinc/bert-ner-japanese": {
"sub_category": "Named entity recognition",
"repository_name": "bert-ner-japanese",
"user_name": "jurabiinc",
"description": "BERTによる日本語固有表現抽出のファインチューニング用プログラム",
"multilingual_descriptions": {
"en": "Program for fine-tuning Japanese named entity recognition using BERT",
"ja": "BERTによる日本語固有表現抽出のファインチューニング用プログラム",
"zh-hans": "使用BERT进行日语命名实体抽取的微调程序",
"zh-hant": "BERT進行日語固有表現抽取的微調程式"
}
},
"https://github.com/tsmatz/huggingface-finetune-japanese": {
"sub_category": "Named entity recognition",
"repository_name": "huggingface-finetune-japanese",
"user_name": "tsmatz",
"description": "Examples to finetune encoder-only and encoder-decoder transformers for Japanese language (Hugging Face) Resources",
"multilingual_descriptions": {
"en": "Examples to finetune encoder-only and encoder-decoder transformers for Japanese language (Hugging Face) Resources",
"ja": "日本語の言語(Hugging Face)リソースのためにエンコーダーのみとエンコーダーデコーダーのトランスフォーマーを微調整するための例",
"zh-hans": "用于调整仅编码器和编码器-解码器变压器以适应日语的示例(Hugging Face)资源",
"zh-hant": "用於日語語言(Hugging Face)資源的調整編碼器和編碼器-解碼器變壓器的示例\n用於日語語言(Hugging Face)資源的調整編碼器和編碼器-解碼器變壓器的示例"
}
},
"https://github.com/kha-white/manga-ocr": {
"sub_category": "OCR",
"repository_name": "manga-ocr",
"user_name": "kha-white",
"description": "About Optical character recognition for Japanese text, with the main focus being Japanese manga",
"multilingual_descriptions": {
"en": "About Optical character recognition for Japanese text, with the main focus being Japanese manga",
"ja": "日本語のマンガを中心に、光学文字認識についての説明。",
"zh-hans": "关于日文文本的光学字符识别,主要关注于日本漫画。",
"zh-hant": "關於日文文字的光學字符識別,主要聚焦於日本漫畫。"
}
},
"https://github.com/kha-white/mokuro": {
"sub_category": "OCR",
"repository_name": "mokuro",
"user_name": "kha-white",
"description": "Read Japanese manga inside browser with selectable text.",
"multilingual_descriptions": {
"en": "Read Japanese manga inside browser with selectable text.",
"ja": "ブラウザ内で選択可能なテキストで日本のマンガを読む。",
"zh-hans": "在浏览器中阅读日本漫画,可选择文本。",
"zh-hant": "在瀏覽器中閱讀日本漫畫,並可選擇文字。"
}
},
"https://github.com/yas-sim/handwritten-japanese-ocr": {
"sub_category": "OCR",
"repository_name": "handwritten-japanese-ocr",
"user_name": "yas-sim",
"description": "Handwritten Japanese OCR demo using touch panel to draw the input text using Intel OpenVINO toolkit",
"multilingual_descriptions": {
"en": "Handwritten Japanese OCR demo using touch panel to draw the input text using Intel OpenVINO toolkit",
"ja": "インテルのOpenVINOツールキットを使用して、タッチパネルを使って入力テキストを描画する手書き日本語OCRデモ",
"zh-hans": "使用Intel OpenVINO工具包,通过触摸面板绘制输入文本的手写日语OCR演示。",
"zh-hant": "手寫日文OCR演示,使用觸控面板繪製輸入文本,使用Intel OpenVINO工具包。"
}
},
"https://github.com/tanreinama/OCR_Japanease": {
"sub_category": "OCR",
"repository_name": "OCR_Japanease",
"user_name": "tanreinama",
"description": "日本語OCR",
"multilingual_descriptions": {
"en": "Japanese OCR",
"ja": "Japanese OCR",
"zh-hans": "日语OCR",
"zh-hant": "日本語OCR"
}
},
"https://github.com/ndl-lab/ndlocr_cli": {
"sub_category": "OCR",
"repository_name": "ndlocr_cli",
"user_name": "ndl-lab",
"description": "NDLOCRのアプリケーション",
"multilingual_descriptions": {
"en": "NDLOCR application",
"ja": "NDLOCRのアプリケーション",
"zh-hans": "NDLOCR应用程序",
"zh-hant": "NDLOCR的應用程式"
}
},
"https://github.com/clovaai/donut": {
"sub_category": "OCR",
"repository_name": "donut",
"user_name": "clovaai",
"description": "Official Implementation of OCR-free Document Understanding Transformer (Donut) and Synthetic Document Generator (SynthDoG), ECCV 2022",
"multilingual_descriptions": {
"en": "Official Implementation of OCR-free Document Understanding Transformer (Donut) and Synthetic Document Generator (SynthDoG), ECCV 2022",
"ja": "OCRフリー文書理解トランスフォーマー(Donut)および合成文書ジェネレーター(SynthDoG)の公式実装、ECCV 2022",
"zh-hans": "OCR-free文档理解变压器(Donut)和合成文档生成器(SynthDoG)的官方实现,ECCV 2022",
"zh-hant": "OCR-free文件理解轉換器(Donut)和合成文件生成器(SynthDoG)的官方實施,ECCV 2022"
}
},
"https://github.com/ttop32/JMTrans": {
"sub_category": "OCR",
"repository_name": "JMTrans",
"user_name": "ttop32",
"description": "get japanese manga from url to translate manga image",
"multilingual_descriptions": {
"en": "Manga translator - retrieve Japanese manga from URL to translate manga images.",
"ja": "マンガ翻訳者 - URLから日本のマンガを取得してマンガ画像を翻訳する",
"zh-hans": "漫画翻译器 - 从网址获取日本漫画以翻译漫画图像",
"zh-hant": "漫畫翻譯器 - 從網址獲取日本漫畫以翻譯漫畫圖像"
}
},
"https://github.com/ducanh841988/Kindai-OCR": {
"sub_category": "OCR",
"repository_name": "Kindai-OCR",
"user_name": "ducanh841988",
"description": "OCR system for recognizing modern Japanese magazines",
"multilingual_descriptions": {
"en": "OCR system for recognizing modern Japanese magazines",
"ja": "現代日本の雑誌を認識するOCRシステム",
"zh-hans": "用于识别现代日本杂志的OCR系统",
"zh-hant": "識別現代日本雜誌的OCR系統"
}
},
"https://github.com/ndl-lab/text_recognition": {
"sub_category": "OCR",
"repository_name": "text_recognition",
"user_name": "ndl-lab",
"description": "NDLOCR用テキスト認識モジュール",
"multilingual_descriptions": {
"en": "Text recognition module for NDLOCR.",
"ja": "NDLOCR用テキスト認識モジュール",
"zh-hans": "NDLOCR文本识别模块",
"zh-hant": "NDLOCR使用文字識別模組"
}
},
"https://github.com/blueaxis/Poricom": {
"sub_category": "OCR",
"repository_name": "Poricom",
"user_name": "blueaxis",
"description": "Optical character recognition in manga images. Manga OCR desktop application",
"multilingual_descriptions": {
"en": "Optical character recognition in manga images. Manga OCR desktop application",
"ja": "漫画画像の光学文字認識。漫画OCRデスクトップアプリケーション。",
"zh-hans": "漫画图像的光学字符识别。漫画OCR桌面应用程序。",
"zh-hant": "漫畫圖像的光學字符識別。漫畫OCR桌面應用程式。"
}
},