Open Chinese Convert
1.0.3
A project for conversion between Traditional and Simplified Chinese
|
00001 /* 00002 * Open Chinese Convert 00003 * 00004 * Copyright 2015 BYVoid <byvoid@byvoid.com> 00005 * 00006 * Licensed under the Apache License, Version 2.0 (the "License"); 00007 * you may not use this file except in compliance with the License. 00008 * You may obtain a copy of the License at 00009 * 00010 * http://www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, software 00013 * distributed under the License is distributed on an "AS IS" BASIS, 00014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 * See the License for the specific language governing permissions and 00016 * limitations under the License. 00017 */ 00018 00019 #pragma once 00020 00021 #include "Lexicon.hpp" 00022 #include "TextDict.hpp" 00023 #include "TestUtils.hpp" 00024 00025 namespace opencc { 00026 00027 class TextDictTestBase : public ::testing::Test { 00028 protected: 00029 TextDictTestBase() : textDict(CreateTextDictForText()){}; 00030 00031 TextDictPtr CreateTextDictForText() const { 00032 LexiconPtr lexicon(new Lexicon); 00033 lexicon->Add(DictEntryFactory::New("BYVoid", "byv")); 00034 lexicon->Add(DictEntryFactory::New("zigzagzig", "zag")); 00035 lexicon->Add(DictEntryFactory::New(utf8("積羽沉舟"), utf8("羣輕折軸"))); 00036 lexicon->Add(DictEntryFactory::New(utf8("清"), "Tsing")); 00037 lexicon->Add(DictEntryFactory::New(utf8("清華"), "Tsinghua")); 00038 lexicon->Add(DictEntryFactory::New(utf8("清華大學"), "TsinghuaUniversity")); 00039 lexicon->Sort(); 00040 return TextDictPtr(new TextDict(lexicon)); 00041 } 00042 00043 DictPtr CreateDictForCharacters() const { 00044 LexiconPtr lexicon(new Lexicon); 00045 lexicon->Add(DictEntryFactory::New(utf8("后"), 00046 vector<string>{utf8("后"), utf8("後")})); 00047 lexicon->Add(DictEntryFactory::New(utf8("发"), 00048 vector<string>{utf8("發"), utf8("髮")})); 00049 lexicon->Add(DictEntryFactory::New( 00050 utf8("干"), vector<string>{utf8("幹"), utf8("乾"), utf8("干")})); 00051 lexicon->Add(DictEntryFactory::New(utf8("里"), 00052 vector<string>{utf8("裏"), utf8("里")})); 00053 lexicon->Sort(); 00054 return TextDictPtr(new TextDict(lexicon)); 00055 } 00056 00057 DictPtr CreateDictForPhrases() const { 00058 LexiconPtr lexicon(new Lexicon); 00059 lexicon->Add(DictEntryFactory::New(utf8("太后"), utf8("太后"))); 00060 lexicon->Add(DictEntryFactory::New(utf8("头发"), utf8("頭髮"))); 00061 lexicon->Add(DictEntryFactory::New(utf8("干燥"), utf8("乾燥"))); 00062 lexicon->Add(DictEntryFactory::New(utf8("鼠标"), utf8("鼠標"))); 00063 lexicon->Sort(); 00064 return TextDictPtr(new TextDict(lexicon)); 00065 } 00066 00067 DictPtr CreateDictForTaiwanVariants() const { 00068 LexiconPtr lexicon(new Lexicon); 00069 lexicon->Add(DictEntryFactory::New(utf8("裏"), utf8("裡"))); 00070 TextDictPtr textDict(new TextDict(lexicon)); 00071 return textDict; 00072 } 00073 00074 DictPtr CreateTaiwanPhraseDict() const { 00075 LexiconPtr lexicon(new Lexicon); 00076 lexicon->Add(DictEntryFactory::New(utf8("鼠标"), utf8("滑鼠"))); 00077 lexicon->Add(DictEntryFactory::New(utf8("服务器"), utf8("伺服器"))); 00078 lexicon->Add(DictEntryFactory::New(utf8("克罗地亚"), utf8("克羅埃西亞"))); 00079 lexicon->Sort(); 00080 return TextDictPtr(new TextDict(lexicon)); 00081 } 00082 00083 void TestDict(const DictPtr dict) const { 00084 Optional<const DictEntry*> entry = dict->MatchPrefix("BYVoid"); 00085 EXPECT_TRUE(!entry.IsNull()); 00086 EXPECT_EQ(utf8("BYVoid"), entry.Get()->Key()); 00087 EXPECT_EQ(utf8("byv"), entry.Get()->GetDefault()); 00088 00089 entry = dict->MatchPrefix("BYVoid123"); 00090 EXPECT_TRUE(!entry.IsNull()); 00091 EXPECT_EQ(utf8("BYVoid"), entry.Get()->Key()); 00092 EXPECT_EQ(utf8("byv"), entry.Get()->GetDefault()); 00093 00094 entry = dict->MatchPrefix(utf8("積羽沉舟")); 00095 EXPECT_TRUE(!entry.IsNull()); 00096 EXPECT_EQ(utf8("積羽沉舟"), entry.Get()->Key()); 00097 EXPECT_EQ(utf8("羣輕折軸"), entry.Get()->GetDefault()); 00098 00099 entry = dict->MatchPrefix("Unknown"); 00100 EXPECT_TRUE(entry.IsNull()); 00101 00102 const vector<const DictEntry*> matches = 00103 dict->MatchAllPrefixes(utf8("清華大學計算機系")); 00104 EXPECT_EQ(3, matches.size()); 00105 EXPECT_EQ(utf8("清華大學"), matches.at(0)->Key()); 00106 EXPECT_EQ(utf8("TsinghuaUniversity"), matches.at(0)->GetDefault()); 00107 EXPECT_EQ(utf8("清華"), matches.at(1)->Key()); 00108 EXPECT_EQ(utf8("Tsinghua"), matches.at(1)->GetDefault()); 00109 EXPECT_EQ(utf8("清"), matches.at(2)->Key()); 00110 EXPECT_EQ(utf8("Tsing"), matches.at(2)->GetDefault()); 00111 } 00112 00113 const TextDictPtr textDict; 00114 }; 00115 00116 } // namespace opencc