1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import android.test.AndroidTestCase; 20 import android.test.suitebuilder.annotation.LargeTest; 21 import android.util.Log; 22 import android.util.Pair; 23 import android.util.SparseArray; 24 25 import com.android.inputmethod.latin.BinaryDictionary; 26 import com.android.inputmethod.latin.common.CodePointUtils; 27 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 28 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 29 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 31 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 32 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 33 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; 34 35 import java.io.File; 36 import java.io.IOException; 37 import java.util.ArrayList; 38 import java.util.Arrays; 39 import java.util.HashMap; 40 import java.util.HashSet; 41 import java.util.List; 42 import java.util.Locale; 43 import java.util.Map.Entry; 44 import java.util.Random; 45 import java.util.Set; 46 import java.util.TreeMap; 47 48 /** 49 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. 50 */ 51 @LargeTest 52 public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 53 private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); 54 private static final int DEFAULT_MAX_UNIGRAMS = 300; 55 private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; 56 private static final int LARGE_CODE_POINT_SET_SIZE = 300; 57 private static final int UNIGRAM_FREQ = 10; 58 private static final int BIGRAM_FREQ = 50; 59 private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; 60 61 private static final ArrayList<String> sWords = new ArrayList<>(); 62 private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>(); 63 private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>(); 64 private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>(); 65 private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>(); 66 67 final Random mRandom; 68 BinaryDictDecoderEncoderTests()69 public BinaryDictDecoderEncoderTests() { 70 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 71 } 72 BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams)73 public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { 74 super(); 75 BinaryDictionaryUtils.setCurrentTimeForTest(0); 76 Log.e(TAG, "Testing dictionary: seed is " + seed); 77 mRandom = new Random(seed); 78 sWords.clear(); 79 sWordsWithVariousCodePoints.clear(); 80 generateWords(maxUnigrams, mRandom); 81 82 for (int i = 0; i < sWords.size(); ++i) { 83 sChainBigrams.put(i, new ArrayList<Integer>()); 84 if (i > 0) { 85 sChainBigrams.get(i - 1).add(i); 86 } 87 } 88 89 sStarBigrams.put(0, new ArrayList<Integer>()); 90 // MAX - 1 because we added one above already 91 final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); 92 for (int i = 1; i < maxBigrams; ++i) { 93 sStarBigrams.get(0).add(i); 94 } 95 } 96 97 @Override setUp()98 protected void setUp() throws Exception { 99 super.setUp(); 100 BinaryDictionaryUtils.setCurrentTimeForTest(0); 101 } 102 103 @Override tearDown()104 protected void tearDown() throws Exception { 105 // Quit test mode. 106 BinaryDictionaryUtils.setCurrentTimeForTest(-1); 107 super.tearDown(); 108 } 109 generateWords(final int number, final Random random)110 private static void generateWords(final int number, final Random random) { 111 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 112 random); 113 final Set<String> wordSet = new HashSet<>(); 114 while (wordSet.size() < number) { 115 wordSet.add(CodePointUtils.generateWord(random, codePointSet)); 116 } 117 sWords.addAll(wordSet); 118 119 final int[] largeCodePointSet = CodePointUtils.generateCodePointSet( 120 LARGE_CODE_POINT_SET_SIZE, random); 121 wordSet.clear(); 122 while (wordSet.size() < number) { 123 wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet)); 124 } 125 sWordsWithVariousCodePoints.addAll(wordSet); 126 } 127 128 /** 129 * Adds unigrams to the dictionary. 130 */ addUnigrams(final int number, final FusionDictionary dict, final List<String> words)131 private static void addUnigrams(final int number, final FusionDictionary dict, 132 final List<String> words) { 133 for (int i = 0; i < number; ++i) { 134 final String word = words.get(i); 135 final ArrayList<WeightedString> shortcuts = new ArrayList<>(); 136 dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), false /* isNotAWord */, 137 false /* isPossiblyOffensive */); 138 } 139 } 140 addBigrams(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)141 private static void addBigrams(final FusionDictionary dict, 142 final List<String> words, 143 final SparseArray<List<Integer>> bigrams) { 144 for (int i = 0; i < bigrams.size(); ++i) { 145 final int w1 = bigrams.keyAt(i); 146 for (int w2 : bigrams.valueAt(i)) { 147 dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ)); 148 } 149 } 150 } 151 152 // The following is useful to dump the dictionary into a textual file, but it can't compile 153 // on-device, so it's commented out. 154 // private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) 155 // throws IOException { 156 // com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( 157 // new java.io.FileWriter(new File(filename)), dict); 158 // } 159 timeWritingDictToFile(final File file, final FusionDictionary dict, final FormatSpec.FormatOptions formatOptions)160 private static long timeWritingDictToFile(final File file, final FusionDictionary dict, 161 final FormatSpec.FormatOptions formatOptions) { 162 163 long now = -1, diff = -1; 164 165 try { 166 final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); 167 168 now = System.currentTimeMillis(); 169 // If you need to dump the dict to a textual file, uncomment the line below and the 170 // function above 171 // dumpToCombinedFileForDebug(file, "/tmp/foo"); 172 dictEncoder.writeDictionary(dict, formatOptions); 173 diff = System.currentTimeMillis() - now; 174 } catch (IOException e) { 175 Log.e(TAG, "IO exception while writing file", e); 176 } catch (UnsupportedFormatException e) { 177 Log.e(TAG, "UnsupportedFormatException", e); 178 } 179 180 return diff; 181 } 182 checkDictionary(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)183 private static void checkDictionary(final FusionDictionary dict, final List<String> words, 184 final SparseArray<List<Integer>> bigrams) { 185 assertNotNull(dict); 186 187 // check unigram 188 for (final String word : words) { 189 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 190 assertNotNull(ptNode); 191 } 192 193 // check bigram 194 for (int i = 0; i < bigrams.size(); ++i) { 195 final int w1 = bigrams.keyAt(i); 196 for (final int w2 : bigrams.valueAt(i)) { 197 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 198 words.get(w1)); 199 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); 200 } 201 } 202 } 203 outputOptions(final int bufferType, final FormatSpec.FormatOptions formatOptions)204 private static String outputOptions(final int bufferType, 205 final FormatSpec.FormatOptions formatOptions) { 206 final String result = " : buffer type = " 207 + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); 208 return result + " : version = " + formatOptions.mVersion; 209 } 210 211 // Tests for readDictionaryBinary and writeDictionaryBinary 212 timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams, final int bufferType)213 private static long timeReadingAndCheckDict(final File file, final List<String> words, 214 final SparseArray<List<Integer>> bigrams, final int bufferType) { 215 long now, diff = -1; 216 217 FusionDictionary dict = null; 218 try { 219 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 220 bufferType); 221 now = System.currentTimeMillis(); 222 dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); 223 diff = System.currentTimeMillis() - now; 224 } catch (IOException e) { 225 Log.e(TAG, "IOException while reading dictionary", e); 226 } catch (UnsupportedFormatException e) { 227 Log.e(TAG, "Unsupported format", e); 228 } 229 230 checkDictionary(dict, words, bigrams); 231 return diff; 232 } 233 234 // Tests for readDictionaryBinary and writeDictionaryBinary runReadAndWrite(final List<String> words, final SparseArray<List<Integer>> bigrams, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message)235 private String runReadAndWrite(final List<String> words, 236 final SparseArray<List<Integer>> bigrams, 237 final int bufferType, final FormatSpec.FormatOptions formatOptions, 238 final String message) { 239 240 final String dictName = "runReadAndWrite"; 241 final String dictVersion = Long.toString(System.currentTimeMillis()); 242 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 243 getContext().getCacheDir()); 244 245 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 246 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 247 addUnigrams(words.size(), dict, words); 248 addBigrams(dict, words, bigrams); 249 checkDictionary(dict, words, bigrams); 250 251 final long write = timeWritingDictToFile(file, dict, formatOptions); 252 final long read = timeReadingAndCheckDict(file, words, bigrams, bufferType); 253 254 return "PROF: read=" + read + "ms, write=" + write + "ms :" + message 255 + " : " + outputOptions(bufferType, formatOptions); 256 } 257 runReadAndWriteTests(final List<String> results, final int bufferType, final FormatSpec.FormatOptions formatOptions)258 private void runReadAndWriteTests(final List<String> results, final int bufferType, 259 final FormatSpec.FormatOptions formatOptions) { 260 results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, 261 formatOptions, "unigram")); 262 results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, 263 formatOptions, "chain")); 264 results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, 265 formatOptions, "star")); 266 results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, formatOptions, 267 "unigram with shortcuts")); 268 results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, formatOptions, 269 "chain with shortcuts")); 270 results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, formatOptions, 271 "star with shortcuts")); 272 results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams, 273 bufferType, formatOptions, 274 "unigram with various code points")); 275 } 276 testCharacterTableIsPresent()277 public void testCharacterTableIsPresent() throws IOException, UnsupportedFormatException { 278 final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"}; 279 final List<String> words = Arrays.asList(wordSource); 280 final String correctCodePointTable = "toesdrniawuplgfcb "; 281 final String dictName = "codePointTableTest"; 282 final String dictVersion = Long.toString(System.currentTimeMillis()); 283 final String codePointTableAttribute = DictionaryHeader.CODE_POINT_TABLE_KEY; 284 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, 285 BinaryDictUtils.STATIC_OPTIONS, getContext().getCacheDir()); 286 287 // Write a test dictionary 288 final DictEncoder dictEncoder = new Ver2DictEncoder(file, 289 Ver2DictEncoder.CODE_POINT_TABLE_ON); 290 final FormatSpec.FormatOptions formatOptions = 291 new FormatSpec.FormatOptions( 292 FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION); 293 final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(), 294 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 295 addUnigrams(words.size(), sourcedict, words); 296 dictEncoder.writeDictionary(sourcedict, formatOptions); 297 298 // Read the dictionary 299 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 300 DictDecoder.USE_BYTEARRAY); 301 final DictionaryHeader fileHeader = dictDecoder.readHeader(); 302 // Check if codePointTable is present 303 assertTrue("codePointTable is not present", 304 fileHeader.mDictionaryOptions.mAttributes.containsKey(codePointTableAttribute)); 305 final String codePointTable = 306 fileHeader.mDictionaryOptions.mAttributes.get(codePointTableAttribute); 307 // Check if codePointTable is correct 308 assertEquals("codePointTable is incorrect", codePointTable, correctCodePointTable); 309 } 310 311 // Unit test for CharEncoding.readString and CharEncoding.writeString. testCharEncoding()312 public void testCharEncoding() { 313 // the max length of a word in sWords is less than 50. 314 // See generateWords. 315 final byte[] buffer = new byte[50 * 3]; 316 final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); 317 for (final String word : sWords) { 318 Arrays.fill(buffer, (byte) 0); 319 CharEncoding.writeString(buffer, 0, word, null); 320 dictBuffer.position(0); 321 final String str = CharEncoding.readString(dictBuffer); 322 assertEquals(word, str); 323 } 324 } 325 testReadAndWriteWithByteBuffer()326 public void testReadAndWriteWithByteBuffer() { 327 final List<String> results = new ArrayList<>(); 328 329 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 330 BinaryDictUtils.STATIC_OPTIONS); 331 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 332 BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); 333 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 334 BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); 335 for (final String result : results) { 336 Log.d(TAG, result); 337 } 338 } 339 testReadAndWriteWithByteArray()340 public void testReadAndWriteWithByteArray() { 341 final List<String> results = new ArrayList<>(); 342 343 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 344 BinaryDictUtils.STATIC_OPTIONS); 345 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 346 BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); 347 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 348 BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); 349 350 for (final String result : results) { 351 Log.d(TAG, result); 352 } 353 } 354 355 // Tests for readUnigramsAndBigramsBinary 356 checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, final TreeMap<Integer, String> resultWords, final TreeMap<Integer, Integer> resultFrequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, final boolean checkProbability)357 private static void checkWordMap(final List<String> expectedWords, 358 final SparseArray<List<Integer>> expectedBigrams, 359 final TreeMap<Integer, String> resultWords, 360 final TreeMap<Integer, Integer> resultFrequencies, 361 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, 362 final boolean checkProbability) { 363 // check unigrams 364 final Set<String> actualWordsSet = new HashSet<>(resultWords.values()); 365 final Set<String> expectedWordsSet = new HashSet<>(expectedWords); 366 assertEquals(actualWordsSet, expectedWordsSet); 367 if (checkProbability) { 368 for (int freq : resultFrequencies.values()) { 369 assertEquals(freq, UNIGRAM_FREQ); 370 } 371 } 372 373 // check bigrams 374 final HashMap<String, Set<String>> expBigrams = new HashMap<>(); 375 for (int i = 0; i < expectedBigrams.size(); ++i) { 376 final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); 377 for (int w2 : expectedBigrams.valueAt(i)) { 378 if (expBigrams.get(word1) == null) { 379 expBigrams.put(word1, new HashSet<String>()); 380 } 381 expBigrams.get(word1).add(expectedWords.get(w2)); 382 } 383 } 384 385 final HashMap<String, Set<String>> actBigrams = new HashMap<>(); 386 for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { 387 final String word1 = resultWords.get(entry.getKey()); 388 final int unigramFreq = resultFrequencies.get(entry.getKey()); 389 for (PendingAttribute attr : entry.getValue()) { 390 final String word2 = resultWords.get(attr.mAddress); 391 if (actBigrams.get(word1) == null) { 392 actBigrams.put(word1, new HashSet<String>()); 393 } 394 actBigrams.get(word1).add(word2); 395 396 if (checkProbability) { 397 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( 398 unigramFreq, attr.mFrequency); 399 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); 400 } 401 } 402 } 403 assertEquals(actBigrams, expBigrams); 404 } 405 406 private static long timeAndCheckReadUnigramsAndBigramsBinary(final File file, 407 final List<String> words, final SparseArray<List<Integer>> bigrams, 408 final int bufferType, final boolean checkProbability) { 409 final TreeMap<Integer, String> resultWords = new TreeMap<>(); 410 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>(); 411 final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>(); 412 413 long now = -1, diff = -1; 414 try { 415 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 416 bufferType); 417 now = System.currentTimeMillis(); 418 dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); 419 diff = System.currentTimeMillis() - now; 420 } catch (IOException e) { 421 Log.e(TAG, "IOException", e); 422 } catch (UnsupportedFormatException e) { 423 Log.e(TAG, "UnsupportedFormatException", e); 424 } 425 426 checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability); 427 return diff; 428 } 429 430 private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, 431 final SparseArray<List<Integer>> bigrams, final int bufferType, 432 final FormatSpec.FormatOptions formatOptions, final String message) { 433 final String dictName = "runReadUnigrams"; 434 final String dictVersion = Long.toString(System.currentTimeMillis()); 435 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 436 getContext().getCacheDir()); 437 438 // making the dictionary from lists of words. 439 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 440 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 441 addUnigrams(words.size(), dict, words); 442 addBigrams(dict, words, bigrams); 443 444 timeWritingDictToFile(file, dict, formatOptions); 445 446 // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the 447 // probability when there's a timestamp for the entry. 448 // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native. 449 long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, 450 !formatOptions.mHasTimestamp /* checkProbability */); 451 long fullReading = timeReadingAndCheckDict(file, words, bigrams, 452 bufferType); 453 454 return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap 455 + " : " + message + " : " + outputOptions(bufferType, formatOptions); 456 } 457 458 private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, 459 final int bufferType, final FormatSpec.FormatOptions formatOptions) { 460 results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, 461 formatOptions, "unigram")); 462 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 463 formatOptions, "chain")); 464 results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, 465 formatOptions, "star")); 466 } 467 468 public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { 469 final ArrayList<String> results = new ArrayList<>(); 470 471 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 472 BinaryDictUtils.STATIC_OPTIONS); 473 474 for (final String result : results) { 475 Log.d(TAG, result); 476 } 477 } 478 479 public void testReadUnigramsAndBigramsBinaryWithByteArray() { 480 final ArrayList<String> results = new ArrayList<>(); 481 482 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 483 BinaryDictUtils.STATIC_OPTIONS); 484 485 for (final String result : results) { 486 Log.d(TAG, result); 487 } 488 } 489 490 // Tests for getTerminalPosition 491 private static String getWordFromBinary(final DictDecoder dictDecoder, final int address) { 492 if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); 493 494 DictionaryHeader fileHeader = null; 495 try { 496 fileHeader = dictDecoder.readHeader(); 497 } catch (IOException e) { 498 return null; 499 } catch (UnsupportedFormatException e) { 500 return null; 501 } 502 if (fileHeader == null) return null; 503 return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, 504 address).mWord; 505 } 506 507 private static long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, 508 final boolean contained) { 509 long diff = -1; 510 int position = -1; 511 try { 512 final long now = System.nanoTime(); 513 position = dictDecoder.getTerminalPosition(word); 514 diff = System.nanoTime() - now; 515 } catch (IOException e) { 516 Log.e(TAG, "IOException while getTerminalPosition", e); 517 } catch (UnsupportedFormatException e) { 518 Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); 519 } 520 521 assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); 522 if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); 523 return diff; 524 } 525 526 private void runGetTerminalPosition(final ArrayList<String> words, 527 final SparseArray<List<Integer>> bigrams, final int bufferType, 528 final FormatOptions formatOptions, final String message) { 529 final String dictName = "testGetTerminalPosition"; 530 final String dictVersion = Long.toString(System.currentTimeMillis()); 531 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 532 getContext().getCacheDir()); 533 534 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 535 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 536 addUnigrams(sWords.size(), dict, sWords); 537 addBigrams(dict, words, bigrams); 538 timeWritingDictToFile(file, dict, formatOptions); 539 540 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 541 DictDecoder.USE_BYTEARRAY); 542 try { 543 dictDecoder.openDictBuffer(); 544 } catch (IOException e) { 545 Log.e(TAG, "IOException while opening the buffer", e); 546 } catch (UnsupportedFormatException e) { 547 Log.e(TAG, "IOException while opening the buffer", e); 548 } 549 assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); 550 551 try { 552 // too long word 553 final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 554 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); 555 556 // null 557 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); 558 559 // empty string 560 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); 561 } catch (IOException e) { 562 } catch (UnsupportedFormatException e) { 563 } 564 565 // Test a word that is contained within the dictionary. 566 long sum = 0; 567 for (int i = 0; i < sWords.size(); ++i) { 568 final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); 569 sum += time == -1 ? 0 : time; 570 } 571 Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message 572 + " : " + outputOptions(bufferType, formatOptions)); 573 574 // Test a word that isn't contained within the dictionary. 575 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 576 mRandom); 577 for (int i = 0; i < 1000; ++i) { 578 final String word = CodePointUtils.generateWord(mRandom, codePointSet); 579 if (sWords.indexOf(word) != -1) continue; 580 checkGetTerminalPosition(dictDecoder, word, false); 581 } 582 } 583 584 private void runGetTerminalPositionTests(final int bufferType, 585 final FormatOptions formatOptions) { 586 runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); 587 } 588 589 public void testGetTerminalPosition() { 590 final ArrayList<String> results = new ArrayList<>(); 591 592 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, 593 BinaryDictUtils.STATIC_OPTIONS); 594 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, 595 BinaryDictUtils.STATIC_OPTIONS); 596 597 for (final String result : results) { 598 Log.d(TAG, result); 599 } 600 } 601 602 public void testVer2DictGetWordProperty() { 603 final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; 604 final ArrayList<String> words = sWords; 605 final String dictName = "testGetWordProperty"; 606 final String dictVersion = Long.toString(System.currentTimeMillis()); 607 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 608 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 609 addUnigrams(words.size(), dict, words); 610 addBigrams(dict, words, sEmptyBigrams); 611 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 612 getContext().getCacheDir()); 613 file.delete(); 614 timeWritingDictToFile(file, dict, formatOptions); 615 final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 616 0 /* offset */, file.length(), true /* useFullEditDistance */, 617 Locale.ENGLISH, dictName, false /* isUpdatable */); 618 for (final String word : words) { 619 final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 620 false /* isBeginningOfSentence */); 621 assertEquals(word, wordProperty.mWord); 622 assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); 623 } 624 } 625 626 public void testVer2DictIteration() { 627 final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; 628 final ArrayList<String> words = sWords; 629 final SparseArray<List<Integer>> bigrams = sEmptyBigrams; 630 final String dictName = "testGetWordProperty"; 631 final String dictVersion = Long.toString(System.currentTimeMillis()); 632 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 633 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 634 addUnigrams(words.size(), dict, words); 635 addBigrams(dict, words, bigrams); 636 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 637 getContext().getCacheDir()); 638 timeWritingDictToFile(file, dict, formatOptions); 639 Log.d(TAG, file.getAbsolutePath()); 640 final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 641 0 /* offset */, file.length(), true /* useFullEditDistance */, 642 Locale.ENGLISH, dictName, false /* isUpdatable */); 643 644 final HashSet<String> wordSet = new HashSet<>(words); 645 final HashSet<Pair<String, String>> bigramSet = new HashSet<>(); 646 647 for (int i = 0; i < words.size(); i++) { 648 final List<Integer> bigramList = bigrams.get(i); 649 if (bigramList != null) { 650 for (final Integer word1Index : bigramList) { 651 final String word1 = words.get(word1Index); 652 bigramSet.add(new Pair<>(words.get(i), word1)); 653 } 654 } 655 } 656 int token = 0; 657 do { 658 final BinaryDictionary.GetNextWordPropertyResult result = 659 binaryDictionary.getNextWordProperty(token); 660 final WordProperty wordProperty = result.mWordProperty; 661 final String word0 = wordProperty.mWord; 662 assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability); 663 wordSet.remove(word0); 664 if (wordProperty.mHasNgrams) { 665 for (final WeightedString bigramTarget : wordProperty.getBigrams()) { 666 final String word1 = bigramTarget.mWord; 667 final Pair<String, String> bigram = new Pair<>(word0, word1); 668 assertTrue(bigramSet.contains(bigram)); 669 bigramSet.remove(bigram); 670 } 671 } 672 token = result.mNextToken; 673 } while (token != 0); 674 assertTrue(wordSet.isEmpty()); 675 assertTrue(bigramSet.isEmpty()); 676 } 677 } 678