1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.makedict;
18 
19 import android.test.AndroidTestCase;
20 import android.test.suitebuilder.annotation.LargeTest;
21 import android.util.Log;
22 import android.util.Pair;
23 import android.util.SparseArray;
24 
25 import com.android.inputmethod.latin.BinaryDictionary;
26 import com.android.inputmethod.latin.common.CodePointUtils;
27 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
28 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
29 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
31 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
32 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
33 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
34 
35 import java.io.File;
36 import java.io.IOException;
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.HashMap;
40 import java.util.HashSet;
41 import java.util.List;
42 import java.util.Locale;
43 import java.util.Map.Entry;
44 import java.util.Random;
45 import java.util.Set;
46 import java.util.TreeMap;
47 
48 /**
49  * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
50  */
51 @LargeTest
52 public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
53     private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
54     private static final int DEFAULT_MAX_UNIGRAMS = 300;
55     private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
56     private static final int LARGE_CODE_POINT_SET_SIZE = 300;
57     private static final int UNIGRAM_FREQ = 10;
58     private static final int BIGRAM_FREQ = 50;
59     private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
60 
61     private static final ArrayList<String> sWords = new ArrayList<>();
62     private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>();
63     private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>();
64     private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>();
65     private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>();
66 
67     final Random mRandom;
68 
BinaryDictDecoderEncoderTests()69     public BinaryDictDecoderEncoderTests() {
70         this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
71     }
72 
BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams)73     public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
74         super();
75         BinaryDictionaryUtils.setCurrentTimeForTest(0);
76         Log.e(TAG, "Testing dictionary: seed is " + seed);
77         mRandom = new Random(seed);
78         sWords.clear();
79         sWordsWithVariousCodePoints.clear();
80         generateWords(maxUnigrams, mRandom);
81 
82         for (int i = 0; i < sWords.size(); ++i) {
83             sChainBigrams.put(i, new ArrayList<Integer>());
84             if (i > 0) {
85                 sChainBigrams.get(i - 1).add(i);
86             }
87         }
88 
89         sStarBigrams.put(0, new ArrayList<Integer>());
90         // MAX - 1 because we added one above already
91         final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1);
92         for (int i = 1; i < maxBigrams; ++i) {
93             sStarBigrams.get(0).add(i);
94         }
95     }
96 
97     @Override
setUp()98     protected void setUp() throws Exception {
99         super.setUp();
100         BinaryDictionaryUtils.setCurrentTimeForTest(0);
101     }
102 
103     @Override
tearDown()104     protected void tearDown() throws Exception {
105         // Quit test mode.
106         BinaryDictionaryUtils.setCurrentTimeForTest(-1);
107         super.tearDown();
108     }
109 
generateWords(final int number, final Random random)110     private static void generateWords(final int number, final Random random) {
111         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
112                 random);
113         final Set<String> wordSet = new HashSet<>();
114         while (wordSet.size() < number) {
115             wordSet.add(CodePointUtils.generateWord(random, codePointSet));
116         }
117         sWords.addAll(wordSet);
118 
119         final int[] largeCodePointSet = CodePointUtils.generateCodePointSet(
120                 LARGE_CODE_POINT_SET_SIZE, random);
121         wordSet.clear();
122         while (wordSet.size() < number) {
123             wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet));
124         }
125         sWordsWithVariousCodePoints.addAll(wordSet);
126     }
127 
128     /**
129      * Adds unigrams to the dictionary.
130      */
addUnigrams(final int number, final FusionDictionary dict, final List<String> words)131     private static void addUnigrams(final int number, final FusionDictionary dict,
132             final List<String> words) {
133         for (int i = 0; i < number; ++i) {
134             final String word = words.get(i);
135             final ArrayList<WeightedString> shortcuts = new ArrayList<>();
136             dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), false /* isNotAWord */,
137                     false /* isPossiblyOffensive */);
138         }
139     }
140 
addBigrams(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)141     private static void addBigrams(final FusionDictionary dict,
142             final List<String> words,
143             final SparseArray<List<Integer>> bigrams) {
144         for (int i = 0; i < bigrams.size(); ++i) {
145             final int w1 = bigrams.keyAt(i);
146             for (int w2 : bigrams.valueAt(i)) {
147                 dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ));
148             }
149         }
150     }
151 
152 //    The following is useful to dump the dictionary into a textual file, but it can't compile
153 //    on-device, so it's commented out.
154 //    private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
155 //            throws IOException {
156 //        com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
157 //                new java.io.FileWriter(new File(filename)), dict);
158 //    }
159 
timeWritingDictToFile(final File file, final FusionDictionary dict, final FormatSpec.FormatOptions formatOptions)160     private static long timeWritingDictToFile(final File file, final FusionDictionary dict,
161             final FormatSpec.FormatOptions formatOptions) {
162 
163         long now = -1, diff = -1;
164 
165         try {
166             final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
167 
168             now = System.currentTimeMillis();
169             // If you need to dump the dict to a textual file, uncomment the line below and the
170             // function above
171             // dumpToCombinedFileForDebug(file, "/tmp/foo");
172             dictEncoder.writeDictionary(dict, formatOptions);
173             diff = System.currentTimeMillis() - now;
174         } catch (IOException e) {
175             Log.e(TAG, "IO exception while writing file", e);
176         } catch (UnsupportedFormatException e) {
177             Log.e(TAG, "UnsupportedFormatException", e);
178         }
179 
180         return diff;
181     }
182 
checkDictionary(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)183     private static void checkDictionary(final FusionDictionary dict, final List<String> words,
184             final SparseArray<List<Integer>> bigrams) {
185         assertNotNull(dict);
186 
187         // check unigram
188         for (final String word : words) {
189             final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
190             assertNotNull(ptNode);
191         }
192 
193         // check bigram
194         for (int i = 0; i < bigrams.size(); ++i) {
195             final int w1 = bigrams.keyAt(i);
196             for (final int w2 : bigrams.valueAt(i)) {
197                 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
198                         words.get(w1));
199                 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
200             }
201         }
202     }
203 
outputOptions(final int bufferType, final FormatSpec.FormatOptions formatOptions)204     private static String outputOptions(final int bufferType,
205             final FormatSpec.FormatOptions formatOptions) {
206         final String result = " : buffer type = "
207                 + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
208         return result + " : version = " + formatOptions.mVersion;
209     }
210 
211     // Tests for readDictionaryBinary and writeDictionaryBinary
212 
timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams, final int bufferType)213     private static long timeReadingAndCheckDict(final File file, final List<String> words,
214             final SparseArray<List<Integer>> bigrams, final int bufferType) {
215         long now, diff = -1;
216 
217         FusionDictionary dict = null;
218         try {
219             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
220                     bufferType);
221             now = System.currentTimeMillis();
222             dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
223             diff  = System.currentTimeMillis() - now;
224         } catch (IOException e) {
225             Log.e(TAG, "IOException while reading dictionary", e);
226         } catch (UnsupportedFormatException e) {
227             Log.e(TAG, "Unsupported format", e);
228         }
229 
230         checkDictionary(dict, words, bigrams);
231         return diff;
232     }
233 
234     // Tests for readDictionaryBinary and writeDictionaryBinary
runReadAndWrite(final List<String> words, final SparseArray<List<Integer>> bigrams, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message)235     private String runReadAndWrite(final List<String> words,
236             final SparseArray<List<Integer>> bigrams,
237             final int bufferType, final FormatSpec.FormatOptions formatOptions,
238             final String message) {
239 
240         final String dictName = "runReadAndWrite";
241         final String dictVersion = Long.toString(System.currentTimeMillis());
242         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
243                 getContext().getCacheDir());
244 
245         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
246                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
247         addUnigrams(words.size(), dict, words);
248         addBigrams(dict, words, bigrams);
249         checkDictionary(dict, words, bigrams);
250 
251         final long write = timeWritingDictToFile(file, dict, formatOptions);
252         final long read = timeReadingAndCheckDict(file, words, bigrams, bufferType);
253 
254         return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
255                 + " : " + outputOptions(bufferType, formatOptions);
256     }
257 
runReadAndWriteTests(final List<String> results, final int bufferType, final FormatSpec.FormatOptions formatOptions)258     private void runReadAndWriteTests(final List<String> results, final int bufferType,
259             final FormatSpec.FormatOptions formatOptions) {
260         results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType,
261                 formatOptions, "unigram"));
262         results.add(runReadAndWrite(sWords, sChainBigrams, bufferType,
263                 formatOptions, "chain"));
264         results.add(runReadAndWrite(sWords, sStarBigrams, bufferType,
265                 formatOptions, "star"));
266         results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, formatOptions,
267                 "unigram with shortcuts"));
268         results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, formatOptions,
269                 "chain with shortcuts"));
270         results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, formatOptions,
271                 "star with shortcuts"));
272         results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams,
273                 bufferType, formatOptions,
274                 "unigram with various code points"));
275     }
276 
testCharacterTableIsPresent()277     public void testCharacterTableIsPresent() throws IOException, UnsupportedFormatException {
278         final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"};
279         final List<String> words = Arrays.asList(wordSource);
280         final String correctCodePointTable = "toesdrniawuplgfcb ";
281         final String dictName = "codePointTableTest";
282         final String dictVersion = Long.toString(System.currentTimeMillis());
283         final String codePointTableAttribute = DictionaryHeader.CODE_POINT_TABLE_KEY;
284         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion,
285                 BinaryDictUtils.STATIC_OPTIONS, getContext().getCacheDir());
286 
287         // Write a test dictionary
288         final DictEncoder dictEncoder = new Ver2DictEncoder(file,
289                 Ver2DictEncoder.CODE_POINT_TABLE_ON);
290         final FormatSpec.FormatOptions formatOptions =
291                 new FormatSpec.FormatOptions(
292                         FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION);
293         final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(),
294                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
295         addUnigrams(words.size(), sourcedict, words);
296         dictEncoder.writeDictionary(sourcedict, formatOptions);
297 
298         // Read the dictionary
299         final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
300                 DictDecoder.USE_BYTEARRAY);
301         final DictionaryHeader fileHeader = dictDecoder.readHeader();
302         // Check if codePointTable is present
303         assertTrue("codePointTable is not present",
304                 fileHeader.mDictionaryOptions.mAttributes.containsKey(codePointTableAttribute));
305         final String codePointTable =
306                 fileHeader.mDictionaryOptions.mAttributes.get(codePointTableAttribute);
307         // Check if codePointTable is correct
308         assertEquals("codePointTable is incorrect", codePointTable, correctCodePointTable);
309     }
310 
311     // Unit test for CharEncoding.readString and CharEncoding.writeString.
testCharEncoding()312     public void testCharEncoding() {
313         // the max length of a word in sWords is less than 50.
314         // See generateWords.
315         final byte[] buffer = new byte[50 * 3];
316         final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
317         for (final String word : sWords) {
318             Arrays.fill(buffer, (byte) 0);
319             CharEncoding.writeString(buffer, 0, word, null);
320             dictBuffer.position(0);
321             final String str = CharEncoding.readString(dictBuffer);
322             assertEquals(word, str);
323         }
324     }
325 
testReadAndWriteWithByteBuffer()326     public void testReadAndWriteWithByteBuffer() {
327         final List<String> results = new ArrayList<>();
328 
329         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
330                 BinaryDictUtils.STATIC_OPTIONS);
331         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
332                 BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP);
333         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
334                 BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP);
335         for (final String result : results) {
336             Log.d(TAG, result);
337         }
338     }
339 
testReadAndWriteWithByteArray()340     public void testReadAndWriteWithByteArray() {
341         final List<String> results = new ArrayList<>();
342 
343         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
344                 BinaryDictUtils.STATIC_OPTIONS);
345         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
346                 BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP);
347         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
348                 BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP);
349 
350         for (final String result : results) {
351             Log.d(TAG, result);
352         }
353     }
354 
355     // Tests for readUnigramsAndBigramsBinary
356 
checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, final TreeMap<Integer, String> resultWords, final TreeMap<Integer, Integer> resultFrequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, final boolean checkProbability)357     private static void checkWordMap(final List<String> expectedWords,
358             final SparseArray<List<Integer>> expectedBigrams,
359             final TreeMap<Integer, String> resultWords,
360             final TreeMap<Integer, Integer> resultFrequencies,
361             final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams,
362             final boolean checkProbability) {
363         // check unigrams
364         final Set<String> actualWordsSet = new HashSet<>(resultWords.values());
365         final Set<String> expectedWordsSet = new HashSet<>(expectedWords);
366         assertEquals(actualWordsSet, expectedWordsSet);
367         if (checkProbability) {
368             for (int freq : resultFrequencies.values()) {
369                 assertEquals(freq, UNIGRAM_FREQ);
370             }
371         }
372 
373         // check bigrams
374         final HashMap<String, Set<String>> expBigrams = new HashMap<>();
375         for (int i = 0; i < expectedBigrams.size(); ++i) {
376             final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
377             for (int w2 : expectedBigrams.valueAt(i)) {
378                 if (expBigrams.get(word1) == null) {
379                     expBigrams.put(word1, new HashSet<String>());
380                 }
381                 expBigrams.get(word1).add(expectedWords.get(w2));
382             }
383         }
384 
385         final HashMap<String, Set<String>> actBigrams = new HashMap<>();
386         for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
387             final String word1 = resultWords.get(entry.getKey());
388             final int unigramFreq = resultFrequencies.get(entry.getKey());
389             for (PendingAttribute attr : entry.getValue()) {
390                 final String word2 = resultWords.get(attr.mAddress);
391                 if (actBigrams.get(word1) == null) {
392                     actBigrams.put(word1, new HashSet<String>());
393                 }
394                 actBigrams.get(word1).add(word2);
395 
396                 if (checkProbability) {
397                     final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
398                             unigramFreq, attr.mFrequency);
399                     assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
400                 }
401             }
402         }
403         assertEquals(actBigrams, expBigrams);
404     }
405 
406     private static long timeAndCheckReadUnigramsAndBigramsBinary(final File file,
407             final List<String> words, final SparseArray<List<Integer>> bigrams,
408             final int bufferType, final boolean checkProbability) {
409         final TreeMap<Integer, String> resultWords = new TreeMap<>();
410         final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>();
411         final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>();
412 
413         long now = -1, diff = -1;
414         try {
415             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
416                     bufferType);
417             now = System.currentTimeMillis();
418             dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
419             diff = System.currentTimeMillis() - now;
420         } catch (IOException e) {
421             Log.e(TAG, "IOException", e);
422         } catch (UnsupportedFormatException e) {
423             Log.e(TAG, "UnsupportedFormatException", e);
424         }
425 
426         checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability);
427         return diff;
428     }
429 
430     private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
431             final SparseArray<List<Integer>> bigrams, final int bufferType,
432             final FormatSpec.FormatOptions formatOptions, final String message) {
433         final String dictName = "runReadUnigrams";
434         final String dictVersion = Long.toString(System.currentTimeMillis());
435         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
436                 getContext().getCacheDir());
437 
438         // making the dictionary from lists of words.
439         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
440                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
441         addUnigrams(words.size(), dict, words);
442         addBigrams(dict, words, bigrams);
443 
444         timeWritingDictToFile(file, dict, formatOptions);
445 
446         // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the
447         // probability when there's a timestamp for the entry.
448         // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native.
449         long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
450                 !formatOptions.mHasTimestamp /* checkProbability */);
451         long fullReading = timeReadingAndCheckDict(file, words, bigrams,
452                 bufferType);
453 
454         return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
455                 + " : " + message + " : " + outputOptions(bufferType, formatOptions);
456     }
457 
458     private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
459             final int bufferType, final FormatSpec.FormatOptions formatOptions) {
460         results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
461                 formatOptions, "unigram"));
462         results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
463                 formatOptions, "chain"));
464         results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
465                 formatOptions, "star"));
466     }
467 
468     public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
469         final ArrayList<String> results = new ArrayList<>();
470 
471         runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
472                 BinaryDictUtils.STATIC_OPTIONS);
473 
474         for (final String result : results) {
475             Log.d(TAG, result);
476         }
477     }
478 
479     public void testReadUnigramsAndBigramsBinaryWithByteArray() {
480         final ArrayList<String> results = new ArrayList<>();
481 
482         runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
483                 BinaryDictUtils.STATIC_OPTIONS);
484 
485         for (final String result : results) {
486             Log.d(TAG, result);
487         }
488     }
489 
490     // Tests for getTerminalPosition
491     private static String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
492         if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
493 
494         DictionaryHeader fileHeader = null;
495         try {
496             fileHeader = dictDecoder.readHeader();
497         } catch (IOException e) {
498             return null;
499         } catch (UnsupportedFormatException e) {
500             return null;
501         }
502         if (fileHeader == null) return null;
503         return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
504                 address).mWord;
505     }
506 
507     private static long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
508             final boolean contained) {
509         long diff = -1;
510         int position = -1;
511         try {
512             final long now = System.nanoTime();
513             position = dictDecoder.getTerminalPosition(word);
514             diff = System.nanoTime() - now;
515         } catch (IOException e) {
516             Log.e(TAG, "IOException while getTerminalPosition", e);
517         } catch (UnsupportedFormatException e) {
518             Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
519         }
520 
521         assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
522         if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
523         return diff;
524     }
525 
526     private void runGetTerminalPosition(final ArrayList<String> words,
527             final SparseArray<List<Integer>> bigrams, final int bufferType,
528             final FormatOptions formatOptions, final String message) {
529         final String dictName = "testGetTerminalPosition";
530         final String dictVersion = Long.toString(System.currentTimeMillis());
531         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
532                 getContext().getCacheDir());
533 
534         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
535                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
536         addUnigrams(sWords.size(), dict, sWords);
537         addBigrams(dict, words, bigrams);
538         timeWritingDictToFile(file, dict, formatOptions);
539 
540         final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
541                 DictDecoder.USE_BYTEARRAY);
542         try {
543             dictDecoder.openDictBuffer();
544         } catch (IOException e) {
545             Log.e(TAG, "IOException while opening the buffer", e);
546         } catch (UnsupportedFormatException e) {
547             Log.e(TAG, "IOException while opening the buffer", e);
548         }
549         assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
550 
551         try {
552             // too long word
553             final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
554             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
555 
556             // null
557             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
558 
559             // empty string
560             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
561         } catch (IOException e) {
562         } catch (UnsupportedFormatException e) {
563         }
564 
565         // Test a word that is contained within the dictionary.
566         long sum = 0;
567         for (int i = 0; i < sWords.size(); ++i) {
568             final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true);
569             sum += time == -1 ? 0 : time;
570         }
571         Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
572                 + " : " + outputOptions(bufferType, formatOptions));
573 
574         // Test a word that isn't contained within the dictionary.
575         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
576                 mRandom);
577         for (int i = 0; i < 1000; ++i) {
578             final String word = CodePointUtils.generateWord(mRandom, codePointSet);
579             if (sWords.indexOf(word) != -1) continue;
580             checkGetTerminalPosition(dictDecoder, word, false);
581         }
582     }
583 
584     private void runGetTerminalPositionTests(final int bufferType,
585             final FormatOptions formatOptions) {
586         runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
587     }
588 
589     public void testGetTerminalPosition() {
590         final ArrayList<String> results = new ArrayList<>();
591 
592         runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
593                 BinaryDictUtils.STATIC_OPTIONS);
594         runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
595                 BinaryDictUtils.STATIC_OPTIONS);
596 
597         for (final String result : results) {
598             Log.d(TAG, result);
599         }
600     }
601 
602     public void testVer2DictGetWordProperty() {
603         final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
604         final ArrayList<String> words = sWords;
605         final String dictName = "testGetWordProperty";
606         final String dictVersion = Long.toString(System.currentTimeMillis());
607         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
608                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
609         addUnigrams(words.size(), dict, words);
610         addBigrams(dict, words, sEmptyBigrams);
611         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
612                 getContext().getCacheDir());
613         file.delete();
614         timeWritingDictToFile(file, dict, formatOptions);
615         final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
616                 0 /* offset */, file.length(), true /* useFullEditDistance */,
617                 Locale.ENGLISH, dictName, false /* isUpdatable */);
618         for (final String word : words) {
619             final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
620                     false /* isBeginningOfSentence */);
621             assertEquals(word, wordProperty.mWord);
622             assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
623         }
624     }
625 
626     public void testVer2DictIteration() {
627         final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
628         final ArrayList<String> words = sWords;
629         final SparseArray<List<Integer>> bigrams = sEmptyBigrams;
630         final String dictName = "testGetWordProperty";
631         final String dictVersion = Long.toString(System.currentTimeMillis());
632         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
633                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
634         addUnigrams(words.size(), dict, words);
635         addBigrams(dict, words, bigrams);
636         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
637                 getContext().getCacheDir());
638         timeWritingDictToFile(file, dict, formatOptions);
639         Log.d(TAG, file.getAbsolutePath());
640         final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
641                 0 /* offset */, file.length(), true /* useFullEditDistance */,
642                 Locale.ENGLISH, dictName, false /* isUpdatable */);
643 
644         final HashSet<String> wordSet = new HashSet<>(words);
645         final HashSet<Pair<String, String>> bigramSet = new HashSet<>();
646 
647         for (int i = 0; i < words.size(); i++) {
648             final List<Integer> bigramList = bigrams.get(i);
649             if (bigramList != null) {
650                 for (final Integer word1Index : bigramList) {
651                     final String word1 = words.get(word1Index);
652                     bigramSet.add(new Pair<>(words.get(i), word1));
653                 }
654             }
655         }
656         int token = 0;
657         do {
658             final BinaryDictionary.GetNextWordPropertyResult result =
659                     binaryDictionary.getNextWordProperty(token);
660             final WordProperty wordProperty = result.mWordProperty;
661             final String word0 = wordProperty.mWord;
662             assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability);
663             wordSet.remove(word0);
664             if (wordProperty.mHasNgrams) {
665                 for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
666                     final String word1 = bigramTarget.mWord;
667                     final Pair<String, String> bigram = new Pair<>(word0, word1);
668                     assertTrue(bigramSet.contains(bigram));
669                     bigramSet.remove(bigram);
670                 }
671             }
672             token = result.mNextToken;
673         } while (token != 0);
674         assertTrue(wordSet.isEmpty());
675         assertTrue(bigramSet.isEmpty());
676     }
677 }
678