1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.spellcheck;
18 
19 import android.content.ContentResolver;
20 import android.database.ContentObserver;
21 import android.os.Binder;
22 import android.provider.UserDictionary.Words;
23 import android.service.textservice.SpellCheckerService.Session;
24 import android.text.TextUtils;
25 import android.util.Log;
26 import android.util.LruCache;
27 import android.view.textservice.SuggestionsInfo;
28 import android.view.textservice.TextInfo;
29 
30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
31 import com.android.inputmethod.keyboard.Keyboard;
32 import com.android.inputmethod.latin.NgramContext;
33 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
34 import com.android.inputmethod.latin.WordComposer;
35 import com.android.inputmethod.latin.common.Constants;
36 import com.android.inputmethod.latin.common.LocaleUtils;
37 import com.android.inputmethod.latin.common.StringUtils;
38 import com.android.inputmethod.latin.define.DebugFlags;
39 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
40 import com.android.inputmethod.latin.utils.ScriptUtils;
41 import com.android.inputmethod.latin.utils.StatsUtils;
42 import com.android.inputmethod.latin.utils.SuggestionResults;
43 
44 import java.util.ArrayList;
45 import java.util.List;
46 import java.util.Locale;
47 
48 public abstract class AndroidWordLevelSpellCheckerSession extends Session {
49     private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
50 
51     public final static String[] EMPTY_STRING_ARRAY = new String[0];
52 
53     // Immutable, but not available in the constructor.
54     private Locale mLocale;
55     // Cache this for performance
56     private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
57     private final AndroidSpellCheckerService mService;
58     protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
59     private final ContentObserver mObserver;
60 
61     private static final String quotesRegexp =
62             "(\\u0022|\\u0027|\\u0060|\\u00B4|\\u2018|\\u2018|\\u201C|\\u201D)";
63 
64     private static final class SuggestionsParams {
65         public final String[] mSuggestions;
66         public final int mFlags;
SuggestionsParams(String[] suggestions, int flags)67         public SuggestionsParams(String[] suggestions, int flags) {
68             mSuggestions = suggestions;
69             mFlags = flags;
70         }
71     }
72 
73     protected static final class SuggestionsCache {
74         private static final int MAX_CACHE_SIZE = 50;
75         private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
76                 new LruCache<>(MAX_CACHE_SIZE);
77 
generateKey(final String query)78         private static String generateKey(final String query) {
79             return query + "";
80         }
81 
getSuggestionsFromCache(final String query)82         public SuggestionsParams getSuggestionsFromCache(final String query) {
83             return mUnigramSuggestionsInfoCache.get(query);
84         }
85 
putSuggestionsToCache( final String query, final String[] suggestions, final int flags)86         public void putSuggestionsToCache(
87                 final String query, final String[] suggestions, final int flags) {
88             if (suggestions == null || TextUtils.isEmpty(query)) {
89                 return;
90             }
91             mUnigramSuggestionsInfoCache.put(
92                     generateKey(query),
93                     new SuggestionsParams(suggestions, flags));
94         }
95 
clearCache()96         public void clearCache() {
97             mUnigramSuggestionsInfoCache.evictAll();
98         }
99     }
100 
AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service)101     AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
102         mService = service;
103         final ContentResolver cres = service.getContentResolver();
104 
105         mObserver = new ContentObserver(null) {
106             @Override
107             public void onChange(boolean self) {
108                 mSuggestionsCache.clearCache();
109             }
110         };
111         cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
112     }
113 
114     @Override
onCreate()115     public void onCreate() {
116         final String localeString = getLocale();
117         mLocale = (null == localeString) ? null
118                 : LocaleUtils.constructLocaleFromString(localeString);
119         mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale);
120     }
121 
122     @Override
onClose()123     public void onClose() {
124         final ContentResolver cres = mService.getContentResolver();
125         cres.unregisterContentObserver(mObserver);
126     }
127 
128     private static final int CHECKABILITY_CHECKABLE = 0;
129     private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
130     private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
131     private static final int CHECKABILITY_EMAIL_OR_URL = 3;
132     private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
133     private static final int CHECKABILITY_TOO_SHORT = 5;
134     /**
135      * Finds out whether a particular string should be filtered out of spell checking.
136      *
137      * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
138      * we know we will never recognize, this accepts a script identifier that should be one
139      * of the SCRIPT_* constants defined above, to rule out quickly characters from very
140      * different languages.
141      *
142      * @param text the string to evaluate.
143      * @param script the identifier for the script this spell checker recognizes
144      * @return one of the FILTER_OUT_* constants above.
145      */
getCheckabilityInScript(final String text, final int script)146     private static int getCheckabilityInScript(final String text, final int script) {
147         if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
148 
149         // TODO: check if an equivalent processing can't be done more quickly with a
150         // compiled regexp.
151         // Filter by first letter
152         final int firstCodePoint = text.codePointAt(0);
153         // Filter out words that don't start with a letter or an apostrophe
154         if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
155                 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
156 
157         // Filter contents
158         final int length = text.length();
159         int letterCount = 0;
160         for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
161             final int codePoint = text.codePointAt(i);
162             // Any word containing a COMMERCIAL_AT is probably an e-mail address
163             // Any word containing a SLASH is probably either an ad-hoc combination of two
164             // words or a URI - in either case we don't want to spell check that
165             if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
166                 return CHECKABILITY_EMAIL_OR_URL;
167             }
168             // If the string contains a period, native returns strange suggestions (it seems
169             // to return suggestions for everything up to the period only and to ignore the
170             // rest), so we suppress lookup if there is a period.
171             // TODO: investigate why native returns these suggestions and remove this code.
172             if (Constants.CODE_PERIOD == codePoint) {
173                 return CHECKABILITY_CONTAINS_PERIOD;
174             }
175             if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
176         }
177         // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
178         // in this word are letters
179         return (letterCount * 4 < length * 3)
180                 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
181     }
182 
183     /**
184      * Helper method to test valid capitalizations of a word.
185      *
186      * If the "text" is lower-case, we test only the exact string.
187      * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
188      *  version of it "text".
189      * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
190      *  version of it "text" and the capitalized version of it "Text".
191      */
isInDictForAnyCapitalization(final String text, final int capitalizeType)192     private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) {
193         // If the word is in there as is, then it's in the dictionary. If not, we'll test lower
194         // case versions, but only if the word is not already all-lower case or mixed case.
195         if (mService.isValidWord(mLocale, text)) return true;
196         if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
197 
198         // If we come here, we have a capitalized word (either First- or All-).
199         // Downcase the word and look it up again. If the word is only capitalized, we
200         // tested all possibilities, so if it's still negative we can return false.
201         final String lowerCaseText = text.toLowerCase(mLocale);
202         if (mService.isValidWord(mLocale, lowerCaseText)) return true;
203         if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
204 
205         // If the lower case version is not in the dictionary, it's still possible
206         // that we have an all-caps version of a word that needs to be capitalized
207         // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
208         return mService.isValidWord(mLocale,
209                 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
210     }
211 
212     // Note : this must be reentrant
213     /**
214      * Gets a list of suggestions for a specific string. This returns a list of possible
215      * corrections for the text passed as an argument. It may split or group words, and
216      * even perform grammatical analysis.
217      */
onGetSuggestionsInternal(final TextInfo textInfo, final int suggestionsLimit)218     private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
219             final int suggestionsLimit) {
220         return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
221     }
222 
onGetSuggestionsInternal( final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit)223     protected SuggestionsInfo onGetSuggestionsInternal(
224             final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit) {
225         try {
226             final String text = textInfo.getText().
227                     replaceAll(AndroidSpellCheckerService.APOSTROPHE,
228                             AndroidSpellCheckerService.SINGLE_QUOTE).
229                     replaceAll("^" + quotesRegexp, "").
230                     replaceAll(quotesRegexp + "$", "");
231 
232             if (!mService.hasMainDictionaryForLocale(mLocale)) {
233                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
234                         false /* reportAsTypo */);
235             }
236 
237             // Handle special patterns like email, URI, telephone number.
238             final int checkability = getCheckabilityInScript(text, mScript);
239             if (CHECKABILITY_CHECKABLE != checkability) {
240                 if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
241                     final String[] splitText = text.split(Constants.REGEXP_PERIOD);
242                     boolean allWordsAreValid = true;
243                     for (final String word : splitText) {
244                         if (!mService.isValidWord(mLocale, word)) {
245                             allWordsAreValid = false;
246                             break;
247                         }
248                     }
249                     if (allWordsAreValid) {
250                         return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
251                                 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
252                                 new String[] {
253                                         TextUtils.join(Constants.STRING_SPACE, splitText) });
254                     }
255                 }
256                 return mService.isValidWord(mLocale, text) ?
257                         AndroidSpellCheckerService.getInDictEmptySuggestions() :
258                         AndroidSpellCheckerService.getNotInDictEmptySuggestions(
259                                 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */);
260             }
261 
262             // Handle normal words.
263             final int capitalizeType = StringUtils.getCapitalizationType(text);
264 
265             if (isInDictForAnyCapitalization(text, capitalizeType)) {
266                 if (DebugFlags.DEBUG_ENABLED) {
267                     Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is a valid word");
268                 }
269                 return AndroidSpellCheckerService.getInDictEmptySuggestions();
270             }
271             if (DebugFlags.DEBUG_ENABLED) {
272                 Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is NOT a valid word");
273             }
274 
275             final Keyboard keyboard = mService.getKeyboardForLocale(mLocale);
276             if (null == keyboard) {
277                 Log.w(TAG, "onGetSuggestionsInternal() : No keyboard for locale: " + mLocale);
278                 // If there is no keyboard for this locale, don't do any spell-checking.
279                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
280                         false /* reportAsTypo */);
281             }
282 
283             final WordComposer composer = new WordComposer();
284             final int[] codePoints = StringUtils.toCodePointArray(text);
285             final int[] coordinates;
286             coordinates = keyboard.getCoordinates(codePoints);
287             composer.setComposingWord(codePoints, coordinates);
288             // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
289             final SuggestionResults suggestionResults = mService.getSuggestionResults(
290                     mLocale, composer.getComposedDataSnapshot(), ngramContext, keyboard);
291             final Result result = getResult(capitalizeType, mLocale, suggestionsLimit,
292                     mService.getRecommendedThreshold(), text, suggestionResults);
293             if (DebugFlags.DEBUG_ENABLED) {
294                 if (result.mSuggestions != null && result.mSuggestions.length > 0) {
295                     final StringBuilder builder = new StringBuilder();
296                     for (String suggestion : result.mSuggestions) {
297                         builder.append(" [");
298                         builder.append(suggestion);
299                         builder.append("]");
300                     }
301                     Log.i(TAG, "onGetSuggestionsInternal() : Suggestions =" + builder);
302                 }
303             }
304             // Handle word not in dictionary.
305             // This is called only once per unique word, so entering multiple
306             // instances of the same word does not result in more than one call
307             // to this method.
308             // Also, upon changing the orientation of the device, this is called
309             // again for every unique invalid word in the text box.
310             StatsUtils.onInvalidWordIdentification(text);
311 
312             final int flags =
313                     SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
314                     | (result.mHasRecommendedSuggestions
315                             ? SuggestionsInfoCompatUtils
316                                     .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
317                             : 0);
318             final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
319             mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags);
320             return retval;
321         } catch (RuntimeException e) {
322             // Don't kill the keyboard if there is a bug in the spell checker
323             Log.e(TAG, "Exception while spellchecking", e);
324             return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
325                     false /* reportAsTypo */);
326         }
327     }
328 
329     private static final class Result {
330         public final String[] mSuggestions;
331         public final boolean mHasRecommendedSuggestions;
Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions)332         public Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions) {
333             mSuggestions = gatheredSuggestions;
334             mHasRecommendedSuggestions = hasRecommendedSuggestions;
335         }
336     }
337 
getResult(final int capitalizeType, final Locale locale, final int suggestionsLimit, final float recommendedThreshold, final String originalText, final SuggestionResults suggestionResults)338     private static Result getResult(final int capitalizeType, final Locale locale,
339             final int suggestionsLimit, final float recommendedThreshold, final String originalText,
340             final SuggestionResults suggestionResults) {
341         if (suggestionResults.isEmpty() || suggestionsLimit <= 0) {
342             return new Result(null /* gatheredSuggestions */,
343                     false /* hasRecommendedSuggestions */);
344         }
345         final ArrayList<String> suggestions = new ArrayList<>();
346         for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
347             final String suggestion;
348             if (StringUtils.CAPITALIZE_ALL == capitalizeType) {
349                 suggestion = suggestedWordInfo.mWord.toUpperCase(locale);
350             } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) {
351                 suggestion = StringUtils.capitalizeFirstCodePoint(
352                         suggestedWordInfo.mWord, locale);
353             } else {
354                 suggestion = suggestedWordInfo.mWord;
355             }
356             suggestions.add(suggestion);
357         }
358         StringUtils.removeDupes(suggestions);
359         // This returns a String[], while toArray() returns an Object[] which cannot be cast
360         // into a String[].
361         final List<String> gatheredSuggestionsList =
362                 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit));
363         final String[] gatheredSuggestions =
364                 gatheredSuggestionsList.toArray(new String[gatheredSuggestionsList.size()]);
365 
366         final int bestScore = suggestionResults.first().mScore;
367         final String bestSuggestion = suggestions.get(0);
368         final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
369                 originalText, bestSuggestion, bestScore);
370         final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold);
371         return new Result(gatheredSuggestions, hasRecommendedSuggestions);
372     }
373 
374     /*
375      * The spell checker acts on its own behalf. That is needed, in particular, to be able to
376      * access the dictionary files, which the provider restricts to the identity of Latin IME.
377      * Since it's called externally by the application, the spell checker is using the identity
378      * of the application by default unless we clearCallingIdentity.
379      * That's what the following method does.
380      */
381     @Override
onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit)382     public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit) {
383         long ident = Binder.clearCallingIdentity();
384         try {
385             return onGetSuggestionsInternal(textInfo, suggestionsLimit);
386         } finally {
387             Binder.restoreCallingIdentity(ident);
388         }
389     }
390 }
391