1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.ContentResolver; 20 import android.database.ContentObserver; 21 import android.os.Binder; 22 import android.provider.UserDictionary.Words; 23 import android.service.textservice.SpellCheckerService.Session; 24 import android.text.TextUtils; 25 import android.util.Log; 26 import android.util.LruCache; 27 import android.view.textservice.SuggestionsInfo; 28 import android.view.textservice.TextInfo; 29 30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31 import com.android.inputmethod.keyboard.Keyboard; 32 import com.android.inputmethod.latin.NgramContext; 33 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 34 import com.android.inputmethod.latin.WordComposer; 35 import com.android.inputmethod.latin.common.Constants; 36 import com.android.inputmethod.latin.common.LocaleUtils; 37 import com.android.inputmethod.latin.common.StringUtils; 38 import com.android.inputmethod.latin.define.DebugFlags; 39 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 40 import com.android.inputmethod.latin.utils.ScriptUtils; 41 import com.android.inputmethod.latin.utils.StatsUtils; 42 import com.android.inputmethod.latin.utils.SuggestionResults; 43 44 import java.util.ArrayList; 45 import java.util.List; 46 import java.util.Locale; 47 48 public abstract class AndroidWordLevelSpellCheckerSession extends Session { 49 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 50 51 public final static String[] EMPTY_STRING_ARRAY = new String[0]; 52 53 // Immutable, but not available in the constructor. 54 private Locale mLocale; 55 // Cache this for performance 56 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 57 private final AndroidSpellCheckerService mService; 58 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 59 private final ContentObserver mObserver; 60 61 private static final String quotesRegexp = 62 "(\\u0022|\\u0027|\\u0060|\\u00B4|\\u2018|\\u2018|\\u201C|\\u201D)"; 63 64 private static final class SuggestionsParams { 65 public final String[] mSuggestions; 66 public final int mFlags; SuggestionsParams(String[] suggestions, int flags)67 public SuggestionsParams(String[] suggestions, int flags) { 68 mSuggestions = suggestions; 69 mFlags = flags; 70 } 71 } 72 73 protected static final class SuggestionsCache { 74 private static final int MAX_CACHE_SIZE = 50; 75 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 76 new LruCache<>(MAX_CACHE_SIZE); 77 generateKey(final String query)78 private static String generateKey(final String query) { 79 return query + ""; 80 } 81 getSuggestionsFromCache(final String query)82 public SuggestionsParams getSuggestionsFromCache(final String query) { 83 return mUnigramSuggestionsInfoCache.get(query); 84 } 85 putSuggestionsToCache( final String query, final String[] suggestions, final int flags)86 public void putSuggestionsToCache( 87 final String query, final String[] suggestions, final int flags) { 88 if (suggestions == null || TextUtils.isEmpty(query)) { 89 return; 90 } 91 mUnigramSuggestionsInfoCache.put( 92 generateKey(query), 93 new SuggestionsParams(suggestions, flags)); 94 } 95 clearCache()96 public void clearCache() { 97 mUnigramSuggestionsInfoCache.evictAll(); 98 } 99 } 100 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service)101 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 102 mService = service; 103 final ContentResolver cres = service.getContentResolver(); 104 105 mObserver = new ContentObserver(null) { 106 @Override 107 public void onChange(boolean self) { 108 mSuggestionsCache.clearCache(); 109 } 110 }; 111 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 112 } 113 114 @Override onCreate()115 public void onCreate() { 116 final String localeString = getLocale(); 117 mLocale = (null == localeString) ? null 118 : LocaleUtils.constructLocaleFromString(localeString); 119 mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale); 120 } 121 122 @Override onClose()123 public void onClose() { 124 final ContentResolver cres = mService.getContentResolver(); 125 cres.unregisterContentObserver(mObserver); 126 } 127 128 private static final int CHECKABILITY_CHECKABLE = 0; 129 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 130 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 131 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 132 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 133 private static final int CHECKABILITY_TOO_SHORT = 5; 134 /** 135 * Finds out whether a particular string should be filtered out of spell checking. 136 * 137 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 138 * we know we will never recognize, this accepts a script identifier that should be one 139 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 140 * different languages. 141 * 142 * @param text the string to evaluate. 143 * @param script the identifier for the script this spell checker recognizes 144 * @return one of the FILTER_OUT_* constants above. 145 */ getCheckabilityInScript(final String text, final int script)146 private static int getCheckabilityInScript(final String text, final int script) { 147 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 148 149 // TODO: check if an equivalent processing can't be done more quickly with a 150 // compiled regexp. 151 // Filter by first letter 152 final int firstCodePoint = text.codePointAt(0); 153 // Filter out words that don't start with a letter or an apostrophe 154 if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script) 155 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 156 157 // Filter contents 158 final int length = text.length(); 159 int letterCount = 0; 160 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 161 final int codePoint = text.codePointAt(i); 162 // Any word containing a COMMERCIAL_AT is probably an e-mail address 163 // Any word containing a SLASH is probably either an ad-hoc combination of two 164 // words or a URI - in either case we don't want to spell check that 165 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 166 return CHECKABILITY_EMAIL_OR_URL; 167 } 168 // If the string contains a period, native returns strange suggestions (it seems 169 // to return suggestions for everything up to the period only and to ignore the 170 // rest), so we suppress lookup if there is a period. 171 // TODO: investigate why native returns these suggestions and remove this code. 172 if (Constants.CODE_PERIOD == codePoint) { 173 return CHECKABILITY_CONTAINS_PERIOD; 174 } 175 if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount; 176 } 177 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 178 // in this word are letters 179 return (letterCount * 4 < length * 3) 180 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 181 } 182 183 /** 184 * Helper method to test valid capitalizations of a word. 185 * 186 * If the "text" is lower-case, we test only the exact string. 187 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 188 * version of it "text". 189 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 190 * version of it "text" and the capitalized version of it "Text". 191 */ isInDictForAnyCapitalization(final String text, final int capitalizeType)192 private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) { 193 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 194 // case versions, but only if the word is not already all-lower case or mixed case. 195 if (mService.isValidWord(mLocale, text)) return true; 196 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 197 198 // If we come here, we have a capitalized word (either First- or All-). 199 // Downcase the word and look it up again. If the word is only capitalized, we 200 // tested all possibilities, so if it's still negative we can return false. 201 final String lowerCaseText = text.toLowerCase(mLocale); 202 if (mService.isValidWord(mLocale, lowerCaseText)) return true; 203 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 204 205 // If the lower case version is not in the dictionary, it's still possible 206 // that we have an all-caps version of a word that needs to be capitalized 207 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 208 return mService.isValidWord(mLocale, 209 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 210 } 211 212 // Note : this must be reentrant 213 /** 214 * Gets a list of suggestions for a specific string. This returns a list of possible 215 * corrections for the text passed as an argument. It may split or group words, and 216 * even perform grammatical analysis. 217 */ onGetSuggestionsInternal(final TextInfo textInfo, final int suggestionsLimit)218 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 219 final int suggestionsLimit) { 220 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 221 } 222 onGetSuggestionsInternal( final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit)223 protected SuggestionsInfo onGetSuggestionsInternal( 224 final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit) { 225 try { 226 final String text = textInfo.getText(). 227 replaceAll(AndroidSpellCheckerService.APOSTROPHE, 228 AndroidSpellCheckerService.SINGLE_QUOTE). 229 replaceAll("^" + quotesRegexp, ""). 230 replaceAll(quotesRegexp + "$", ""); 231 232 if (!mService.hasMainDictionaryForLocale(mLocale)) { 233 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 234 false /* reportAsTypo */); 235 } 236 237 // Handle special patterns like email, URI, telephone number. 238 final int checkability = getCheckabilityInScript(text, mScript); 239 if (CHECKABILITY_CHECKABLE != checkability) { 240 if (CHECKABILITY_CONTAINS_PERIOD == checkability) { 241 final String[] splitText = text.split(Constants.REGEXP_PERIOD); 242 boolean allWordsAreValid = true; 243 for (final String word : splitText) { 244 if (!mService.isValidWord(mLocale, word)) { 245 allWordsAreValid = false; 246 break; 247 } 248 } 249 if (allWordsAreValid) { 250 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 251 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS, 252 new String[] { 253 TextUtils.join(Constants.STRING_SPACE, splitText) }); 254 } 255 } 256 return mService.isValidWord(mLocale, text) ? 257 AndroidSpellCheckerService.getInDictEmptySuggestions() : 258 AndroidSpellCheckerService.getNotInDictEmptySuggestions( 259 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */); 260 } 261 262 // Handle normal words. 263 final int capitalizeType = StringUtils.getCapitalizationType(text); 264 265 if (isInDictForAnyCapitalization(text, capitalizeType)) { 266 if (DebugFlags.DEBUG_ENABLED) { 267 Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is a valid word"); 268 } 269 return AndroidSpellCheckerService.getInDictEmptySuggestions(); 270 } 271 if (DebugFlags.DEBUG_ENABLED) { 272 Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is NOT a valid word"); 273 } 274 275 final Keyboard keyboard = mService.getKeyboardForLocale(mLocale); 276 if (null == keyboard) { 277 Log.w(TAG, "onGetSuggestionsInternal() : No keyboard for locale: " + mLocale); 278 // If there is no keyboard for this locale, don't do any spell-checking. 279 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 280 false /* reportAsTypo */); 281 } 282 283 final WordComposer composer = new WordComposer(); 284 final int[] codePoints = StringUtils.toCodePointArray(text); 285 final int[] coordinates; 286 coordinates = keyboard.getCoordinates(codePoints); 287 composer.setComposingWord(codePoints, coordinates); 288 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 289 final SuggestionResults suggestionResults = mService.getSuggestionResults( 290 mLocale, composer.getComposedDataSnapshot(), ngramContext, keyboard); 291 final Result result = getResult(capitalizeType, mLocale, suggestionsLimit, 292 mService.getRecommendedThreshold(), text, suggestionResults); 293 if (DebugFlags.DEBUG_ENABLED) { 294 if (result.mSuggestions != null && result.mSuggestions.length > 0) { 295 final StringBuilder builder = new StringBuilder(); 296 for (String suggestion : result.mSuggestions) { 297 builder.append(" ["); 298 builder.append(suggestion); 299 builder.append("]"); 300 } 301 Log.i(TAG, "onGetSuggestionsInternal() : Suggestions =" + builder); 302 } 303 } 304 // Handle word not in dictionary. 305 // This is called only once per unique word, so entering multiple 306 // instances of the same word does not result in more than one call 307 // to this method. 308 // Also, upon changing the orientation of the device, this is called 309 // again for every unique invalid word in the text box. 310 StatsUtils.onInvalidWordIdentification(text); 311 312 final int flags = 313 SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 314 | (result.mHasRecommendedSuggestions 315 ? SuggestionsInfoCompatUtils 316 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 317 : 0); 318 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 319 mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags); 320 return retval; 321 } catch (RuntimeException e) { 322 // Don't kill the keyboard if there is a bug in the spell checker 323 Log.e(TAG, "Exception while spellchecking", e); 324 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 325 false /* reportAsTypo */); 326 } 327 } 328 329 private static final class Result { 330 public final String[] mSuggestions; 331 public final boolean mHasRecommendedSuggestions; Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions)332 public Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions) { 333 mSuggestions = gatheredSuggestions; 334 mHasRecommendedSuggestions = hasRecommendedSuggestions; 335 } 336 } 337 getResult(final int capitalizeType, final Locale locale, final int suggestionsLimit, final float recommendedThreshold, final String originalText, final SuggestionResults suggestionResults)338 private static Result getResult(final int capitalizeType, final Locale locale, 339 final int suggestionsLimit, final float recommendedThreshold, final String originalText, 340 final SuggestionResults suggestionResults) { 341 if (suggestionResults.isEmpty() || suggestionsLimit <= 0) { 342 return new Result(null /* gatheredSuggestions */, 343 false /* hasRecommendedSuggestions */); 344 } 345 final ArrayList<String> suggestions = new ArrayList<>(); 346 for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) { 347 final String suggestion; 348 if (StringUtils.CAPITALIZE_ALL == capitalizeType) { 349 suggestion = suggestedWordInfo.mWord.toUpperCase(locale); 350 } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) { 351 suggestion = StringUtils.capitalizeFirstCodePoint( 352 suggestedWordInfo.mWord, locale); 353 } else { 354 suggestion = suggestedWordInfo.mWord; 355 } 356 suggestions.add(suggestion); 357 } 358 StringUtils.removeDupes(suggestions); 359 // This returns a String[], while toArray() returns an Object[] which cannot be cast 360 // into a String[]. 361 final List<String> gatheredSuggestionsList = 362 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit)); 363 final String[] gatheredSuggestions = 364 gatheredSuggestionsList.toArray(new String[gatheredSuggestionsList.size()]); 365 366 final int bestScore = suggestionResults.first().mScore; 367 final String bestSuggestion = suggestions.get(0); 368 final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( 369 originalText, bestSuggestion, bestScore); 370 final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold); 371 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 372 } 373 374 /* 375 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 376 * access the dictionary files, which the provider restricts to the identity of Latin IME. 377 * Since it's called externally by the application, the spell checker is using the identity 378 * of the application by default unless we clearCallingIdentity. 379 * That's what the following method does. 380 */ 381 @Override onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit)382 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit) { 383 long ident = Binder.clearCallingIdentity(); 384 try { 385 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 386 } finally { 387 Binder.restoreCallingIdentity(ident); 388 } 389 } 390 } 391