1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.vcard;
17 
18 import android.text.TextUtils;
19 import android.util.Base64;
20 import android.util.Log;
21 
22 import com.android.vcard.exception.VCardAgentNotSupportedException;
23 import com.android.vcard.exception.VCardException;
24 import com.android.vcard.exception.VCardInvalidCommentLineException;
25 import com.android.vcard.exception.VCardInvalidLineException;
26 import com.android.vcard.exception.VCardVersionException;
27 
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.io.Reader;
33 import java.util.ArrayList;
34 import java.util.Collection;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Set;
38 
39 /**
40  * <p>
41  * Basic implementation achieving vCard parsing. Based on vCard 2.1.
42  * </p>
43  * @hide
44  */
45 /* package */ class VCardParserImpl_V21 {
46     private static final String LOG_TAG = VCardConstants.LOG_TAG;
47 
48     protected static final class CustomBufferedReader extends BufferedReader {
49         private long mTime;
50 
51         /**
52          * Needed since "next line" may be null due to end of line.
53          */
54         private boolean mNextLineIsValid;
55         private String mNextLine;
56 
CustomBufferedReader(Reader in)57         public CustomBufferedReader(Reader in) {
58             super(in);
59         }
60 
61         @Override
readLine()62         public String readLine() throws IOException {
63             if (mNextLineIsValid) {
64                 final String ret = mNextLine;
65                 mNextLine = null;
66                 mNextLineIsValid = false;
67                 return ret;
68             }
69 
70             final long start = System.currentTimeMillis();
71             final String line = super.readLine();
72             final long end = System.currentTimeMillis();
73             mTime += end - start;
74             return line;
75         }
76 
77         /**
78          * Read one line, but make this object store it in its queue.
79          */
peekLine()80         public String peekLine() throws IOException {
81             if (!mNextLineIsValid) {
82                 final long start = System.currentTimeMillis();
83                 final String line = super.readLine();
84                 final long end = System.currentTimeMillis();
85                 mTime += end - start;
86 
87                 mNextLine = line;
88                 mNextLineIsValid = true;
89             }
90 
91             return mNextLine;
92         }
93 
getTotalmillisecond()94         public long getTotalmillisecond() {
95             return mTime;
96         }
97     }
98 
99     private static final String DEFAULT_ENCODING = "8BIT";
100     private static final String DEFAULT_CHARSET = "UTF-8";
101 
102     protected final String mIntermediateCharset;
103 
104     private final List<VCardInterpreter> mInterpreterList = new ArrayList<VCardInterpreter>();
105     private boolean mCanceled;
106 
107     /**
108      * <p>
109      * The encoding type for deconding byte streams. This member variable is
110      * reset to a default encoding every time when a new item comes.
111      * </p>
112      * <p>
113      * "Encoding" in vCard is different from "Charset". It is mainly used for
114      * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
115      * "QUOTED-PRINTABLE" are known examples.
116      * </p>
117      */
118     protected String mCurrentEncoding;
119 
120     protected String mCurrentCharset;
121 
122     /**
123      * <p>
124      * The reader object to be used internally.
125      * </p>
126      * <p>
127      * Developers should not directly read a line from this object. Use
128      * getLine() unless there some reason.
129      * </p>
130      */
131     protected CustomBufferedReader mReader;
132 
133     /**
134      * <p>
135      * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
136      * specification, but happens to be seen in real world vCard.
137      * </p>
138      * <p>
139      * We just accept those invalid types after emitting a warning for each of it.
140      * </p>
141      */
142     protected final Set<String> mUnknownTypeSet = new HashSet<String>();
143 
144     /**
145      * <p>
146      * Set for storing unkonwn VALUE attributes, which is not acceptable in
147      * vCard specification, but happens to be seen in real world vCard.
148      * </p>
149      * <p>
150      * We just accept those invalid types after emitting a warning for each of it.
151      * </p>
152      */
153     protected final Set<String> mUnknownValueSet = new HashSet<String>();
154 
155 
VCardParserImpl_V21()156     public VCardParserImpl_V21() {
157         this(VCardConfig.VCARD_TYPE_DEFAULT);
158     }
159 
VCardParserImpl_V21(int vcardType)160     public VCardParserImpl_V21(int vcardType) {
161         mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
162     }
163 
164     /**
165      * @return true when a given property name is a valid property name.
166      */
isValidPropertyName(final String propertyName)167     protected boolean isValidPropertyName(final String propertyName) {
168         if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
169                 propertyName.startsWith("X-"))
170                 && !mUnknownTypeSet.contains(propertyName)) {
171             mUnknownTypeSet.add(propertyName);
172             Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
173         }
174         return true;
175     }
176 
177     /**
178      * @return String. It may be null, or its length may be 0
179      * @throws IOException
180      */
getLine()181     protected String getLine() throws IOException {
182         return mReader.readLine();
183     }
184 
peekLine()185     protected String peekLine() throws IOException {
186         return mReader.peekLine();
187     }
188 
189     /**
190      * @return String with it's length > 0
191      * @throws IOException
192      * @throws VCardException when the stream reached end of line
193      */
getNonEmptyLine()194     protected String getNonEmptyLine() throws IOException, VCardException {
195         String line;
196         while (true) {
197             line = getLine();
198             if (line == null) {
199                 throw new VCardException("Reached end of buffer.");
200             } else if (line.trim().length() > 0) {
201                 return line;
202             }
203         }
204     }
205 
206     /**
207      * <code>
208      * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
209      *         items *CRLF
210      *         "END" [ws] ":" [ws] "VCARD"
211      * </code>
212      * @return False when reaching end of file.
213      */
parseOneVCard()214     private boolean parseOneVCard() throws IOException, VCardException {
215         // reset for this entire vCard.
216         mCurrentEncoding = DEFAULT_ENCODING;
217         mCurrentCharset = DEFAULT_CHARSET;
218 
219         // allow parsing of vcards that have mime data leading up to BEGIN:VCARD
220         boolean allowGarbage = true;
221         if (!readBeginVCard(allowGarbage)) {
222             return false;
223         }
224         for (VCardInterpreter interpreter : mInterpreterList) {
225             interpreter.onEntryStarted();
226         }
227         parseItems();
228         for (VCardInterpreter interpreter : mInterpreterList) {
229             interpreter.onEntryEnded();
230         }
231         return true;
232     }
233 
234     /**
235      * @return True when successful. False when reaching the end of line
236      * @throws IOException
237      * @throws VCardException
238      */
readBeginVCard(boolean allowGarbage)239     protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
240         // TODO: use consructPropertyLine().
241         String line;
242         do {
243             while (true) {
244                 line = getLine();
245                 if (line == null) {
246                     return false;
247                 } else if (line.trim().length() > 0) {
248                     break;
249                 }
250             }
251             final String[] strArray = line.split(":", 2);
252             final int length = strArray.length;
253 
254             // Although vCard 2.1/3.0 specification does not allow lower cases,
255             // we found vCard file emitted by some external vCard expoter have such
256             // invalid Strings.
257             // e.g. BEGIN:vCard
258             if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
259                     && strArray[1].trim().equalsIgnoreCase("VCARD")) {
260                 return true;
261             } else if (!allowGarbage) {
262                 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
263                         + "(Instead, \"" + line + "\" came)");
264             }
265         } while (allowGarbage);
266 
267         throw new VCardException("Reached where must not be reached.");
268     }
269 
270     /**
271      * Parses lines other than the first "BEGIN:VCARD". Takes care of "END:VCARD"n and
272      * "BEGIN:VCARD" in nested vCard.
273      */
274     /*
275      * items = *CRLF item / item
276      *
277      * Note: BEGIN/END aren't include in the original spec while this method handles them.
278      */
parseItems()279     protected void parseItems() throws IOException, VCardException {
280         boolean ended = false;
281 
282         try {
283             ended = parseItem();
284         } catch (VCardInvalidCommentLineException e) {
285             Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
286         }
287 
288         while (!ended) {
289             try {
290                 ended = parseItem();
291             } catch (VCardInvalidCommentLineException e) {
292                 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
293             }
294         }
295     }
296 
297     /*
298      * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
299      * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
300      * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
301      * "AGENT" [params] ":" vcard CRLF
302      */
parseItem()303     protected boolean parseItem() throws IOException, VCardException {
304         // Reset for an item.
305         mCurrentEncoding = DEFAULT_ENCODING;
306 
307         final String line = getNonEmptyLine();
308         final VCardProperty propertyData = constructPropertyData(line);
309 
310         final String propertyNameUpper = propertyData.getName().toUpperCase();
311         final String propertyRawValue = propertyData.getRawValue();
312 
313         if (propertyNameUpper.equals(VCardConstants.PROPERTY_BEGIN)) {
314             if (propertyRawValue.equalsIgnoreCase("VCARD")) {
315                 handleNest();
316             } else {
317                 throw new VCardException("Unknown BEGIN type: " + propertyRawValue);
318             }
319         } else if (propertyNameUpper.equals(VCardConstants.PROPERTY_END)) {
320             if (propertyRawValue.equalsIgnoreCase("VCARD")) {
321                 return true;  // Ended.
322             } else {
323                 throw new VCardException("Unknown END type: " + propertyRawValue);
324             }
325         } else {
326             parseItemInter(propertyData, propertyNameUpper);
327         }
328         return false;
329     }
330 
parseItemInter(VCardProperty property, String propertyNameUpper)331     private void parseItemInter(VCardProperty property, String propertyNameUpper)
332             throws IOException, VCardException {
333         String propertyRawValue = property.getRawValue();
334         if (propertyNameUpper.equals(VCardConstants.PROPERTY_AGENT)) {
335             handleAgent(property);
336         } else if (isValidPropertyName(propertyNameUpper)) {
337             if (propertyNameUpper.equals(VCardConstants.PROPERTY_VERSION) &&
338                     !propertyRawValue.equals(getVersionString())) {
339                 throw new VCardVersionException(
340                         "Incompatible version: " + propertyRawValue + " != " + getVersionString());
341             }
342             handlePropertyValue(property, propertyNameUpper);
343         } else {
344             throw new VCardException("Unknown property name: \"" + propertyNameUpper + "\"");
345         }
346     }
347 
handleNest()348     private void handleNest() throws IOException, VCardException {
349         for (VCardInterpreter interpreter : mInterpreterList) {
350             interpreter.onEntryStarted();
351         }
352         parseItems();
353         for (VCardInterpreter interpreter : mInterpreterList) {
354             interpreter.onEntryEnded();
355         }
356     }
357 
358     // For performance reason, the states for group and property name are merged into one.
359     static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
360     static private final int STATE_PARAMS = 1;
361     // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
362     static private final int STATE_PARAMS_IN_DQUOTE = 2;
363 
constructPropertyData(String line)364     protected VCardProperty constructPropertyData(String line) throws VCardException {
365         final VCardProperty propertyData = new VCardProperty();
366 
367         final int length = line.length();
368         if (length > 0 && line.charAt(0) == '#') {
369             throw new VCardInvalidCommentLineException();
370         }
371 
372         int state = STATE_GROUP_OR_PROPERTY_NAME;
373         int nameIndex = 0;
374 
375         // This loop is developed so that we don't have to take care of bottle neck here.
376         // Refactor carefully when you need to do so.
377         for (int i = 0; i < length; i++) {
378             final char ch = line.charAt(i);
379             switch (state) {
380                 case STATE_GROUP_OR_PROPERTY_NAME: {
381                     if (ch == ':') {  // End of a property name.
382                         final String propertyName = line.substring(nameIndex, i);
383                         propertyData.setName(propertyName);
384                         propertyData.setRawValue( i < length - 1 ? line.substring(i + 1) : "");
385                         return propertyData;
386                     } else if (ch == '.') {  // Each group is followed by the dot.
387                         final String groupName = line.substring(nameIndex, i);
388                         if (groupName.length() == 0) {
389                             Log.w(LOG_TAG, "Empty group found. Ignoring.");
390                         } else {
391                             propertyData.addGroup(groupName);
392                         }
393                         nameIndex = i + 1;  // Next should be another group or a property name.
394                     } else if (ch == ';') {  // End of property name and beginneng of parameters.
395                         final String propertyName = line.substring(nameIndex, i);
396                         propertyData.setName(propertyName);
397                         nameIndex = i + 1;
398                         state = STATE_PARAMS;  // Start parameter parsing.
399                     }
400                     // TODO: comma support (in vCard 3.0 and 4.0).
401                     break;
402                 }
403                 case STATE_PARAMS: {
404                     if (ch == '"') {
405                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
406                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
407                                     "Silently allow it");
408                         }
409                         state = STATE_PARAMS_IN_DQUOTE;
410                     } else if (ch == ';') {  // Starts another param.
411                         handleParams(propertyData, line.substring(nameIndex, i));
412                         nameIndex = i + 1;
413                     } else if (ch == ':') {  // End of param and beginenning of values.
414                         handleParams(propertyData, line.substring(nameIndex, i));
415                         propertyData.setRawValue(i < length - 1 ? line.substring(i + 1) : "");
416                         return propertyData;
417                     }
418                     break;
419                 }
420                 case STATE_PARAMS_IN_DQUOTE: {
421                     if (ch == '"') {
422                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
423                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
424                                     "Silently allow it");
425                         }
426                         state = STATE_PARAMS;
427                     }
428                     break;
429                 }
430             }
431         }
432 
433         throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
434     }
435 
436     /*
437      * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
438      * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
439      * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
440      * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
441      * [ws] word / knowntype
442      */
443     protected void handleParams(VCardProperty propertyData, String params)
444             throws VCardException {
445         final String[] strArray = params.split("=", 2);
446         if (strArray.length == 2) {
447             final String paramName = strArray[0].trim().toUpperCase();
448             String paramValue = strArray[1].trim();
449             if (paramName.equals("TYPE")) {
450                 handleType(propertyData, paramValue);
451             } else if (paramName.equals("VALUE")) {
452                 handleValue(propertyData, paramValue);
453             } else if (paramName.equals("ENCODING")) {
454                 handleEncoding(propertyData, paramValue.toUpperCase());
455             } else if (paramName.equals("CHARSET")) {
456                 handleCharset(propertyData, paramValue);
457             } else if (paramName.equals("LANGUAGE")) {
458                 handleLanguage(propertyData, paramValue);
459             } else if (paramName.startsWith("X-")) {
460                 handleAnyParam(propertyData, paramName, paramValue);
461             } else {
462                 throw new VCardException("Unknown type \"" + paramName + "\"");
463             }
464         } else {
465             handleParamWithoutName(propertyData, strArray[0]);
466         }
467     }
468 
469     /**
470      * vCard 3.0 parser implementation may throw VCardException.
471      */
472     protected void handleParamWithoutName(VCardProperty propertyData, final String paramValue) {
473         handleType(propertyData, paramValue);
474     }
475 
476     /*
477      * ptypeval = knowntype / "X-" word
478      */
479     protected void handleType(VCardProperty propertyData, final String ptypeval) {
480         if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
481                 || ptypeval.startsWith("X-"))
482                 && !mUnknownTypeSet.contains(ptypeval)) {
483             mUnknownTypeSet.add(ptypeval);
484             Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
485         }
486         propertyData.addParameter(VCardConstants.PARAM_TYPE, ptypeval);
487     }
488 
489     /*
490      * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
491      */
492     protected void handleValue(VCardProperty propertyData, final String pvalueval) {
493         if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
494                 || pvalueval.startsWith("X-")
495                 || mUnknownValueSet.contains(pvalueval))) {
496             mUnknownValueSet.add(pvalueval);
497             Log.w(LOG_TAG, String.format(
498                     "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
499         }
500         propertyData.addParameter(VCardConstants.PARAM_VALUE, pvalueval);
501     }
502 
503     /*
504      * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
505      */
506     protected void handleEncoding(VCardProperty propertyData, String pencodingval)
507             throws VCardException {
508         if (getAvailableEncodingSet().contains(pencodingval) ||
509                 pencodingval.startsWith("X-")) {
510             propertyData.addParameter(VCardConstants.PARAM_ENCODING, pencodingval);
511             // Update encoding right away, as this is needed to understanding other params.
512             mCurrentEncoding = pencodingval.toUpperCase();
513         } else {
514             throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
515         }
516     }
517 
518     /**
519      * <p>
520      * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
521      * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
522      * We allow any charset.
523      * </p>
524      */
525     protected void handleCharset(VCardProperty propertyData, String charsetval) {
526         mCurrentCharset = charsetval;
527         propertyData.addParameter(VCardConstants.PARAM_CHARSET, charsetval);
528     }
529 
530     /**
531      * See also Section 7.1 of RFC 1521
532      */
533     protected void handleLanguage(VCardProperty propertyData, String langval)
534             throws VCardException {
535         String[] strArray = langval.split("-");
536         if (strArray.length != 2) {
537             throw new VCardException("Invalid Language: \"" + langval + "\"");
538         }
539         String tmp = strArray[0];
540         int length = tmp.length();
541         for (int i = 0; i < length; i++) {
542             if (!isAsciiLetter(tmp.charAt(i))) {
543                 throw new VCardException("Invalid Language: \"" + langval + "\"");
544             }
545         }
546         tmp = strArray[1];
547         length = tmp.length();
548         for (int i = 0; i < length; i++) {
549             if (!isAsciiLetter(tmp.charAt(i))) {
550                 throw new VCardException("Invalid Language: \"" + langval + "\"");
551             }
552         }
553         propertyData.addParameter(VCardConstants.PARAM_LANGUAGE, langval);
554     }
555 
556     private boolean isAsciiLetter(char ch) {
557         if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
558             return true;
559         }
560         return false;
561     }
562 
563     /**
564      * Mainly for "X-" type. This accepts any kind of type without check.
565      */
566     protected void handleAnyParam(
567             VCardProperty propertyData, String paramName, String paramValue) {
568         propertyData.addParameter(paramName, paramValue);
569     }
570 
571     protected void handlePropertyValue(VCardProperty property, String propertyName)
572             throws IOException, VCardException {
573         final String propertyNameUpper = property.getName().toUpperCase();
574         String propertyRawValue = property.getRawValue();
575         final String sourceCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
576         final Collection<String> charsetCollection =
577                 property.getParameters(VCardConstants.PARAM_CHARSET);
578         String targetCharset =
579                 ((charsetCollection != null) ? charsetCollection.iterator().next() : null);
580         if (TextUtils.isEmpty(targetCharset)) {
581             targetCharset = VCardConfig.DEFAULT_IMPORT_CHARSET;
582         }
583 
584         // TODO: have "separableProperty" which reflects vCard spec..
585         if (propertyNameUpper.equals(VCardConstants.PROPERTY_ADR)
586                 || propertyNameUpper.equals(VCardConstants.PROPERTY_ORG)
587                 || propertyNameUpper.equals(VCardConstants.PROPERTY_N)) {
588             handleAdrOrgN(property, propertyRawValue, sourceCharset, targetCharset);
589             return;
590         }
591 
592         if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP) ||
593                 // If encoding attribute is missing, then attempt to detect QP encoding.
594                 // This is to handle a bug where the android exporter was creating FN properties
595                 // with missing encoding.  b/7292017
596                 (propertyNameUpper.equals(VCardConstants.PROPERTY_FN) &&
597                         property.getParameters(VCardConstants.PARAM_ENCODING) == null &&
598                         VCardUtils.appearsLikeAndroidVCardQuotedPrintable(propertyRawValue))
599                 ) {
600             final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
601             final String propertyEncodedValue =
602                     VCardUtils.parseQuotedPrintable(quotedPrintablePart,
603                             false, sourceCharset, targetCharset);
604             property.setRawValue(quotedPrintablePart);
605             property.setValues(propertyEncodedValue);
606             for (VCardInterpreter interpreter : mInterpreterList) {
607                 interpreter.onPropertyCreated(property);
608             }
609         } else if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
610                 || mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
611             // It is very rare, but some BASE64 data may be so big that
612             // OutOfMemoryError occurs. To ignore such cases, use try-catch.
613             try {
614                 final String base64Property = getBase64(propertyRawValue);
615                 try {
616                     property.setByteValue(Base64.decode(base64Property, Base64.DEFAULT));
617                 } catch (IllegalArgumentException e) {
618                     throw new VCardException("Decode error on base64 photo: " + propertyRawValue);
619                 }
620                 for (VCardInterpreter interpreter : mInterpreterList) {
621                     interpreter.onPropertyCreated(property);
622                 }
623             } catch (OutOfMemoryError error) {
624                 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
625                 for (VCardInterpreter interpreter : mInterpreterList) {
626                     interpreter.onPropertyCreated(property);
627                 }
628             }
629         } else {
630             if (!(mCurrentEncoding.equals("7BIT") || mCurrentEncoding.equals("8BIT") ||
631                     mCurrentEncoding.startsWith("X-"))) {
632                 Log.w(LOG_TAG,
633                         String.format("The encoding \"%s\" is unsupported by vCard %s",
634                                 mCurrentEncoding, getVersionString()));
635             }
636 
637             // Some device uses line folding defined in RFC 2425, which is not allowed
638             // in vCard 2.1 (while needed in vCard 3.0).
639             //
640             // e.g.
641             // BEGIN:VCARD
642             // VERSION:2.1
643             // N:;Omega;;;
644             // EMAIL;INTERNET:"Omega"
645             //   <omega@example.com>
646             // FN:Omega
647             // END:VCARD
648             //
649             // The vCard above assumes that email address should become:
650             // "Omega" <omega@example.com>
651             //
652             // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
653             //
654             // For more information about line folding,
655             // see "5.8.1. Line delimiting and folding" in RFC 2425.
656             //
657             // We take care of this case more formally in vCard 3.0, so we only need to
658             // do this in vCard 2.1.
659             if (getVersion() == VCardConfig.VERSION_21) {
660                 StringBuilder builder = null;
661                 while (true) {
662                     final String nextLine = peekLine();
663                     // We don't need to care too much about this exceptional case,
664                     // but we should not wrongly eat up "END:VCARD", since it critically
665                     // breaks this parser's state machine.
666                     // Thus we roughly look over the next line and confirm it is at least not
667                     // "END:VCARD". This extra fee is worth paying. This is exceptional
668                     // anyway.
669                     if (!TextUtils.isEmpty(nextLine) &&
670                             nextLine.charAt(0) == ' ' &&
671                             !"END:VCARD".contains(nextLine.toUpperCase())) {
672                         getLine();  // Drop the next line.
673 
674                         if (builder == null) {
675                             builder = new StringBuilder();
676                             builder.append(propertyRawValue);
677                         }
678                         builder.append(nextLine.substring(1));
679                     } else {
680                         break;
681                     }
682                 }
683                 if (builder != null) {
684                     propertyRawValue = builder.toString();
685                 }
686             }
687 
688             ArrayList<String> propertyValueList = new ArrayList<String>();
689             String value = maybeUnescapeText(VCardUtils.convertStringCharset(
690                     propertyRawValue, sourceCharset, targetCharset));
691             propertyValueList.add(value);
692             property.setValues(propertyValueList);
693             for (VCardInterpreter interpreter : mInterpreterList) {
694                 interpreter.onPropertyCreated(property);
695             }
696         }
697     }
698 
699     private void handleAdrOrgN(VCardProperty property, String propertyRawValue,
700             String sourceCharset, String targetCharset) throws VCardException, IOException {
701         List<String> encodedValueList = new ArrayList<String>();
702 
703         // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices emit
704         // such data.
705         if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
706             // First we retrieve Quoted-Printable String from vCard entry, which may include
707             // multiple lines.
708             final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
709 
710             // "Raw value" from the view of users should contain all part of QP string.
711             // TODO: add test for this handling
712             property.setRawValue(quotedPrintablePart);
713 
714             // We split Quoted-Printable String using semi-colon before decoding it, as
715             // the Quoted-Printable may have semi-colon, which confuses splitter.
716             final List<String> quotedPrintableValueList =
717                     VCardUtils.constructListFromValue(quotedPrintablePart, getVersion());
718             for (String quotedPrintableValue : quotedPrintableValueList) {
719                 String encoded = VCardUtils.parseQuotedPrintable(quotedPrintableValue,
720                         false, sourceCharset, targetCharset);
721                 encodedValueList.add(encoded);
722             }
723         } else {
724             final String propertyValue = VCardUtils.convertStringCharset(
725                     getPotentialMultiline(propertyRawValue), sourceCharset, targetCharset);
726             final List<String> valueList =
727                     VCardUtils.constructListFromValue(propertyValue, getVersion());
728             for (String value : valueList) {
729                 encodedValueList.add(value);
730             }
731         }
732 
733         property.setValues(encodedValueList);
734         for (VCardInterpreter interpreter : mInterpreterList) {
735             interpreter.onPropertyCreated(property);
736         }
737     }
738 
739     /**
740      * <p>
741      * Parses and returns Quoted-Printable.
742      * </p>
743      *
744      * @param firstString The string following a parameter name and attributes.
745      *            Example: "string" in
746      *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
747      * @return whole Quoted-Printable string, including a given argument and
748      *         following lines. Excludes the last empty line following to Quoted
749      *         Printable lines.
750      * @throws IOException
751      * @throws VCardException
752      */
753     private String getQuotedPrintablePart(String firstString)
754             throws IOException, VCardException {
755         // Specifically, there may be some padding between = and CRLF.
756         // See the following:
757         //
758         // qp-line := *(qp-segment transport-padding CRLF)
759         // qp-part transport-padding
760         // qp-segment := qp-section *(SPACE / TAB) "="
761         // ; Maximum length of 76 characters
762         //
763         // e.g. (from RFC 2045)
764         // Now's the time =
765         // for all folk to come=
766         // to the aid of their country.
767         if (firstString.trim().endsWith("=")) {
768             // remove "transport-padding"
769             int pos = firstString.length() - 1;
770             while (firstString.charAt(pos) != '=') {
771             }
772             StringBuilder builder = new StringBuilder();
773             builder.append(firstString.substring(0, pos + 1));
774             builder.append("\r\n");
775             String line;
776             while (true) {
777                 line = getLine();
778                 if (line == null) {
779                     throw new VCardException("File ended during parsing a Quoted-Printable String");
780                 }
781                 if (line.trim().endsWith("=")) {
782                     // remove "transport-padding"
783                     pos = line.length() - 1;
784                     while (line.charAt(pos) != '=') {
785                     }
786                     builder.append(line.substring(0, pos + 1));
787                     builder.append("\r\n");
788                 } else {
789                     builder.append(line);
790                     break;
791                 }
792             }
793             return builder.toString();
794         } else {
795             return firstString;
796         }
797     }
798 
799     /**
800      * Given the first line of a property, checks consecutive lines after it and builds a new
801      * multi-line value if it exists.
802      *
803      * @param firstString The first line of the property.
804      * @return A new property, potentially built from multiple lines.
805      * @throws IOException
806      */
807     private String getPotentialMultiline(String firstString) throws IOException {
808         final StringBuilder builder = new StringBuilder();
809         builder.append(firstString);
810 
811         while (true) {
812             final String line = peekLine();
813             if (line == null || line.length() == 0) {
814                 break;
815             }
816 
817             final String propertyName = getPropertyNameUpperCase(line);
818             if (propertyName != null) {
819                 break;
820             }
821 
822             // vCard 2.1 does not allow multi-line of adr but microsoft vcards may have it.
823             // We will consider the next line to be a part of a multi-line value if it does not
824             // contain a property name (i.e. a colon or semi-colon).
825             // Consume the line.
826             getLine();
827             builder.append(" ").append(line);
828         }
829 
830         return builder.toString();
831     }
832 
833     protected String getBase64(String firstString) throws IOException, VCardException {
834         final StringBuilder builder = new StringBuilder();
835         builder.append(firstString);
836 
837         while (true) {
838             final String line = peekLine();
839             if (line == null) {
840                 throw new VCardException("File ended during parsing BASE64 binary");
841             }
842 
843             // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't
844             // have them. We try to detect those cases using colon and semi-colon, given BASE64
845             // does not contain it.
846             // E.g.
847             //      TEL;TYPE=WORK:+5555555
848             // or
849             //      END:VCARD
850             String propertyName = getPropertyNameUpperCase(line);
851             if (getKnownPropertyNameSet().contains(propertyName) ||
852                     VCardConstants.PROPERTY_X_ANDROID_CUSTOM.equals(propertyName)) {
853                 Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " +
854                         "which must not contain semi-colon or colon. Treat the line as next "
855                         + "property.");
856                 Log.w(LOG_TAG, "Problematic line: " + line.trim());
857                 break;
858             }
859 
860             // Consume the line.
861             getLine();
862 
863             if (line.length() == 0) {
864                 break;
865             }
866             // Trim off any extraneous whitespace to handle 2.1 implementations
867             // that use 3.0 style line continuations. This is safe because space
868             // isn't a Base64 encoding value.
869             builder.append(line.trim());
870         }
871 
872         return builder.toString();
873     }
874 
875     /**
876      * Extracts the property name portion of a given vCard line.
877      * <p>
878      * Properties must contain a colon.
879      * <p>
880      * E.g.
881      *      TEL;TYPE=WORK:+5555555  // returns "TEL"
882      *      END:VCARD // returns "END"
883      *      TEL; // returns null
884      *
885      * @param line The vCard line.
886      * @return The property name portion. {@literal null} if no property name found.
887      */
888     private String getPropertyNameUpperCase(String line) {
889         final int colonIndex = line.indexOf(":");
890         if (colonIndex > -1) {
891             final int semiColonIndex = line.indexOf(";");
892 
893             // Find the minimum index that is greater than -1.
894             final int minIndex;
895             if (colonIndex == -1) {
896                 minIndex = semiColonIndex;
897             } else if (semiColonIndex == -1) {
898                 minIndex = colonIndex;
899             } else {
900                 minIndex = Math.min(colonIndex, semiColonIndex);
901             }
902             return line.substring(0, minIndex).toUpperCase();
903         }
904         return null;
905     }
906 
907     /*
908      * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
909      * error toward the AGENT property.
910      * // TODO: Support AGENT property.
911      * item =
912      * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
913      * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
914      */
915     protected void handleAgent(final VCardProperty property) throws VCardException {
916         if (!property.getRawValue().toUpperCase().contains("BEGIN:VCARD")) {
917             // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
918             for (VCardInterpreter interpreter : mInterpreterList) {
919                 interpreter.onPropertyCreated(property);
920             }
921             return;
922         } else {
923             throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
924         }
925     }
926 
927     /**
928      * For vCard 3.0.
929      */
930     protected String maybeUnescapeText(final String text) {
931         return text;
932     }
933 
934     /**
935      * Returns unescaped String if the character should be unescaped. Return
936      * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
937      * while "\x" should not be.
938      */
939     protected String maybeUnescapeCharacter(final char ch) {
940         return unescapeCharacter(ch);
941     }
942 
943     /* package */ static String unescapeCharacter(final char ch) {
944         // Original vCard 2.1 specification does not allow transformation
945         // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
946         // implementation of
947         // this class allowed them, so keep it as is.
948         if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
949             return String.valueOf(ch);
950         } else {
951             return null;
952         }
953     }
954 
955     /**
956      * @return {@link VCardConfig#VERSION_21}
957      */
958     protected int getVersion() {
959         return VCardConfig.VERSION_21;
960     }
961 
962     /**
963      * @return {@link VCardConfig#VERSION_30}
964      */
965     protected String getVersionString() {
966         return VCardConstants.VERSION_V21;
967     }
968 
969     protected Set<String> getKnownPropertyNameSet() {
970         return VCardParser_V21.sKnownPropertyNameSet;
971     }
972 
973     protected Set<String> getKnownTypeSet() {
974         return VCardParser_V21.sKnownTypeSet;
975     }
976 
977     protected Set<String> getKnownValueSet() {
978         return VCardParser_V21.sKnownValueSet;
979     }
980 
981     protected Set<String> getAvailableEncodingSet() {
982         return VCardParser_V21.sAvailableEncoding;
983     }
984 
985     protected String getDefaultEncoding() {
986         return DEFAULT_ENCODING;
987     }
988 
989     protected String getDefaultCharset() {
990         return DEFAULT_CHARSET;
991     }
992 
993     protected String getCurrentCharset() {
994         return mCurrentCharset;
995     }
996 
997     public void addInterpreter(VCardInterpreter interpreter) {
998         mInterpreterList.add(interpreter);
999     }
1000 
1001     public void parse(InputStream is) throws IOException, VCardException {
1002         if (is == null) {
1003             throw new NullPointerException("InputStream must not be null.");
1004         }
1005 
1006         final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1007         mReader = new CustomBufferedReader(tmpReader);
1008 
1009         final long start = System.currentTimeMillis();
1010         for (VCardInterpreter interpreter : mInterpreterList) {
1011             interpreter.onVCardStarted();
1012         }
1013 
1014         // vcard_file = [wsls] vcard [wsls]
1015         while (true) {
1016             synchronized (this) {
1017                 if (mCanceled) {
1018                     Log.i(LOG_TAG, "Cancel request has come. exitting parse operation.");
1019                     break;
1020                 }
1021             }
1022             if (!parseOneVCard()) {
1023                 break;
1024             }
1025         }
1026 
1027         for (VCardInterpreter interpreter : mInterpreterList) {
1028             interpreter.onVCardEnded();
1029         }
1030     }
1031 
1032     public void parseOne(InputStream is) throws IOException, VCardException {
1033         if (is == null) {
1034             throw new NullPointerException("InputStream must not be null.");
1035         }
1036 
1037         final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1038         mReader = new CustomBufferedReader(tmpReader);
1039 
1040         final long start = System.currentTimeMillis();
1041         for (VCardInterpreter interpreter : mInterpreterList) {
1042             interpreter.onVCardStarted();
1043         }
1044         parseOneVCard();
1045         for (VCardInterpreter interpreter : mInterpreterList) {
1046             interpreter.onVCardEnded();
1047         }
1048     }
1049 
1050     public final synchronized void cancel() {
1051         Log.i(LOG_TAG, "ParserImpl received cancel operation.");
1052         mCanceled = true;
1053     }
1054 }
1055