| | 824 | static void |
|---|
| | 825 | print_hex (unsigned char *data, size_t len) |
|---|
| | 826 | { |
|---|
| | 827 | size_t i, x; |
|---|
| | 828 | unsigned char *p = data; |
|---|
| | 829 | char high, low; |
|---|
| | 830 | |
|---|
| | 831 | x = 0; |
|---|
| | 832 | printf ("%04u ", x); |
|---|
| | 833 | for (i = 0; i < len; i++) { |
|---|
| | 834 | high = *p >> 4; |
|---|
| | 835 | high = (high<10) ? high + '0' : high + 'a' - 10; |
|---|
| | 836 | |
|---|
| | 837 | low = *p & 0x0f; |
|---|
| | 838 | low = (low<10) ? low + '0' : low + 'a' - 10; |
|---|
| | 839 | |
|---|
| | 840 | printf ("0x%c%c ", high, low); |
|---|
| | 841 | |
|---|
| | 842 | p++; |
|---|
| | 843 | x++; |
|---|
| | 844 | if (i % 8 == 7) { |
|---|
| | 845 | printf ("\n%04u ", x); |
|---|
| | 846 | } |
|---|
| | 847 | } |
|---|
| | 848 | printf ("\n"); |
|---|
| | 849 | } |
|---|
| | 850 | |
|---|
| | 851 | static size_t |
|---|
| | 852 | conv_to_utf8 (const char *encname, char *in, size_t inlen, char *out, size_t outlen) |
|---|
| | 853 | { |
|---|
| | 854 | char *charset, *inbuf, *outbuf; |
|---|
| | 855 | iconv_t ic; |
|---|
| | 856 | size_t inbuf_len, outbuf_len, ret; |
|---|
| | 857 | |
|---|
| | 858 | charset = (char *) e_iconv_charset_name (encname); |
|---|
| | 859 | |
|---|
| | 860 | ic = e_iconv_open ("UTF-8", charset); |
|---|
| | 861 | if (ic == (iconv_t) -1) { |
|---|
| | 862 | printf ("e_iconv_open() error\n"); |
|---|
| | 863 | return (size_t)-1; |
|---|
| | 864 | } |
|---|
| | 865 | |
|---|
| | 866 | inbuf = in; |
|---|
| | 867 | inbuf_len = inlen; |
|---|
| | 868 | |
|---|
| | 869 | outbuf = out; |
|---|
| | 870 | outbuf_len = outlen; |
|---|
| | 871 | |
|---|
| | 872 | ret = e_iconv (ic, (const char **) &inbuf, &inbuf_len, &outbuf, &outbuf_len); |
|---|
| | 873 | if (ret == (size_t)-1) { |
|---|
| | 874 | printf ("e_iconv() error! source charset is %s, target charset is %s\n", charset, "UTF-8"); |
|---|
| | 875 | printf ("converted %u bytes, but last %u bytes can't convert!!\n", inlen - inbuf_len, inbuf_len); |
|---|
| | 876 | printf ("source data:\n"); |
|---|
| | 877 | print_hex (in, inlen); |
|---|
| | 878 | |
|---|
| | 879 | *outbuf = '\0'; |
|---|
| | 880 | printf ("target string is \"%s\"\n", out); |
|---|
| | 881 | |
|---|
| | 882 | return (size_t)-1; |
|---|
| | 883 | } |
|---|
| | 884 | |
|---|
| | 885 | ret = outlen - outbuf_len; |
|---|
| | 886 | out[ret] = '\0'; |
|---|
| | 887 | |
|---|
| | 888 | e_iconv_close (ic); |
|---|
| | 889 | |
|---|
| | 890 | return ret; |
|---|
| | 891 | } |
|---|
| | 892 | |
|---|
| 858 | | tmplen = inend-inptr-2; |
|---|
| 859 | | decword = g_alloca (tmplen); /* this will always be more-than-enough room */ |
|---|
| 860 | | switch(toupper(inptr[0])) { |
|---|
| | 934 | |
|---|
| | 935 | /* charset */ |
|---|
| | 936 | start = inptr; |
|---|
| | 937 | inptr = memchr (inptr, '?', inend-inptr); |
|---|
| | 938 | if (!inptr) { |
|---|
| | 939 | return NULL; |
|---|
| | 940 | } |
|---|
| | 941 | strncpy (curr_charset, start, inptr-start); /* maybe overflow */ |
|---|
| | 942 | curr_charset[inptr-start] = '\0'; |
|---|
| | 943 | if (prev_charset[0] == '\0') { /* first charset in multi encode words */ |
|---|
| | 944 | strcpy (prev_charset, curr_charset); |
|---|
| | 945 | } |
|---|
| | 946 | d(printf ("curr_charset = %s\n", curr_charset)); |
|---|
| | 947 | |
|---|
| | 948 | /* if (charset.perv != charset.curr) iconv perv to utf8 */ |
|---|
| | 949 | if (prev_charset[0] != '\0' && strcmp(prev_charset, curr_charset)) { |
|---|
| | 950 | inlen = decword_ptr - decword; |
|---|
| | 951 | ret = conv_to_utf8 (prev_charset, decword, inlen, utf8_decword_ptr, outlen); |
|---|
| | 952 | if (ret == (size_t)-1) { |
|---|
| | 953 | printf ("conv_to_utf8() error!\n"); |
|---|
| | 954 | return NULL; |
|---|
| | 955 | } |
|---|
| | 956 | |
|---|
| | 957 | utf8_decword_ptr += ret; |
|---|
| | 958 | outlen = outlen - ret; |
|---|
| | 959 | |
|---|
| | 960 | decword_ptr = decword; /* reset decword_ptr */ |
|---|
| | 961 | strcpy (prev_charset, curr_charset); |
|---|
| | 962 | } |
|---|
| | 963 | |
|---|
| | 964 | /* encode */ |
|---|
| | 965 | inptr++; |
|---|
| | 966 | encode = *inptr; |
|---|
| | 967 | inptr++; |
|---|
| | 968 | if (*inptr != '?') { |
|---|
| | 969 | return NULL; |
|---|
| | 970 | } |
|---|
| | 971 | |
|---|
| | 972 | /* text */ |
|---|
| | 973 | inptr++; |
|---|
| | 974 | start = inptr; |
|---|
| | 975 | inptr = memchr (inptr, '?', inend-inptr); |
|---|
| | 976 | if (!inptr || *(inptr+1) != '=') { |
|---|
| | 977 | return NULL; |
|---|
| | 978 | } |
|---|
| | 979 | |
|---|
| | 980 | /* decode */ |
|---|
| | 981 | switch(encode) { |
|---|
| | 982 | |
|---|
| 878 | | /* yuck, all this snot is to setup iconv! */ |
|---|
| 879 | | tmplen = inptr - in - 3; |
|---|
| 880 | | encname = g_alloca (tmplen + 1); |
|---|
| 881 | | memcpy (encname, in + 2, tmplen); |
|---|
| 882 | | encname[tmplen] = '\0'; |
|---|
| 883 | | |
|---|
| 884 | | /* rfc2231 updates rfc2047 encoded words... |
|---|
| 885 | | * The ABNF given in RFC 2047 for encoded-words is: |
|---|
| 886 | | * encoded-word := "=?" charset "?" encoding "?" encoded-text "?=" |
|---|
| 887 | | * This specification changes this ABNF to: |
|---|
| 888 | | * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?=" |
|---|
| 889 | | */ |
|---|
| 890 | | |
|---|
| 891 | | /* trim off the 'language' part if it's there... */ |
|---|
| 892 | | p = strchr (encname, '*'); |
|---|
| 893 | | if (p) |
|---|
| 894 | | *p = '\0'; |
|---|
| 895 | | |
|---|
| 896 | | charset = e_iconv_charset_name (encname); |
|---|
| 897 | | |
|---|
| 898 | | inbuf = decword; |
|---|
| 899 | | |
|---|
| 900 | | outlen = inlen * 6 + 16; |
|---|
| 901 | | outbase = g_alloca (outlen); |
|---|
| 902 | | outbuf = outbase; |
|---|
| 903 | | |
|---|
| 904 | | retry: |
|---|
| 905 | | ic = e_iconv_open ("UTF-8", charset); |
|---|
| 906 | | if (ic != (iconv_t) -1) { |
|---|
| 907 | | ret = e_iconv (ic, &inbuf, &inlen, &outbuf, &outlen); |
|---|
| 908 | | if (ret != (size_t) -1) { |
|---|
| 909 | | e_iconv (ic, NULL, 0, &outbuf, &outlen); |
|---|
| 910 | | *outbuf = 0; |
|---|
| 911 | | decoded = g_strdup (outbase); |
|---|
| 912 | | } else { |
|---|
| 913 | | perror ("iconv"); |
|---|
| 914 | | e_iconv (ic, NULL, 0, &outbuf, &outlen); |
|---|
| 915 | | *outbuf = 0; |
|---|
| 916 | | decoded = g_strdup (outbase); |
|---|
| 917 | | /* decoded = g_strdup (inbuf); */ |
|---|
| 918 | | } |
|---|
| 919 | | |
|---|
| 920 | | e_iconv_close (ic); |
|---|
| 921 | | } else { |
|---|
| 922 | | w(g_warning ("Cannot decode charset, header display may be corrupt: %s: %s", |
|---|
| 923 | | charset, strerror (errno))); |
|---|
| 924 | | |
|---|
| 925 | | if (!retried) { |
|---|
| 926 | | charset = e_iconv_locale_charset (); |
|---|
| 927 | | if (!charset) |
|---|
| 928 | | charset = "iso-8859-1"; |
|---|
| 929 | | |
|---|
| 930 | | retried = TRUE; |
|---|
| 931 | | goto retry; |
|---|
| 932 | | } |
|---|
| 933 | | |
|---|
| 934 | | /* we return the encoded word here because we've got to return valid utf8 */ |
|---|
| 935 | | decoded = g_strndup (in, inlen); |
|---|
| 936 | | } |
|---|
| 937 | | } |
|---|
| 938 | | } |
|---|
| 939 | | |
|---|
| 940 | | d(printf("decoded '%s'\n", decoded)); |
|---|
| 941 | | |
|---|
| 942 | | return decoded; |
|---|
| | 1003 | decword_ptr += inlen; |
|---|
| | 1004 | } else { |
|---|
| | 1005 | return NULL; |
|---|
| | 1006 | } |
|---|
| | 1007 | |
|---|
| | 1008 | inptr += 2; /* skip '?=' */ |
|---|
| | 1009 | } /* end of "while (inptr < inend)" */ |
|---|
| | 1010 | |
|---|
| | 1011 | /* at last, iconv to utf8 */ |
|---|
| | 1012 | inlen = decword_ptr - decword; |
|---|
| | 1013 | ret = conv_to_utf8 (curr_charset, decword, inlen, utf8_decword_ptr, outlen); |
|---|
| | 1014 | if (ret == (size_t)-1) { |
|---|
| | 1015 | printf ("conv_to_utf8() error!\n"); |
|---|
| | 1016 | return NULL; |
|---|
| | 1017 | } |
|---|
| | 1018 | |
|---|
| | 1019 | utf8_decword_ptr += ret; |
|---|
| | 1020 | *utf8_decword_ptr = '\0'; |
|---|
| | 1021 | |
|---|
| | 1022 | d(printf("decoded '%s'\n", utf8_decword)); |
|---|
| | 1023 | |
|---|
| | 1024 | return strdup (utf8_decword); |
|---|
| 1050 | | } else if (dword == NULL) { |
|---|
| 1051 | | append (out, start, inptr - start); |
|---|
| 1052 | | } else { |
|---|
| 1053 | | chunk = start; |
|---|
| 1054 | | } |
|---|
| 1055 | | |
|---|
| 1056 | | start = inptr; |
|---|
| 1057 | | while (inptr < inend && !camel_mime_is_type (*inptr, mask)) |
|---|
| 1058 | | inptr++; |
|---|
| 1059 | | |
|---|
| 1060 | | dword = rfc2047_decode_word(start, inptr-start); |
|---|
| 1061 | | if (dword) { |
|---|
| 1062 | | g_string_append(out, dword); |
|---|
| 1063 | | g_free(dword); |
|---|
| 1064 | | } else { |
|---|
| 1065 | | if (!chunk) |
|---|
| 1066 | | chunk = start; |
|---|
| 1067 | | |
|---|
| 1068 | | if ((default_charset == NULL || !append_8bit (out, chunk, inptr-chunk, default_charset)) |
|---|
| 1069 | | && (locale_charset == NULL || !append_8bit(out, chunk, inptr-chunk, locale_charset))) { |
|---|
| 1070 | | |
|---|
| 1071 | | |
|---|
| 1072 | | append_latin1(out, chunk, inptr-chunk); |
|---|
| 1073 | | } |
|---|
| 1074 | | } |
|---|
| 1075 | | |
|---|
| 1076 | | chunk = NULL; |
|---|
| | 1145 | |
|---|
| | 1146 | case BEGIN_SPACE: |
|---|
| | 1147 | if (isspace(*inptr)) { |
|---|
| | 1148 | /* do nothing */ |
|---|
| | 1149 | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| | 1150 | stats = ENCODED_WORD_CHARSET; |
|---|
| | 1151 | start = inptr; |
|---|
| | 1152 | inptr++; |
|---|
| | 1153 | } else if (*inptr == '\0') { |
|---|
| | 1154 | stats = END; |
|---|
| | 1155 | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| | 1156 | stats = NOENCODED_WORD; |
|---|
| | 1157 | start = inptr; |
|---|
| | 1158 | } |
|---|
| | 1159 | break; |
|---|
| | 1160 | |
|---|
| | 1161 | case NOENCODED_WORD: |
|---|
| | 1162 | if (isspace(*inptr)) { |
|---|
| | 1163 | /* do nothing */ |
|---|
| | 1164 | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| | 1165 | if ((default_charset == NULL || !append_8bit (out, start, inptr - start, default_charset)) |
|---|
| | 1166 | && (locale_charset == NULL || !append_8bit (out, start, inptr - start, locale_charset))) |
|---|
| | 1167 | append_latin1 (out, start, inptr - start); |
|---|
| | 1168 | |
|---|
| | 1169 | stats = ENCODED_WORD_CHARSET; |
|---|
| | 1170 | start = inptr; |
|---|
| | 1171 | inptr++; |
|---|
| | 1172 | } else if (*inptr == '\0') { |
|---|
| | 1173 | inptr--; |
|---|
| | 1174 | while (isspace(*inptr)) { |
|---|
| | 1175 | inptr--; |
|---|
| | 1176 | } |
|---|
| | 1177 | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| | 1178 | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| | 1179 | append_latin1 (out, start, inptr - start); |
|---|
| | 1180 | |
|---|
| | 1181 | stats = END; |
|---|
| | 1182 | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| | 1183 | /* do nothing */ |
|---|
| | 1184 | } |
|---|
| | 1185 | break; |
|---|
| | 1186 | |
|---|
| | 1187 | case ENCODED_WORD_CHARSET: |
|---|
| | 1188 | if (isspace (*inptr)) { |
|---|
| | 1189 | stats = NOENCODED_WORD; |
|---|
| | 1190 | } else if (*inptr == '?') { |
|---|
| | 1191 | inptr++; |
|---|
| | 1192 | if ((*inptr == 'Q' || *inptr == 'q' |
|---|
| | 1193 | || *inptr == 'B' || *inptr == 'b') |
|---|
| | 1194 | && *(inptr+1) == '?') { |
|---|
| | 1195 | inptr++; |
|---|
| | 1196 | stats = ENCODED_WORD_ENCODED_TEXT; |
|---|
| | 1197 | } else { |
|---|
| | 1198 | stats = NOENCODED_WORD; |
|---|
| | 1199 | } |
|---|
| | 1200 | } else if (*inptr == '\0') { |
|---|
| | 1201 | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| | 1202 | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| | 1203 | append_latin1 (out, start, inptr - start); |
|---|
| | 1204 | |
|---|
| | 1205 | stats = END; |
|---|
| | 1206 | } else if (isgraph(*inptr)) { |
|---|
| | 1207 | /* do nothing */ |
|---|
| | 1208 | } else { |
|---|
| | 1209 | /* impossible */ |
|---|
| | 1210 | } |
|---|
| | 1211 | break; |
|---|
| | 1212 | |
|---|
| | 1213 | case ENCODED_WORD_ENCODED_TEXT: |
|---|
| | 1214 | if (isspace (*inptr)) { |
|---|
| | 1215 | stats = NOENCODED_WORD; /* maybe do nothing */ |
|---|
| | 1216 | } else if (*inptr == '?' && *(inptr+1) == '=') { |
|---|
| | 1217 | /* we will decode it in stats ENCODED_WORD_END */ |
|---|
| | 1218 | stats = ENCODED_WORD_END; |
|---|
| | 1219 | inptr++; |
|---|
| | 1220 | } else if (*inptr == '\0') { |
|---|
| | 1221 | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| | 1222 | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| | 1223 | append_latin1 (out, start, inptr - start); |
|---|
| | 1224 | |
|---|
| | 1225 | stats = END; |
|---|
| | 1226 | } else if (isgraph(*inptr)) { |
|---|
| | 1227 | /* do nothing */ |
|---|
| | 1228 | } else { |
|---|
| | 1229 | /* impossible */ |
|---|
| | 1230 | } |
|---|
| | 1231 | break; |
|---|
| | 1232 | |
|---|
| | 1233 | case ENCODED_WORD_END: |
|---|
| | 1234 | if (isspace(*inptr)) { |
|---|
| | 1235 | /* fix some buggy mail clients */ |
|---|
| | 1236 | stats = ENCODED_WORD_END_SPACE; |
|---|
| | 1237 | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| | 1238 | stats = ENCODED_WORD_CHARSET; |
|---|
| | 1239 | inptr++; |
|---|
| | 1240 | } else { |
|---|
| | 1241 | dword = rfc2047_decode_word (start, inptr - start); |
|---|
| | 1242 | if (dword) { |
|---|
| | 1243 | g_string_append (out, dword); |
|---|
| | 1244 | g_free (dword); |
|---|
| | 1245 | } else { |
|---|
| | 1246 | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| | 1247 | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| | 1248 | append_latin1 (out, start, inptr - start); |
|---|
| | 1249 | } |
|---|
| | 1250 | |
|---|
| | 1251 | if (*inptr == '\0') { |
|---|
| | 1252 | stats = END; |
|---|
| | 1253 | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| | 1254 | start = inptr; |
|---|
| | 1255 | stats = NOENCODED_WORD; |
|---|
| | 1256 | } |
|---|
| | 1257 | } |
|---|
| | 1258 | break; |
|---|
| | 1259 | |
|---|
| | 1260 | case ENCODED_WORD_END_SPACE: |
|---|
| | 1261 | if (isspace(*inptr)) { |
|---|
| | 1262 | /* do nothing */ |
|---|
| | 1263 | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| | 1264 | /* yes, combine two encoded words */ |
|---|
| | 1265 | stats = ENCODED_WORD_CHARSET; |
|---|
| | 1266 | inptr++; |
|---|
| | 1267 | } else { |
|---|
| | 1268 | if (*inptr == '\0') { |
|---|
| | 1269 | stats = END; |
|---|
| | 1270 | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| | 1271 | stats = NOENCODED_WORD; |
|---|
| | 1272 | } |
|---|
| | 1273 | |
|---|
| | 1274 | inptr--; |
|---|
| | 1275 | while (isspace(*inptr)) { |
|---|
| | 1276 | inptr--; |
|---|
| | 1277 | } |
|---|
| | 1278 | inptr++; |
|---|
| | 1279 | |
|---|
| | 1280 | dword = rfc2047_decode_word (start, inptr - start); |
|---|
| | 1281 | if (dword) { |
|---|
| | 1282 | g_string_append (out, dword); |
|---|
| | 1283 | g_free (dword); |
|---|
| | 1284 | } else { |
|---|
| | 1285 | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| | 1286 | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| | 1287 | append_latin1 (out, start, inptr - start); |
|---|
| | 1288 | } |
|---|
| | 1289 | |
|---|
| | 1290 | if (stats == NOENCODED_WORD) { |
|---|
| | 1291 | start = inptr; |
|---|
| | 1292 | } |
|---|
| | 1293 | } |
|---|
| | 1294 | break; |
|---|
| | 1295 | |
|---|
| | 1296 | default: |
|---|
| | 1297 | /* impossible */ |
|---|
| | 1298 | break; |
|---|
| | 1299 | } |
|---|
| | 1300 | |
|---|
| | 1301 | inptr++; |
|---|