| 824 | | static void |
|---|
| 825 | | print_hex (unsigned char *data, size_t len) |
|---|
| 826 | | { |
|---|
| 827 | | size_t i, x; |
|---|
| 828 | | unsigned char *p = data; |
|---|
| 829 | | char high, low; |
|---|
| 830 | | |
|---|
| 831 | | x = 0; |
|---|
| 832 | | printf ("%04u ", x); |
|---|
| 833 | | for (i = 0; i < len; i++) { |
|---|
| 834 | | high = *p >> 4; |
|---|
| 835 | | high = (high<10) ? high + '0' : high + 'a' - 10; |
|---|
| 836 | | |
|---|
| 837 | | low = *p & 0x0f; |
|---|
| 838 | | low = (low<10) ? low + '0' : low + 'a' - 10; |
|---|
| 839 | | |
|---|
| 840 | | printf ("0x%c%c ", high, low); |
|---|
| 841 | | |
|---|
| 842 | | p++; |
|---|
| 843 | | x++; |
|---|
| 844 | | if (i % 8 == 7) { |
|---|
| 845 | | printf ("\n%04u ", x); |
|---|
| 846 | | } |
|---|
| 847 | | } |
|---|
| 848 | | printf ("\n"); |
|---|
| 849 | | } |
|---|
| 850 | | |
|---|
| 851 | | static size_t |
|---|
| 852 | | conv_to_utf8 (const char *encname, char *in, size_t inlen, char *out, size_t outlen) |
|---|
| 853 | | { |
|---|
| 854 | | char *charset, *inbuf, *outbuf; |
|---|
| 855 | | iconv_t ic; |
|---|
| 856 | | size_t inbuf_len, outbuf_len, ret; |
|---|
| 857 | | |
|---|
| 858 | | charset = (char *) e_iconv_charset_name (encname); |
|---|
| 859 | | |
|---|
| 860 | | ic = e_iconv_open ("UTF-8", charset); |
|---|
| 861 | | if (ic == (iconv_t) -1) { |
|---|
| 862 | | printf ("e_iconv_open() error\n"); |
|---|
| 863 | | return (size_t)-1; |
|---|
| 864 | | } |
|---|
| 865 | | |
|---|
| 866 | | inbuf = in; |
|---|
| 867 | | inbuf_len = inlen; |
|---|
| 868 | | |
|---|
| 869 | | outbuf = out; |
|---|
| 870 | | outbuf_len = outlen; |
|---|
| 871 | | |
|---|
| 872 | | ret = e_iconv (ic, (const char **) &inbuf, &inbuf_len, &outbuf, &outbuf_len); |
|---|
| 873 | | if (ret == (size_t)-1) { |
|---|
| 874 | | printf ("e_iconv() error! source charset is %s, target charset is %s\n", charset, "UTF-8"); |
|---|
| 875 | | printf ("converted %u bytes, but last %u bytes can't convert!!\n", inlen - inbuf_len, inbuf_len); |
|---|
| 876 | | printf ("source data:\n"); |
|---|
| 877 | | print_hex (in, inlen); |
|---|
| 878 | | |
|---|
| 879 | | *outbuf = '\0'; |
|---|
| 880 | | printf ("target string is \"%s\"\n", out); |
|---|
| 881 | | |
|---|
| 882 | | return (size_t)-1; |
|---|
| 883 | | } |
|---|
| 884 | | |
|---|
| 885 | | ret = outlen - outbuf_len; |
|---|
| 886 | | out[ret] = '\0'; |
|---|
| 887 | | |
|---|
| 888 | | e_iconv_close (ic); |
|---|
| 889 | | |
|---|
| 890 | | return ret; |
|---|
| 891 | | } |
|---|
| 892 | | |
|---|
| 934 | | |
|---|
| 935 | | /* charset */ |
|---|
| 936 | | start = inptr; |
|---|
| 937 | | inptr = memchr (inptr, '?', inend-inptr); |
|---|
| 938 | | if (!inptr) { |
|---|
| 939 | | return NULL; |
|---|
| 940 | | } |
|---|
| 941 | | strncpy (curr_charset, start, inptr-start); /* maybe overflow */ |
|---|
| 942 | | curr_charset[inptr-start] = '\0'; |
|---|
| 943 | | if (prev_charset[0] == '\0') { /* first charset in multi encode words */ |
|---|
| 944 | | strcpy (prev_charset, curr_charset); |
|---|
| 945 | | } |
|---|
| 946 | | d(printf ("curr_charset = %s\n", curr_charset)); |
|---|
| 947 | | |
|---|
| 948 | | /* if (charset.perv != charset.curr) iconv perv to utf8 */ |
|---|
| 949 | | if (prev_charset[0] != '\0' && strcmp(prev_charset, curr_charset)) { |
|---|
| 950 | | inlen = decword_ptr - decword; |
|---|
| 951 | | ret = conv_to_utf8 (prev_charset, decword, inlen, utf8_decword_ptr, outlen); |
|---|
| 952 | | if (ret == (size_t)-1) { |
|---|
| 953 | | printf ("conv_to_utf8() error!\n"); |
|---|
| 954 | | return NULL; |
|---|
| 955 | | } |
|---|
| 956 | | |
|---|
| 957 | | utf8_decword_ptr += ret; |
|---|
| 958 | | outlen = outlen - ret; |
|---|
| 959 | | |
|---|
| 960 | | decword_ptr = decword; /* reset decword_ptr */ |
|---|
| 961 | | strcpy (prev_charset, curr_charset); |
|---|
| 962 | | } |
|---|
| 963 | | |
|---|
| 964 | | /* encode */ |
|---|
| 965 | | inptr++; |
|---|
| 966 | | encode = *inptr; |
|---|
| 967 | | inptr++; |
|---|
| 968 | | if (*inptr != '?') { |
|---|
| 969 | | return NULL; |
|---|
| 970 | | } |
|---|
| 971 | | |
|---|
| 972 | | /* text */ |
|---|
| 973 | | inptr++; |
|---|
| 974 | | start = inptr; |
|---|
| 975 | | inptr = memchr (inptr, '?', inend-inptr); |
|---|
| 976 | | if (!inptr || *(inptr+1) != '=') { |
|---|
| 977 | | return NULL; |
|---|
| 978 | | } |
|---|
| 979 | | |
|---|
| 980 | | /* decode */ |
|---|
| 981 | | switch(encode) { |
|---|
| 982 | | |
|---|
| | 858 | tmplen = inend-inptr-2; |
|---|
| | 859 | decword = g_alloca (tmplen); /* this will always be more-than-enough room */ |
|---|
| | 860 | switch(toupper(inptr[0])) { |
|---|
| 1003 | | decword_ptr += inlen; |
|---|
| 1004 | | } else { |
|---|
| 1005 | | return NULL; |
|---|
| 1006 | | } |
|---|
| 1007 | | |
|---|
| 1008 | | inptr += 2; /* skip '?=' */ |
|---|
| 1009 | | } /* end of "while (inptr < inend)" */ |
|---|
| 1010 | | |
|---|
| 1011 | | /* at last, iconv to utf8 */ |
|---|
| 1012 | | inlen = decword_ptr - decword; |
|---|
| 1013 | | ret = conv_to_utf8 (curr_charset, decword, inlen, utf8_decword_ptr, outlen); |
|---|
| 1014 | | if (ret == (size_t)-1) { |
|---|
| 1015 | | printf ("conv_to_utf8() error!\n"); |
|---|
| 1016 | | return NULL; |
|---|
| 1017 | | } |
|---|
| 1018 | | |
|---|
| 1019 | | utf8_decword_ptr += ret; |
|---|
| 1020 | | *utf8_decword_ptr = '\0'; |
|---|
| 1021 | | |
|---|
| 1022 | | d(printf("decoded '%s'\n", utf8_decword)); |
|---|
| 1023 | | |
|---|
| 1024 | | return strdup (utf8_decword); |
|---|
| | 878 | /* yuck, all this snot is to setup iconv! */ |
|---|
| | 879 | tmplen = inptr - in - 3; |
|---|
| | 880 | encname = g_alloca (tmplen + 1); |
|---|
| | 881 | memcpy (encname, in + 2, tmplen); |
|---|
| | 882 | encname[tmplen] = '\0'; |
|---|
| | 883 | |
|---|
| | 884 | /* rfc2231 updates rfc2047 encoded words... |
|---|
| | 885 | * The ABNF given in RFC 2047 for encoded-words is: |
|---|
| | 886 | * encoded-word := "=?" charset "?" encoding "?" encoded-text "?=" |
|---|
| | 887 | * This specification changes this ABNF to: |
|---|
| | 888 | * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?=" |
|---|
| | 889 | */ |
|---|
| | 890 | |
|---|
| | 891 | /* trim off the 'language' part if it's there... */ |
|---|
| | 892 | p = strchr (encname, '*'); |
|---|
| | 893 | if (p) |
|---|
| | 894 | *p = '\0'; |
|---|
| | 895 | |
|---|
| | 896 | charset = e_iconv_charset_name (encname); |
|---|
| | 897 | |
|---|
| | 898 | inbuf = decword; |
|---|
| | 899 | |
|---|
| | 900 | outlen = inlen * 6 + 16; |
|---|
| | 901 | outbase = g_alloca (outlen); |
|---|
| | 902 | outbuf = outbase; |
|---|
| | 903 | |
|---|
| | 904 | retry: |
|---|
| | 905 | ic = e_iconv_open ("UTF-8", charset); |
|---|
| | 906 | if (ic != (iconv_t) -1) { |
|---|
| | 907 | ret = e_iconv (ic, &inbuf, &inlen, &outbuf, &outlen); |
|---|
| | 908 | if (ret != (size_t) -1) { |
|---|
| | 909 | e_iconv (ic, NULL, 0, &outbuf, &outlen); |
|---|
| | 910 | *outbuf = 0; |
|---|
| | 911 | decoded = g_strdup (outbase); |
|---|
| | 912 | } else { |
|---|
| | 913 | perror ("iconv"); |
|---|
| | 914 | e_iconv (ic, NULL, 0, &outbuf, &outlen); |
|---|
| | 915 | *outbuf = 0; |
|---|
| | 916 | decoded = g_strdup (outbase); |
|---|
| | 917 | /* decoded = g_strdup (inbuf); */ |
|---|
| | 918 | } |
|---|
| | 919 | |
|---|
| | 920 | e_iconv_close (ic); |
|---|
| | 921 | } else { |
|---|
| | 922 | w(g_warning ("Cannot decode charset, header display may be corrupt: %s: %s", |
|---|
| | 923 | charset, strerror (errno))); |
|---|
| | 924 | |
|---|
| | 925 | if (!retried) { |
|---|
| | 926 | charset = e_iconv_locale_charset (); |
|---|
| | 927 | if (!charset) |
|---|
| | 928 | charset = "iso-8859-1"; |
|---|
| | 929 | |
|---|
| | 930 | retried = TRUE; |
|---|
| | 931 | goto retry; |
|---|
| | 932 | } |
|---|
| | 933 | |
|---|
| | 934 | /* we return the encoded word here because we've got to return valid utf8 */ |
|---|
| | 935 | decoded = g_strndup (in, inlen); |
|---|
| | 936 | } |
|---|
| | 937 | } |
|---|
| | 938 | } |
|---|
| | 939 | |
|---|
| | 940 | d(printf("decoded '%s'\n", decoded)); |
|---|
| | 941 | |
|---|
| | 942 | return decoded; |
|---|
| 1145 | | |
|---|
| 1146 | | case BEGIN_SPACE: |
|---|
| 1147 | | if (isspace(*inptr)) { |
|---|
| 1148 | | /* do nothing */ |
|---|
| 1149 | | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| 1150 | | stats = ENCODED_WORD_CHARSET; |
|---|
| 1151 | | start = inptr; |
|---|
| 1152 | | inptr++; |
|---|
| 1153 | | } else if (*inptr == '\0') { |
|---|
| 1154 | | stats = END; |
|---|
| 1155 | | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| 1156 | | stats = NOENCODED_WORD; |
|---|
| 1157 | | start = inptr; |
|---|
| 1158 | | } |
|---|
| 1159 | | break; |
|---|
| 1160 | | |
|---|
| 1161 | | case NOENCODED_WORD: |
|---|
| 1162 | | if (isspace(*inptr)) { |
|---|
| 1163 | | /* do nothing */ |
|---|
| 1164 | | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| 1165 | | if ((default_charset == NULL || !append_8bit (out, start, inptr - start, default_charset)) |
|---|
| 1166 | | && (locale_charset == NULL || !append_8bit (out, start, inptr - start, locale_charset))) |
|---|
| 1167 | | append_latin1 (out, start, inptr - start); |
|---|
| 1168 | | |
|---|
| 1169 | | stats = ENCODED_WORD_CHARSET; |
|---|
| 1170 | | start = inptr; |
|---|
| 1171 | | inptr++; |
|---|
| 1172 | | } else if (*inptr == '\0') { |
|---|
| 1173 | | inptr--; |
|---|
| 1174 | | while (isspace(*inptr)) { |
|---|
| 1175 | | inptr--; |
|---|
| 1176 | | } |
|---|
| 1177 | | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| 1178 | | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| 1179 | | append_latin1 (out, start, inptr - start); |
|---|
| 1180 | | |
|---|
| 1181 | | stats = END; |
|---|
| 1182 | | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| 1183 | | /* do nothing */ |
|---|
| 1184 | | } |
|---|
| 1185 | | break; |
|---|
| 1186 | | |
|---|
| 1187 | | case ENCODED_WORD_CHARSET: |
|---|
| 1188 | | if (isspace (*inptr)) { |
|---|
| 1189 | | stats = NOENCODED_WORD; |
|---|
| 1190 | | } else if (*inptr == '?') { |
|---|
| 1191 | | inptr++; |
|---|
| 1192 | | if ((*inptr == 'Q' || *inptr == 'q' |
|---|
| 1193 | | || *inptr == 'B' || *inptr == 'b') |
|---|
| 1194 | | && *(inptr+1) == '?') { |
|---|
| 1195 | | inptr++; |
|---|
| 1196 | | stats = ENCODED_WORD_ENCODED_TEXT; |
|---|
| 1197 | | } else { |
|---|
| 1198 | | stats = NOENCODED_WORD; |
|---|
| 1199 | | } |
|---|
| 1200 | | } else if (*inptr == '\0') { |
|---|
| 1201 | | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| 1202 | | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| 1203 | | append_latin1 (out, start, inptr - start); |
|---|
| 1204 | | |
|---|
| 1205 | | stats = END; |
|---|
| 1206 | | } else if (isgraph(*inptr)) { |
|---|
| 1207 | | /* do nothing */ |
|---|
| 1208 | | } else { |
|---|
| 1209 | | /* impossible */ |
|---|
| 1210 | | } |
|---|
| 1211 | | break; |
|---|
| 1212 | | |
|---|
| 1213 | | case ENCODED_WORD_ENCODED_TEXT: |
|---|
| 1214 | | if (isspace (*inptr)) { |
|---|
| 1215 | | stats = NOENCODED_WORD; /* maybe do nothing */ |
|---|
| 1216 | | } else if (*inptr == '?' && *(inptr+1) == '=') { |
|---|
| 1217 | | /* we will decode it in stats ENCODED_WORD_END */ |
|---|
| 1218 | | stats = ENCODED_WORD_END; |
|---|
| 1219 | | inptr++; |
|---|
| 1220 | | } else if (*inptr == '\0') { |
|---|
| 1221 | | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| 1222 | | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| 1223 | | append_latin1 (out, start, inptr - start); |
|---|
| 1224 | | |
|---|
| 1225 | | stats = END; |
|---|
| 1226 | | } else if (isgraph(*inptr)) { |
|---|
| 1227 | | /* do nothing */ |
|---|
| 1228 | | } else { |
|---|
| 1229 | | /* impossible */ |
|---|
| 1230 | | } |
|---|
| 1231 | | break; |
|---|
| 1232 | | |
|---|
| 1233 | | case ENCODED_WORD_END: |
|---|
| 1234 | | if (isspace(*inptr)) { |
|---|
| 1235 | | /* fix some buggy mail clients */ |
|---|
| 1236 | | stats = ENCODED_WORD_END_SPACE; |
|---|
| 1237 | | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| 1238 | | stats = ENCODED_WORD_CHARSET; |
|---|
| 1239 | | inptr++; |
|---|
| 1240 | | } else { |
|---|
| 1241 | | dword = rfc2047_decode_word (start, inptr - start); |
|---|
| 1242 | | if (dword) { |
|---|
| 1243 | | g_string_append (out, dword); |
|---|
| 1244 | | g_free (dword); |
|---|
| 1245 | | } else { |
|---|
| 1246 | | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| 1247 | | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| 1248 | | append_latin1 (out, start, inptr - start); |
|---|
| 1249 | | } |
|---|
| 1250 | | |
|---|
| 1251 | | if (*inptr == '\0') { |
|---|
| 1252 | | stats = END; |
|---|
| 1253 | | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| 1254 | | start = inptr; |
|---|
| 1255 | | stats = NOENCODED_WORD; |
|---|
| 1256 | | } |
|---|
| 1257 | | } |
|---|
| 1258 | | break; |
|---|
| 1259 | | |
|---|
| 1260 | | case ENCODED_WORD_END_SPACE: |
|---|
| 1261 | | if (isspace(*inptr)) { |
|---|
| 1262 | | /* do nothing */ |
|---|
| 1263 | | } else if (*inptr == '=' && *(inptr+1) == '?') { |
|---|
| 1264 | | /* yes, combine two encoded words */ |
|---|
| 1265 | | stats = ENCODED_WORD_CHARSET; |
|---|
| 1266 | | inptr++; |
|---|
| 1267 | | } else { |
|---|
| 1268 | | if (*inptr == '\0') { |
|---|
| 1269 | | stats = END; |
|---|
| 1270 | | } else { //if (isgraph(*inptr)) { // we accept multi-byte encode |
|---|
| 1271 | | stats = NOENCODED_WORD; |
|---|
| 1272 | | } |
|---|
| 1273 | | |
|---|
| 1274 | | inptr--; |
|---|
| 1275 | | while (isspace(*inptr)) { |
|---|
| 1276 | | inptr--; |
|---|
| 1277 | | } |
|---|
| 1278 | | inptr++; |
|---|
| 1279 | | |
|---|
| 1280 | | dword = rfc2047_decode_word (start, inptr - start); |
|---|
| 1281 | | if (dword) { |
|---|
| 1282 | | g_string_append (out, dword); |
|---|
| 1283 | | g_free (dword); |
|---|
| 1284 | | } else { |
|---|
| 1285 | | if ((default_charset == NULL || !append_8bit (out, start, inptr + 1 - start, default_charset)) |
|---|
| 1286 | | && (locale_charset == NULL || !append_8bit (out, start, inptr + 1 - start, locale_charset))) |
|---|
| 1287 | | append_latin1 (out, start, inptr - start); |
|---|
| 1288 | | } |
|---|
| 1289 | | |
|---|
| 1290 | | if (stats == NOENCODED_WORD) { |
|---|
| 1291 | | start = inptr; |
|---|
| 1292 | | } |
|---|
| 1293 | | } |
|---|
| 1294 | | break; |
|---|
| 1295 | | |
|---|
| 1296 | | default: |
|---|
| 1297 | | /* impossible */ |
|---|
| 1298 | | break; |
|---|
| 1299 | | } |
|---|
| 1300 | | |
|---|
| 1301 | | inptr++; |
|---|
| | 1050 | } else if (dword == NULL) { |
|---|
| | 1051 | append (out, start, inptr - start); |
|---|
| | 1052 | } else { |
|---|
| | 1053 | chunk = start; |
|---|
| | 1054 | } |
|---|
| | 1055 | |
|---|
| | 1056 | start = inptr; |
|---|
| | 1057 | while (inptr < inend && !camel_mime_is_type (*inptr, mask)) |
|---|
| | 1058 | inptr++; |
|---|
| | 1059 | |
|---|
| | 1060 | dword = rfc2047_decode_word(start, inptr-start); |
|---|
| | 1061 | if (dword) { |
|---|
| | 1062 | g_string_append(out, dword); |
|---|
| | 1063 | g_free(dword); |
|---|
| | 1064 | } else { |
|---|
| | 1065 | if (!chunk) |
|---|
| | 1066 | chunk = start; |
|---|
| | 1067 | |
|---|
| | 1068 | if ((default_charset == NULL || !append_8bit (out, chunk, inptr-chunk, default_charset)) |
|---|
| | 1069 | && (locale_charset == NULL || !append_8bit(out, chunk, inptr-chunk, locale_charset))) { |
|---|
| | 1070 | |
|---|
| | 1071 | |
|---|
| | 1072 | append_latin1(out, chunk, inptr-chunk); |
|---|
| | 1073 | } |
|---|
| | 1074 | } |
|---|
| | 1075 | |
|---|
| | 1076 | chunk = NULL; |
|---|