Klaus Demo nginx / 96eaa05
fix utf-8 names in autoindex Igor Sysoev 13 years ago
3 changed file(s) with 44 addition(s) and 39 deletion(s). Raw diff Collapse all Expand all
954954 * ngx_utf_decode() decodes two and more bytes UTF sequences only
955955 * the return values:
956956 * 0x80 - 0x10ffff valid character
957 * 0x10ffff - 0xfffffffd invalid sequence
957 * 0x110000 - 0xfffffffd invalid sequence
958958 * 0xfffffffe incomplete sequence
959959 * 0xffffffff error
960960 */
10191019 size_t
10201020 ngx_utf_length(u_char *p, size_t n)
10211021 {
1022 u_char c;
1023 size_t len;
1024 ngx_uint_t i;
1025
1026 for (len = 0, i = 0; i < n; len++, i++) {
1027
1028 c = p[i];
1022 u_char c, *last;
1023 size_t len;
1024
1025 last = p + n;
1026
1027 for (len = 0; p < last; len++) {
1028
1029 c = *p;
10291030
10301031 if (c < 0x80) {
1032 p++;
10311033 continue;
10321034 }
10331035
1034 if (c >= 0xc0) {
1035 for (c <<= 1; c & 0x80; c <<= 1) {
1036 i++;
1037 }
1038
1039 continue;
1040 }
1041
1042 /* invalid utf */
1043
1044 return n;
1036 if (ngx_utf_decode(&p, n) > 0x10ffff) {
1037 /* invalid utf */
1038 return n;
1039 }
10451040 }
10461041
10471042 return len;
10491044
10501045
10511046 u_char *
1052 ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n)
1053 {
1054 u_char c;
1047 ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n, size_t len)
1048 {
1049 u_char c, *next;
10551050
10561051 if (n == 0) {
10571052 return dst;
10581053 }
10591054
1060 for ( /* void */ ; --n; dst++, src++) {
1055 while (--n) {
10611056
10621057 c = *src;
10631058 *dst = c;
10641059
10651060 if (c < 0x80) {
1066 if (*dst != '\0') {
1061
1062 if (c != '\0') {
1063 dst++;
1064 src++;
1065 len--;
1066
10671067 continue;
10681068 }
10691069
10701070 return dst;
10711071 }
10721072
1073 if (c >= 0xc0) {
1074 for (c <<= 1; c & 0x80; c <<= 1) {
1075 *++dst = *++src;
1076 }
1077
1078 continue;
1079 }
1080
1081 /* invalid utf */
1073 next = src;
1074
1075 if (ngx_utf_decode(&next, len) > 0x10ffff) {
1076 /* invalid utf */
1077 break;
1078 }
1079
1080 len--;
1081
1082 while (src < next) {
1083 *++dst = *++src;
1084 len--;
1085 }
10821086 }
10831087
10841088 *dst = '\0';
152152
153153 uint32_t ngx_utf_decode(u_char **p, size_t n);
154154 size_t ngx_utf_length(u_char *p, size_t n);
155 u_char *ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n);
155 u_char *ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n, size_t len);
156156
157157
158158 #define NGX_ESCAPE_URI 0
134134 {
135135 u_char *last, *filename, scale;
136136 off_t length;
137 size_t len, copy, allocated, root;
137 size_t len, utf_len, allocated, root;
138138 ngx_tm_t tm;
139139 ngx_err_t err;
140140 ngx_buf_t *b;
411411
412412 len = entry[i].utf_len;
413413
414 if (entry[i].name.len - len) {
414 if (entry[i].name.len != len) {
415415 if (len > NGX_HTTP_AUTOINDEX_NAME_LEN) {
416 copy = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1;
416 utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1;
417417
418418 } else {
419 copy = NGX_HTTP_AUTOINDEX_NAME_LEN + 1;
419 utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN + 1;
420420 }
421421
422 b->last = ngx_utf_cpystrn(b->last, entry[i].name.data, copy);
422 b->last = ngx_utf_cpystrn(b->last, entry[i].name.data,
423 utf_len, entry[i].name.len + 1);
423424 last = b->last;
424425
425426 } else {