fix utf-8 names in autoindex
Igor Sysoev
13 years ago
954 | 954 |
* ngx_utf_decode() decodes two and more bytes UTF sequences only
|
955 | 955 |
* the return values:
|
956 | 956 |
* 0x80 - 0x10ffff valid character
|
957 | |
* 0x10ffff - 0xfffffffd invalid sequence
|
|
957 |
* 0x110000 - 0xfffffffd invalid sequence
|
958 | 958 |
* 0xfffffffe incomplete sequence
|
959 | 959 |
* 0xffffffff error
|
960 | 960 |
*/
|
|
1019 | 1019 |
size_t
|
1020 | 1020 |
ngx_utf_length(u_char *p, size_t n)
|
1021 | 1021 |
{
|
1022 | |
u_char c;
|
1023 | |
size_t len;
|
1024 | |
ngx_uint_t i;
|
1025 | |
|
1026 | |
for (len = 0, i = 0; i < n; len++, i++) {
|
1027 | |
|
1028 | |
c = p[i];
|
|
1022 |
u_char c, *last;
|
|
1023 |
size_t len;
|
|
1024 |
|
|
1025 |
last = p + n;
|
|
1026 |
|
|
1027 |
for (len = 0; p < last; len++) {
|
|
1028 |
|
|
1029 |
c = *p;
|
1029 | 1030 |
|
1030 | 1031 |
if (c < 0x80) {
|
|
1032 |
p++;
|
1031 | 1033 |
continue;
|
1032 | 1034 |
}
|
1033 | 1035 |
|
1034 | |
if (c >= 0xc0) {
|
1035 | |
for (c <<= 1; c & 0x80; c <<= 1) {
|
1036 | |
i++;
|
1037 | |
}
|
1038 | |
|
1039 | |
continue;
|
1040 | |
}
|
1041 | |
|
1042 | |
/* invalid utf */
|
1043 | |
|
1044 | |
return n;
|
|
1036 |
if (ngx_utf_decode(&p, n) > 0x10ffff) {
|
|
1037 |
/* invalid utf */
|
|
1038 |
return n;
|
|
1039 |
}
|
1045 | 1040 |
}
|
1046 | 1041 |
|
1047 | 1042 |
return len;
|
|
1049 | 1044 |
|
1050 | 1045 |
|
1051 | 1046 |
u_char *
|
1052 | |
ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n)
|
1053 | |
{
|
1054 | |
u_char c;
|
|
1047 |
ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n, size_t len)
|
|
1048 |
{
|
|
1049 |
u_char c, *next;
|
1055 | 1050 |
|
1056 | 1051 |
if (n == 0) {
|
1057 | 1052 |
return dst;
|
1058 | 1053 |
}
|
1059 | 1054 |
|
1060 | |
for ( /* void */ ; --n; dst++, src++) {
|
|
1055 |
while (--n) {
|
1061 | 1056 |
|
1062 | 1057 |
c = *src;
|
1063 | 1058 |
*dst = c;
|
1064 | 1059 |
|
1065 | 1060 |
if (c < 0x80) {
|
1066 | |
if (*dst != '\0') {
|
|
1061 |
|
|
1062 |
if (c != '\0') {
|
|
1063 |
dst++;
|
|
1064 |
src++;
|
|
1065 |
len--;
|
|
1066 |
|
1067 | 1067 |
continue;
|
1068 | 1068 |
}
|
1069 | 1069 |
|
1070 | 1070 |
return dst;
|
1071 | 1071 |
}
|
1072 | 1072 |
|
1073 | |
if (c >= 0xc0) {
|
1074 | |
for (c <<= 1; c & 0x80; c <<= 1) {
|
1075 | |
*++dst = *++src;
|
1076 | |
}
|
1077 | |
|
1078 | |
continue;
|
1079 | |
}
|
1080 | |
|
1081 | |
/* invalid utf */
|
|
1073 |
next = src;
|
|
1074 |
|
|
1075 |
if (ngx_utf_decode(&next, len) > 0x10ffff) {
|
|
1076 |
/* invalid utf */
|
|
1077 |
break;
|
|
1078 |
}
|
|
1079 |
|
|
1080 |
len--;
|
|
1081 |
|
|
1082 |
while (src < next) {
|
|
1083 |
*++dst = *++src;
|
|
1084 |
len--;
|
|
1085 |
}
|
1082 | 1086 |
}
|
1083 | 1087 |
|
1084 | 1088 |
*dst = '\0';
|
152 | 152 |
|
153 | 153 |
uint32_t ngx_utf_decode(u_char **p, size_t n);
|
154 | 154 |
size_t ngx_utf_length(u_char *p, size_t n);
|
155 | |
u_char *ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n);
|
|
155 |
u_char *ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n, size_t len);
|
156 | 156 |
|
157 | 157 |
|
158 | 158 |
#define NGX_ESCAPE_URI 0
|
134 | 134 |
{
|
135 | 135 |
u_char *last, *filename, scale;
|
136 | 136 |
off_t length;
|
137 | |
size_t len, copy, allocated, root;
|
|
137 |
size_t len, utf_len, allocated, root;
|
138 | 138 |
ngx_tm_t tm;
|
139 | 139 |
ngx_err_t err;
|
140 | 140 |
ngx_buf_t *b;
|
|
411 | 411 |
|
412 | 412 |
len = entry[i].utf_len;
|
413 | 413 |
|
414 | |
if (entry[i].name.len - len) {
|
|
414 |
if (entry[i].name.len != len) {
|
415 | 415 |
if (len > NGX_HTTP_AUTOINDEX_NAME_LEN) {
|
416 | |
copy = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1;
|
|
416 |
utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1;
|
417 | 417 |
|
418 | 418 |
} else {
|
419 | |
copy = NGX_HTTP_AUTOINDEX_NAME_LEN + 1;
|
|
419 |
utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN + 1;
|
420 | 420 |
}
|
421 | 421 |
|
422 | |
b->last = ngx_utf_cpystrn(b->last, entry[i].name.data, copy);
|
|
422 |
b->last = ngx_utf_cpystrn(b->last, entry[i].name.data,
|
|
423 |
utf_len, entry[i].name.len + 1);
|
423 | 424 |
last = b->last;
|
424 | 425 |
|
425 | 426 |
} else {
|