Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Samba utility functions
4 : Copyright (C) Andrew Tridgell 1992-2001
5 : Copyright (C) Simo Sorce 2001
6 :
7 : This program is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3 of the License, or
10 : (at your option) any later version.
11 :
12 : This program is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with this program. If not, see <http://www.gnu.org/licenses/>.
19 : */
20 :
21 : #include "replace.h"
22 : #include "system/locale.h"
23 : #include "charset.h"
24 : #include "lib/util/byteorder.h"
25 : #include "lib/util/fault.h"
26 :
27 : /**
28 : String replace.
29 : NOTE: oldc and newc must be 7 bit characters
30 : **/
31 5 : _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
32 : {
33 5 : struct smb_iconv_handle *ic = get_iconv_handle();
34 19 : while (s && *s) {
35 14 : size_t size;
36 14 : codepoint_t c = next_codepoint_handle(ic, s, &size);
37 14 : if (c == oldc) {
38 5 : *s = newc;
39 : }
40 14 : s += size;
41 : }
42 5 : }
43 :
44 : /**
45 : Convert a string to lower case, allocated with talloc
46 : **/
47 5714054 : _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
48 : TALLOC_CTX *ctx, const char *src)
49 : {
50 5714054 : size_t size=0;
51 18013 : char *dest;
52 :
53 5714054 : if(src == NULL) {
54 0 : return NULL;
55 : }
56 :
57 : /* this takes advantage of the fact that upper/lower can't
58 : change the length of a character by more than 1 byte */
59 5714054 : dest = talloc_array(ctx, char, 2*(strlen(src))+1);
60 5714054 : if (dest == NULL) {
61 0 : return NULL;
62 : }
63 :
64 127244994 : while (*src) {
65 414356 : size_t c_size;
66 121530940 : codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
67 121530940 : src += c_size;
68 :
69 121530940 : c = tolower_m(c);
70 :
71 121530940 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
72 121530940 : if (c_size == -1) {
73 0 : talloc_free(dest);
74 0 : return NULL;
75 : }
76 121530940 : size += c_size;
77 : }
78 :
79 5714054 : dest[size] = 0;
80 :
81 : /* trim it so talloc_append_string() works */
82 5714054 : dest = talloc_realloc(ctx, dest, char, size+1);
83 :
84 5714054 : talloc_set_name_const(dest, dest);
85 :
86 5714054 : return dest;
87 : }
88 :
89 5714048 : _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
90 : {
91 5714048 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
92 5714048 : return strlower_talloc_handle(iconv_handle, ctx, src);
93 : }
94 :
95 : /**
96 : Convert a string to UPPER case, allocated with talloc
97 : source length limited to n bytes, iconv handle supplied
98 : **/
99 698098934 : _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
100 : TALLOC_CTX *ctx, const char *src, size_t n)
101 : {
102 698098934 : size_t size=0;
103 12943128 : char *dest;
104 :
105 698098934 : if (!src) {
106 190047 : return NULL;
107 : }
108 :
109 : /* this takes advantage of the fact that upper/lower can't
110 : change the length of a character by more than 1 byte */
111 697871722 : dest = talloc_array(ctx, char, 2*(n+1));
112 697871722 : if (dest == NULL) {
113 0 : return NULL;
114 : }
115 :
116 10682710975 : while (n && *src) {
117 118686660 : size_t c_size;
118 9984839255 : codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
119 : CH_UNIX, &c_size);
120 9984839255 : src += c_size;
121 9984839255 : n -= c_size;
122 :
123 9984839255 : c = toupper_m(c);
124 :
125 9984839255 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
126 9984839255 : if (c_size == -1) {
127 2 : talloc_free(dest);
128 2 : return NULL;
129 : }
130 9984839253 : size += c_size;
131 : }
132 :
133 697871720 : dest[size] = 0;
134 :
135 : /* trim it so talloc_append_string() works */
136 697871720 : dest = talloc_realloc(ctx, dest, char, size+1);
137 :
138 697871720 : talloc_set_name_const(dest, dest);
139 :
140 697871720 : return dest;
141 : }
142 :
143 : /**
144 : Convert a string to UPPER case, allocated with talloc
145 : source length limited to n bytes
146 : **/
147 698098928 : _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
148 : {
149 698098928 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
150 698098928 : return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
151 : }
152 : /**
153 : Convert a string to UPPER case, allocated with talloc
154 : **/
155 5932220 : _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
156 : {
157 5932220 : return strupper_talloc_n(ctx, src, src?strlen(src):0);
158 : }
159 :
160 : /**
161 : talloc_strdup() a unix string to upper case.
162 : **/
163 3049297 : _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
164 : {
165 3049297 : return strupper_talloc(ctx, src);
166 : }
167 :
168 : /**
169 : Find the number of 'c' chars in a string
170 : **/
171 4 : _PUBLIC_ size_t count_chars_m(const char *s, char c)
172 : {
173 4 : struct smb_iconv_handle *ic = get_iconv_handle();
174 4 : size_t count = 0;
175 :
176 13 : while (*s) {
177 9 : size_t size;
178 9 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
179 9 : if (c2 == c) count++;
180 9 : s += size;
181 : }
182 :
183 4 : return count;
184 : }
185 :
186 3019523 : size_t ucs2_align(const void *base_ptr, const void *p, int flags)
187 : {
188 3019523 : if (flags & (STR_NOALIGN|STR_ASCII)) {
189 128545 : return 0;
190 : }
191 2890884 : return PTR_DIFF(p, base_ptr) & 1;
192 : }
193 :
194 : /**
195 : return the number of bytes occupied by a buffer in CH_UTF16 format
196 : the result includes the null termination
197 : **/
198 0 : size_t utf16_len(const void *buf)
199 : {
200 0 : size_t len;
201 :
202 0 : for (len = 0; SVAL(buf,len); len += 2) ;
203 :
204 0 : return len + 2;
205 : }
206 :
207 : /**
208 : return the number of bytes occupied by a buffer in CH_UTF16 format
209 : the result includes the null termination
210 : limited by 'n' bytes
211 : **/
212 1383795 : size_t utf16_len_n(const void *src, size_t n)
213 : {
214 1447 : size_t len;
215 :
216 16645848 : for (len = 0; (len+2 < n) && SVAL(src, len); len += 2) ;
217 :
218 1383795 : if (len+2 <= n) {
219 1383654 : len += 2;
220 : }
221 :
222 1383795 : return len;
223 : }
224 :
225 :
226 : /**
227 : * Determine the length and validity of a utf-8 string.
228 : *
229 : * @param input the string pointer
230 : * @param maxlen maximum size of the string
231 : * @param byte_len receives the length of the valid section
232 : * @param char_len receives the number of unicode characters in the valid section
233 : * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
234 : *
235 : * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
236 : */
237 0 : bool utf8_check(const char *input, size_t maxlen,
238 : size_t *byte_len,
239 : size_t *char_len,
240 : size_t *utf16_len)
241 : {
242 0 : const uint8_t *s = (const uint8_t *)input;
243 0 : size_t i;
244 0 : size_t chars = 0;
245 0 : size_t long_chars = 0;
246 0 : uint32_t codepoint;
247 0 : uint8_t a, b, c, d;
248 0 : for (i = 0; i < maxlen; i++, chars++) {
249 0 : if (s[i] == 0) {
250 0 : break;
251 : }
252 0 : if (s[i] < 0x80) {
253 0 : continue;
254 : }
255 0 : if ((s[i] & 0xe0) == 0xc0) {
256 : /* 110xxxxx 10xxxxxx */
257 0 : a = s[i];
258 0 : if (maxlen - i < 2) {
259 0 : goto error;
260 : }
261 0 : b = s[i + 1];
262 0 : if ((b & 0xc0) != 0x80) {
263 0 : goto error;
264 : }
265 0 : codepoint = (a & 31) << 6 | (b & 63);
266 0 : if (codepoint < 0x80) {
267 0 : goto error;
268 : }
269 0 : i++;
270 0 : continue;
271 : }
272 0 : if ((s[i] & 0xf0) == 0xe0) {
273 : /* 1110xxxx 10xxxxxx 10xxxxxx */
274 0 : if (maxlen - i < 3) {
275 0 : goto error;
276 : }
277 0 : a = s[i];
278 0 : b = s[i + 1];
279 0 : c = s[i + 2];
280 0 : if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
281 0 : goto error;
282 : }
283 0 : codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
284 :
285 0 : if (codepoint < 0x800) {
286 0 : goto error;
287 : }
288 0 : if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
289 : /*
290 : * This is an invalid codepoint, per
291 : * RFC3629, as it encodes part of a
292 : * UTF-16 surrogate pair for a
293 : * character over U+10000, which ought
294 : * to have been encoded as a four byte
295 : * utf-8 sequence.
296 : */
297 0 : goto error;
298 : }
299 0 : i += 2;
300 0 : continue;
301 : }
302 :
303 0 : if ((s[i] & 0xf8) == 0xf0) {
304 : /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
305 0 : if (maxlen - i < 4) {
306 0 : goto error;
307 : }
308 0 : a = s[i];
309 0 : b = s[i + 1];
310 0 : c = s[i + 2];
311 0 : d = s[i + 3];
312 :
313 0 : if ((b & 0xc0) != 0x80 ||
314 0 : (c & 0xc0) != 0x80 ||
315 0 : (d & 0xc0) != 0x80) {
316 0 : goto error;
317 : }
318 0 : codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
319 :
320 0 : if (codepoint < 0x10000 || codepoint > 0x10ffff) {
321 0 : goto error;
322 : }
323 : /* this one will need two UTF16 characters */
324 0 : long_chars++;
325 0 : i += 3;
326 0 : continue;
327 : }
328 : /*
329 : * If it wasn't handled yet, it's wrong.
330 : */
331 0 : goto error;
332 : }
333 0 : *byte_len = i;
334 0 : *char_len = chars;
335 0 : *utf16_len = chars + long_chars;
336 0 : return true;
337 :
338 0 : error:
339 0 : *byte_len = i;
340 0 : *char_len = chars;
341 0 : *utf16_len = chars + long_chars;
342 0 : return false;
343 : }
344 :
345 :
346 : /**
347 : * Copy a string from a char* unix src to a dos codepage string destination.
348 : *
349 : * @converted_size the number of bytes occupied by the string in the destination.
350 : * @return bool true if success.
351 : *
352 : * @param flags can include
353 : * <dl>
354 : * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
355 : * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
356 : * </dl>
357 : *
358 : * @param dest_len the maximum length in bytes allowed in the
359 : * destination. If @p dest_len is -1 then no maximum is used.
360 : **/
361 4123 : static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
362 : {
363 144 : size_t src_len;
364 144 : bool ret;
365 :
366 4123 : if (flags & STR_UPPER) {
367 4 : char *tmpbuf = strupper_talloc(NULL, src);
368 4 : if (tmpbuf == NULL) {
369 0 : return false;
370 : }
371 4 : ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
372 4 : talloc_free(tmpbuf);
373 4 : return ret;
374 : }
375 :
376 4119 : src_len = strlen(src);
377 :
378 4119 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
379 4075 : src_len++;
380 :
381 4119 : return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
382 : }
383 :
384 : /**
385 : * Copy a string from a dos codepage source to a unix char* destination.
386 : *
387 : * The resulting string in "dest" is always null terminated.
388 : *
389 : * @param flags can have:
390 : * <dl>
391 : * <dt>STR_TERMINATE</dt>
392 : * <dd>STR_TERMINATE means the string in @p src
393 : * is null terminated, and src_len is ignored.</dd>
394 : * </dl>
395 : *
396 : * @param src_len is the length of the source area in bytes.
397 : * @returns the number of bytes occupied by the string in @p src.
398 : **/
399 172 : static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
400 : {
401 172 : size_t size = 0;
402 :
403 172 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
404 0 : if (src_len == (size_t)-1) {
405 0 : src_len = strlen((const char *)src) + 1;
406 : } else {
407 0 : size_t len = strnlen((const char *)src, src_len);
408 0 : if (len < src_len)
409 0 : len++;
410 0 : src_len = len;
411 : }
412 : }
413 :
414 : /* We're ignoring the return here.. */
415 172 : (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
416 :
417 172 : if (dest_len)
418 172 : dest[MIN(size, dest_len-1)] = 0;
419 :
420 172 : return src_len;
421 : }
422 :
423 : /**
424 : * Copy a string from a char* src to a unicode destination.
425 : *
426 : * @returns the number of bytes occupied by the string in the destination.
427 : *
428 : * @param flags can have:
429 : *
430 : * <dl>
431 : * <dt>STR_TERMINATE <dd>means include the null termination.
432 : * <dt>STR_UPPER <dd>means uppercase in the destination.
433 : * <dt>STR_NOALIGN <dd>means don't do alignment.
434 : * </dl>
435 : *
436 : * @param dest_len is the maximum length allowed in the
437 : * destination. If dest_len is -1 then no maximum is used.
438 : **/
439 457417 : static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
440 : {
441 457417 : size_t len=0;
442 457417 : size_t src_len = strlen(src);
443 457417 : size_t size = 0;
444 11140 : bool ret;
445 :
446 457417 : if (flags & STR_UPPER) {
447 2952 : char *tmpbuf = strupper_talloc(NULL, src);
448 144 : ssize_t retval;
449 2952 : if (tmpbuf == NULL) {
450 0 : return -1;
451 : }
452 2952 : retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
453 2952 : talloc_free(tmpbuf);
454 2952 : return retval;
455 : }
456 :
457 454465 : if (flags & STR_TERMINATE)
458 314903 : src_len++;
459 :
460 454465 : if (ucs2_align(NULL, dest, flags)) {
461 148821 : *(char *)dest = 0;
462 148821 : dest = (void *)((char *)dest + 1);
463 148821 : if (dest_len) dest_len--;
464 143751 : len++;
465 : }
466 :
467 : /* ucs2 is always a multiple of 2 bytes */
468 454465 : dest_len &= ~1;
469 :
470 454465 : ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
471 454465 : if (ret == false) {
472 0 : return 0;
473 : }
474 :
475 454465 : len += size;
476 :
477 454465 : return (ssize_t)len;
478 : }
479 :
480 :
481 : /**
482 : Copy a string from a ucs2 source to a unix char* destination.
483 : Flags can have:
484 : STR_TERMINATE means the string in src is null terminated.
485 : STR_NOALIGN means don't try to align.
486 : if STR_TERMINATE is set then src_len is ignored if it is -1.
487 : src_len is the length of the source area in bytes
488 : Return the number of bytes occupied by the string in src.
489 : The resulting string in "dest" is always null terminated.
490 : **/
491 :
492 0 : static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
493 : {
494 0 : size_t size = 0;
495 :
496 0 : if (ucs2_align(NULL, src, flags)) {
497 0 : src = (const void *)((const char *)src + 1);
498 0 : if (src_len > 0)
499 0 : src_len--;
500 : }
501 :
502 0 : if (flags & STR_TERMINATE) {
503 0 : if (src_len == (size_t)-1) {
504 0 : src_len = utf16_len(src);
505 : } else {
506 0 : src_len = utf16_len_n(src, src_len);
507 : }
508 : }
509 :
510 : /* ucs2 is always a multiple of 2 bytes */
511 0 : if (src_len != (size_t)-1)
512 0 : src_len &= ~1;
513 :
514 : /* We're ignoring the return here.. */
515 0 : (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
516 0 : if (dest_len)
517 0 : dest[MIN(size, dest_len-1)] = 0;
518 :
519 0 : return src_len;
520 : }
521 :
522 : /**
523 : Copy a string from a char* src to a unicode or ascii
524 : dos codepage destination choosing unicode or ascii based on the
525 : flags in the SMB buffer starting at base_ptr.
526 : Return the number of bytes occupied by the string in the destination.
527 : flags can have:
528 : STR_TERMINATE means include the null termination.
529 : STR_UPPER means uppercase in the destination.
530 : STR_ASCII use ascii even with unicode packet.
531 : STR_NOALIGN means don't do alignment.
532 : dest_len is the maximum length allowed in the destination. If dest_len
533 : is -1 then no maximum is used.
534 : **/
535 :
536 458584 : _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
537 : {
538 458584 : if (flags & STR_ASCII) {
539 4119 : size_t size = 0;
540 4119 : if (push_ascii_string(dest, src, dest_len, flags, &size)) {
541 4119 : return (ssize_t)size;
542 : } else {
543 0 : return (ssize_t)-1;
544 : }
545 454465 : } else if (flags & STR_UNICODE) {
546 454465 : return push_ucs2(dest, src, dest_len, flags);
547 : } else {
548 0 : smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
549 : return -1;
550 : }
551 : }
552 :
553 :
554 : /**
555 : Copy a string from a unicode or ascii source (depending on
556 : the packet flags) to a char* destination.
557 : Flags can have:
558 : STR_TERMINATE means the string in src is null terminated.
559 : STR_UNICODE means to force as unicode.
560 : STR_ASCII use ascii even with unicode packet.
561 : STR_NOALIGN means don't do alignment.
562 : if STR_TERMINATE is set then src_len is ignored is it is -1
563 : src_len is the length of the source area in bytes.
564 : Return the number of bytes occupied by the string in src.
565 : The resulting string in "dest" is always null terminated.
566 : **/
567 :
568 172 : _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
569 : {
570 172 : if (flags & STR_ASCII) {
571 172 : return pull_ascii_string(dest, src, dest_len, src_len, flags);
572 0 : } else if (flags & STR_UNICODE) {
573 0 : return pull_ucs2(dest, src, dest_len, src_len, flags);
574 : } else {
575 0 : smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
576 : return -1;
577 : }
578 : }
|