LCOV - coverage report for smb2.twrp.listdir_fix f886ca1c

LCOV - code coverage report

Current view:	top level - lib/util/charset - util_unistr.c (source / functions)		Hit	Total	Coverage
Test:	coverage report for smb2.twrp.listdir_fix f886ca1c	Lines:	132	237	55.7 %
Date:	2023-11-07 19:11:32	Functions:	15	18	83.3 %

          Line data    Source code

       1             : /* 
       2             :    Unix SMB/CIFS implementation.
       3             :    Samba utility functions
       4             :    Copyright (C) Andrew Tridgell 1992-2001
       5             :    Copyright (C) Simo Sorce 2001
       6             :    
       7             :    This program is free software; you can redistribute it and/or modify
       8             :    it under the terms of the GNU General Public License as published by
       9             :    the Free Software Foundation; either version 3 of the License, or
      10             :    (at your option) any later version.
      11             :    
      12             :    This program is distributed in the hope that it will be useful,
      13             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15             :    GNU General Public License for more details.
      16             :    
      17             :    You should have received a copy of the GNU General Public License
      18             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      19             : */
      20             : 
      21             : #include "replace.h"
      22             : #include "system/locale.h"
      23             : #include "charset.h"
      24             : #include "lib/util/byteorder.h"
      25             : #include "lib/util/fault.h"
      26             : 
      27             : /**
      28             :  String replace.
      29             :  NOTE: oldc and newc must be 7 bit characters
      30             : **/
      31           5 : _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
      32             : {
      33           5 :         struct smb_iconv_handle *ic = get_iconv_handle();
      34          19 :         while (s && *s) {
      35          14 :                 size_t size;
      36          14 :                 codepoint_t c = next_codepoint_handle(ic, s, &size);
      37          14 :                 if (c == oldc) {
      38           5 :                         *s = newc;
      39             :                 }
      40          14 :                 s += size;
      41             :         }
      42           5 : }
      43             : 
      44             : /**
      45             :  Convert a string to lower case, allocated with talloc
      46             : **/
      47     5714054 : _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
      48             :                                       TALLOC_CTX *ctx, const char *src)
      49             : {
      50     5714054 :         size_t size=0;
      51       18013 :         char *dest;
      52             : 
      53     5714054 :         if(src == NULL) {
      54           0 :                 return NULL;
      55             :         }
      56             : 
      57             :         /* this takes advantage of the fact that upper/lower can't
      58             :            change the length of a character by more than 1 byte */
      59     5714054 :         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
      60     5714054 :         if (dest == NULL) {
      61           0 :                 return NULL;
      62             :         }
      63             : 
      64   127244994 :         while (*src) {
      65      414356 :                 size_t c_size;
      66   121530940 :                 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
      67   121530940 :                 src += c_size;
      68             : 
      69   121530940 :                 c = tolower_m(c);
      70             : 
      71   121530940 :                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
      72   121530940 :                 if (c_size == -1) {
      73           0 :                         talloc_free(dest);
      74           0 :                         return NULL;
      75             :                 }
      76   121530940 :                 size += c_size;
      77             :         }
      78             : 
      79     5714054 :         dest[size] = 0;
      80             : 
      81             :         /* trim it so talloc_append_string() works */
      82     5714054 :         dest = talloc_realloc(ctx, dest, char, size+1);
      83             : 
      84     5714054 :         talloc_set_name_const(dest, dest);
      85             : 
      86     5714054 :         return dest;
      87             : }
      88             : 
      89     5714048 : _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
      90             : {
      91     5714048 :         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
      92     5714048 :         return strlower_talloc_handle(iconv_handle, ctx, src);
      93             : }
      94             : 
      95             : /**
      96             :  Convert a string to UPPER case, allocated with talloc
      97             :  source length limited to n bytes, iconv handle supplied
      98             : **/
      99   698098934 : _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
     100             :                                         TALLOC_CTX *ctx, const char *src, size_t n)
     101             : {
     102   698098934 :         size_t size=0;
     103    12943128 :         char *dest;
     104             : 
     105   698098934 :         if (!src) {
     106      190047 :                 return NULL;
     107             :         }
     108             : 
     109             :         /* this takes advantage of the fact that upper/lower can't
     110             :            change the length of a character by more than 1 byte */
     111   697871722 :         dest = talloc_array(ctx, char, 2*(n+1));
     112   697871722 :         if (dest == NULL) {
     113           0 :                 return NULL;
     114             :         }
     115             : 
     116 10682710975 :         while (n && *src) {
     117   118686660 :                 size_t c_size;
     118  9984839255 :                 codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
     119             :                                                           CH_UNIX, &c_size);
     120  9984839255 :                 src += c_size;
     121  9984839255 :                 n -= c_size;
     122             : 
     123  9984839255 :                 c = toupper_m(c);
     124             : 
     125  9984839255 :                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
     126  9984839255 :                 if (c_size == -1) {
     127           2 :                         talloc_free(dest);
     128           2 :                         return NULL;
     129             :                 }
     130  9984839253 :                 size += c_size;
     131             :         }
     132             : 
     133   697871720 :         dest[size] = 0;
     134             : 
     135             :         /* trim it so talloc_append_string() works */
     136   697871720 :         dest = talloc_realloc(ctx, dest, char, size+1);
     137             : 
     138   697871720 :         talloc_set_name_const(dest, dest);
     139             : 
     140   697871720 :         return dest;
     141             : }
     142             : 
     143             : /**
     144             :  Convert a string to UPPER case, allocated with talloc
     145             :  source length limited to n bytes
     146             : **/
     147   698098928 : _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
     148             : {
     149   698098928 :         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
     150   698098928 :         return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
     151             : }
     152             : /**
     153             :  Convert a string to UPPER case, allocated with talloc
     154             : **/
     155     5932220 : _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
     156             : {
     157     5932220 :         return strupper_talloc_n(ctx, src, src?strlen(src):0);
     158             : }
     159             : 
     160             : /**
     161             :  talloc_strdup() a unix string to upper case.
     162             : **/
     163     3049297 : _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
     164             : {
     165     3049297 :         return strupper_talloc(ctx, src);
     166             : }
     167             : 
     168             : /**
     169             :  Find the number of 'c' chars in a string
     170             : **/
     171           4 : _PUBLIC_ size_t count_chars_m(const char *s, char c)
     172             : {
     173           4 :         struct smb_iconv_handle *ic = get_iconv_handle();
     174           4 :         size_t count = 0;
     175             : 
     176          13 :         while (*s) {
     177           9 :                 size_t size;
     178           9 :                 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
     179           9 :                 if (c2 == c) count++;
     180           9 :                 s += size;
     181             :         }
     182             : 
     183           4 :         return count;
     184             : }
     185             : 
     186     3019523 : size_t ucs2_align(const void *base_ptr, const void *p, int flags)
     187             : {
     188     3019523 :         if (flags & (STR_NOALIGN|STR_ASCII)) {
     189      128545 :                 return 0;
     190             :         }
     191     2890884 :         return PTR_DIFF(p, base_ptr) & 1;
     192             : }
     193             : 
     194             : /**
     195             : return the number of bytes occupied by a buffer in CH_UTF16 format
     196             : the result includes the null termination
     197             : **/
     198           0 : size_t utf16_len(const void *buf)
     199             : {
     200           0 :         size_t len;
     201             : 
     202           0 :         for (len = 0; SVAL(buf,len); len += 2) ;
     203             : 
     204           0 :         return len + 2;
     205             : }
     206             : 
     207             : /**
     208             : return the number of bytes occupied by a buffer in CH_UTF16 format
     209             : the result includes the null termination
     210             : limited by 'n' bytes
     211             : **/
     212     1383795 : size_t utf16_len_n(const void *src, size_t n)
     213             : {
     214        1447 :         size_t len;
     215             : 
     216    16645848 :         for (len = 0; (len+2 < n) && SVAL(src, len); len += 2) ;
     217             : 
     218     1383795 :         if (len+2 <= n) {
     219     1383654 :                 len += 2;
     220             :         }
     221             : 
     222     1383795 :         return len;
     223             : }
     224             : 
     225             : 
     226             : /**
     227             :  * Determine the length and validity of a utf-8 string.
     228             :  *
     229             :  * @param input the string pointer
     230             :  * @param maxlen maximum size of the string
     231             :  * @param byte_len receives the length of the valid section
     232             :  * @param char_len receives the number of unicode characters in the valid section
     233             :  * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
     234             :  *
     235             :  * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
     236             :  */
     237           0 : bool utf8_check(const char *input, size_t maxlen,
     238             :                 size_t *byte_len,
     239             :                 size_t *char_len,
     240             :                 size_t *utf16_len)
     241             : {
     242           0 :         const uint8_t *s = (const uint8_t *)input;
     243           0 :         size_t i;
     244           0 :         size_t chars = 0;
     245           0 :         size_t long_chars = 0;
     246           0 :         uint32_t codepoint;
     247           0 :         uint8_t a, b, c, d;
     248           0 :         for (i = 0; i < maxlen; i++, chars++) {
     249           0 :                 if (s[i] == 0) {
     250           0 :                         break;
     251             :                 }
     252           0 :                 if (s[i] < 0x80) {
     253           0 :                         continue;
     254             :                 }
     255           0 :                 if ((s[i] & 0xe0) == 0xc0) {
     256             :                         /* 110xxxxx 10xxxxxx */
     257           0 :                         a = s[i];
     258           0 :                         if (maxlen - i < 2) {
     259           0 :                                 goto error;
     260             :                         }
     261           0 :                         b = s[i + 1];
     262           0 :                         if ((b & 0xc0) != 0x80) {
     263           0 :                                 goto error;
     264             :                         }
     265           0 :                         codepoint = (a & 31) << 6 | (b & 63);
     266           0 :                         if (codepoint < 0x80) {
     267           0 :                                 goto error;
     268             :                         }
     269           0 :                         i++;
     270           0 :                         continue;
     271             :                 }
     272           0 :                 if ((s[i] & 0xf0) == 0xe0) {
     273             :                         /* 1110xxxx 10xxxxxx 10xxxxxx */
     274           0 :                         if (maxlen - i < 3) {
     275           0 :                                 goto error;
     276             :                         }
     277           0 :                         a = s[i];
     278           0 :                         b = s[i + 1];
     279           0 :                         c = s[i + 2];
     280           0 :                         if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
     281           0 :                                 goto error;
     282             :                         }
     283           0 :                         codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
     284             : 
     285           0 :                         if (codepoint < 0x800) {
     286           0 :                                 goto error;
     287             :                         }
     288           0 :                         if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
     289             :                                 /*
     290             :                                  * This is an invalid codepoint, per
     291             :                                  * RFC3629, as it encodes part of a
     292             :                                  * UTF-16 surrogate pair for a
     293             :                                  * character over U+10000, which ought
     294             :                                  * to have been encoded as a four byte
     295             :                                  * utf-8 sequence.
     296             :                                  */
     297           0 :                                 goto error;
     298             :                         }
     299           0 :                         i += 2;
     300           0 :                         continue;
     301             :                 }
     302             : 
     303           0 :                 if ((s[i] & 0xf8) == 0xf0) {
     304             :                         /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
     305           0 :                         if (maxlen - i < 4) {
     306           0 :                                 goto error;
     307             :                         }
     308           0 :                         a = s[i];
     309           0 :                         b = s[i + 1];
     310           0 :                         c = s[i + 2];
     311           0 :                         d = s[i + 3];
     312             : 
     313           0 :                         if ((b & 0xc0) != 0x80 ||
     314           0 :                             (c & 0xc0) != 0x80 ||
     315           0 :                             (d & 0xc0) != 0x80) {
     316           0 :                                 goto error;
     317             :                         }
     318           0 :                         codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
     319             : 
     320           0 :                         if (codepoint < 0x10000 || codepoint > 0x10ffff) {
     321           0 :                                 goto error;
     322             :                         }
     323             :                         /* this one will need two UTF16 characters */
     324           0 :                         long_chars++;
     325           0 :                         i += 3;
     326           0 :                         continue;
     327             :                 }
     328             :                 /*
     329             :                  * If it wasn't handled yet, it's wrong.
     330             :                  */
     331           0 :                 goto error;
     332             :         }
     333           0 :         *byte_len = i;
     334           0 :         *char_len = chars;
     335           0 :         *utf16_len = chars + long_chars;
     336           0 :         return true;
     337             : 
     338           0 : error:
     339           0 :         *byte_len = i;
     340           0 :         *char_len = chars;
     341           0 :         *utf16_len = chars + long_chars;
     342           0 :         return false;
     343             : }
     344             : 
     345             : 
     346             : /**
     347             :  * Copy a string from a char* unix src to a dos codepage string destination.
     348             :  *
     349             :  * @converted_size the number of bytes occupied by the string in the destination.
     350             :  * @return bool true if success.
     351             :  *
     352             :  * @param flags can include
     353             :  * <dl>
     354             :  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
     355             :  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
     356             :  * </dl>
     357             :  *
     358             :  * @param dest_len the maximum length in bytes allowed in the
     359             :  * destination.  If @p dest_len is -1 then no maximum is used.
     360             :  **/
     361        4123 : static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
     362             : {
     363         144 :         size_t src_len;
     364         144 :         bool ret;
     365             : 
     366        4123 :         if (flags & STR_UPPER) {
     367           4 :                 char *tmpbuf = strupper_talloc(NULL, src);
     368           4 :                 if (tmpbuf == NULL) {
     369           0 :                         return false;
     370             :                 }
     371           4 :                 ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
     372           4 :                 talloc_free(tmpbuf);
     373           4 :                 return ret;
     374             :         }
     375             : 
     376        4119 :         src_len = strlen(src);
     377             : 
     378        4119 :         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
     379        4075 :                 src_len++;
     380             : 
     381        4119 :         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
     382             : }
     383             : 
     384             : /**
     385             :  * Copy a string from a dos codepage source to a unix char* destination.
     386             :  *
     387             :  * The resulting string in "dest" is always null terminated.
     388             :  *
     389             :  * @param flags can have:
     390             :  * <dl>
     391             :  * <dt>STR_TERMINATE</dt>
     392             :  * <dd>STR_TERMINATE means the string in @p src
     393             :  * is null terminated, and src_len is ignored.</dd>
     394             :  * </dl>
     395             :  *
     396             :  * @param src_len is the length of the source area in bytes.
     397             :  * @returns the number of bytes occupied by the string in @p src.
     398             :  **/
     399         172 : static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     400             : {
     401         172 :         size_t size = 0;
     402             : 
     403         172 :         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
     404           0 :                 if (src_len == (size_t)-1) {
     405           0 :                         src_len = strlen((const char *)src) + 1;
     406             :                 } else {
     407           0 :                         size_t len = strnlen((const char *)src, src_len);
     408           0 :                         if (len < src_len)
     409           0 :                                 len++;
     410           0 :                         src_len = len;
     411             :                 }
     412             :         }
     413             : 
     414             :         /* We're ignoring the return here.. */
     415         172 :         (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
     416             : 
     417         172 :         if (dest_len)
     418         172 :                 dest[MIN(size, dest_len-1)] = 0;
     419             : 
     420         172 :         return src_len;
     421             : }
     422             : 
     423             : /**
     424             :  * Copy a string from a char* src to a unicode destination.
     425             :  *
     426             :  * @returns the number of bytes occupied by the string in the destination.
     427             :  *
     428             :  * @param flags can have:
     429             :  *
     430             :  * <dl>
     431             :  * <dt>STR_TERMINATE <dd>means include the null termination.
     432             :  * <dt>STR_UPPER     <dd>means uppercase in the destination.
     433             :  * <dt>STR_NOALIGN   <dd>means don't do alignment.
     434             :  * </dl>
     435             :  *
     436             :  * @param dest_len is the maximum length allowed in the
     437             :  * destination. If dest_len is -1 then no maximum is used.
     438             :  **/
     439      457417 : static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
     440             : {
     441      457417 :         size_t len=0;
     442      457417 :         size_t src_len = strlen(src);
     443      457417 :         size_t size = 0;
     444       11140 :         bool ret;
     445             : 
     446      457417 :         if (flags & STR_UPPER) {
     447        2952 :                 char *tmpbuf = strupper_talloc(NULL, src);
     448         144 :                 ssize_t retval;
     449        2952 :                 if (tmpbuf == NULL) {
     450           0 :                         return -1;
     451             :                 }
     452        2952 :                 retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
     453        2952 :                 talloc_free(tmpbuf);
     454        2952 :                 return retval;
     455             :         }
     456             : 
     457      454465 :         if (flags & STR_TERMINATE)
     458      314903 :                 src_len++;
     459             : 
     460      454465 :         if (ucs2_align(NULL, dest, flags)) {
     461      148821 :                 *(char *)dest = 0;
     462      148821 :                 dest = (void *)((char *)dest + 1);
     463      148821 :                 if (dest_len) dest_len--;
     464      143751 :                 len++;
     465             :         }
     466             : 
     467             :         /* ucs2 is always a multiple of 2 bytes */
     468      454465 :         dest_len &= ~1;
     469             : 
     470      454465 :         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
     471      454465 :         if (ret == false) {
     472           0 :                 return 0;
     473             :         }
     474             : 
     475      454465 :         len += size;
     476             : 
     477      454465 :         return (ssize_t)len;
     478             : }
     479             : 
     480             : 
     481             : /**
     482             :  Copy a string from a ucs2 source to a unix char* destination.
     483             :  Flags can have:
     484             :   STR_TERMINATE means the string in src is null terminated.
     485             :   STR_NOALIGN   means don't try to align.
     486             :  if STR_TERMINATE is set then src_len is ignored if it is -1.
     487             :  src_len is the length of the source area in bytes
     488             :  Return the number of bytes occupied by the string in src.
     489             :  The resulting string in "dest" is always null terminated.
     490             : **/
     491             : 
     492           0 : static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     493             : {
     494           0 :         size_t size = 0;
     495             : 
     496           0 :         if (ucs2_align(NULL, src, flags)) {
     497           0 :                 src = (const void *)((const char *)src + 1);
     498           0 :                 if (src_len > 0)
     499           0 :                         src_len--;
     500             :         }
     501             : 
     502           0 :         if (flags & STR_TERMINATE) {
     503           0 :                 if (src_len == (size_t)-1) {
     504           0 :                         src_len = utf16_len(src);
     505             :                 } else {
     506           0 :                         src_len = utf16_len_n(src, src_len);
     507             :                 }
     508             :         }
     509             : 
     510             :         /* ucs2 is always a multiple of 2 bytes */
     511           0 :         if (src_len != (size_t)-1)
     512           0 :                 src_len &= ~1;
     513             : 
     514             :         /* We're ignoring the return here.. */
     515           0 :         (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
     516           0 :         if (dest_len)
     517           0 :                 dest[MIN(size, dest_len-1)] = 0;
     518             : 
     519           0 :         return src_len;
     520             : }
     521             : 
     522             : /**
     523             :  Copy a string from a char* src to a unicode or ascii
     524             :  dos codepage destination choosing unicode or ascii based on the 
     525             :  flags in the SMB buffer starting at base_ptr.
     526             :  Return the number of bytes occupied by the string in the destination.
     527             :  flags can have:
     528             :   STR_TERMINATE means include the null termination.
     529             :   STR_UPPER     means uppercase in the destination.
     530             :   STR_ASCII     use ascii even with unicode packet.
     531             :   STR_NOALIGN   means don't do alignment.
     532             :  dest_len is the maximum length allowed in the destination. If dest_len
     533             :  is -1 then no maximum is used.
     534             : **/
     535             : 
     536      458584 : _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
     537             : {
     538      458584 :         if (flags & STR_ASCII) {
     539        4119 :                 size_t size = 0;
     540        4119 :                 if (push_ascii_string(dest, src, dest_len, flags, &size)) {
     541        4119 :                         return (ssize_t)size;
     542             :                 } else {
     543           0 :                         return (ssize_t)-1;
     544             :                 }
     545      454465 :         } else if (flags & STR_UNICODE) {
     546      454465 :                 return push_ucs2(dest, src, dest_len, flags);
     547             :         } else {
     548           0 :                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
     549             :                 return -1;
     550             :         }
     551             : }
     552             : 
     553             : 
     554             : /**
     555             :  Copy a string from a unicode or ascii source (depending on
     556             :  the packet flags) to a char* destination.
     557             :  Flags can have:
     558             :   STR_TERMINATE means the string in src is null terminated.
     559             :   STR_UNICODE   means to force as unicode.
     560             :   STR_ASCII     use ascii even with unicode packet.
     561             :   STR_NOALIGN   means don't do alignment.
     562             :  if STR_TERMINATE is set then src_len is ignored is it is -1
     563             :  src_len is the length of the source area in bytes.
     564             :  Return the number of bytes occupied by the string in src.
     565             :  The resulting string in "dest" is always null terminated.
     566             : **/
     567             : 
     568         172 : _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     569             : {
     570         172 :         if (flags & STR_ASCII) {
     571         172 :                 return pull_ascii_string(dest, src, dest_len, src_len, flags);
     572           0 :         } else if (flags & STR_UNICODE) {
     573           0 :                 return pull_ucs2(dest, src, dest_len, src_len, flags);
     574             :         } else {
     575           0 :                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
     576             :                 return -1;
     577             :         }
     578             : }

Generated by: LCOV version 1.14