|           Line data    Source code 
       1             : /*
       2             :  * Functions for RFC 3986 percent-encoding.
       3             :  *
       4             :  * NOTE:
       5             :  *
       6             :  * This file was originally imported from the Squid project but has been
       7             :  * significantly altered. The licence below is reproduced intact, but refers
       8             :  * to files in Squid's repository, not in Samba. See COPYING for the GPLv3
       9             :  * notice (being the later version mentioned below).
      10             :  */
      11             : 
      12             : /*
      13             :  * $Id$
      14             :  *
      15             :  * DEBUG:
      16             :  * AUTHOR: Harvest Derived
      17             :  *
      18             :  * SQUID Web Proxy Cache          http://www.squid-cache.org/
      19             :  * ----------------------------------------------------------
      20             :  *
      21             :  *  Squid is the result of efforts by numerous individuals from
      22             :  *  the Internet community; see the CONTRIBUTORS file for full
      23             :  *  details.   Many organizations have provided support for Squid's
      24             :  *  development; see the SPONSORS file for full details.  Squid is
      25             :  *  Copyrighted (C) 2001 by the Regents of the University of
      26             :  *  California; see the COPYRIGHT file for full details.  Squid
      27             :  *  incorporates software developed and/or copyrighted by other
      28             :  *  sources; see the CREDITS file for full details.
      29             :  *
      30             :  *  This program is free software; you can redistribute it and/or modify
      31             :  *  it under the terms of the GNU General Public License as published by
      32             :  *  the Free Software Foundation; either version 2 of the License, or
      33             :  *  (at your option) any later version.
      34             :  *
      35             :  *  This program is distributed in the hope that it will be useful,
      36             :  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
      37             :  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      38             :  *  GNU General Public License for more details.
      39             :  *
      40             :  *  You should have received a copy of the GNU General Public License
      41             :  *  along with this program; if not, write to the Free Software
      42             :  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
      43             :  *
      44             :  */
      45             : 
      46             : #include "replace.h"
      47             : #include <talloc.h>
      48             : #include "lib/util/samba_util.h"
      49             : 
      50             : #define RFC1738_ENCODE 1
      51             : #define RFC1738_RESERVED 2
      52             : 
      53             : /*
      54             :  * According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as
      55             :  * that has been obsolete since 2004, we sm instead for RFC 3986, where:
      56             :  *
      57             :  *  reserved =    : / ? # [ ] @ ! $ & ' ( ) * + , ; =
      58             :  *  unreserved = ALPHA DIGIT - . _ ~
      59             :  *
      60             :  * and whatever is not in either of those are what RFC 1738 called "unsafe",
      61             :  * meaning that they should are canonically but not mandatorily escaped.
      62             :  *
      63             :  * Characters below 0x20 or above 0x7E are always encoded.
      64             :  */
      65             : 
      66             : static const unsigned char escapees[127] = {
      67             :         [' '] = RFC1738_ENCODE,
      68             :         ['"'] = RFC1738_ENCODE,
      69             :         ['%'] = RFC1738_ENCODE,
      70             :         ['<'] = RFC1738_ENCODE,
      71             :         ['>'] = RFC1738_ENCODE,
      72             :         ['\\'] = RFC1738_ENCODE,
      73             :         ['^'] = RFC1738_ENCODE,
      74             :         ['`'] = RFC1738_ENCODE,
      75             :         ['{'] = RFC1738_ENCODE,
      76             :         ['|'] = RFC1738_ENCODE,
      77             :         ['}'] = RFC1738_ENCODE,
      78             :         /* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */
      79             :         [':'] = RFC1738_RESERVED,
      80             :         ['/'] = RFC1738_RESERVED,
      81             :         ['?'] = RFC1738_RESERVED,
      82             :         ['#'] = RFC1738_RESERVED,
      83             :         ['['] = RFC1738_RESERVED,
      84             :         [']'] = RFC1738_RESERVED,
      85             :         ['@'] = RFC1738_RESERVED,
      86             :         ['!'] = RFC1738_RESERVED,
      87             :         ['$'] = RFC1738_RESERVED,
      88             :         ['&'] = RFC1738_RESERVED,
      89             :         ['\''] = RFC1738_RESERVED,
      90             :         ['('] = RFC1738_RESERVED,
      91             :         [')'] = RFC1738_RESERVED,
      92             :         ['*'] = RFC1738_RESERVED,
      93             :         ['+'] = RFC1738_RESERVED,
      94             :         [','] = RFC1738_RESERVED,
      95             :         [';'] = RFC1738_RESERVED,
      96             :         ['='] = RFC1738_RESERVED,
      97             : };
      98             : 
      99             : /*
     100             :  *  rfc1738_do_escape - fills a preallocated buffer with an escaped version of
     101             :  *  the given string.
     102             :  *
     103             :  *  For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED.
     104             :  *  For mandatory escaping, mask should be RFC1738_RESERVED.
     105             :  */
     106             : static char *
     107          52 : rfc1738_do_escape(char *buf, size_t bufsize,
     108             :                   const char *url, size_t len, unsigned char mask)
     109             : {
     110          15 :         size_t i;
     111          52 :         size_t j = 0;
     112        2254 :         for (i = 0; i < len; i++) {
     113        2202 :                 unsigned int c = (unsigned char) url[i];
     114        2202 :                 if (c > 126 || c < 32 || (escapees[c] & mask)) {
     115         475 :                         if (j + 3 >= bufsize) {
     116           0 :                                 return NULL;
     117             :                         }
     118         475 :                         (void) snprintf(&buf[j], 4, "%%%02X", c);
     119         475 :                         j += 3;
     120             :                 } else {
     121        1727 :                         if (j + 1 >= bufsize) {
     122           0 :                                 return NULL;
     123             :                         }
     124        1727 :                         buf[j] = c;
     125        1727 :                         j++;
     126             :                 }
     127             :         }
     128          52 :         buf[j] = '\0';
     129          52 :         return buf;
     130             : }
     131             : 
     132             : /*
     133             :  * rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986
     134             :  * compliant, escaped version of the given url segment.
     135             :  */
     136             : char *
     137          52 : rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url)
     138             : {
     139          52 :         size_t bufsize = 0;
     140          52 :         char *buf = NULL;
     141             : 
     142          52 :         size_t len = strlen(url);
     143          52 :         if (len >= SIZE_MAX / 3) {
     144           0 :                 return NULL;
     145             :         }
     146             : 
     147          52 :         bufsize = len * 3 + 1;
     148          52 :         buf = talloc_array(mem_ctx, char, bufsize);
     149          52 :         if (buf == NULL) {
     150           0 :                 return NULL;
     151             :         }
     152             : 
     153          52 :         talloc_set_name_const(buf, buf);
     154             : 
     155          52 :         return rfc1738_do_escape(buf, bufsize, url, len,
     156             :                                  RFC1738_ENCODE | RFC1738_RESERVED);
     157             : }
     158             : 
     159             : /*
     160             :  * rfc1738_unescape() - Converts url-escaped characters in the string.
     161             :  *
     162             :  * The two characters following a '%' in a string should be hex digits that
     163             :  * describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII;
     164             :  * this is the only way to include a % in the unescaped string. Any character
     165             :  * can be escaped, including plain letters (e.g. "%61" for "a"). Anything
     166             :  * other than 2 hex characters following the % is an error.
     167             :  *
     168             :  * The conversion is done in-place, which is always safe as unescapes can only
     169             :  * shorten the string.
     170             :  *
     171             :  * Returns a pointer to the end of the string (that is, the '\0' byte), or
     172             :  * NULL on error, at which point s is in an undefined state.
     173             :  *
     174             :  * Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal
     175             :  * `e - s` if s originally contained "%00". You might want to check for this.
     176             :  */
     177             : 
     178         235 : _PUBLIC_ char *rfc1738_unescape(char *s)
     179             : {
     180          23 :         size_t i, j;        /* i is write, j is read */
     181       10572 :         for (i = 0, j = 0; s[j] != '\0'; i++, j++) {
     182       10346 :                 if (s[j] == '%') {
     183         271 :                         uint8_t v;
     184         271 :                         bool ok;
     185             : 
     186         271 :                         ok = hex_byte(&s[j+1], &v);
     187         271 :                         if (!ok) {
     188           9 :                                 return NULL;
     189             :                         }
     190         262 :                         j += 2; /* OK; hex_byte() has checked ahead */
     191         262 :                         s[i] = (unsigned char)v;
     192             :                 } else {
     193       10075 :                         s[i] = s[j];
     194             :                 }
     195             :         }
     196         226 :         s[i] = '\0';
     197         226 :         return s + i;
     198             : }
 |