677 lines
30 KiB
C++
677 lines
30 KiB
C++
// Copyright 2006 Google Inc. All Rights Reserved.
|
|
// Authors: Numerous. Principal maintainers are csilvers and zunger.
|
|
//
|
|
// This is a grab-bag file for string utilities involved in escaping and
|
|
// unescaping strings in various ways. Who knew there were so many?
|
|
//
|
|
// NOTE: Although the functions declared here have been imported into
|
|
// the global namespace, the using statements are slated for removal.
|
|
// Do not refer to these symbols without properly namespace-qualifying
|
|
// them with "strings::". Of course you may also use "using" statements
|
|
// within a .cc file.
|
|
//
|
|
// There are more escaping functions in:
|
|
// webutil/html/tagutils.h (Escaping strings for HTML, PRE, JavaScript, etc.)
|
|
// webutil/url/url.h (Escaping for URL's, both RFC-2396 and other methods)
|
|
// template/template_modifiers.h (All sorts of stuff)
|
|
// util/regex/re2/re2.h (Escaping for literals within regular expressions
|
|
// - see RE2::QuoteMeta).
|
|
// And probably many more places, as well.
|
|
|
|
#ifndef STRINGS_ESCAPING_H_
|
|
#define STRINGS_ESCAPING_H_
|
|
|
|
#include <stddef.h>
|
|
#include <string>
|
|
using std::string;
|
|
#include <vector>
|
|
using std::vector;
|
|
|
|
#include <common/logging.h>
|
|
|
|
#include "gutil/strings/ascii_ctype.h"
|
|
#include "gutil/strings/charset.h"
|
|
#include "gutil/strings/stringpiece.h"
|
|
|
|
namespace strings {
|
|
|
|
// ----------------------------------------------------------------------
|
|
// EscapeStrForCSV()
|
|
// Escapes the quotes in 'src' by doubling them. This is necessary
|
|
// for generating CSV files (see SplitCSVLine).
|
|
// Returns the number of characters written into dest (not counting
|
|
// the \0) or -1 if there was insufficient space.
|
|
//
|
|
// Example: [some "string" to test] --> [some ""string"" to test]
|
|
// ----------------------------------------------------------------------
|
|
int EscapeStrForCSV(const char* src, char* dest, int dest_len);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// UnescapeCEscapeSequences()
|
|
// Copies "source" to "dest", rewriting C-style escape sequences
|
|
// -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
|
|
// equivalents. "dest" must be sufficiently large to hold all
|
|
// the characters in the rewritten string (i.e. at least as large
|
|
// as strlen(source) + 1 should be safe, since the replacements
|
|
// are always shorter than the original escaped sequences). It's
|
|
// safe for source and dest to be the same. RETURNS the length
|
|
// of dest.
|
|
//
|
|
// It allows hex sequences \xhh, or generally \xhhhhh with an
|
|
// arbitrary number of hex digits, but all of them together must
|
|
// specify a value of a single byte (e.g. \x0045 is equivalent
|
|
// to \x45, and \x1234 is erroneous). If the value is too large,
|
|
// it is truncated to 8 bits and an error is set. This is also
|
|
// true of octal values that exceed 0xff.
|
|
//
|
|
// It also allows escape sequences of the form \uhhhh (exactly four
|
|
// hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
|
|
// hex digits, upper or lower case) to specify a Unicode code
|
|
// point. The dest array will contain the UTF8-encoded version of
|
|
// that code-point (e.g., if source contains \u2019, then dest will
|
|
// contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
|
|
// transformation, use UniLib::UTF8EscapeString
|
|
// (util/utf8/public/unilib.h), not CEscapeString.
|
|
//
|
|
// Errors: In the first form of the call, errors are reported with
|
|
// LOG(ERROR). The same is true for the second form of the call if
|
|
// the pointer to the string vector is NULL; otherwise, error
|
|
// messages are stored in the vector. In either case, the effect on
|
|
// the dest array is not defined, but rest of the source will be
|
|
// processed.
|
|
//
|
|
// *** DEPRECATED: Use CUnescape() in new code ***
|
|
// ----------------------------------------------------------------------
|
|
int UnescapeCEscapeSequences(const char* source, char* dest);
|
|
int UnescapeCEscapeSequences(const char* source, char* dest,
|
|
vector<string>* errors);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// UnescapeCEscapeString()
|
|
// This does the same thing as UnescapeCEscapeSequences, but creates
|
|
// a new string. The caller does not need to worry about allocating
|
|
// a dest buffer. This should be used for non performance critical
|
|
// tasks such as printing debug messages. It is safe for src and dest
|
|
// to be the same.
|
|
//
|
|
// The second call stores its errors in a supplied string vector.
|
|
// If the string vector pointer is NULL, it reports the errors with LOG().
|
|
//
|
|
// In the first and second calls, the length of dest is returned. In the
|
|
// the third call, the new string is returned.
|
|
//
|
|
// *** DEPRECATED: Use CUnescape() in new code ***
|
|
// ----------------------------------------------------------------------
|
|
int UnescapeCEscapeString(const string& src, string* dest);
|
|
int UnescapeCEscapeString(const string& src, string* dest,
|
|
vector<string>* errors);
|
|
string UnescapeCEscapeString(const string& src);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// CUnescape()
|
|
// Copies "source" to "dest", rewriting C-style escape sequences
|
|
// -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
|
|
// equivalents. "dest" must be sufficiently large to hold all
|
|
// the characters in the rewritten string (i.e. at least as large
|
|
// as source.size() should be safe, since the replacements
|
|
// are never longer than the original escaped sequences). It's
|
|
// safe for source and dest to be the same. RETURNS true if
|
|
// conversion was successful, false otherwise. Stores the size of
|
|
// the result in 'dest_len'.
|
|
//
|
|
// It allows hex sequences \xhh, or generally \xhhhhh with an
|
|
// arbitrary number of hex digits, but all of them together must
|
|
// specify a value of a single byte (e.g. \x0045 is equivalent
|
|
// to \x45, and \x1234 is erroneous). If the value is too large,
|
|
// an error is set. This is also true of octal values that exceed 0xff.
|
|
//
|
|
// It also allows escape sequences of the form \uhhhh (exactly four
|
|
// hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
|
|
// hex digits, upper or lower case) to specify a Unicode code
|
|
// point. The dest array will contain the UTF8-encoded version of
|
|
// that code-point (e.g., if source contains \u2019, then dest will
|
|
// contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
|
|
// transformation, use UniLib::UTF8EscapeString
|
|
// (util/utf8/public/unilib.h), not CEscapeString.
|
|
//
|
|
// Errors: Sets the description of the first encountered error in
|
|
// 'error'. To disable error reporting, set 'error' to NULL.
|
|
// ----------------------------------------------------------------------
|
|
bool CUnescape(const StringPiece& source, char* dest, int* dest_len,
|
|
string* error);
|
|
|
|
bool CUnescape(const StringPiece& source, string* dest, string* error);
|
|
|
|
// A version with no error reporting.
|
|
inline bool CUnescape(const StringPiece& source, string* dest) {
|
|
return CUnescape(source, dest, NULL);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// CUnescapeForNullTerminatedString()
|
|
//
|
|
// This has the same behavior as CUnescape, except that each octal, hex,
|
|
// or Unicode escape sequence that resolves to a null character ('\0')
|
|
// is left in its original escaped form. The result is a
|
|
// display-formatted string that can be interpreted as a null-terminated
|
|
// const char* and will not be cut short if it contains embedded null
|
|
// characters.
|
|
//
|
|
// ----------------------------------------------------------------------
|
|
|
|
bool CUnescapeForNullTerminatedString(const StringPiece& source,
|
|
char* dest,
|
|
int* dest_len,
|
|
string* error);
|
|
|
|
bool CUnescapeForNullTerminatedString(const StringPiece& source,
|
|
string* dest,
|
|
string* error);
|
|
|
|
// A version with no error reporting.
|
|
inline bool CUnescapeForNullTerminatedString(const StringPiece& source,
|
|
string* dest) {
|
|
return CUnescapeForNullTerminatedString(source, dest, NULL);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// CEscapeString()
|
|
// CHexEscapeString()
|
|
// Utf8SafeCEscapeString()
|
|
// Utf8SafeCHexEscapeString()
|
|
// Copies 'src' to 'dest', escaping dangerous characters using
|
|
// C-style escape sequences. This is very useful for preparing query
|
|
// flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
|
|
// hexadecimal rather than octal sequences. The 'Utf8Safe' version
|
|
// doesn't touch UTF-8 bytes.
|
|
// Returns the number of bytes written to 'dest' (not including the \0)
|
|
// or -1 if there was insufficient space.
|
|
//
|
|
// Currently only \n, \r, \t, ", ', \ and !ascii_isprint() chars are escaped.
|
|
// ----------------------------------------------------------------------
|
|
int CEscapeString(const char* src, int src_len, char* dest, int dest_len);
|
|
int CHexEscapeString(const char* src, int src_len, char* dest, int dest_len);
|
|
int Utf8SafeCEscapeString(const char* src, int src_len, char* dest,
|
|
int dest_len);
|
|
int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest,
|
|
int dest_len);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// CEscape()
|
|
// CHexEscape()
|
|
// Utf8SafeCEscape()
|
|
// Utf8SafeCHexEscape()
|
|
// More convenient form of CEscapeString: returns result as a "string".
|
|
// This version is slower than CEscapeString() because it does more
|
|
// allocation. However, it is much more convenient to use in
|
|
// non-speed-critical code like logging messages etc.
|
|
// ----------------------------------------------------------------------
|
|
string CEscape(const StringPiece& src);
|
|
string CHexEscape(const StringPiece& src);
|
|
string Utf8SafeCEscape(const StringPiece& src);
|
|
string Utf8SafeCHexEscape(const StringPiece& src);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// BackslashEscape()
|
|
// Given a string and a list of characters to escape, replace any
|
|
// instance of one of those characters with \ + that character. For
|
|
// example, when exporting maps to /varz, label values need to have
|
|
// all dots escaped. Appends the result to dest.
|
|
// BackslashUnescape()
|
|
// Replace \ + any of the indicated "unescape me" characters with just
|
|
// that character. Appends the result to dest.
|
|
//
|
|
// IMPORTANT:
|
|
// This function does not escape \ by default, so if you do not include
|
|
// it in the chars to escape you will most certainly get an undesirable
|
|
// result. That is, it won't be a reversible operation:
|
|
// string src = "foo\\:bar";
|
|
// BackslashUnescape(BackslashEscape(src, ":"), ":") == "foo\\\\:bar"
|
|
// On the other hand, for all strings "src", the following is true:
|
|
// BackslashUnescape(BackslashEscape(src, ":\\"), ":\\") == src
|
|
// ----------------------------------------------------------------------
|
|
void BackslashEscape(const StringPiece& src,
|
|
const strings::CharSet& to_escape,
|
|
string* dest);
|
|
void BackslashUnescape(const StringPiece& src,
|
|
const strings::CharSet& to_unescape,
|
|
string* dest);
|
|
|
|
inline string BackslashEscape(const StringPiece& src,
|
|
const strings::CharSet& to_escape) {
|
|
string s;
|
|
BackslashEscape(src, to_escape, &s);
|
|
return s;
|
|
}
|
|
|
|
inline string BackslashUnescape(const StringPiece& src,
|
|
const strings::CharSet& to_unescape) {
|
|
string s;
|
|
BackslashUnescape(src, to_unescape, &s);
|
|
return s;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// QuotedPrintableUnescape()
|
|
// Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for
|
|
// more details, only briefly implemented. But from the web...
|
|
// Quoted-printable is an encoding method defined in the MIME
|
|
// standard. It is used primarily to encode 8-bit text (such as text
|
|
// that includes foreign characters) into 7-bit US ASCII, creating a
|
|
// document that is mostly readable by humans, even in its encoded
|
|
// form. All MIME compliant applications can decode quoted-printable
|
|
// text, though they may not necessarily be able to properly display the
|
|
// document as it was originally intended. As quoted-printable encoding
|
|
// is implemented most commonly, printable ASCII characters (values 33
|
|
// through 126, excluding 61), tabs and spaces that do not appear at the
|
|
// end of lines, and end-of-line characters are not encoded. Other
|
|
// characters are represented by an equal sign (=) immediately followed
|
|
// by that character's hexadecimal value. Lines that are longer than 76
|
|
// characters are shortened by line breaks, with the equal sign marking
|
|
// where the breaks occurred.
|
|
//
|
|
// Note that QuotedPrintableUnescape is different from 'Q'-encoding as
|
|
// defined in rfc2047. In particular, This does not treat '_'s as spaces.
|
|
//
|
|
// See QEncodingUnescape().
|
|
//
|
|
// Copies "src" to "dest", rewriting quoted printable escape sequences
|
|
// =XX to their ASCII equivalents. src is not null terminated, instead
|
|
// specify len. I recommend that slen<szdest, but we honor szdest
|
|
// anyway.
|
|
// RETURNS the length of dest.
|
|
// ----------------------------------------------------------------------
|
|
int QuotedPrintableUnescape(const char* src, int slen, char* dest, int szdest);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// QEncodingUnescape()
|
|
// This is very similar to QuotedPrintableUnescape except that we convert
|
|
// '_'s into spaces. (See RFC 2047)
|
|
// http://www.faqs.org/rfcs/rfc2047.html.
|
|
//
|
|
// Copies "src" to "dest", rewriting q-encoding escape sequences
|
|
// =XX to their ASCII equivalents. src is not null terminated, instead
|
|
// specify len. I recommend that slen<szdest, but we honour szdest
|
|
// anyway.
|
|
// RETURNS the length of dest.
|
|
// ----------------------------------------------------------------------
|
|
int QEncodingUnescape(const char* src, int slen, char* dest, int szdest);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// Base64Unescape()
|
|
// WebSafeBase64Unescape()
|
|
// Copies "src" to "dest", where src is in base64 and is written to its
|
|
// ASCII equivalents. src is not null terminated, instead specify len.
|
|
// I recommend that slen<szdest, but we honor szdest anyway.
|
|
// RETURNS the length of dest, or -1 if src contains invalid chars.
|
|
// The WebSafe variation use '-' instead of '+' and '_' instead of '/'.
|
|
// The variations that store into a string clear the string first, and
|
|
// return false (with dest empty) if src contains invalid chars; for
|
|
// these versions src and dest must be different strings.
|
|
// ----------------------------------------------------------------------
|
|
int Base64Unescape(const char* src, int slen, char* dest, int szdest);
|
|
bool Base64Unescape(const char* src, int slen, string* dest);
|
|
inline bool Base64Unescape(const string& src, string* dest) {
|
|
return Base64Unescape(src.data(), src.size(), dest);
|
|
}
|
|
|
|
int WebSafeBase64Unescape(const char* src, int slen, char* dest, int szdest);
|
|
bool WebSafeBase64Unescape(const char* src, int slen, string* dest);
|
|
inline bool WebSafeBase64Unescape(const string& src, string* dest) {
|
|
return WebSafeBase64Unescape(src.data(), src.size(), dest);
|
|
}
|
|
|
|
// Return the length to use for the output buffer given to the base64 escape
|
|
// routines. Make sure to use the same value for do_padding in both.
|
|
// This function may return incorrect results if given input_len values that
|
|
// are extremely high, which should happen rarely.
|
|
int CalculateBase64EscapedLen(int input_len, bool do_padding);
|
|
// Use this version when calling Base64Escape without a do_padding arg.
|
|
int CalculateBase64EscapedLen(int input_len);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// Base64Escape()
|
|
// WebSafeBase64Escape()
|
|
// Encode "src" to "dest" using base64 encoding.
|
|
// src is not null terminated, instead specify len.
|
|
// 'dest' should have at least CalculateBase64EscapedLen() length.
|
|
// RETURNS the length of dest.
|
|
// The WebSafe variation use '-' instead of '+' and '_' instead of '/'
|
|
// so that we can place the out in the URL or cookies without having
|
|
// to escape them. It also has an extra parameter "do_padding",
|
|
// which when set to false will prevent padding with "=".
|
|
// ----------------------------------------------------------------------
|
|
int Base64Escape(const unsigned char* src, int slen, char* dest, int szdest);
|
|
int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest,
|
|
int szdest, bool do_padding);
|
|
// Encode src into dest with padding.
|
|
void Base64Escape(const string& src, string* dest);
|
|
// Encode src into dest web-safely without padding.
|
|
void WebSafeBase64Escape(const string& src, string* dest);
|
|
// Encode src into dest web-safely with padding.
|
|
void WebSafeBase64EscapeWithPadding(const string& src, string* dest);
|
|
|
|
void Base64Escape(const unsigned char* src, int szsrc,
|
|
string* dest, bool do_padding);
|
|
void WebSafeBase64Escape(const unsigned char* src, int szsrc,
|
|
string* dest, bool do_padding);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// Base32Unescape()
|
|
// Copies "src" to "dest", where src is in base32 and is written to its
|
|
// ASCII equivalents. src is not null terminated, instead specify len.
|
|
// RETURNS the length of dest, or -1 if src contains invalid chars.
|
|
// ----------------------------------------------------------------------
|
|
int Base32Unescape(const char* src, int slen, char* dest, int szdest);
|
|
bool Base32Unescape(const char* src, int slen, string* dest);
|
|
inline bool Base32Unescape(const string& src, string* dest) {
|
|
return Base32Unescape(src.data(), src.size(), dest);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// Base32Escape()
|
|
// Encode "src" to "dest" using base32 encoding.
|
|
// src is not null terminated, instead specify len.
|
|
// 'dest' should have at least CalculateBase32EscapedLen() length.
|
|
// RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
|
|
// too small to fit the fully encoded result. 'dest' is padded with '='.
|
|
//
|
|
// Note that this is "Base 32 Encoding" from RFC 4648 section 6.
|
|
// ----------------------------------------------------------------------
|
|
int Base32Escape(const unsigned char* src, size_t szsrc,
|
|
char* dest, size_t szdest);
|
|
bool Base32Escape(const string& src, string* dest);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// Base32HexEscape()
|
|
// Encode "src" to "dest" using base32hex encoding.
|
|
// src is not null terminated, instead specify len.
|
|
// 'dest' should have at least CalculateBase32EscapedLen() length.
|
|
// RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
|
|
// too small to fit the fully encoded result. 'dest' is padded with '='.
|
|
//
|
|
// Note that this is "Base 32 Encoding with Extended Hex Alphabet"
|
|
// from RFC 4648 section 7.
|
|
// ----------------------------------------------------------------------
|
|
int Base32HexEscape(const unsigned char* src, size_t szsrc,
|
|
char* dest, size_t szdest);
|
|
bool Base32HexEscape(const string& src, string* dest);
|
|
|
|
// Return the length to use for the output buffer given to the base32 escape
|
|
// routines. This function may return incorrect results if given input_len
|
|
// values that are extremely high, which should happen rarely.
|
|
int CalculateBase32EscapedLen(size_t input_len);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// EightBase32DigitsToTenHexDigits()
|
|
// TenHexDigitsToEightBase32Digits()
|
|
// Convert base32 to and from hex.
|
|
//
|
|
// for EightBase32DigitsToTenHexDigits():
|
|
// *in must point to 8 base32 digits.
|
|
// *out must point to 10 bytes.
|
|
//
|
|
// for TenHexDigitsToEightBase32Digits():
|
|
// *in must point to 10 hex digits.
|
|
// *out must point to 8 bytes.
|
|
//
|
|
// Note that the Base64 functions above are different. They convert base64
|
|
// to and from binary data. We convert to and from string representations
|
|
// of hex. They deal with arbitrary lengths and we deal with single,
|
|
// whole base32 quanta.
|
|
//
|
|
// See RFC3548 at http://www.ietf.org/rfc/rfc3548.txt
|
|
// for details on base32.
|
|
// ----------------------------------------------------------------------
|
|
void EightBase32DigitsToTenHexDigits(const char* in, char* out);
|
|
void TenHexDigitsToEightBase32Digits(const char* in, char* out);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// EightBase32DigitsToFiveBytes()
|
|
// FiveBytesToEightBase32Digits()
|
|
// Convert base32 to and from binary
|
|
//
|
|
// for EightBase32DigitsToTenHexDigits():
|
|
// *in must point to 8 base32 digits.
|
|
// *out must point to 5 bytes.
|
|
//
|
|
// for TenHexDigitsToEightBase32Digits():
|
|
// *in must point to 5 bytes.
|
|
// *out must point to 8 bytes.
|
|
//
|
|
// Note that the Base64 functions above are different. They deal with
|
|
// arbitrary lengths and we deal with single, whole base32 quanta.
|
|
// ----------------------------------------------------------------------
|
|
void EightBase32DigitsToFiveBytes(const char* in, unsigned char* bytes_out);
|
|
void FiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// EscapeFileName()
|
|
// UnescapeFileName()
|
|
// Utility functions to (un)escape strings to make them suitable for use in
|
|
// filenames. Characters not in [a-zA-Z0-9-_.] will be escaped into %XX.
|
|
// E.g: "Hello, world!" will be escaped as "Hello%2c%20world%21"
|
|
//
|
|
// NB that this function escapes slashes, so the output will be a flat
|
|
// filename and will not keep the directory structure. Slashes are replaced
|
|
// with '~', instead of a %XX sequence to make it easier for people to
|
|
// understand the escaped form when the original string is a file path.
|
|
//
|
|
// WARNING: filenames produced by these functions may not be compatible with
|
|
// Colossus FS. In particular, the '%' character has a special meaning in
|
|
// CFS.
|
|
//
|
|
// The versions that receive a string for the output will append to it.
|
|
// ----------------------------------------------------------------------
|
|
void EscapeFileName(const StringPiece& src, string* dst);
|
|
void UnescapeFileName(const StringPiece& src, string* dst);
|
|
inline string EscapeFileName(const StringPiece& src) {
|
|
string r;
|
|
EscapeFileName(src, &r);
|
|
return r;
|
|
}
|
|
inline string UnescapeFileName(const StringPiece& src) {
|
|
string r;
|
|
UnescapeFileName(src, &r);
|
|
return r;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// Here are a couple utility methods to change ints to hex chars & back
|
|
// ----------------------------------------------------------------------
|
|
|
|
inline int int_to_hex_digit(int i) {
|
|
DCHECK((i >= 0) && (i <= 15));
|
|
return ((i < 10) ? (i + '0') : ((i - 10) + 'A'));
|
|
}
|
|
|
|
inline int int_to_lower_hex_digit(int i) {
|
|
DCHECK((i >= 0) && (i <= 15));
|
|
return (i < 10) ? (i + '0') : ((i - 10) + 'a');
|
|
}
|
|
|
|
inline int hex_digit_to_int(char c) {
|
|
/* Assume ASCII. */
|
|
DCHECK('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);
|
|
DCHECK(ascii_isxdigit(c));
|
|
int x = static_cast<unsigned char>(c);
|
|
if (x > '9') {
|
|
x += 9;
|
|
}
|
|
return x & 0xf;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------
|
|
// a2b_hex()
|
|
// Description: Ascii-to-Binary hex conversion. This converts
|
|
// 2*'num' hexadecimal characters to 'num' binary data.
|
|
// Return value: 'num' bytes of binary data (via the 'to' argument)
|
|
// ----------------------------------------------------------------------
|
|
void a2b_hex(const char* from, unsigned char* to, int num);
|
|
void a2b_hex(const char* from, char* to, int num);
|
|
void a2b_hex(const char* from, string* to, int num);
|
|
string a2b_hex(const string& a);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// a2b_bin()
|
|
// Description: Ascii-to-Binary binary conversion. This converts
|
|
// a.size() binary characters (ascii '0' or '1') to
|
|
// ceil(a.size()/8) bytes of binary data. The first character is
|
|
// considered the most significant if byte_order_msb is set. a is
|
|
// considered to be padded with trailing 0s if its size is not a
|
|
// multiple of 8.
|
|
// Return value: ceil(a.size()/8) bytes of binary data
|
|
// ----------------------------------------------------------------------
|
|
string a2b_bin(const string& a, bool byte_order_msb);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// b2a_hex()
|
|
// Description: Binary-to-Ascii hex conversion. This converts
|
|
// 'num' bytes of binary to a 2*'num'-character hexadecimal representation
|
|
// Return value: 2*'num' characters of ascii text (via the 'to' argument)
|
|
// ----------------------------------------------------------------------
|
|
void b2a_hex(const unsigned char* from, char* to, int num);
|
|
void b2a_hex(const unsigned char* from, string* to, int num);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// b2a_hex()
|
|
// Description: Binary-to-Ascii hex conversion. This converts
|
|
// 'num' bytes of binary to a 2*'num'-character hexadecimal representation
|
|
// Return value: 2*'num' characters of ascii string
|
|
// ----------------------------------------------------------------------
|
|
string b2a_hex(const char* from, int num);
|
|
string b2a_hex(const StringPiece& b);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// b2a_bin()
|
|
// Description: Binary-to-Ascii binary conversion. This converts
|
|
// b.size() bytes of binary to a 8*b.size() character representation
|
|
// (ascii '0' or '1'). The highest order bit in each byte is returned
|
|
// first in the string if byte_order_msb is set.
|
|
// Return value: 8*b.size() characters of ascii text
|
|
// ----------------------------------------------------------------------
|
|
string b2a_bin(const string& b, bool byte_order_msb);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// ShellEscape
|
|
// Make a shell command argument from a string.
|
|
// Returns a Bourne shell string literal such that, once the shell finishes
|
|
// expanding the argument, the argument passed on to the program being
|
|
// run will be the same as whatever you passed in.
|
|
// NOTE: This is "ported" from python2.2's commands.mkarg(); it should be
|
|
// safe for Bourne shell syntax (i.e. sh, bash), but mileage may vary
|
|
// with other shells.
|
|
// ----------------------------------------------------------------------
|
|
string ShellEscape(StringPiece src);
|
|
|
|
// Runs ShellEscape() on the arguments, concatenates them with a space, and
|
|
// returns the resulting string.
|
|
template <class InputIterator>
|
|
string ShellEscapeCommandLine(InputIterator begin, const InputIterator& end) {
|
|
string result;
|
|
for (; begin != end; ++begin) {
|
|
if (!result.empty()) result.append(" ");
|
|
result.append(ShellEscape(*begin));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// Reads at most bytes_to_read from binary_string and writes it to
|
|
// ascii_string in lower case hex.
|
|
void ByteStringToAscii(const string& binary_string, int bytes_to_read,
|
|
string* ascii_string);
|
|
|
|
inline string ByteStringToAscii(const string& binary_string,
|
|
int bytes_to_read) {
|
|
string result;
|
|
ByteStringToAscii(binary_string, bytes_to_read, &result);
|
|
return result;
|
|
}
|
|
|
|
// Converts the hex from ascii_string into binary data and
|
|
// writes the binary data into binary_string.
|
|
// Empty input successfully converts to empty output.
|
|
// Returns false and may modify output if it is
|
|
// unable to parse the hex string.
|
|
bool ByteStringFromAscii(const string& ascii_string, string* binary_string);
|
|
|
|
// Clean up a multi-line string to conform to Unix line endings.
|
|
// Reads from src and appends to dst, so usually dst should be empty.
|
|
// If there is no line ending at the end of a non-empty string, it can
|
|
// be added automatically.
|
|
//
|
|
// Four different types of input are correctly handled:
|
|
//
|
|
// - Unix/Linux files: line ending is LF, pass through unchanged
|
|
//
|
|
// - DOS/Windows files: line ending is CRLF: convert to LF
|
|
//
|
|
// - Legacy Mac files: line ending is CR: convert to LF
|
|
//
|
|
// - Garbled files: random line endings, covert gracefully
|
|
// lonely CR, lonely LF, CRLF: convert to LF
|
|
//
|
|
// @param src The multi-line string to convert
|
|
// @param dst The converted string is appended to this string
|
|
// @param auto_end_last_line Automatically terminate the last line
|
|
//
|
|
// Limitations:
|
|
//
|
|
// This does not do the right thing for CRCRLF files created by
|
|
// broken programs that do another Unix->DOS conversion on files
|
|
// that are already in CRLF format.
|
|
void CleanStringLineEndings(const string& src, string* dst,
|
|
bool auto_end_last_line);
|
|
|
|
// Same as above, but transforms the argument in place.
|
|
void CleanStringLineEndings(string* str, bool auto_end_last_line);
|
|
|
|
} // namespace strings
|
|
|
|
// The following functions used to be defined in strutil.h in the top-level
|
|
// namespace, so we alias them here. Do not add new functions here.
|
|
//
|
|
// Talk to him if you want to help.
|
|
//
|
|
// DEPRECATED(mec): Using these names in the global namespace is deprecated.
|
|
// Use the strings:: names.
|
|
|
|
using strings::EscapeStrForCSV;
|
|
using strings::UnescapeCEscapeSequences;
|
|
using strings::UnescapeCEscapeString;
|
|
using strings::CEscapeString;
|
|
using strings::CHexEscapeString;
|
|
using strings::CEscape;
|
|
using strings::CHexEscape;
|
|
using strings::BackslashEscape;
|
|
using strings::BackslashUnescape;
|
|
using strings::QuotedPrintableUnescape;
|
|
using strings::QEncodingUnescape;
|
|
using strings::Base64Unescape;
|
|
using strings::WebSafeBase64Unescape;
|
|
using strings::CalculateBase64EscapedLen;
|
|
using strings::Base64Escape;
|
|
using strings::WebSafeBase64Escape;
|
|
using strings::WebSafeBase64EscapeWithPadding;
|
|
using strings::Base32Escape;
|
|
using strings::Base32HexEscape;
|
|
using strings::CalculateBase32EscapedLen;
|
|
using strings::EightBase32DigitsToTenHexDigits;
|
|
using strings::TenHexDigitsToEightBase32Digits;
|
|
using strings::EightBase32DigitsToFiveBytes;
|
|
using strings::FiveBytesToEightBase32Digits;
|
|
using strings::int_to_hex_digit;
|
|
using strings::int_to_lower_hex_digit;
|
|
using strings::hex_digit_to_int;
|
|
using strings::a2b_hex;
|
|
using strings::a2b_bin;
|
|
using strings::b2a_hex;
|
|
using strings::b2a_bin;
|
|
using strings::ShellEscape;
|
|
using strings::ShellEscapeCommandLine;
|
|
using strings::ByteStringFromAscii;
|
|
using strings::ByteStringToAscii;
|
|
using strings::CleanStringLineEndings;
|
|
|
|
#endif // STRINGS_ESCAPING_H_
|