// $Id: strtox.c,v 1.2 2014/02/03 01:30:25 daviddu Exp $ // $Source: /afs/awd/projects/eclipz/KnowledgeBase/.cvsroot/eclipz/chips/p8/working/procedures/lib/strtox.c,v $ //----------------------------------------------------------------------------- // *! (C) Copyright International Business Machines Corp. 2013 // *! All Rights Reserved -- Property of IBM // *! *** IBM Confidential *** //----------------------------------------------------------------------------- /// \file strtox.c /// \brief Implementation of strtol(), strtoul(), strtoll() and strtoull() /// /// Standard String Conversion Routines /// /// This file contains implementaions of strtol(), strtoul(), strtoll() and /// strtoull(). These APIs are all called as /// /// \code /// strtoX(const char* str, char** endptr, int base) /// \endcode /// /// where X is /// /// - l : Convert to a long integer /// - ul : Convert to an unsigned long integer /// - ll : Convert to a long long integer /// - ull : Convert to an unsigned long long integer /// /// \param str The string to convert /// /// \param endptr If non-null, will be set to a pointer to the portion of the /// string following the convertable portion. If no conversion is performed /// then the original \a str is returned here. /// /// \param base Either 0 to indicate that the base should be derived from /// radix markers in the string, or a number in the range 2 to 36 inclusive. /// /// The APIs convert the initial portion of the string pointed to by \a str to /// an integer, which is either a long integer (strtol), an unsigned long /// (strtoul()), a long long (strtoll), or an unsigned long long /// (strtoull). First, the APIs decompose the input string into three parts: /// /// - An initial, possibly empty, sequence of white-space characters (as /// specified by isspace()) /// /// - A subject sequence interpreted as an integer represented in some radix /// determined by the value of \a base /// /// - A final string of one or more unrecognized characters, including the /// terminating null byte of the input string. /// /// The APIs then attempt to convert the subject sequence to an integer of the /// required type and returns the result. /// /// If the value of \a base is 0, the expected form of the subject sequence is /// that of a decimal constant, octal constant, or hexadecimal constant, any /// of which may be preceded by a '+' or '-' sign. A decimal constant begins /// with a non-zero digit, and consists of a sequence of decimal digits. An /// octal constant consists of the prefix '0' optionally followed by a /// sequence of the digits '0' to '7' only. A hexadecimal constant consists of /// the prefix 0x or 0X followed by a sequence of the decimal digits and /// letters 'a' (or 'A' ) to 'f' (or 'F' ) with values 10 to 15 respectively. /// /// If the value of \a base is between 2 and 36, the expected form of the /// subject sequence is a sequence of letters and digits representing an /// integer with the radix specified by base, optionally preceded by a '+' or /// '-' sign. The letters from 'a' (or 'A' ) to 'z' (or 'Z' ) inclusive are /// ascribed the values 10 to 35; only letters whose ascribed values are less /// than that of base are permitted. If the value of base is 16, the /// characters 0x or 0X may optionally precede the sequence of letters and /// digits, following the sign if present. /// /// The subject sequence is defined as the longest initial subsequence of the /// input string, starting with the first non-white-space character that is of /// the expected form. The subject sequence contains no characters if the /// input string is empty or consists entirely of white-space characters, or if /// the first non-white-space character is other than a sign or a permissible /// letter or digit. /// /// If the subject sequence has the expected form and the value of base is 0, /// the sequence of characters starting with the first digit will be /// interpreted as an integer constant. If the subject sequence has the /// expected form and the value of base is between 2 and 36, it will be used /// as the base for conversion, ascribing to each letter its value as given /// above. If the subject sequence begins with a minus sign, the value /// resulting from the conversion will be negated. A pointer to the final /// string will be stored in the object pointed to by \a endptr, provided that /// \a endptr is not a null pointer. /// /// If the subject sequence is empty or does not have the expected form, no /// conversion is performed; the value of \a str is stored in the object /// pointed to by \a endptr, provided that \a endptr is not a null pointer. /// /// Note that the unsigned APIs silently convert signed representations into /// the equivalent unsigned number. /// /// Since 0, (L)LONG_MIN and (U)(L)LONG_MAX are returned on error and are /// also valid returns on success, there is no way for an SSX application to /// determine whether the conversion succeeded or failed (since SSX does not /// support \a errno). For this reason it is recommended that SSX-only /// applications use the underlying APIs _strtol(), _strtoul(), _strtoll() and /// _strtoull(), or even better the extended APIs strtoi32(), strtou32(), /// strtoi64() or strtou64() discussed further below. /// /// Upon successful completion, strtoX() returns the converted /// value, if any. If no conversion could be performed or there was an error /// in the base specification, 0 is returned. /// /// If the correct value is outside the range of representable values, /// (L)LONG_MIN or (U)(L)LONG_MAX will be returned (according to the sign /// and type of the value). /// /// Note: This specification is adapted from IEEE Std. 10003.1, 2003 Edition /// /// /// Underlying APIs /// /// The APIs underlying the standard APIs are all called as /// /// \code /// int _strtoX(const char* str, char** endptr, int radix, * value) /// \endcode /// /// where X is /// /// - l : Convert to a long integer /// - ul : Convert to an unsigned long integer /// - ll : Convert to a long long integer /// - ull : Convert to an unsigned long long integer /// /// \param str The string to convert /// /// \param endptr If non-null, will be set to a pointer to the portion of the /// string following the convertable portion. If no conversion is performed /// then the original \a str is returned here. /// /// \param base Either 0 to indicate that the base should be derived from /// radix markers in the string, or a number in the range 2 to 36 inclusive. /// /// \param value The converted value, returned as the return value of the /// standard API. /// /// The return value of the underlying APIs is one of the following /// /// \retval 0 Success /// /// \retval -STRTOX_NO_CONVERSION_EMPTY No conversion was performed because the /// string was effectively empty. /// /// \retval -STRTOX_NO_CONVERSION_PARSE No conversion was performed because the /// string did not parse as an integer. /// /// \retval -STRTOX_INVALID_ARGUMENT No conversion was performed because the /// \a base specification was not valid. /// /// \retval -STRTOX_UNDERFLOW_STRTOL1 Conversion resulted in underflow /// /// \retval -STRTOX_UNDERFLOW_STRTOL2 Conversion resulted in underflow /// /// \retval -STRTOX_UNDERFLOW_STRTOLL1 Conversion resulted in underflow /// /// \retval -STRTOX_UNDERFLOW_STRTOLL2 Conversion resulted in underflow /// /// \retval -STRTOX_OVERFLOW_STRTOL1 Conversion resulted in overflow /// /// \retval -STRTOX_OVERFLOW_STRTOL2 Conversion resulted in overflow /// /// \retval -STRTOX_OVERFLOW_STRTOLL1 Conversion resulted in overflow /// /// \retval -STRTOX_OVERFLOW_STRTOLL2 Conversion resulted in overflow /// /// /// Extended APIs /// /// The extended APIs are the preferred way to do portable integer /// conversion. These APIs are all called as /// /// \code /// int strtoX(const char* str, char** endptr, int radix, * value) /// \endcode /// /// where X is /// /// - i32 : Convert to an int32_t /// - u32 : Convert to a uint32_t /// - i64 : Convert to an int64_t /// - u64 : Convert to a uint64_t /// /// \param str The string to convert /// /// \param endptr If non-null, will be set to a pointer to the portion of the /// string following the convertable portion. If no conversion is performed /// then the original \a str is returned here. /// /// \param base Either 0 to indicate that the base should be derived from /// radix markers in the string, or a number in the range 2 to 36 inclusive. /// /// \param value The converted value /// /// The return value of the underlying APIs is one of the following /// /// \retval 0 Success /// /// \retval -STRTOX_NO_CONVERSION_EMPTY No conversion was performed because the /// string was effectively empty. /// /// \retval -STRTOX_NO_CONVERSION_PARSE No conversion was performed because the /// string did not parse as an integer. /// /// \retval -STRTOX_INVALID_ARGUMENT No conversion was performed because the /// \a base specification was not valid. /// /// \retval -STRTOX_UNDERFLOW_STRTOL1 Conversion resulted in underflow /// /// \retval -STRTOX_UNDERFLOW_STRTOL2 Conversion resulted in underflow /// /// \retval -STRTOX_UNDERFLOW_STRTOLL1 Conversion resulted in underflow /// /// \retval -STRTOX_UNDERFLOW_STRTOLL2 Conversion resulted in underflow /// /// \retval -STRTOX_OVERFLOW_STRTOL1 Conversion resulted in overflow /// /// \retval -STRTOX_OVERFLOW_STRTOL2 Conversion resulted in overflow /// /// \retval -STRTOX_OVERFLOW_STRTOLL1 Conversion resulted in overflow /// /// \retval -STRTOX_OVERFLOW_STRTOLL2 Conversion resulted in overflow /// #include "ssx.h" #include "ctype.h" #include "libssx.h" #include "strtox.h" // Skip whitespace static const char * skip_whitespace(const char *s) { while (isspace(*s)) { s++; } return s; } // Pick up a +/- sign. This is a predicate returning 1 if the value is // negated. static int sign(const char** s) { if (**s == '+') { (*s)++; return 0; } else if (**s == '-') { (*s)++; return 1; } else { return 0; } } // Look for a radix mark (0, 0[xX]). The string pointer is advanced if it is a // hex mark (0[xX]), but not for a simple '0' which could be either the start // of an octal constant or simply the number 0. The return value is either 8, // 10 or 16. static int radix_mark(const char** s) { const char* p = *s; if (p[0] == '0') { if ((p[1] == 'x') || (p[1] == 'X')) { *s += 2; return 16; } else { return 8; } } else { return 10; } } // Parse a character as a radix-base digit. Return the value of the digit or // -1 if it is not a legal digit for the radix. static int parse_digit(char c, int radix) { if (isdigit(c)) { if ((c - '0') < radix) { return c - '0'; } else { return -1; } } else if (radix <= 10) { return -1; } else { if (islower(c)) { if ((c - 'a') < (radix - 10)) { return c - 'a' + 10; } else { return -1; } } else if (isupper(c)) { if ((c - 'A') < (radix - 10)) { return c - 'A' + 10; } else { return -1; } } else { return -1; } } } // The most basic API is strtox(), which converts a string to an unsigned long // long. All of the base APIs are written in terms of this. This is legal due // to the fact that conversion is defined to continue even in the event of // overflow. This API may return the codes STRTOX_NO_CONVERSION_EMPTY, // STRTOX_NO_CONVERSION_PARSE or STRTOX_INVALID_ARGUMENT, // which the standard APIs always convert to a 0 // return value. Otherwise the flags 'overflow' and 'negative' are used by // the base APIs to determine how to handle special cases. static int strtox(const char *str, char **endptr, int base, unsigned long long* value, int* negative, int* overflow) { const char* s; unsigned long long new; int rc, radix, digit; do { s = str; *value = 0; *negative = 0; *overflow = 0; // Initial error checks if ((base != 0) && ((base < 2) || (base > 36))) { rc = STRTOX_INVALID_ARGUMENT; break; } // Skip whitespace s = skip_whitespace(s); if (*s == '\0') { rc = STRTOX_NO_CONVERSION_EMPTY; break; } // Process a +/- sign. Only one is allowed. *negative = sign(&s); // Look for a radix mark. Note that if base == 16 this will cause the // skip of a leading 0 in the string not followed by [xX], but that's // OK because it doesn't change the result of the conversion. if (base == 0) { radix = radix_mark(&s); } else { radix = base; if (radix == 16) { radix_mark(&s); } } // Parse. Note that once overflow is detected we continue to parse // (but ignore the data). rc = STRTOX_NO_CONVERSION_PARSE; while ((digit = parse_digit(*s, radix)) >= 0) { s++; if (!*overflow) { rc = 0; new = (*value * radix) + digit; if (new < *value) { *overflow = 1; } else { *value = new; } } } } while(0); if (endptr) { if (rc == 0) { *endptr = (char*)s; } else { *endptr = (char*)str; } } return rc; } /// See documentation for the file strtox.c int _strtol(const char* str, char** endptr, int base, long* value) { int rc, negative, overflow; unsigned long long value_ull; rc = strtox(str, endptr, base, &value_ull, &negative, &overflow); if (rc) { *value = 0; } else { if (overflow || (value_ull != (unsigned long)value_ull)) { if (negative) { rc = STRTOX_UNDERFLOW_STRTOL1; *value = LONG_MIN; } else { rc = STRTOX_OVERFLOW_STRTOL1; *value = LONG_MAX; } } else if (negative) { if (value_ull > ((unsigned long long)LONG_MAX + 1ull)) { rc = STRTOX_UNDERFLOW_STRTOL2; *value = LONG_MIN; } else { *value = ~value_ull + 1; } } else if (value_ull > (unsigned long long)LONG_MAX) { rc = STRTOX_OVERFLOW_STRTOL2; *value = LONG_MAX; } else { *value = value_ull; } } return rc; } /// See documentation for the file strtox.c int _strtoll(const char* str, char** endptr, int base, long long* value) { int rc, negative, overflow; unsigned long long value_ull; rc = strtox(str, endptr, base, &value_ull, &negative, &overflow); if (rc) { *value = 0; } else { if (overflow) { if (negative) { rc = STRTOX_UNDERFLOW_STRTOLL1; *value = LLONG_MIN; } else { rc = STRTOX_OVERFLOW_STRTOLL1; *value = LLONG_MAX; } } else if (negative) { if (value_ull > ((unsigned long long)LLONG_MAX + 1ull)) { rc = STRTOX_UNDERFLOW_STRTOLL2; *value = LLONG_MIN; } else { *value = ~value_ull + 1; } } else if (value_ull > (unsigned long long)LLONG_MAX) { rc = STRTOX_OVERFLOW_STRTOLL2; *value = LLONG_MAX; } else { *value = value_ull; } } return rc; } /// See documentation for the file strtox.c int _strtoul(const char* str, char** endptr, int base, unsigned long* value) { int rc, negative, overflow; unsigned long long value_ull; rc = strtox(str, endptr, base, &value_ull, &negative, &overflow); if (rc) { *value = 0; } else { if (overflow || (value_ull != (unsigned long)value_ull)) { rc = STRTOX_OVERFLOW_STRTOUL; *value = ULONG_MAX; } else { *value = value_ull; if (negative) { *value = ~*value + 1; } } } return rc; } /// See documentation for the file strtox.c int _strtoull(const char* str, char** endptr, int base, unsigned long long* value) { int rc, negative, overflow; rc = strtox(str, endptr, base, value, &negative, &overflow); if (rc) { *value = 0; } else { if (overflow) { rc = STRTOX_OVERFLOW_STRTOULL; *value = ULLONG_MAX; } else { if (negative) { *value = ~*value + 1; } } } return rc; } /// See documentation for the file strtox.c long int strtol(const char* str, char** endptr, int base) { long int value; _strtol(str, endptr, base, &value); return value; } /// See documentation for the file strtox.c long long int strtoll(const char* str, char** endptr, int base) { long long int value; _strtoll(str, endptr, base, &value); return value; } /// See documentation for the file strtox.c unsigned long int strtoul(const char* str, char** endptr, int base) { unsigned long int value; _strtoul(str, endptr, base, &value); return value; } /// See documentation for the file strtox.c unsigned long long int strtoull(const char* str, char** endptr, int base) { unsigned long long int value; _strtoull(str, endptr, base, &value); return value; } #if (__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ <= 1)) /// Internal version of strtol() /// /// ppcnf-mcp5 (GCC 4.1) requires that the entry point __strtol_internal() be /// present at certain optimization levels. This is equivalent to strtol() /// except that it takes an extra argument that must be == 0. The \a group /// parameter is supposed to control locale-specific thousands grouping. long int __strtol_internal(const char* str, char** endptr, int base, int group) { if (group != 0) { SSX_PANIC(STRTOX_INVALID_ARGUMENT_STRTOL); } return strtol(str, endptr, base); } #endif