// Copyright 2012 Google Inc. All Rights Reserved. // // This file declares INTERNAL parts of the Split API that are inline/templated // or otherwise need to be available at compile time. The main two abstractions // defined in here are // // - SplitIterator<> // - Splitter<> // // Everything else is plumbing for those two. // // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including // strings/split.h. // // IWYU pragma: private, include "strings/split.h" #pragma once #include using std::back_insert_iterator; using std::iterator_traits; #include using std::map; using std::multimap; #include using std::vector; #include "gutil/port.h" // for LANG_CXX11 #include "gutil/strings/stringpiece.h" #ifdef LANG_CXX11 // This must be included after "base/port.h", which defines LANG_CXX11. #include #endif // LANG_CXX11 namespace strings { namespace internal { // The default Predicate object, which doesn't filter out anything. struct NoFilter { bool operator()(StringPiece /* ignored */) { return true; } }; // This class splits a string using the given delimiter, returning the split // substrings via an iterator interface. An optional Predicate functor may be // supplied, which will be used to filter the split strings: strings for which // the predicate returns false will be skipped. A Predicate object is any // functor that takes a StringPiece and returns bool. By default, the NoFilter // Predicate is used, which does not filter out anything. // // This class is NOT part of the public splitting API. // // Usage: // // using strings::delimiter::Literal; // Literal d(","); // for (SplitIterator it("a,b,c", d), end(d); it != end; ++it) { // StringPiece substring = *it; // DoWork(substring); // } // // The explicit single-argument constructor is used to create an "end" iterator. // The two-argument constructor is used to split the given text using the given // delimiter. template class SplitIterator { public: using iterator_category = std::input_iterator_tag; using value_type = StringPiece; using difference_type = ptrdiff_t; using pointer = StringPiece*; using reference = StringPiece&; // Two constructors for "end" iterators. explicit SplitIterator(Delimiter d) : delimiter_(std::move(d)), predicate_(), is_end_(true) {} SplitIterator(Delimiter d, Predicate p) : delimiter_(std::move(d)), predicate_(std::move(p)), is_end_(true) {} // Two constructors taking the text to iterator. SplitIterator(StringPiece text, Delimiter d) : text_(std::move(text)), delimiter_(std::move(d)), predicate_(), is_end_(false) { ++(*this); } SplitIterator(StringPiece text, Delimiter d, Predicate p) : text_(std::move(text)), delimiter_(std::move(d)), predicate_(std::move(p)), is_end_(false) { ++(*this); } StringPiece operator*() { return curr_piece_; } StringPiece* operator->() { return &curr_piece_; } SplitIterator& operator++() { do { if (text_.end() == curr_piece_.end()) { // Already consumed all of text_, so we're done. is_end_ = true; return *this; } StringPiece found_delimiter = delimiter_.Find(text_); assert(found_delimiter.data() != NULL); assert(text_.begin() <= found_delimiter.begin()); assert(found_delimiter.end() <= text_.end()); // found_delimiter is allowed to be empty. // Sets curr_piece_ to all text up to but excluding the delimiter itself. // Sets text_ to remaining data after the delimiter. curr_piece_.set(text_.begin(), found_delimiter.begin() - text_.begin()); text_.remove_prefix(found_delimiter.end() - text_.begin()); } while (!predicate_(curr_piece_)); return *this; } SplitIterator operator++(int /* postincrement */) { SplitIterator old(*this); ++(*this); return old; } bool operator==(const SplitIterator& other) const { // Two "end" iterators are always equal. If the two iterators being compared // aren't both end iterators, then we fallback to comparing their fields. // Importantly, the text being split must be equal and the current piece // within the text being split must also be equal. The delimiter_ and // predicate_ fields need not be checked here because they're template // parameters that are already part of the SplitIterator's type. return (is_end_ && other.is_end_) || (is_end_ == other.is_end_ && text_ == other.text_ && text_.data() == other.text_.data() && curr_piece_ == other.curr_piece_ && curr_piece_.data() == other.curr_piece_.data()); } bool operator!=(const SplitIterator& other) const { return !(*this == other); } private: // The text being split. Modified as delimited pieces are consumed. StringPiece text_; Delimiter delimiter_; Predicate predicate_; bool is_end_; // Holds the currently split piece of text. Will always refer to string data // within text_. This value is returned when the iterator is dereferenced. StringPiece curr_piece_; }; // Declares a functor that can convert a StringPiece to another type. This works // for any type that has a constructor (explicit or not) taking a single // StringPiece argument. A specialization exists for converting to string // because the underlying data needs to be copied. In theory, these // specializations could be extended to work with other types (e.g., int32), but // then a solution for error reporting would need to be devised. template struct StringPieceTo { To operator()(StringPiece from) const { return To(from); } }; // Specialization for converting to string. template <> struct StringPieceTo { string operator()(StringPiece from) const { return from.ToString(); } }; // Specialization for converting to *const* string. template <> struct StringPieceTo { string operator()(StringPiece from) const { return from.ToString(); } }; #ifdef LANG_CXX11 // IsNotInitializerList::type exists iff T is not an initializer_list. More // details below in Splitter<> where this is used. template struct IsNotInitializerList { typedef void type; }; template struct IsNotInitializerList> {}; #endif // LANG_CXX11 namespace base { // Types small_ and big_ are guaranteed such that sizeof(small_) < // sizeof(big_) using small_ = char; struct big_ { char dummy[2]; }; } // This class implements the behavior of the split API by giving callers access // to the underlying split substrings in various convenient ways, such as // through iterators or implicit conversion functions. Do not construct this // class directly, rather use the Split() function instead. // // Output containers can be collections of either StringPiece or string objects. // StringPiece is more efficient because the underlying data will not need to be // copied; the returned StringPieces will all refer to the data within the // original input string. If a collection of string objects is used, then each // substring will be copied. // // An optional Predicate functor may be supplied. This predicate will be used to // filter the split strings: only strings for which the predicate returns true // will be kept. A Predicate object is any unary functor that takes a // StringPiece and returns bool. By default, the NoFilter predicate is used, // which does not filter out anything. template class Splitter { public: typedef internal::SplitIterator Iterator; Splitter(StringPiece text, Delimiter d) : begin_(text, d), end_(d) {} Splitter(StringPiece text, Delimiter d, Predicate p) : begin_(text, d, p), end_(d, p) {} // Range functions that iterate the split substrings as StringPiece objects. // These methods enable a Splitter to be used in a range-based for loop in // C++11, for example: // // for (StringPiece sp : my_splitter) { // DoWork(sp); // } const Iterator& begin() const { return begin_; } const Iterator& end() const { return end_; } #ifdef LANG_CXX11 // Support for default template arguments for function templates was added in // C++11, but it is not allowed if compiled in C++98 compatibility mode. Since // this code is under a LANG_CXX11 guard, we can safely ignore the // -Wc++98-compat flag and use default template arguments on the implicit // conversion operator below. // // This use of default template arguments on a function template was approved // by tgs and sanjay on behalf of the c-style-arbiters in email thread // // All compiler flags are first saved with a diagnostic push and restored with a // diagnostic pop below. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpragmas" #pragma GCC diagnostic ignored "-Wc++98-compat" // Uses SFINAE to restrict conversion to container-like types (by testing for // the presence of a const_iterator member type) and also to disable // conversion to an initializer_list (which also has a const_iterator). // Otherwise, code compiled in C++11 will get an error due to ambiguous // conversion paths (in C++11 vector::operator= is overloaded to take // either a vector or an initializer_list). // // This trick was taken from util/gtl/container_literal.h template ::type, typename ContainerChecker = typename Container::const_iterator> operator Container() { return SelectContainer::value>()(this); } // Restores diagnostic settings, i.e., removes the "ignore" on -Wpragmas and // -Wc++98-compat. #pragma GCC diagnostic pop #else // Not under LANG_CXX11 template operator Container() { return SelectContainer::value>()(this); } #endif // LANG_CXX11 template operator std::pair() { return ToPair(); } private: // is_map::value is true iff there exists a type T::mapped_type. This is // used to dispatch to one of the SelectContainer<> functors (below) from the // implicit conversion operator (above). template struct is_map { template static base::big_ test(typename U::mapped_type*); template static base::small_ test(...); static const bool value = (sizeof(test(0)) == sizeof(base::big_)); }; // Base template handles splitting to non-map containers template struct SelectContainer { Container operator()(Splitter* splitter) const { return splitter->template ToContainer(); } }; // Partial template specialization for splitting to map-like containers. template struct SelectContainer { Container operator()(Splitter* splitter) const { return splitter->template ToMap(); } }; // Inserts split results into the container. To do this the results are first // stored in a vector. This is where the input text is actually // "parsed". This vector is then used to possibly reserve space in the output // container, and the StringPieces in "v" are converted as necessary to the // output container's value type. // // The reason to use an intermediate vector of StringPiece is so we can learn // the needed capacity of the output container. This is needed when the output // container is a vector in which case resizes can be expensive due to // copying of the ::string objects. // // At some point in the future we might add a C++11 move constructor to // ::string, in which case the vector resizes are much less expensive and the // use of this intermediate vector "v" can be removed. template Container ToContainer() { vector v; for (Iterator it = begin(); it != end_; ++it) { v.push_back(*it); } typedef typename Container::value_type ToType; internal::StringPieceTo converter; Container c; ReserveCapacity(&c, v.size()); std::insert_iterator inserter(c, c.begin()); for (const auto& sp : v) { *inserter++ = converter(sp); } return c; } // The algorithm is to insert a new pair into the map for each even-numbered // item, with the even-numbered item as the key with a default-constructed // value. Each odd-numbered item will then be assigned to the last pair's // value. template Map ToMap() { typedef typename Map::key_type Key; typedef typename Map::mapped_type Data; Map m; StringPieceTo key_converter; StringPieceTo val_converter; typename Map::iterator curr_pair; bool is_even = true; for (Iterator it = begin(); it != end_; ++it) { if (is_even) { curr_pair = InsertInMap(std::make_pair(key_converter(*it), Data()), &m); } else { curr_pair->second = val_converter(*it); } is_even = !is_even; } return m; } // Returns a pair with its .first and .second members set to the first two // strings returned by the begin() iterator. Either/both of .first and .second // will be empty strings if the iterator doesn't have a corresponding value. template std::pair ToPair() { StringPieceTo first_converter; StringPieceTo second_converter; StringPiece first, second; Iterator it = begin(); if (it != end()) { first = *it; if (++it != end()) { second = *it; } } return std::make_pair(first_converter(first), second_converter(second)); } // Overloaded InsertInMap() function. The first overload is the commonly used // one for most map-like objects. The second overload is a special case for // multimap, because multimap's insert() member function directly returns an // iterator, rather than a pair like map's. template typename Map::iterator InsertInMap(const typename Map::value_type& value, Map* map) { return map->insert(value).first; } // InsertInMap overload for multimap. template typename std::multimap::iterator InsertInMap( const typename std::multimap::value_type& value, typename std::multimap* map) { return map->insert(value); } // Reserves the given amount of capacity in a vector template void ReserveCapacity(vector* v, size_t size) { v->reserve(size); } void ReserveCapacity(...) {} const Iterator begin_; const Iterator end_; }; } // namespace internal } // namespace strings