// ---------------------------------------------------------------------- // File: StringSplit.hh // Author: Abhishek Lekshmanan - CERN // ---------------------------------------------------------------------- /************************************************************************ * EOS - the CERN Disk Storage System * * Copyright (C) 2021 CERN/Switzerland * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see .* ************************************************************************/ #pragma once #include #include #include #include namespace eos::common { namespace detail { // A simple type checker to decide between types having a (const) iterator template struct has_const_iter : std::false_type {}; template struct has_const_iter().cbegin(), std::declval().cend())>> : std::true_type {}; template bool constexpr has_const_iter_v = has_const_iter::value; // A helper function to find the next position of a delimiter in a sequence // given a starting position // While tested for strings, this should work for any // iterator sequence which requires a subset match of a delimiter pattern. // Technically these can take univ. references if you decay the template // arguments and do a enable_if_t on the delim_t, given we don't have a lot of // variants the less generic version is a bit more readable template auto get_delim_p(const str_t& str, const delim_t& delim, typename str_t::size_type start_pos) -> typename str_t::size_type { static_assert(has_const_iter_v, "delimiter must implement a const iterator!"); auto p = std::find_first_of(str.cbegin() + start_pos, str.cend(), delim.cbegin(), delim.cend()); return std::distance(str.cbegin(), p); } template auto get_delim_p(const str_t& str, char delim, typename str_t::size_type start_pos) -> typename str_t::size_type { return str.find(delim, start_pos); } // simple overloads for distinguishing vector/deque types vs set types - technically // is_assoc_container_v should already get the job done, but a future map vs set // distinction would require has_emplace_back type functionality anyway as is_assoc // would be true for a map type template struct has_emplace_back : std::false_type {}; template struct has_emplace_back().emplace_back())>> : std::true_type {}; template bool constexpr has_emplace_back_v = has_emplace_back::value; } // detail // A non owning iterator for splitting a string with delimiters As far as the // pointed string is valid, this is a really fast way to iterate over split parts // // for (std::string_view part : StringSplit(input,"/")) { // // ...} // // The current implementation is similar to boost::split and the like in the sense of // Given a string of delimiters, presence of any of them will trigger a match. For eg. // // StringSplit("ab,cd\nde,gh", ",\n") -> ["ab","cd","de", "gh"] // // Though it is easy enough to modify this to use find instead of find_first_of // for a pattern by using a different detail::get_delim_p function (This can be // an additional template param/tag which is currently not implemented though // fairly easy to do) // // For copying onto a container the same code above can be used for eg. // std::vector v // for (std::string_view part : StringSplit(input,delim)) { // v.emplace_back(part) } // // Though if it is sure that the parent string is in scope, using either the iterator // directly or using a vector would yield the most performant results // A helper is provided which will move these to a desired container Use the // StringSplit or CharSplit aliases for most cases, we do need an explicit // std::string_view template specification in other cases as by default the // compiler will try to default to const char* which is not desirable template class LazySplit { public: LazySplit(str_type s, delim_type d) : str(s), delim(d) {} class iterator { public: // A base declaration of the underlying string type so that we don't have to // decay every time, this is to ensure that we correctly have a reference type // when we hold a const std::string&, using base_string_type = typename std::decay::type; // Basic iterator definition member types using iterator_category = std::forward_iterator_tag; using value_type = str_type; using difference_type = std::string_view::difference_type; // basically std::ptrdiff_t using pointer = std::add_pointer_t; using const_pointer = std::add_const_t; using reference = std::add_lvalue_reference_t; using const_reference = std::add_const_t; using size_type = std::string_view::size_type; iterator(str_type s, delim_type d): str(s), delim(d), segment(next(0)) {} iterator(size_type sz) : pos(sz) {} iterator& operator++() { segment = next(pos); return *this; } iterator operator++(int) { iterator curr = *this; segment = next(pos); return curr; } reference operator*() { return segment; } pointer operator->() { return &segment; } friend bool operator==(const iterator& a, const iterator& b) { return a.segment == b.segment; } friend bool operator!=(const iterator& a, const iterator& b) { return !(a == b); } private: // we need to collapse the reference here, hence we have to return by value We // have a special variant accepting char delimiters, this is useful for // functions which split on nullbyte etc. For allmost everything else the // other string_view splitter is more preferred as it allows for // multicharacter splits while still maintaining speed. The member function* // find_first_of is slightly slower than the std::find_first_of with iterators base_string_type next(size_type start_pos) { // this loop is needed to advance past empty delims while (start_pos < str.size()) { pos = detail::get_delim_p(str, delim, start_pos); // check if we are at the end or at a delim if (pos != start_pos) { return str.substr(start_pos, pos - start_pos); } start_pos = pos + 1; } return {}; } size_type pos {0}; str_type str; delim_type delim; str_type segment; }; using const_iterator = iterator; iterator begin() const { return {str, delim}; } const_iterator cbegin() const { return {str, delim}; } iterator end() const { return { std::string::npos }; } const_iterator cend() const { return { std::string::npos }; } private: str_type str; delim_type delim; }; template bool operator==(const LazySplit& split, const C& cont) { return std::equal(split.begin(), split.end(), cont.begin(), cont.end()); } template bool operator==(const C& cont, const LazySplit& split) { return std::equal(split.begin(), split.end(), cont.begin(), cont.end()); } using StringSplitIt = LazySplit; using CharSplitIt = LazySplit; template > auto StringSplit(std::string_view input, std::string_view delim) -> std::enable_if_t, C> { C c; auto split_iter = StringSplitIt(input, delim); for (std::string_view part : split_iter) { c.emplace_back(part); } return c; } // Not defaulting this type as it will mean an additional include of an // unordered/ordered type which is unnecessary template auto StringSplit(std::string_view input, std::string_view delim) -> std::enable_if_t, C> { C c; auto split_iter = StringSplitIt(input, delim); for (std::string_view part : split_iter) { c.emplace(part); } return c; } template > C SplitPath(std::string_view input) { return StringSplit(input, "/"); } inline std::string GetRootPath(std::string_view path) { using namespace std::string_view_literals; auto it = StringSplitIt(path, "/"sv).begin(); return std::string(*it); } } // namespace eos::common