WvStreams
wvstrutils.h
Go to the documentation of this file.
00001 /* -*- Mode: C++ -*-
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  *
00005  * Various little string functions...
00006  * 
00007  * FIXME: and some other assorted crap that belongs anywhere but here.
00008  */
00009 #ifndef __WVSTRUTILS_H
00010 #define __WVSTRUTILS_H
00011 
00012 #include <sys/types.h> // for off_t
00013 #include <sys/stat.h>
00014 #include <unistd.h>
00015 #include <time.h>
00016 #include <ctype.h>
00017 #include "wvstring.h"
00018 #include "wvstringlist.h"
00019 #include "wvhex.h"
00020 #ifndef _WIN32
00021 #include "wvregex.h"
00022 #endif
00023 
00036 char *terminate_string(char *string, char c);
00037 
00046 char *trim_string(char *string);
00047 
00052 char *trim_string(char *string, char c);
00053 
00067 WvString spacecat(WvStringParm a, WvStringParm b, char sep = ' ',
00068                   bool onesep = false);
00069 
00070     
00075 char *non_breaking(const char *string);
00076 
00081 void replace_char(void *string, char c1, char c2, int length);
00082 
00086 char *snip_string(char *haystack, char *needle);
00087 
00088 #ifndef _WIN32
00089 
00093 char *strlwr(char *string);
00094 
00099 char *strupr(char *string);
00100 
00101 #endif
00102 
00104 bool is_word(const char *string);
00105 
00114 WvString hexdump_buffer(const void *buf, size_t len, bool charRep = true);
00115 
00120 bool isnewline(char c);
00121 
00129 WvString url_decode(WvStringParm str, bool no_space = false);
00130 
00131 
00140 WvString url_encode(WvStringParm str, WvStringParm unsafe = "");
00141  
00142 
00146 WvString  diff_dates(time_t t1, time_t t2);
00147 
00148 
00153 WvString rfc822_date(time_t _when = -1);
00154 
00156 WvString rfc1123_date(time_t _when);
00157 
00159 WvString local_date(time_t _when = -1);
00160 
00162 WvString intl_time(time_t _when = -1);
00163 
00165 WvString intl_date(time_t _when = -1);
00166 
00168 WvString intl_datetime(time_t _when = -1);
00169 
00170 time_t intl_gmtoff(time_t t);
00171 
00172 #ifndef _WIN32
00173 
00178 WvString passwd_crypt(const char *str);
00179 
00180 #endif
00181 
00186 WvString passwd_md5(const char *str);
00187 
00192 WvString backslash_escape(WvStringParm s1);
00193 
00195 int strcount(WvStringParm s, const char c);
00196 
00201 WvString encode_hostname_as_DN(WvStringParm hostname);
00202 
00209 WvString nice_hostname(WvStringParm name);
00210 
00216 WvString getfilename(WvStringParm fullname);
00217 WvString getdirname(WvStringParm fullname);
00218 
00219 /*
00220  * Possible rounding methods for numbers -- remember from school?
00221  */
00222 enum RoundingMethod
00223 {
00224     ROUND_DOWN,
00225     ROUND_DOWN_AT_POINT_FIVE,
00226     ROUND_UP_AT_POINT_FIVE,
00227     ROUND_UP
00228 };
00229 
00235 WvString sizetoa(unsigned long long blocks, unsigned long blocksize = 1,
00236                  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
00237 
00242 WvString sizektoa(unsigned long long kbytes,
00243                   RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
00244 
00250 WvString sizeitoa(unsigned long long blocks, unsigned long blocksize = 1,
00251                   RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
00252 
00257 WvString sizekitoa(unsigned long long kbytes,
00258                    RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
00259 
00263 WvString secondstoa(unsigned int total_seconds);
00264 
00269 int lookup(const char *str, const char * const *table,
00270     bool case_sensitive = false);
00271 
00279 template<class StringCollection>
00280 void strcoll_split(StringCollection &coll, WvStringParm _s,
00281     const char *splitchars = " \t", int limit = 0)
00282 {
00283     WvString s(_s);
00284     char *sptr = s.edit(), *eptr, oldc;
00285     
00286     // Simple if statement to catch (and add) empty (but not NULL) strings.
00287     if (sptr && !*sptr )
00288     {   
00289         WvString *emptyString = new WvString("");
00290         coll.add(emptyString, true);
00291     }
00292     
00293     // Needed to catch delimeters at the beginning of the string.
00294     bool firstrun = true;
00295 
00296     while (sptr && *sptr)
00297     {
00298         --limit;
00299 
00300         if (firstrun)
00301         {   
00302             firstrun = false;
00303         }
00304         else
00305         {
00306             sptr += strspn(sptr, splitchars);
00307         }
00308 
00309         if (limit)
00310         {
00311             eptr = sptr + strcspn(sptr, splitchars);
00312         }
00313         else
00314         {
00315             eptr = sptr + strlen(sptr);
00316         }
00317         
00318         oldc = *eptr;
00319         *eptr = 0;
00320         
00321         WvString *newstr = new WvString(sptr);
00322         coll.add(newstr, true);
00323         
00324         *eptr = oldc;
00325         sptr = eptr;
00326     }
00327 }
00328 
00329 
00343 template<class StringCollection>
00344 void strcoll_splitstrict(StringCollection &coll, WvStringParm _s,
00345     const char *splitchars = " \t", int limit = 0)
00346 {
00347     WvString s(_s);
00348     char *cur = s.edit();
00349 
00350     if (!cur) return;
00351 
00352     for (;;)
00353     {
00354         --limit;
00355         if (!limit)
00356         {
00357             coll.add(new WvString(cur), true);
00358             break;
00359         }
00360 
00361         int len = strcspn(cur, splitchars);
00362 
00363         char tmp = cur[len];
00364         cur[len] = 0;
00365         coll.add(new WvString(cur), true);
00366         cur[len] = tmp;
00367 
00368         if (!cur[len]) break;
00369         cur += len + 1;
00370     }
00371 }
00372 
00373 
00374 #ifndef _WIN32 // don't have regex on win32
00375 
00382 template<class StringCollection>
00383 void strcoll_split(StringCollection &coll, WvStringParm s,
00384     const WvRegex &regex, int limit = 0)
00385 {
00386     int start = 0;
00387     int match_start, match_end;
00388     int count = 0;
00389     
00390     while ((limit == 0 || count < limit)
00391             && regex.continuable_match(&s[start], match_start, match_end)
00392             && match_end > 0)
00393     {
00394         WvString *substr = new WvString;
00395         int len = match_start;
00396         substr->setsize(len+1);
00397         memcpy(substr->edit(), &s[start], len);
00398         substr->edit()[len] = '\0';
00399         coll.add(substr, true);
00400         start += match_end;
00401         ++count;
00402     }
00403     
00404     if (limit == 0 || count < limit)
00405     {
00406         WvString *last = new WvString(&s[start]);
00407         last->unique();
00408         coll.add(last, true);
00409     }
00410 }
00411 #endif
00412 
00413 
00419 template<class StringCollection>
00420 WvString strcoll_join(const StringCollection &coll,
00421     const char *joinchars = " \t")
00422 {
00423     size_t joinlen = strlen(joinchars);
00424     size_t totlen = 1;
00425     typename StringCollection::Iter s(
00426         const_cast<StringCollection&>(coll));
00427     for (s.rewind(); s.next(); )
00428     {
00429         if (s->cstr())
00430             totlen += strlen(s->cstr());
00431         totlen += joinlen;
00432     }
00433     totlen -= joinlen; // no join chars at tail
00434     
00435     WvString total;
00436     total.setsize(totlen);
00437 
00438     char *te = total.edit();
00439     te[0] = 0;
00440     bool first = true;
00441     for (s.rewind(); s.next(); )
00442     {
00443         if (first)
00444             first = false;
00445         else
00446             strcat(te, joinchars);
00447         if (s->cstr()) 
00448             strcat(te, s->cstr());
00449     }
00450     return total;
00451 }
00452 
00457 WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b);
00458 
00460 WvString undupe(WvStringParm s, char c);
00461 
00463 WvString hostname();
00464 
00466 WvString fqdomainname();
00467 
00469 WvString wvgetcwd();
00470 
00475 WvString metriculate(const off_t i);
00476 
00481 WvString afterstr(WvStringParm line, WvStringParm a);
00482 
00487 WvString beforestr(WvStringParm line, WvStringParm a);
00488 
00495 WvString substr(WvString line, unsigned int pos, unsigned int len);
00496 
00501 WvString depunctuate(WvStringParm line);
00502 
00503 // Converts a string in decimal to an arbitrary numeric type
00504 template<class T>
00505 bool wvstring_to_num(WvStringParm str, T &n)
00506 {
00507     bool neg = false;
00508     n = 0;
00509 
00510     for (const char *p = str; *p; ++p)
00511     {
00512         if (isdigit(*p))
00513         {
00514             n = n * T(10) + T(*p - '0');
00515         }
00516         else if ((const char *)str == p
00517                 && *p == '-')
00518         {
00519             neg = true;
00520         }
00521         else return false;
00522     }
00523 
00524     if (neg)
00525         n = -n;
00526 
00527     return true;
00528 }
00529 
00530 /*
00531  * Before using the C-style string escaping functions below, please consider
00532  * using the functions in wvtclstring.h instead; they usualy lead to much more
00533  * human readable and manageable results, and allow representation of
00534  * lists of strings.
00535  */
00536 
00537 struct CStrExtraEscape
00538 {
00539     char ch;
00540     const char *esc;
00541 };
00542 extern const CStrExtraEscape CSTR_TCLSTR_ESCAPES[];
00543 
00545 //
00546 // If data is NULL, returns WvString::null; otherwise, returns an allocated
00547 // WvString containing the C-style string constant that represents the data.
00548 //
00549 // All printable characters including space except " and \ are represented with
00550 // escaping.
00551 //
00552 // The usual C escapes are performed, such as \n, \r, \", \\ and \0.
00553 //
00554 // All other characters are escaped in uppercase hex form, eg. \x9E
00555 //
00556 // The extra_escapes parameter allows for additional characters beyond
00557 // the usual ones escaped in C; setting it to CSTR_TCLSTR_ESCAPES will
00558 // escape { and } as < and >, which allows the resulting strings to be
00559 // TCL-string coded without ridiculous double-escaping.
00560 //
00561 WvString cstr_escape(const void *data, size_t size,
00562         const CStrExtraEscape extra_escapes[] = NULL);
00563 
00565 // 
00566 // This function does *not* include the trailing null that a C compiler would --
00567 //   if you want this null, put \0 at the end of the C-style string
00568 // 
00569 // If cstr is correctly formatted and max_size is large enough for the
00570 // resulting data, returns true and size will equal the size of the
00571 // resulting data.  If data is not NULL it will contain this data.
00572 //
00573 // If cstr is correctly formatted but max_size is too small for the resulting
00574 // data, returns false and size will equal the minimum value of min_size
00575 // for this function to have returned true.  If data is non-NULL it will
00576 // contain the first max_size bytes of resulting data.
00577 // 
00578 // If cstr is incorrectly formatted, returns false and size will equal 0.
00579 //
00580 // This functions works just as well on multiple, whitespace-separated
00581 // C-style strings as well.  This allows you to concatenate strings produced
00582 // by cstr_escape, and the result of cstr_unescape will be the data blocks
00583 // concatenated together.  This implies that the empty string corresponds
00584 // to a valid data block of length zero; however, a null string still returns
00585 // an error.
00586 //
00587 // The extra_escapes parameter must match that used in the call to 
00588 // cstr_escape used to produce the escaped strings.
00589 //
00590 bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size,
00591         const CStrExtraEscape extra_escapes[] = NULL);
00592 
00593 static inline bool is_int(const char *str)
00594 {
00595     if (!str)
00596         return false;
00597     
00598     if (*str == '-')
00599         ++str;
00600     
00601     if (!*str)
00602         return false;
00603     
00604     while (*str)
00605         if (!isdigit(*str++))
00606             return false;
00607             
00608     return true;
00609 }
00610 
00613 WvString ptr2str(void* ptr);
00614 
00615 #endif // __WVSTRUTILS_H