#ifndef _STREAMS_H #define _STREAMS_H #include #include #include "extensions.h" using namespace std; //CONVERSIONS //used eveywhere in the system and separated also for other projects //UTF-8, ISO8859, ascii conversions //URLEncoding, HTML entity encoding / decoding //STRING ANALYSIS //character, word and sentence //all encoding is done by: //scanning the string for any charcters that need changing and recording their position //IF ANY CHARACTERS FOUND: malloc using the number of characters that need changing to esimate the new size //convert and return //one character only (static in-line for speed and simplicity) //often inherited but also used statically class Character { public: static bool is( const unsigned char c, const char *chars) {if (!chars) return false; else return strchr(chars, c) != 0;} static bool isWhiteSpace( const unsigned char c) {return (c <= ' ');} static bool isUpper( const unsigned char c) {return (c >= 'A' && c <= 'Z');} static bool isLower( const unsigned char c) {return (c >= 'a' && c <= 'z');} static bool isNumeric( const unsigned char c) {return (c >= '0' && c <= '9');} static bool isAlpha( const unsigned char c) {return (isUpper(c) || isLower(c));} static bool isAlphaNumeric( const unsigned char c) {return (isAlpha(c) || isNumeric(c));} static bool isUTF8AlphaNumeric(const unsigned char c) {return (isAlpha(c) || isNumeric(c) || !isASCII(c));} static bool isASCII( const unsigned char c) {return (c & 128) == 0;} static bool isGrammar( const unsigned char c) {return !isWhiteSpace(c) && !isUTF8AlphaNumeric(c);} static bool isSentenceEnder( const unsigned char c) {return is(c, ".!?:>");} }; //more than one character for short combinations (static in-line for speed and simplicity) //these functions only look forward if the byte is non-zero to avoid page faults class Phrase: public Character { public: static bool isSentenceEnd(const char current, const char next ) {return (isSentenceEnder(current) && !isUTF8AlphaNumeric(next));} static bool isWordStart( const char current, const char previous) {return (!isUTF8AlphaNumeric(previous) && isUTF8AlphaNumeric(current));} static bool isWordEnd( const char current, const char previous) {return ( isUTF8AlphaNumeric(previous) && !isUTF8AlphaNumeric(current));} }; //entire stream with conversions (all static) //can be inherited class Stream: public Phrase { public: static bool isURLEncoded(const char *string); //these 2 go hand-in-hand: calculate the length, malloc and then call the 2nd to write into the space created static size_t URLEncodedLength(const char *string); static size_t URLEncode(const char *in, char *out); //alternative way: mallocs (where necessary) the space and writes the string static const char *URLEncode(char *string, const bool reuseSpaces = false, const bool checkNotEncodedAlready = true); //saving HTML to and from database (raw UTF-8, not encoded) static const char *HTMLEntityDecodeToUTF8(char *string); static const char *UTF8ToHTMLEntityEncode(char *string); }; #endif