diff --git a/src/envelope.cpp b/src/envelope.cpp index c3b77c5..bb85fac 100644 --- a/src/envelope.cpp +++ b/src/envelope.cpp @@ -18,6 +18,26 @@ #include "envelope.h" +/* + * helpers + */ + +inline static bool acceptable_char (char c) +{ + return + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '_' || c == '.'; +} + +static bool acceptable_id (const std::string&a) +{ + for (size_t i = 0; i < a.length(); ++i) + if (!acceptable_char (a[i]) ) return false; + return true; +} + /* * how do the ascii envelopes look like? * @@ -34,7 +54,7 @@ * To distinguish ourselves from PGP, we use six dashes and prefixed CCR name. * No version information is supplied - versioning should be contained * preferably in typeident, e.g. like "message" "better_message" and - * "bettermessage-version3". + * "bettermessage_version3.5". * * Cleartext two-part messages and similar evil sorceries are generalized to * multipart messages using the "part cut". @@ -42,33 +62,151 @@ * Also, to prevent cleartext embedding conflicts, we add termident, which is * basically a random string of letters and numbers that serves as a mark that * must be the same on the begin and end. + * + * Also also, there's always newline before cut/end, even if there already is a + * newline from previous cut. Therefore, size 0 section looks like it has an + * empty line inside. */ -size_t envelope_get (const std::string&data, size_t offset, - std::string&out_type, - std::vector&out_parts) +size_t envelope_read (const std::string&data, size_t offset, + std::string&out_type, + std::vector&out_parts) +{ + for (;;) { + //try to find begin mark. + std::string + begin_prefix = "------ccr begin ", + begin_suffix = "------\n"; + size_t begin = data.find (begin_prefix, offset); + + //nothing possible found, die. + if (begin == data.npos) return 0; + + //verify it's on the beginning of the line + if (begin > 0) if (data[begin - 1] != '\n') { + offset += begin_prefix.length(); + continue; + } + + //try to parse the typeident and termident + std::string type, term; + offset += begin_prefix.length(); + + //find and verify possible positions of type and term strings + size_t eoterm = data.find (begin_suffix, offset), + eotype = data.find (' ', offset); + if (eoterm == data.npos || + eotype == data.npos || + eotype > (eoterm - 1) ) + continue; + + type = data.substr (offset, eotype); + term = data.substr (eotype + 1, eoterm); + + //verify that type&term are only of acceptable characters + if (!acceptable_id (type) || !acceptable_id (term) ) + continue; + + offset = eoterm + begin_suffix.length(); + + //read all sections + std::string + cut_sep = "\n------ccr cut " + type + " " + term + "------\n", + end_sep = "\n------ccr end " + type + " " + term + "------\n"; + + out_parts.clear(); + + bool retry = false; + for (;;) { + //find closest cut or sep + size_t cut_pos = data.find (cut_sep, offset), + end_pos = data.find (end_sep, offset); + + if (end_pos == data.npos) { + //can't even find end, don't care about cut_pos + retry = true; + break; + } + + if ( cut_pos != data.npos && cut_pos < end_pos) { + //there is cut + out_parts.push_back (data.substr (offset, + cut_pos) ); + } else { + //no cut, it's till the end + out_parts.push_back (data.substr (offset, + end_pos) ); + } + + if (cut_pos == data.npos) { + //it was end_pos, finished! + offset = end_pos + end_sep.length(); + break; + } else { + //move offset for next search + offset = cut_pos + cut_sep.length(); + } + } + + if (retry) continue; + + //return the modified offset + return offset; + } +} + +/* + * The Much Simpler Envelope Formatter! + */ + +static void gen_random_term (std::string&out, prng&rng, size_t length) +{ + //this could be longer, but don't generate absolute mess. + static const char letters[] = "abcdefghijklmnopqrstuvwxyz0123456789"; + + out.resize (length); + for (size_t i = 0; i < length; ++i) { + out[i] = letters[rng.random (36)]; + } +} + +std::string envelope_format (const std::string&type, + const std::vector& parts, + prng&rng) { - size_t begin; - -restart: - //try to find begin mark. - begin = data.find ("------ccr begin ", offset); - - //nothing possible found, die. - if (begin == data.npos) return 0; - - //try to parse the begin mark - std::string type, mark; - - //TODO parse it lol - //TODO move offset - - //read all sections for (;;) { + std::string term; + gen_random_term (term, rng, 16); + std::string + cut_sep = "\n------ccr cut " + type + " " + term + "------\n", + end_sep = "\n------ccr end " + type + " " + term + "------\n"; + + //check whether there's no collision with boundary + bool good = true; + std::vector::const_iterator i, e; + for (i = parts.begin(), e = parts.end(); i != e; ++i) { + if ( i->find (cut_sep) != i->npos || + i->find (end_sep) != i->npos) { + good = false; + break; + } + } + if (!good) continue; //retry generating the termident mark + + //now construct the result + std::string + res = "------ccr begin " + type + " " + term + "------\n"; + + if (parts.size() > 0) { + res += parts[0]; + for (i = parts.begin() + 1, e = parts.end(); + i != e; ++i) { + res += cut_sep; + res += *i; + } + } + res += end_sep; } - - //return the modified offset - return offset; } diff --git a/src/envelope.h b/src/envelope.h index 5882632..5d677e8 100644 --- a/src/envelope.h +++ b/src/envelope.h @@ -22,8 +22,10 @@ #include #include +#include "prng.h" + /* - * Tools for finding envelopes in ascii/utf-8 text. + * Tool for finding envelopes in ascii/utf-8 text. * * We simply don't care about wide chars in text, UTF-16+, nor conflicting * encodings, nor any similar abominations. @@ -31,11 +33,19 @@ * envelope_get tries to find an envelope in text data, starting from offset, * returning the offset of first possible following envelope or 0 if nothing * usuable was found. + * + * Finally, no one wants to see CRLF line endings here. Ever. ffs. */ -size_t envelope_get (const std::string& data, size_t offset, - std::string&out_type, - std::vector&out_parts); +size_t envelope_read (const std::string& data, size_t offset, + std::string&out_type, + std::vector&out_parts); + +/* + * this simply formats a compatible envelope + */ +std::string envelope_format (const std::string&type, + const std::vector& parts, + prng&rng); #endif -