envelope: impl

2013-04-17 17:14:08 +02:00 · 2013-04-17 17:14:08 +02:00 · 74e7e6226f
parent 4b844ffd20
commit 74e7e6226f
2 changed files with 176 additions and 28 deletions
--- a/src/envelope.cpp
+++ b/src/envelope.cpp
@ -18,6 +18,26 @@
 #include "envelope.h"
 /*
 * helpers
 */
 inline static bool acceptable_char (char c)
 {
 	return
 	    (c >= 'a' && c <= 'z') ||
 	    (c >= 'A' && c <= 'Z') ||
 	    (c >= '0' && c <= '9') ||
 	    c == '_' || c == '.';
 }
 static bool acceptable_id (const std::string&a)
 {
 	for (size_t i = 0; i < a.length(); ++i)
 		if (!acceptable_char (a[i]) ) return false;
 	return true;
 }
 /*
 * how do the ascii envelopes look like?
 *
@ -34,7 +54,7 @@
 * To distinguish ourselves from PGP, we use six dashes and prefixed CCR name.
 * No version information is supplied - versioning should be contained
 * preferably in typeident, e.g. like "message" "better_message" and
- * "bettermessage-version3".
+ * "bettermessage_version3.5".
 *
 * Cleartext two-part messages and similar evil sorceries are generalized to
 * multipart messages using the "part cut".
@ -42,33 +62,151 @@
 * Also, to prevent cleartext embedding conflicts, we add termident, which is
 * basically a random string of letters and numbers that serves as a mark that
 * must be the same on the begin and end.
 *
 * Also also, there's always newline before cut/end, even if there already is a
 * newline from previous cut. Therefore, size 0 section looks like it has an
 * empty line inside.
 */
-size_t envelope_get (const std::string&data, size_t offset,
+size_t envelope_read (const std::string&data, size_t offset,
-                     std::string&out_type,
+                      std::string&out_type,
-                     std::vector<std::string>&out_parts)
+                      std::vector<std::string>&out_parts)
 {
 	for (;;) {
 		//try to find begin mark.
 		std::string
 		begin_prefix = "------ccr begin ",
 		begin_suffix = "------\n";
 		size_t begin = data.find (begin_prefix, offset);
 		//nothing possible found, die.
 		if (begin == data.npos) return 0;
 		//verify it's on the beginning of the line
 		if (begin > 0) if (data[begin - 1] != '\n') {
 				offset += begin_prefix.length();
 				continue;
 			}
 		//try to parse the typeident and termident
 		std::string type, term;
 		offset += begin_prefix.length();
 		//find and verify possible positions of type and term strings
 		size_t eoterm = data.find (begin_suffix, offset),
 		       eotype = data.find (' ', offset);
 		if (eoterm == data.npos ||
 		    eotype == data.npos ||
 		    eotype > (eoterm - 1) )
 			continue;
 		type = data.substr (offset, eotype);
 		term = data.substr (eotype + 1, eoterm);
 		//verify that type&term are only of acceptable characters
 		if (!acceptable_id (type) || !acceptable_id (term) )
 			continue;
 		offset = eoterm + begin_suffix.length();
 		//read all sections
 		std::string
 		cut_sep = "\n------ccr cut " + type + " " + term + "------\n",
 		end_sep = "\n------ccr end " + type + " " + term + "------\n";
 		out_parts.clear();
 		bool retry = false;
 		for (;;) {
 			//find closest cut or sep
 			size_t cut_pos = data.find (cut_sep, offset),
 			       end_pos = data.find (end_sep, offset);
 			if (end_pos == data.npos) {
 				//can't even find end, don't care about cut_pos
 				retry = true;
 				break;
 			}
 			if ( cut_pos != data.npos && cut_pos < end_pos) {
 				//there is cut
 				out_parts.push_back (data.substr (offset,
 				                                  cut_pos) );
 			} else {
 				//no cut, it's till the end
 				out_parts.push_back (data.substr (offset,
 				                                  end_pos) );
 			}
 			if (cut_pos == data.npos) {
 				//it was end_pos, finished!
 				offset = end_pos + end_sep.length();
 				break;
 			} else {
 				//move offset for next search
 				offset = cut_pos + cut_sep.length();
 			}
 		}
 		if (retry) continue;
 		//return the modified offset
 		return offset;
 	}
 }
 /*
 * The Much Simpler Envelope Formatter!
 */
 static void gen_random_term (std::string&out, prng&rng, size_t length)
 {
 	//this could be longer, but don't generate absolute mess.
 	static const char letters[] = "abcdefghijklmnopqrstuvwxyz0123456789";
 	out.resize (length);
 	for (size_t i = 0; i < length; ++i) {
 		out[i] = letters[rng.random (36)];
 	}
 }
 std::string envelope_format (const std::string&type,
                             const std::vector<std::string>& parts,
                             prng&rng)
 {
 	size_t begin;
 restart:
 	//try to find begin mark.
 	begin = data.find ("------ccr begin ", offset);
 	//nothing possible found, die.
 	if (begin == data.npos) return 0;
 	//try to parse the begin mark
 	std::string type, mark;
 	//TODO parse it lol
 	//TODO move offset
 	//read all sections
 	for (;;) {
 		std::string term;
 		gen_random_term (term, rng, 16);
 		std::string
 		cut_sep = "\n------ccr cut " + type + " " + term + "------\n",
 		end_sep = "\n------ccr end " + type + " " + term + "------\n";
 		//check whether there's no collision with boundary
 		bool good = true;
 		std::vector<std::string>::const_iterator i, e;
 		for (i = parts.begin(), e = parts.end(); i != e; ++i) {
 			if ( i->find (cut_sep) != i->npos ||
 			     i->find (end_sep) != i->npos) {
 				good = false;
 				break;
 			}
 		}
 		if (!good) continue; //retry generating the termident mark
 		//now construct the result
 		std::string
 		res = "------ccr begin " + type + " " + term + "------\n";
 		if (parts.size() > 0) {
 			res += parts[0];
 			for (i = parts.begin() + 1, e = parts.end();
 			     i != e; ++i) {
 				res += cut_sep;
 				res += *i;
 			}
 		}
 		res += end_sep;
 	}
 	//return the modified offset
 	return offset;
 }
--- a/src/envelope.h
+++ b/src/envelope.h
@ -22,8 +22,10 @@
 #include <string>
 #include <vector>
 #include "prng.h"
 /*
- * Tools for finding envelopes in ascii/utf-8 text.
+ * Tool for finding envelopes in ascii/utf-8 text.
 *
 * We simply don't care about wide chars in text, UTF-16+, nor conflicting
 * encodings, nor any similar abominations.
@ -31,11 +33,19 @@
 * envelope_get tries to find an envelope in text data, starting from offset,
 * returning the offset of first possible following envelope or 0 if nothing
 * usuable was found.
 *
 * Finally, no one wants to see CRLF line endings here. Ever. ffs.
 */
-size_t envelope_get (const std::string& data, size_t offset,
+size_t envelope_read (const std::string& data, size_t offset,
-                     std::string&out_type,
+                      std::string&out_type,
-                     std::vector<std::string>&out_parts);
+                      std::vector<std::string>&out_parts);
 /*
 * this simply formats a compatible envelope
 */
 std::string envelope_format (const std::string&type,
                             const std::vector<std::string>& parts,
                             prng&rng);
 #endif