envelope: impl
This commit is contained in:
parent
4b844ffd20
commit
74e7e6226f
184
src/envelope.cpp
184
src/envelope.cpp
|
@ -18,6 +18,26 @@
|
||||||
|
|
||||||
#include "envelope.h"
|
#include "envelope.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* helpers
|
||||||
|
*/
|
||||||
|
|
||||||
|
inline static bool acceptable_char (char c)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
(c >= 'a' && c <= 'z') ||
|
||||||
|
(c >= 'A' && c <= 'Z') ||
|
||||||
|
(c >= '0' && c <= '9') ||
|
||||||
|
c == '_' || c == '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool acceptable_id (const std::string&a)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < a.length(); ++i)
|
||||||
|
if (!acceptable_char (a[i]) ) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* how do the ascii envelopes look like?
|
* how do the ascii envelopes look like?
|
||||||
*
|
*
|
||||||
|
@ -34,7 +54,7 @@
|
||||||
* To distinguish ourselves from PGP, we use six dashes and prefixed CCR name.
|
* To distinguish ourselves from PGP, we use six dashes and prefixed CCR name.
|
||||||
* No version information is supplied - versioning should be contained
|
* No version information is supplied - versioning should be contained
|
||||||
* preferably in typeident, e.g. like "message" "better_message" and
|
* preferably in typeident, e.g. like "message" "better_message" and
|
||||||
* "bettermessage-version3".
|
* "bettermessage_version3.5".
|
||||||
*
|
*
|
||||||
* Cleartext two-part messages and similar evil sorceries are generalized to
|
* Cleartext two-part messages and similar evil sorceries are generalized to
|
||||||
* multipart messages using the "part cut".
|
* multipart messages using the "part cut".
|
||||||
|
@ -42,33 +62,151 @@
|
||||||
* Also, to prevent cleartext embedding conflicts, we add termident, which is
|
* Also, to prevent cleartext embedding conflicts, we add termident, which is
|
||||||
* basically a random string of letters and numbers that serves as a mark that
|
* basically a random string of letters and numbers that serves as a mark that
|
||||||
* must be the same on the begin and end.
|
* must be the same on the begin and end.
|
||||||
|
*
|
||||||
|
* Also also, there's always newline before cut/end, even if there already is a
|
||||||
|
* newline from previous cut. Therefore, size 0 section looks like it has an
|
||||||
|
* empty line inside.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
size_t envelope_get (const std::string&data, size_t offset,
|
size_t envelope_read (const std::string&data, size_t offset,
|
||||||
std::string&out_type,
|
std::string&out_type,
|
||||||
std::vector<std::string>&out_parts)
|
std::vector<std::string>&out_parts)
|
||||||
|
{
|
||||||
|
for (;;) {
|
||||||
|
//try to find begin mark.
|
||||||
|
std::string
|
||||||
|
begin_prefix = "------ccr begin ",
|
||||||
|
begin_suffix = "------\n";
|
||||||
|
size_t begin = data.find (begin_prefix, offset);
|
||||||
|
|
||||||
|
//nothing possible found, die.
|
||||||
|
if (begin == data.npos) return 0;
|
||||||
|
|
||||||
|
//verify it's on the beginning of the line
|
||||||
|
if (begin > 0) if (data[begin - 1] != '\n') {
|
||||||
|
offset += begin_prefix.length();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
//try to parse the typeident and termident
|
||||||
|
std::string type, term;
|
||||||
|
offset += begin_prefix.length();
|
||||||
|
|
||||||
|
//find and verify possible positions of type and term strings
|
||||||
|
size_t eoterm = data.find (begin_suffix, offset),
|
||||||
|
eotype = data.find (' ', offset);
|
||||||
|
if (eoterm == data.npos ||
|
||||||
|
eotype == data.npos ||
|
||||||
|
eotype > (eoterm - 1) )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
type = data.substr (offset, eotype);
|
||||||
|
term = data.substr (eotype + 1, eoterm);
|
||||||
|
|
||||||
|
//verify that type&term are only of acceptable characters
|
||||||
|
if (!acceptable_id (type) || !acceptable_id (term) )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = eoterm + begin_suffix.length();
|
||||||
|
|
||||||
|
//read all sections
|
||||||
|
std::string
|
||||||
|
cut_sep = "\n------ccr cut " + type + " " + term + "------\n",
|
||||||
|
end_sep = "\n------ccr end " + type + " " + term + "------\n";
|
||||||
|
|
||||||
|
out_parts.clear();
|
||||||
|
|
||||||
|
bool retry = false;
|
||||||
|
for (;;) {
|
||||||
|
//find closest cut or sep
|
||||||
|
size_t cut_pos = data.find (cut_sep, offset),
|
||||||
|
end_pos = data.find (end_sep, offset);
|
||||||
|
|
||||||
|
if (end_pos == data.npos) {
|
||||||
|
//can't even find end, don't care about cut_pos
|
||||||
|
retry = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( cut_pos != data.npos && cut_pos < end_pos) {
|
||||||
|
//there is cut
|
||||||
|
out_parts.push_back (data.substr (offset,
|
||||||
|
cut_pos) );
|
||||||
|
} else {
|
||||||
|
//no cut, it's till the end
|
||||||
|
out_parts.push_back (data.substr (offset,
|
||||||
|
end_pos) );
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cut_pos == data.npos) {
|
||||||
|
//it was end_pos, finished!
|
||||||
|
offset = end_pos + end_sep.length();
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
//move offset for next search
|
||||||
|
offset = cut_pos + cut_sep.length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (retry) continue;
|
||||||
|
|
||||||
|
//return the modified offset
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The Much Simpler Envelope Formatter!
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void gen_random_term (std::string&out, prng&rng, size_t length)
|
||||||
|
{
|
||||||
|
//this could be longer, but don't generate absolute mess.
|
||||||
|
static const char letters[] = "abcdefghijklmnopqrstuvwxyz0123456789";
|
||||||
|
|
||||||
|
out.resize (length);
|
||||||
|
for (size_t i = 0; i < length; ++i) {
|
||||||
|
out[i] = letters[rng.random (36)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string envelope_format (const std::string&type,
|
||||||
|
const std::vector<std::string>& parts,
|
||||||
|
prng&rng)
|
||||||
{
|
{
|
||||||
|
|
||||||
size_t begin;
|
|
||||||
|
|
||||||
restart:
|
|
||||||
//try to find begin mark.
|
|
||||||
begin = data.find ("------ccr begin ", offset);
|
|
||||||
|
|
||||||
//nothing possible found, die.
|
|
||||||
if (begin == data.npos) return 0;
|
|
||||||
|
|
||||||
//try to parse the begin mark
|
|
||||||
std::string type, mark;
|
|
||||||
|
|
||||||
//TODO parse it lol
|
|
||||||
//TODO move offset
|
|
||||||
|
|
||||||
//read all sections
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
std::string term;
|
||||||
|
gen_random_term (term, rng, 16);
|
||||||
|
|
||||||
|
std::string
|
||||||
|
cut_sep = "\n------ccr cut " + type + " " + term + "------\n",
|
||||||
|
end_sep = "\n------ccr end " + type + " " + term + "------\n";
|
||||||
|
|
||||||
|
//check whether there's no collision with boundary
|
||||||
|
bool good = true;
|
||||||
|
std::vector<std::string>::const_iterator i, e;
|
||||||
|
for (i = parts.begin(), e = parts.end(); i != e; ++i) {
|
||||||
|
if ( i->find (cut_sep) != i->npos ||
|
||||||
|
i->find (end_sep) != i->npos) {
|
||||||
|
good = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!good) continue; //retry generating the termident mark
|
||||||
|
|
||||||
|
//now construct the result
|
||||||
|
std::string
|
||||||
|
res = "------ccr begin " + type + " " + term + "------\n";
|
||||||
|
|
||||||
|
if (parts.size() > 0) {
|
||||||
|
res += parts[0];
|
||||||
|
for (i = parts.begin() + 1, e = parts.end();
|
||||||
|
i != e; ++i) {
|
||||||
|
res += cut_sep;
|
||||||
|
res += *i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res += end_sep;
|
||||||
}
|
}
|
||||||
|
|
||||||
//return the modified offset
|
|
||||||
return offset;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,10 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "prng.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Tools for finding envelopes in ascii/utf-8 text.
|
* Tool for finding envelopes in ascii/utf-8 text.
|
||||||
*
|
*
|
||||||
* We simply don't care about wide chars in text, UTF-16+, nor conflicting
|
* We simply don't care about wide chars in text, UTF-16+, nor conflicting
|
||||||
* encodings, nor any similar abominations.
|
* encodings, nor any similar abominations.
|
||||||
|
@ -31,11 +33,19 @@
|
||||||
* envelope_get tries to find an envelope in text data, starting from offset,
|
* envelope_get tries to find an envelope in text data, starting from offset,
|
||||||
* returning the offset of first possible following envelope or 0 if nothing
|
* returning the offset of first possible following envelope or 0 if nothing
|
||||||
* usuable was found.
|
* usuable was found.
|
||||||
|
*
|
||||||
|
* Finally, no one wants to see CRLF line endings here. Ever. ffs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
size_t envelope_get (const std::string& data, size_t offset,
|
size_t envelope_read (const std::string& data, size_t offset,
|
||||||
std::string&out_type,
|
std::string&out_type,
|
||||||
std::vector<std::string>&out_parts);
|
std::vector<std::string>&out_parts);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this simply formats a compatible envelope
|
||||||
|
*/
|
||||||
|
std::string envelope_format (const std::string&type,
|
||||||
|
const std::vector<std::string>& parts,
|
||||||
|
prng&rng);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue