diff --git a/autogen.sh b/autogen.sh index a16e002..c11562f 100755 --- a/autogen.sh +++ b/autogen.sh @@ -5,7 +5,7 @@ NAME="ccr" COMMON_CPPFLAGS="-I/usr/local/include" COMMON_CFLAGS="-Wall" -COMMON_CXXFLAGS="${COMMON_CFLAGS}" +COMMON_CXXFLAGS="${COMMON_CFLAGS} -std=c++11" COMMON_LDFLAGS="-L/usr/local/lib" COMMON_LDADD="" @@ -28,7 +28,7 @@ echo "${NAME}_CPPFLAGS = -I\$(srcdir)/$i/ ${COMMON_CPPFLAGS}" >>$OUT echo "${NAME}_CFLAGS = ${COMMON_CFLAGS}" >>$OUT echo "${NAME}_CXXFLAGS = ${COMMON_CXXFLAGS}" >>$OUT echo "${NAME}_LDFLAGS = ${COMMON_LDFLAGS}" >>$OUT -echo "${NAME}_LDADD = -lgmp @CRYPTOPP_LIBS@ ${COMMON_LDADD} " >>$OUT +echo "${NAME}_LDADD = -lgmp -lfftw3 -lm @CRYPTOPP_LIBS@ ${COMMON_LDADD} " >>$OUT libtoolize --force && aclocal && autoconf && automake --add-missing diff --git a/configure.ac b/configure.ac index 3d88782..8e30313 100644 --- a/configure.ac +++ b/configure.ac @@ -18,6 +18,10 @@ AC_PROG_INSTALL AC_CHECK_HEADERS([gmp.h], , AC_MSG_ERROR([Codecrypt requires gmp.h])) AC_CHECK_LIB(gmp, __gmpz_init, , AC_MSG_ERROR([Codecrypt requires libgmp])) +#check for FFTW library presnece +AC_CHECK_HEADERS([fftw3.h], , AC_MSG_ERROR([Codecrytp requires fftw3.h])) +AC_CHECK_LIB(fftw3, fftw_plan_dft_1d, , AC_MSG_ERROR([Codecrypt requires libfftw3])) + #check whether to build with crypto++ AC_ARG_WITH([cryptopp], AC_HELP_STRING([--with-cryptopp],[Build algorithms that need Crypto++ support]), diff --git a/src/actions.cpp b/src/actions.cpp index 1f0a0e4..2c12b90 100644 --- a/src/actions.cpp +++ b/src/actions.cpp @@ -108,16 +108,15 @@ algspectable_t& algspectable() static bool init = false; if (!init) { - table["enc"] = "MCEQD128FO-CUBE256-CHACHA20"; - table["enc-strong"] = "MCEQD192FO-CUBE384-CHACHA20"; - table["enc-strongest"] = "MCEQD256FO-CUBE512-CHACHA20"; + table["enc"] = "MCEQCMDPC128FO-CUBE256-CHACHA20"; + table["enc-256"] = "MCEQCMDPC256FO-CUBE512-CHACHA20"; table["sig"] = "FMTSEQ128C-CUBE256-CUBE128"; - table["sig-strong"] = "FMTSEQ192C-CUBE384-CUBE192"; - table["sig-strongest"] = "FMTSEQ256C-CUBE512-CUBE256"; + table["sig-192"] = "FMTSEQ192C-CUBE384-CUBE192"; + table["sig-256"] = "FMTSEQ256C-CUBE512-CUBE256"; table["sym"] = "chacha20,sha256"; - table["sym-strong"] = "chacha20,xsynd,arcfour,cube512,sha512"; + table["sym-combined"] = "chacha20,xsynd,arcfour,cube512,sha512"; init = true; } diff --git a/src/algos_enc.cpp b/src/algos_enc.cpp index 8463ad7..20d8fd6 100644 --- a/src/algos_enc.cpp +++ b/src/algos_enc.cpp @@ -419,21 +419,21 @@ int algo_mceqcmdpc##name::create_keypair (sencode**pub, sencode**priv, prng&rng) #if HAVE_CRYPTOPP==1 -mceqcmdpc_create_keypair_func (128, 9857, 2, 71, 134, 60, 7) -mceqcmdpc_create_keypair_func (256, 32771, 2, 137, 264, 60, 7) -mceqcmdpc_create_keypair_func (128cha, 9857, 2, 71, 134, 60, 7) -mceqcmdpc_create_keypair_func (256cha, 32771, 2, 137, 264, 60, 7) -mceqcmdpc_create_keypair_func (128xs, 9857, 2, 71, 134, 60, 7) -mceqcmdpc_create_keypair_func (256xs, 32771, 2, 137, 264, 60, 7) +mceqcmdpc_create_keypair_func (128, 9857, 2, 71, 134, 60, 5) +mceqcmdpc_create_keypair_func (256, 32771, 2, 137, 264, 60, 8) +mceqcmdpc_create_keypair_func (128cha, 9857, 2, 71, 134, 60, 5) +mceqcmdpc_create_keypair_func (256cha, 32771, 2, 137, 264, 60, 8) +mceqcmdpc_create_keypair_func (128xs, 9857, 2, 71, 134, 60, 5) +mceqcmdpc_create_keypair_func (256xs, 32771, 2, 137, 264, 60, 8) #endif //HAVE_CRYPTOPP==1 -mceqcmdpc_create_keypair_func (128cube, 9857, 2, 71, 134, 60, 7) -mceqcmdpc_create_keypair_func (256cube, 32771, 2, 137, 264, 60, 7) -mceqcmdpc_create_keypair_func (128cubecha, 9857, 2, 71, 134, 60, 7) -mceqcmdpc_create_keypair_func (256cubecha, 32771, 2, 137, 264, 60, 7) -mceqcmdpc_create_keypair_func (128cubexs, 9857, 2, 71, 134, 60, 7) -mceqcmdpc_create_keypair_func (256cubexs, 32771, 2, 137, 264, 60, 7) +mceqcmdpc_create_keypair_func (128cube, 9857, 2, 71, 134, 60, 5) +mceqcmdpc_create_keypair_func (256cube, 32771, 2, 137, 264, 60, 8) +mceqcmdpc_create_keypair_func (128cubecha, 9857, 2, 71, 134, 60, 5) +mceqcmdpc_create_keypair_func (256cubecha, 32771, 2, 137, 264, 60, 8) +mceqcmdpc_create_keypair_func (128cubexs, 9857, 2, 71, 134, 60, 5) +mceqcmdpc_create_keypair_func (256cubexs, 32771, 2, 137, 264, 60, 8) #define mceqcmdpc_create_encdec_func(name,bs,bc,errcount,hash_type,pad_hash_type,scipher,ranksize) \ int algo_mceqcmdpc##name::encrypt (const bvector&plain, bvector&cipher, \ diff --git a/src/bvector.cpp b/src/bvector.cpp index a2a55a0..127569e 100644 --- a/src/bvector.cpp +++ b/src/bvector.cpp @@ -214,6 +214,91 @@ bool bvector::zero() const return true; } +bool bvector::one() const +{ + //zero padding again + for (size_t i = 0; i < _data.size(); ++i) if (i == 0) { + if (_data[i] != 1) return false; + } else if (_data[i] != 0) return false; + return true; +} + +int bvector::degree() +{ + //find the position of the last non-zero item + int r; + for (r = _data.size() - 1; r >= 0; --r) if (_data[r]) break; + if (r < 0) return -1; //only zeroes. + uint64_t tmp = _data[r]; + int res = 64 * r; + while (tmp > 1) { + ++res; + tmp >>= 1; + } + return res; +} + +void bvector::poly_strip() +{ + resize (degree() + 1); +} + +bvector bvector::ext_gcd (const bvector&b, bvector&s0, bvector&t0) +{ + //result gcd(this,b) = s*this + t*b + bvector s1, t1; + s0.clear(); + s1.clear(); + t0.clear(); + t1.clear(); + s0.resize (1, 1); + t1.resize (1, 1); + bvector r1 = b; + bvector r0 = *this; + + for (;;) { + int d0 = r0.degree(); + int d1 = r1.degree(); + //out ("r0" << r0 << "r1" << r1 << "s0" << s0 << "s1" << s1 << "t0" << t0 << "t1" << t1 << "d0=" << d0 << " d1=" << d1); + if (d0 < 0) { + s0.swap (s1); + t0.swap (t1); + return r1; + } + if (d1 < 0) { + //this would result in reorganization and failure in + //next step, return it the other way + return r0; + } + if (d0 > d1) { + //quotient is zero, reverse the thing manually + s0.swap (s1); + t0.swap (t1); + r0.swap (r1); + continue; + } + + //we only consider quotient in form q=x^(log q) + //("only subtraction, not divmod, still slow") + int logq = d1 - d0; + + //r(i+1)=r(i-1)-q*r(i) + //s(i+1)=s(i-1)-q*s(i) + //t(i+1)=t(i-1)-q*t(i) + r1.add_offset (r0, logq); + s1.add_offset (s0, logq); + t1.add_offset (t0, logq); + r1.poly_strip(); + s1.poly_strip(); + t1.poly_strip(); + + //"rotate" the thing to new positions + r1.swap (r0); + s1.swap (s0); + t1.swap (t0); + } +} + void bvector::from_poly_cotrace (const polynomial&r, gf2m&fld) { clear(); diff --git a/src/bvector.h b/src/bvector.h index 1e57a8d..4c50e05 100644 --- a/src/bvector.h +++ b/src/bvector.h @@ -100,11 +100,11 @@ private: return s >> 6; } - void fix_padding(); protected: _ccr_declare_vector_item public: + void fix_padding(); bvector() { _size = 0; } @@ -176,13 +176,15 @@ public: uint hamming_weight(); void append (const bvector&); void add (const bvector&); - void add_offset (const bvector&, size_t offset_from, size_t offset_to, size_t cnt = 0); + void add_offset (const bvector&, + size_t offset_from, size_t offset_to, + size_t cnt = 0); void add_offset (const bvector&, size_t offset_to); void add_range (const bvector&, size_t, size_t); void rot_add (const bvector&, size_t); void set_block (const bvector&, size_t); - void get_block (size_t, size_t, bvector&) const; + void get_block (size_t start, size_t cnt, bvector&) const; uint and_hamming_weight (const bvector&) const; inline bool operator* (const bvector&a) const { @@ -191,6 +193,11 @@ public: } bool zero() const; + bool one() const; + + int degree(); + void poly_strip(); + bvector ext_gcd (const bvector&b, bvector&s, bvector&t); void from_poly_cotrace (const polynomial&, gf2m&); diff --git a/src/fft.cpp b/src/fft.cpp new file mode 100644 index 0000000..70ce632 --- /dev/null +++ b/src/fft.cpp @@ -0,0 +1,71 @@ + +/* + * This file is part of Codecrypt. + * + * Codecrypt is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * Codecrypt is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Codecrypt. If not, see . + */ + +#include "fft.h" + +#include +#include +#include + +/* + * FFTW wraparound for performing fast multiplication of cyclic matrices. + * + * It would probably be cool to save wisdom manually or generate better plans, + * but since we're usually doing less than 10 FFTs for each run of codecrypt, + * the thing doesn't pay off. Feel free to implement it. + */ + +#include "iohelpers.h" + +void fft (bool forward, std::vector&in, std::vector&out) +{ + fftw_plan p; + out.resize (in.size(), dcx (0, 0)); + + p = fftw_plan_dft_1d (in.size(), + //Cin, Cout, + reinterpret_cast (in.data()), + reinterpret_cast (out.data()), + forward ? FFTW_FORWARD : FFTW_BACKWARD, + FFTW_ESTIMATE); + + if (!forward) + for (size_t i = 0; i < out.size(); ++i) + out[i] /= (double) out.size(); + + fftw_execute (p); + fftw_destroy_plan (p); +} + +void fft (bvector&inb, std::vector&out) +{ + std::vector in; + in.resize (inb.size(), dcx (0, 0)); + for (size_t i = 0; i < inb.size(); ++i) if (inb[i]) in[i] = dcx (1, 0); + fft (true, in, out); +} + +void fft (std::vector&in, bvector&outb) +{ + std::vector out; + fft (false, in, out); + outb.resize (out.size()); + outb.fill_zeros(); + for (size_t i = 0; i < out.size(); ++i) + if (1 & (int) round (out[i].real())) outb[i] = 1; +} diff --git a/src/fft.h b/src/fft.h new file mode 100644 index 0000000..b4acfaa --- /dev/null +++ b/src/fft.h @@ -0,0 +1,32 @@ + +/* + * This file is part of Codecrypt. + * + * Codecrypt is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * Codecrypt is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Codecrypt. If not, see . + */ + +#ifndef _ccr_fft_h_ +#define _ccr_fft_h_ + +#include "bvector.h" +#include + +typedef std::complex dcx; +void fft (bool forward, std::vector&in, std::vector&out); + +//direct conversion from/to GF(2) +void fft (bvector&in, std::vector&out); +void fft (std::vector&in, bvector&out); + +#endif diff --git a/src/mce_qcmdpc.cpp b/src/mce_qcmdpc.cpp index 6307704..17be9d0 100644 --- a/src/mce_qcmdpc.cpp +++ b/src/mce_qcmdpc.cpp @@ -18,78 +18,99 @@ #include "mce_qcmdpc.h" -#include "gf2m.h" -#include "polynomial.h" +#include "fft.h" +#include using namespace mce_qcmdpc; +using namespace std; + +#include "iohelpers.h" +#include "ios.h" int mce_qcmdpc::generate (pubkey&pub, privkey&priv, prng&rng, uint block_size, uint block_count, uint wi, uint t, uint rounds, uint delta) { uint i, j; - priv.H.resize (block_count); if (wi > block_size / 2) return 1; //safety + priv.H.resize (block_count); + pub.G.resize (block_count - 1); + /* - * Trick. Cyclomatic matrix of size n is invertible if a - * polynomial that's made up from its first row is coprime to - * (x^n-1), the polynomial inversion and matrix inversion are - * then isomorphic. + * Cyclic matrices are diagonalizable by FFT so this stuff gets pretty + * fast. Otherwise they behave like simple polynomials over GF(2) mod + * (1+x^n). */ - gf2m gf; - gf.create (1); //binary - polynomial xmm1; //x^m-1 - xmm1.resize (block_size + 1, 0); - xmm1[0] = 1; - xmm1[block_size] = 1; - polynomial last_inv_H; + + vector H_last_inv; + for (;;) { //retry generating the rightmost block until it is invertible - polynomial g; - g.resize (block_size, 0); + bvector Hb; + Hb.resize (block_size, 0); for (i = 0; i < wi; ++i) for (uint pos = rng.random (block_size); - g[pos] ? 1 : (g[pos] = 1, 0); + Hb[pos] ? 1 : (Hb[pos] = 1, 0); pos = rng.random (block_size)); - //try if it is coprime to (x^n-1) - polynomial gcd = g.gcd (xmm1, gf); - if (!gcd.one()) continue; //it isn't. + bvector xnm1, Hb_inv, tmp; + xnm1.resize (block_size + 1, 0); + xnm1[0] = 1; + xnm1[block_size] = 1; //poly (x^n-1) in gf(2) - //if it is, save it to matrix (in "reverse" order for columns) - priv.H[block_count - 1].resize (block_size, 0); - for (i = 0; i < block_size && i < g.size(); ++i) - priv.H[block_count - 1][i] = g[ (-i) % block_size]; + /* + * TODO This is quadratic, speed it up. + * + * No one actually cares about keygen speed yet, but this can + * be done in O(n*log(n)) using Schönhage-Strassen algorithm. + * If speed is required (e.g. for SPF in some ssl replacement, + * *wink* *wink*), use libNTL's GF2X. + * + * NTL one uses simpler Karatsuba with ~O(n^1.58) which should + * (according to wikipedia) be faster for sizes under 32k bits + * because of constant factors involved. + */ + bvector rem = Hb.ext_gcd (xnm1, Hb_inv, tmp); + if (!rem.one()) continue; //not invertible, retry + if (Hb_inv.size() > block_size) continue; //totally weird. + Hb_inv.resize (block_size, 0); //pad polynomial with zeros - //invert it, save for later and succeed. - g.inv (xmm1, gf); - last_inv_H = g; - break; + //if it is, save it to matrix + priv.H[block_count - 1] = Hb; + + //precompute the fft of the inverted last block + fft (Hb_inv, H_last_inv); + + break; //success } //generate the rests of matrix blocks, fill the G right away. - pub.G.resize (block_count - 1); for (i = 0; i < block_count - 1; ++i) { - polynomial hi; - hi.resize (block_size, 0); + bvector Hb; + Hb.resize (block_size, 0); //generate the polynomial corresponding to the first row for (j = 0; j < wi; ++j) for (uint pos = rng.random (block_size); - hi[pos] ? 1 : (hi[pos] = 1, 0); + Hb[pos] ? 1 : (Hb[pos] = 1, 0); pos = rng.random (block_size)); + //save it to H - priv.H[i].resize (block_size); - for (j = 0; j < block_size; ++j) priv.H[i][j] = hi[ (-j) % block_size]; + priv.H[i] = Hb; //compute inv(H[last])*H[i] - hi.mult (last_inv_H, gf); - hi.mod (xmm1, gf); + vector H; + fft (Hb, H); + for (j = 0; j < block_size; ++j) + H[j] *= H_last_inv[j]; + fft (H, Hb); + //save it to G - pub.G[i].resize (block_size); - for (j = 0; j < block_size; ++j) pub.G[i][j] = hi[j % block_size]; + pub.G[i] = Hb; + pub.G[i].resize (block_size, 0); + //for (j = 0; j < block_size; ++j) pub.G[i][j] = Hb[j]; } //save the target params @@ -128,18 +149,37 @@ int pubkey::encrypt (const bvector&in, bvector&out, const bvector&errors) uint ps = plain_size(); if (in.size() != ps) return 1; uint bs = G[0].size(); - for (uint i = 1; i < G.size(); ++i) if (G[i].size() != bs) return 1; //prevent mangled keys + uint blocks = G.size(); + for (uint i = 1; i < blocks; ++i) + if (G[i].size() != bs) return 1; //prevent mangled keys //first, the checksum part - bvector bcheck; + vector bcheck, Pd, Gd; + bcheck.resize (bs, dcx (0, 0)); //initially zero + bvector block; - //G stores first row(s) of the circulant matrix blocks, proceed row-by-row and construct the checkum - for (uint i = 0; i < ps; ++i) - if (in[i]) bcheck.rot_add (G[ (i % ps) / bs], i % bs); + /* + * G stores first row(s) of the circulant matrix blocks. Proceed block + * by block and construct the checksum. + * + * On a side note, it would be cool to store the G already pre-FFT'd, + * but the performance gain wouldn't be interesting enough to + * compensate for 128 times larger public key (each bit would get + * expanded to two doubles). Do it if you want to encrypt bulk data. + */ + + for (size_t i = 0; i < blocks; ++i) { + in.get_block (i * bs, bs, block); + fft (block, Pd); + fft (G[i], Gd); + for (size_t j = 0; j < bs; ++j) + bcheck[j] += Pd[j] * Gd[j]; + } //compute the ciphertext out = in; - out.append (bcheck); + fft (bcheck, block); //get the checksum part + out.append (block); out.add (errors); return 0; @@ -155,53 +195,73 @@ int privkey::decrypt (const bvector & in, bvector & out) int privkey::decrypt (const bvector & in_orig, bvector & out, bvector & errors) { - uint i; + uint i, j; uint cs = cipher_size(); if (in_orig.size() != cs) return 1; - uint bs; - bs = H[0].size(); + uint bs = H[0].size(); + uint blocks = H.size(); + for (i = 1; i < blocks; ++i) if (H[i].size() != bs) return 2; + + bvector in = in_orig; //we will modify this. /* * probabilistic decoding! */ - //compute the syndrome first - bvector syndrome; - syndrome.resize (bs, 0); - bvector in = in_orig; //we will modify it + vector synd_diag, tmp, Htmp; + synd_diag.resize (bs, dcx (0, 0)); - for (i = 0; i < cs; ++i) if (in[i]) - syndrome.rot_add (H[i / bs], (cs - i) % bs); + //precompute the syndrome + for (i = 0; i < blocks; ++i) { + bvector b; + b.resize (bs, 0); + b.add_offset (in, bs * i, 0, bs); + fft (b, tmp); + fft (H[i], Htmp); + for (j = 0; j < bs; ++j) synd_diag[j] += Htmp[j] * tmp[j]; + } - //minimize counts of unsatisfied equations by flipping - std::vector unsatisfied; - unsatisfied.resize (cs, 0); + bvector (syndrome); + fft (synd_diag, syndrome); + + vector unsat; + unsat.resize (cs, 0); for (i = 0; i < rounds; ++i) { - uint bit, max_unsat; - bvector tmp; - max_unsat = 0; - for (bit = 0; bit < cs; ++bit) { - tmp.fill_zeros(); - tmp.rot_add (H[bit / bs], (cs - bit) % bs); - unsatisfied[bit] = tmp.and_hamming_weight (syndrome); - if (unsatisfied[bit] > max_unsat) max_unsat = unsatisfied[bit]; - } - //TODO what about timing attacks? + /* + * count the correlations, abuse the sparsity of matrices. + * + * TODO this is the slowest part of the whole thing. It's all + * probabilistic, maybe there could be some potential to speed + * it up by discarding some (already missing) precision. + */ + + for (j = 0; j < cs; ++j) unsat[j] = 0; + for (uint Hi = 0; Hi < cs; ++Hi) + if (H[Hi / bs][Hi % bs]) { + uint blk = Hi / bs; + for (j = 0; j < bs; ++j) + if (syndrome[j]) + ++unsat[blk * bs + + (j + cs - Hi) % bs]; + } + + uint max_unsat = 0; + for (j = 0; j < cs; ++j) + if (unsat[j] > max_unsat) max_unsat = unsat[j]; if (!max_unsat) break; + //TODO what about timing attacks? :] uint threshold = 0; if (max_unsat > delta) threshold = max_unsat - delta; //TODO also timing (but it gets pretty statistically hard here I guess) - uint flipped = 0; - for (bit = 0; bit < cs; ++bit) - if (unsatisfied[bit] > threshold) { + for (uint bit = 0; bit < cs; ++bit) + if (unsat[bit] > threshold) { in[bit] = !in[bit]; - syndrome.rot_add (H[bit / bs], (cs - bit) % bs); - ++flipped; + syndrome.rot_add (H[bit / bs], bit % bs); } }