From 105a7731d3502f8a5b66abc773fe17249848a9e2 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sat, 18 May 2013 09:08:24 +0200 Subject: [PATCH] bvector: massive ranking/unranking speedup --- src/bvector.cpp | 153 ++++++++++++++++++++++++++++++++---------------- src/bvector.h | 2 +- 2 files changed, 103 insertions(+), 52 deletions(-) diff --git a/src/bvector.cpp b/src/bvector.cpp index f7e03c3..6735587 100644 --- a/src/bvector.cpp +++ b/src/bvector.cpp @@ -139,12 +139,42 @@ void bvector::from_string (const std::string&in) * utility colex (un)ranking for niederreiter and workalikes. * see Ruskey's Combinatorial Generation, algorithm 4.10 * - * TODO use (external) cache for combination numbers to speed this up. + * Colex ranking here uses "walking" through combination number space instead + * of caching or actual combination number generation. For a nice image, see + * the book. + * + * Suppose that you have a = (n choose k) + * + * then: + * (n+1 choose k) = a * (n+1) / (n-k+1) + * (n-1 choose k) = a * (n-k) / n + * (n choose k+1) = a * (n-k) / (k+1) + * (n choose k-1) = a * k / (n-k+1) + * + * Because colex ranking forms a "path" through tne combination number space + * (from (1 choose 1) to (length choose count)), worst operations actually use + * at most (length+count) multiplications and divisions, for worst McEliece + * parameters that is around 16k operations total (plus starting combination + * number computation un case of unranking, which is around 500 long ops.) + * + * To compare naive approach: + * - ranking needs computation of the same count of combination numbers as + * count parameter. Those are (n choose k) with average n=(length/2) and + * k=(count/2), every number needs 2k long operations, total operations is + * count*count/2 = around 32k for mceliece approach. + * - unranking needs terrible number of "attempts" to determine each p (around + * 13 in binary search), this basically means it uses around 0.4M long + * operations. Wrong way. + * + * For extremely sparse vectors (where 2*(length+count)>count*count/2, roughly + * where count>2*sqrt(length)) it can be benefical to compute stuff using the + * naive approach. This doesn't count for our McEliece parameters, so naive + * approach is not implemented at all. */ #include -static void combination_number (uint n, uint k, mpz_t& r) +static void combination_number (mpz_t& r, uint n, uint k) { mpz_t t; if (k > n) { @@ -190,83 +220,104 @@ static void mpz_to_bvector (mpz_t&x, bvector&r) void bvector::colex_rank (bvector&r) const { - mpz_t res, t, t2; + mpz_t res, comb, t; mpz_init_set_ui (res, 0); + mpz_init_set_ui (comb, 1); mpz_init (t); - mpz_init (t2); - uint i, j; - j = 1; - for (i = 0; i < size(); ++i) - if (item (i) ) { - combination_number (i, j, t); - mpz_swap (t2, res); - mpz_add (res, t, t2); - ++j; + uint n = 0, k = 1; + + while (item (n) ) ++n, ++k; //skip the "zeroes" on the beginning + + ++n; //now n=k=1, comb=1 + + //non-zero positions + for (; n < size(); ++n) { + + if (item (n) ) { + //add combination number to result + mpz_swap (t, res); + mpz_add (res, t, comb); } + //increase n in comb + mpz_swap (t, comb); + mpz_mul_ui (comb, t, n + 1); + mpz_swap (t, comb); + mpz_fdiv_q_ui (comb, t, n - k + 1); + + if (item (n) ) { + //increase k in comb + mpz_swap (t, comb); + mpz_mul_ui (comb, t, n + 1 - k); //n has changed! + mpz_swap (t, comb); + mpz_fdiv_q_ui (comb, t, k + 1); + ++k; + } + } + mpz_to_bvector (res, r); + mpz_clear (comb); mpz_clear (t); - mpz_clear (t2); mpz_clear (res); } -void bvector::colex_unrank (bvector&res, uint n, uint k) const +bool bvector::colex_unrank (bvector&res, uint n, uint k) const { - mpz_t r, t, t2; + mpz_t r, comb, t, t2; mpz_init (r); + mpz_init (comb); mpz_init (t); - mpz_init (t2); bvector_to_mpz (*this, r); + combination_number (comb, n, k); //initialize to the end of path res.clear(); + + //check if incoming r is not too big. + if (mpz_cmp (r, comb) >= 0) { + mpz_clear (r); + mpz_clear (comb); + mpz_clear (t); + return false; + } + res.resize (n, 0); - for (uint i = k; i > 0; --i) { - /* Original code: + for (; k > 0; --k) { + if (mpz_sgn (r) == 0) //zero r needs n switch to simple mode + break; - uint p = i; - for (;;) { - combination_number (p, i, t); - - if (mpz_cmp (t, r) > 0) break; - ++p; + while (n > k && mpz_cmp (comb, r) > 0) { + //decrease n until something <=r is found + mpz_swap (t, comb); + mpz_mul_ui (comb, t, n - k); + mpz_swap (t, comb); + mpz_fdiv_q_ui (comb, t, n); + --n; } - * ...that kindof lacks speed. We're actually trying to find - * the smallest value of p for which comb(p,i)>r. - * - * Computing all combination numbers is KIND OF slow and cache - * (as suggested by Barreto and others) doesn't really make big - * difference here (we're usually doing only one or two runs of - * this stuff in one program run). Storing about 50megs of - * precalculated combination numbers is weird as well. - * - * Therefore, with the knowledge that i <= p <= n, we're - * halving the search interval as usual. - */ + res[n] = 1; - uint p, a = i, b = n + 1; - while (a < b) { - p = (a + b) / 2; + //r -= comb + mpz_swap (t, r); + mpz_sub (r, t, comb); - combination_number (p, i, t); - if (mpz_cmp (t, r) > 0) b = p; - else a = p + 1; - } + //decrease k + mpz_swap (t, comb); + mpz_mul_ui (comb, t, k); + mpz_swap (t, comb); + mpz_fdiv_q_ui (comb, t, n - k + 1); + } - combination_number (b - 1, i, t); - mpz_swap (t2, r); - mpz_sub (r, t2, t); - - //overflow protection (result's wrong anyway now) - if (b > n) continue; - res[b - 1] = 1; + //do the "zeroes" rest + for (; k > 0; --k) { + res[k - 1] = 1; } mpz_clear (r); + mpz_clear (comb); mpz_clear (t); - mpz_clear (t2); + return true; } diff --git a/src/bvector.h b/src/bvector.h index 1857d24..2536ed2 100644 --- a/src/bvector.h +++ b/src/bvector.h @@ -55,7 +55,7 @@ public: void from_poly_cotrace (const polynomial&, gf2m&); void colex_rank (bvector&) const; - void colex_unrank (bvector&, uint n, uint k) const; + bool colex_unrank (bvector&, uint n, uint k) const; bool to_string (std::string&) const; void from_string (const std::string&);