...
 
......@@ -5,23 +5,34 @@ My own take on
[Diceware](http://world.std.com/~reinhold/diceware.html) dictionary
generation and formatting for printing. Released under the WTFPLv2.
Unlike the original Diceware, the dictionary size is always a power of
two, giving a nice round number of bits of entropy per word.
This package provides three scripts:
To choose words at random, it is best to have 8-sided, 4-sided and
2-sided dice (or a coin), but it's also possible to use any dice or
coin using the tables below (or your common sense).
* `gen-dict` will generate a word list based on input text, optionally
rejecting words above a certain length or too infrequent words.
* `format-dict` will format a generated dictionary for printing, and
index each word using the optimum number of dice rolls, based on
which dice you have available.
* `gen-passphrase` can generate random passphrases without printing or
rolling dice, instead using the computer's CSPRNG. Use at your own
risk.
Read also the excellent [Diceware
FAQ](http://world.std.com/%7Ereinhold/dicewarefaq.html).
**See a demonstration of a dictionary generated from Steven Erikson's
Malazan Book of the Fallen series: [6-letter words (13 bits of
entropy)](./demo/mbotf-u6-13.pdf), [7-letter words (14 bits of
entropy)](./demo/mbotf-u7-14.pdf).**
Example dictionaries
====================
Dictionaries were generated from Steven Erikson's __Malazan Book of
the Fallen__ series. Only words that appeared twice or more were kept.
* [6 letters and under, requires 2d30 and 1d10, 13.1 bits per word](./demo/mbotf-62-303010-1314.pdf)
* [7 letters and under, requires 4d6 and 1d10, 13.7 bits per word](./demo/mbotf-72-666610-1366.pdf)
* [8 letters and under, requires 2d8 and 4d4, 14.0 bits per word](./demo/mbotf-82-884444-1400.pdf)
Entropy tables
==============
Password strength & entropy
===========================
> “To provide adequate protection against the most serious
threats... keys used to protect data today should be at least 75
......@@ -64,287 +75,5 @@ chosen word.
~~~
See the entropy tables above to figure out how much entropy adding a
symbol adds to your passphrase. A pessimistic estimate is at least 10
bits per added symbol.
Dice and coin tables
====================
In the tables below, `R` means reroll.
~~~
coin > d2
-----+---
H | 1
T | 2
~~~
~~~
2 coins > d4
--------+---
HH | 1
HT | 2
TH | 3
TT | 4
~~~
~~~
3 coins > d8
--------+---
HHH | 1
HHT | 2
HTH | 3
HTT | 4
THH | 5
THT | 6
TTH | 7
TTT | 8
~~~
~~~
2d4 > d8d2
----|-----
11 | 11
12 | 12
13 | 21
14 | 22
21 | 31
22 | 32
23 | 41
24 | 42
31 | 51
32 | 52
33 | 61
34 | 62
41 | 71
42 | 72
43 | 81
44 | 82
~~~
~~~
d6 > d2 | d4
---+----+---
1 | 1 | 1
2 | 1 | 2
3 | 1 | 3
4 | 2 | 4
5 | 2 | R
6 | 2 | R
~~~
~~~
2d6 > d8d4
----+-----
11 | 11
12 | 12
13 | 13
14 | 14
15 | 21
16 | 22
21 | 23
22 | 24
23 | 31
24 | 32
25 | 33
26 | 34
31 | 41
32 | 42
33 | 43
34 | 44
35 | 51
36 | 52
41 | 53
42 | 54
43 | 61
44 | 62
45 | 63
46 | 64
51 | 71
52 | 72
53 | 73
54 | 74
55 | 81
56 | 82
61 | 83
62 | 84
63 | R
64 | R
65 | R
66 | R
~~~
~~~
d8 > d4d2
---+-----
1 | 11
2 | 12
3 | 21
4 | 22
5 | 21
6 | 22
7 | 31
8 | 32
~~~
~~~
d10 > d2 | d4 | d8
----+----+----+---
1 | 1 | 1 | 1
2 | 1 | 1 | 2
3 | 1 | 2 | 3
4 | 1 | 2 | 4
5 | 1 | 3 | 5
6 | 2 | 3 | 6
7 | 2 | 4 | 7
8 | 2 | 4 | 8
9 | 2 | R | R
10 | 2 | R | R
~~~
~~~
d12 > d2 | d4 | d8
----+----+----+---
1 | 1 | 1 | 1
2 | 1 | 1 | 2
3 | 1 | 2 | 3
4 | 1 | 2 | 4
5 | 1 | 3 | 5
6 | 1 | 3 | 6
7 | 2 | 4 | 7
8 | 2 | 4 | 8
9 | 2 | R | R
10 | 2 | R | R
11 | 2 | R | R
12 | 2 | R | R
~~~
~~~
d20 > d2 | d4 | d8d2
----+----+----+-----
1 | 1 | 1 | 11
2 | 1 | 1 | 12
3 | 1 | 1 | 21
4 | 1 | 1 | 22
5 | 1 | 2 | 31
6 | 1 | 2 | 32
7 | 1 | 2 | 41
8 | 1 | 2 | 42
9 | 1 | 3 | 51
10 | 1 | 3 | 52
11 | 2 | 3 | 61
12 | 2 | 3 | 62
13 | 2 | 4 | 71
14 | 2 | 4 | 72
15 | 2 | 4 | 81
16 | 2 | 4 | 82
17 | 2 | R | R
18 | 2 | R | R
19 | 2 | R | R
20 | 2 | R | R
~~~
~~~
d100 > d8d8
-----|-----
1 | 11
2 | 12
3 | 13
4 | 14
5 | 15
6 | 16
7 | 17
8 | 18
9 | 21
10 | 22
11 | 23
12 | 24
13 | 25
14 | 26
15 | 27
16 | 28
17 | 31
18 | 32
19 | 33
20 | 34
21 | 35
22 | 36
23 | 37
24 | 38
25 | 41
26 | 42
27 | 43
28 | 44
29 | 45
30 | 46
31 | 47
32 | 48
33 | 51
34 | 52
35 | 53
36 | 54
37 | 55
38 | 56
39 | 57
40 | 58
41 | 61
42 | 62
43 | 63
44 | 64
45 | 65
46 | 66
47 | 67
48 | 68
49 | 71
50 | 72
51 | 73
52 | 74
53 | 75
54 | 76
55 | 77
56 | 78
57 | 81
58 | 82
59 | 83
60 | 84
61 | 85
62 | 86
63 | 87
64 | 88
65 | R
66 | R
67 | R
68 | R
69 | R
70 | R
71 | R
72 | R
73 | R
74 | R
75 | R
76 | R
77 | R
78 | R
79 | R
80 | R
81 | R
82 | R
83 | R
84 | R
85 | R
86 | R
87 | R
88 | R
89 | R
90 | R
91 | R
92 | R
93 | R
94 | R
95 | R
96 | R
97 | R
98 | R
99 | R
100 | R
~~~
\ No newline at end of file
symbol adds to your passphrase. A reasonable estimate is about 10 bits
of extra entropy per added symbol.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -9,7 +9,7 @@
* http://sam.zoy.org/wtfpl/COPYING for more details. */
if($argc !== 6) {
fprintf(STDERR, "Usage: %s <col-bits> <font-size-pt> <paper-width-mm> <paper-height-mm> <paper-margin-mm> < dict.txt\n", $argv[0]);
fprintf(STDERR, "Usage: %s <num-cols> <font-size-pt> <paper-width-mm> <paper-height-mm> <paper-margin-mm> < dict.txt\n", $argv[0]);
die(1);
}
......@@ -17,49 +17,68 @@ register_shutdown_function(function() {
passthru('find . -maxdepth 1 -name ".'.getmypid().'-page??.*" -delete');
});
$colbits = (int)$argv[1];
$numcols = (int)$argv[1];
$fs = floatval($argv[2]) / 72.0 * 25.4;
$pw = floatval($argv[3]);
$ph = floatval($argv[4]);
$pm = floatval($argv[5]);
$numrows = floor(($ph - 2.0 * $pm) / $fs) - 2; /* minus 2 for the headers */
$dict = [];
$lw = 0;
while(($l = fgets(STDIN)) !== false) {
list($n, $w) = explode(' ', substr($l, 0, -1), 2);
$dict[substr($n, 0, -$colbits)][substr($n, -$colbits)] = $w;
$w = substr($l, 0, -1);
$dict[] = $w;
$l = mb_strlen($w);
if($l > $lw) $lw = $l;
}
$fs = floatval($argv[2]) / 72.0 * 25.4;
$pw = floatval($argv[3]);
$ph = floatval($argv[4]);
$pm = floatval($argv[5]);
$dice = [ 60, 30, 24, 20, 12, 10, 8, 6, 4 ];
$coldice = find_rolls_l($numcols, $dice, []);
$coldicelen = strlen(implode('', $coldice));
if(array_product($coldice) !== $numcols) {
fwrite(STDERR, "Could not make that number of columns from available dice, use a different number of columns.\n");
die(2);
}
$dc = count($dict);
$maindice = array_slice(find_rolls_l($dc, $dice, $coldice), count($coldice));
$maindicelen = strlen(implode('', $maindice));
$c = array_product($coldice) * array_product($maindice);
fprintf(STDERR, "Row rolls: %s ; column rolls: %s ; %d/%d words, %.2f%%, %.2f bits\n", json_encode($maindice), json_encode($coldice), $c, $dc, 100.0 * $c / $dc, log($c) / log(2.0));
trim_dict($dict, $c);
assert(count($dict) === $c);
$endrow = end($dict);
$rolls = format_binary(key($dict));
foreach(array_chunk($dict, floor(($ph - 2.0 * $pm) / $fs) - 2, true) as $i => $page) {
$header = '<tspan x="0" dy="1em">';
$header .= format_rolls($maindice, array_product($maindice) - 1);
for($j = 0; $j < $numcols; ++$j) {
$colrollstr = format_rolls($coldice, $j);
$header .= ' | '.$colrollstr.str_repeat(' ', $lw - $coldicelen);
}
$header .= '</tspan><tspan x="0" dy="1em">';
$header .= str_repeat('-', $maindicelen);
for($j = 0; $j < $numcols; ++$j) {
$header .= '-+-'.str_repeat('-', $lw);
}
$header .= '</tspan>';
foreach(array_chunk($dict, $numrows * $numcols, true) as $i => $page) {
ob_start();
echo '<?xml version="1.0" encoding="utf-8" standalone="no" ?>';
printf('<svg width="%fmm" height="%fmm" viewBox="0 0 %f %f" xmlns="http://www.w3.org/2000/svg">', $pw, $ph, $pw, $ph);
printf('<g transform="translate(%f %f)"><text font-family="monospace" font-size="%f" style="white-space: pre;">', $pm, $pm, $fs);
echo '<tspan x="0" dy="1em">';
echo $rolls;
foreach($endrow as $suffix => $w) {
$suffix = format_binary($suffix);
echo ' | ', $suffix, str_repeat(' ', $lw - mb_strlen($suffix));
}
echo '</tspan>';
echo '<tspan x="0" dy="1em">';
echo str_repeat('-', mb_strlen($rolls));
foreach($endrow as $suffix => $w) {
echo '-+-', str_repeat('-', $lw);
}
echo '</tspan>';
echo $header;
foreach($page as $prefix => $row) {
foreach(array_chunk($page, $numcols, true) as $row) {
echo '<tspan x="0" dy="1em">';
echo format_binary($prefix);
foreach($row as $suffix => $w) {
echo format_rolls($maindice, key($row) / $numcols);
foreach($row as $w) {
echo ' | ', $w, str_repeat(' ', $lw - mb_strlen($w));
}
echo '</tspan>';
......@@ -71,24 +90,51 @@ foreach(array_chunk($dict, floor(($ph - 2.0 * $pm) / $fs) - 2, true) as $i => $p
passthru('parallel -N 1 inkscape -A {.}.pdf {} ::: .'.getmypid().'-page??.svg');
passthru('pdfunite .'.getmypid().'-page??.pdf out-dict-'.getmypid().'.pdf');
die(0);
function format_binary(string $bin): string {
$ret = "";
/* Prefer 2d4 instead of d8d2 for 4 bits */
while(strlen($bin) > 4) {
$ret .= (string)(bindec(substr($bin, 0, 3)) + 1);
$bin = substr($bin, 3);
}
while(strlen($bin) >= 2) {
$ret .= (string)(bindec(substr($bin, 0, 2)) + 1);
$bin = substr($bin, 2);
function find_rolls_l(int $upperlimit, array $dice, array $have = []): array {
$c = array_product($have);
assert($c <= $upperlimit);
$bestc = $c;
$best = $have;
foreach($dice as $s) {
if($s * $c > $upperlimit) {
continue;
}
$have[] = $s;
$ch = find_rolls_l($upperlimit, $dice, $have);
$cc = array_product($ch);
array_pop($have);
if($cc > $bestc) {
$bestc = $cc;
$best = $ch;
}
}
if(strlen($bin) >= 1) {
$ret .= (string)(bindec(substr($bin, 0, 1)) + 1);
$bin = substr($bin, 1);
return $best;
}
function format_rolls(array $dice, int $i, bool $bold = true) {
if($dice === []) {
assert($i === 0);
return '';
}
assert($bin === "");
return $ret;
$s = array_pop($dice);
$k = str_pad((string)(($i % $s) + 1), strlen($s), '0', STR_PAD_LEFT);
if($bold) $k = sprintf('<tspan style="font-weight: bold;">%s</tspan>', $k);
return format_rolls($dice, (int)($i / $s), !$bold).$k;
}
function trim_dict(array &$dict, int $targetsize) {
/* Trim random longest words */
shuffle($dict);
usort($dict, function(string $a, string $b) { return mb_strlen($a) <=> mb_strlen($b); });
$dict = array_slice($dict, 0, $targetsize);
sort($dict);
}
......@@ -8,45 +8,36 @@
* License, Version 2, as published by Sam Hocevar. See
* http://sam.zoy.org/wtfpl/COPYING for more details. */
if($argc !== 2 && $argc !== 3) {
fprintf(STDERR, "Usage: %s <max-word-length> [entropy-per-word] < text...\n", $argv[0]);
if($argc > 3) {
fprintf(STDERR, "Usage: %s [max-word-length] [min-occurences] < text...\n", $argv[0]);
die(1);
}
$maxwl = (int)$argv[1];
$bits = isset($argv[2]) ? (int)$argv[2] : null;
$maxwl = isset($argv[1]) ? (int)$argv[1] : PHP_INT_MAX;
$minoc = isset($argv[2]) ? (int)$argv[2] : 0;
$dict = [];
while(($l = fgets(STDIN)) !== false) {
preg_match_all('%([a-z]|’)+%i', $l, $matches);
foreach($matches[0] as $w) {
preg_match('%^(y?’)?(?<core>([a-z]|’)+?)(’(s|d|t|re|ve|ll)?)?$%i', $w, $m);
$w = $m['core'];
$wl = mb_strlen($w);
if($wl < 2 || $wl > $maxwl) continue;
$w = strtolower($w);
$w = strtolower($m['core']);
if(!isset($dict[$w])) $dict[$w] = 1;
else ++$dict[$w];
}
}
if($bits === null) {
$bits = (int)(log(count($dict)) / log(2.0));
}
$n = pow(2, $bits);
if(count($dict) < $n) {
fprintf(STDERR, "%s: asked for %d bits of entropy per word, only %d words available\n", $argv[0], $bits, count($dict));
die(2);
foreach($dict as $w => $oc) {
if($oc < $minoc || mb_strlen($w) > $maxwl) {
unset($dict[$w]);
}
}
arsort($dict);
$dict = array_slice($dict, 0, $n, true);
fprintf(STDERR, "%s: OK, dictionary size %s, %d bits of entropy per word\n", $argv[0], count($dict), $bits);
fprintf(STDERR, "%s: %d words in dictionary, %.2f bits of entropy per word\n", $argv[0], count($dict), log(count($dict))/log(2.0));
$dict = array_keys($dict);
sort($dict);
foreach($dict as $i => $w) {
printf("%0".$bits."s %s\n", decbin($i), $w);
foreach($dict as $w) {
echo $w, PHP_EOL;
}
#!/usr/bin/env php
<?php
/* Author: Romain “Artefact2” Dal Maso <artefact2@gmail.com>
*
* This program is free software. It comes without any warranty, to the
* extent permitted by applicable law. You can redistribute it and/or
* modify it under the terms of the Do What The Fuck You Want To Public
* License, Version 2, as published by Sam Hocevar. See
* http://sam.zoy.org/wtfpl/COPYING for more details. */
if($argc < 2 || $argc > 4) {
fprintf(STDERR, "Usage: %s <dict.txt> [num-words] [num-symbols]\n", $argv[0]);
die(1);
}
$dict = $argv[1];
$nw = isset($argv[2]) ? (int)$argv[2] : 6;
$ns = isset($argv[3]) ? (int)$argv[3] : 1;
if(!file_exists($dict) || !is_readable($dict) || ($dict = file_get_contents($dict)) === false) {
fprintf(STDERR, "%s: could not read dictionary\n", $argv[0]);
die(2);
}
fwrite(STDERR, "WARNING! This tool is only as secure as your machine and your random number generator.\nRead https://php.net/random_bytes for more information.\nUsing real paper and real dice is strongly recommended.\n\n");
$dict = explode("\n", substr($dict, 0, -1));
$c = count($dict);
$bits = log($c) / log(2.0);
$pp = [];
$entropy = 0.0;
for($i = 0; $i < $nw; ++$i) {
$pp[] = $dict[random_int(0, $c - 1)];
$entropy += $bits;
}
$pp = implode(' ', $pp);
const SYMBOLS = '0123456789+-*/%=<>[]{}()~\$|&_@#?!.:;,\'"';
for($i = 0; $i < $ns; ++$i) {
$entropy += log((mb_strlen($pp) + 1) * strlen(SYMBOLS)) / log(2.0);
$idx = random_int(0, mb_strlen($pp));
$pp = substr($pp, 0, $idx).SYMBOLS[random_int(0, strlen(SYMBOLS) - 1)].substr($pp, $idx);
}
fprintf(STDERR, "Passphrase strength: %.1f bits\n\n", $entropy);
echo $pp, PHP_EOL;