From 74feec5dd83d879368c1081aec5b6a1cb6dd7ce2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 6 Jul 2012 14:03:18 -0400 Subject: random: fix up sparse warnings Add extern and static declarations to suppress sparse warnings Signed-off-by: "Theodore Ts'o" --- drivers/char/random.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index 4ec04a754733..cb541b9a5231 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1279,6 +1279,7 @@ static int proc_do_uuid(ctl_table *table, int write, } static int sysctl_poolsize = INPUT_POOL_WORDS * 32; +extern ctl_table random_table[]; ctl_table random_table[] = { { .procname = "poolsize", @@ -1344,7 +1345,7 @@ late_initcall(random_int_secret_init); * value is not cryptographically secure but for several uses the cost of * depleting entropy is too high */ -DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash); +static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash); unsigned int get_random_int(void) { __u32 *hash; -- cgit v1.2.1 From 775f4b297b780601e61787b766f306ed3e1d23eb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 2 Jul 2012 07:52:16 -0400 Subject: random: make 'add_interrupt_randomness()' do something sane We've been moving away from add_interrupt_randomness() for various reasons: it's too expensive to do on every interrupt, and flooding the CPU with interrupts could theoretically cause bogus floods of entropy from a somewhat externally controllable source. This solves both problems by limiting the actual randomness addition to just once a second or after 64 interrupts, whicever comes first. During that time, the interrupt cycle data is buffered up in a per-cpu pool. Also, we make sure the the nonblocking pool used by urandom is initialized before we start feeding the normal input pool. This assures that /dev/urandom is returning unpredictable data as soon as possible. (Based on an original patch by Linus, but significantly modified by tytso.) Tested-by: Eric Wustrow Reported-by: Eric Wustrow Reported-by: Nadia Heninger Reported-by: Zakir Durumeric Reported-by: J. Alex Halderman . Signed-off-by: Linus Torvalds Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- drivers/char/random.c | 103 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 17 deletions(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index cb541b9a5231..9fcceace239c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -127,19 +127,15 @@ * * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); - * void add_interrupt_randomness(int irq); + * void add_interrupt_randomness(int irq, int irq_flags); * void add_disk_randomness(struct gendisk *disk); * * add_input_randomness() uses the input layer interrupt timing, as well as * the event type information from the hardware. * - * add_interrupt_randomness() uses the inter-interrupt timing as random - * inputs to the entropy pool. Note that not all interrupts are good - * sources of randomness! For example, the timer interrupts is not a - * good choice, because the periodicity of the interrupts is too - * regular, and hence predictable to an attacker. Network Interface - * Controller interrupts are a better measure, since the timing of the - * NIC interrupts are more unpredictable. + * add_interrupt_randomness() uses the interrupt timing as random + * inputs to the entropy pool. Using the cycle counters and the irq source + * as inputs, it feeds the randomness roughly once a second. * * add_disk_randomness() uses what amounts to the seek time of block * layer request events, on a per-disk_devt basis, as input to the @@ -248,6 +244,7 @@ #include #include #include +#include #ifdef CONFIG_GENERIC_HARDIRQS # include @@ -256,6 +253,7 @@ #include #include #include +#include #include /* @@ -421,7 +419,9 @@ struct entropy_store { spinlock_t lock; unsigned add_ptr; int entropy_count; + int entropy_total; int input_rotate; + unsigned int initialized:1; __u8 last_data[EXTRACT_SIZE]; }; @@ -454,6 +454,10 @@ static struct entropy_store nonblocking_pool = { .pool = nonblocking_pool_data }; +static __u32 const twist_table[8] = { + 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, + 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; + /* * This function adds bytes into the entropy "pool". It does not * update the entropy estimate. The caller should call @@ -467,9 +471,6 @@ static struct entropy_store nonblocking_pool = { static void mix_pool_bytes_extract(struct entropy_store *r, const void *in, int nbytes, __u8 out[64]) { - static __u32 const twist_table[8] = { - 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, - 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; unsigned long i, j, tap1, tap2, tap3, tap4, tap5; int input_rotate; int wordmask = r->poolinfo->poolwords - 1; @@ -528,6 +529,36 @@ static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes) mix_pool_bytes_extract(r, in, bytes, NULL); } +struct fast_pool { + __u32 pool[4]; + unsigned long last; + unsigned short count; + unsigned char rotate; + unsigned char last_timer_intr; +}; + +/* + * This is a fast mixing routine used by the interrupt randomness + * collector. It's hardcoded for an 128 bit pool and assumes that any + * locks that might be needed are taken by the caller. + */ +static void fast_mix(struct fast_pool *f, const void *in, int nbytes) +{ + const char *bytes = in; + __u32 w; + unsigned i = f->count; + unsigned input_rotate = f->rotate; + + while (nbytes--) { + w = rol32(*bytes++, input_rotate & 31) ^ f->pool[i & 3] ^ + f->pool[(i + 1) & 3]; + f->pool[i & 3] = (w >> 3) ^ twist_table[w & 7]; + input_rotate += (i++ & 3) ? 7 : 14; + } + f->count = i; + f->rotate = input_rotate; +} + /* * Credit (or debit) the entropy store with n bits of entropy */ @@ -551,6 +582,12 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) entropy_count = r->poolinfo->POOLBITS; r->entropy_count = entropy_count; + if (!r->initialized && nbits > 0) { + r->entropy_total += nbits; + if (r->entropy_total > 128) + r->initialized = 1; + } + /* should we wake readers? */ if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) { wake_up_interruptible(&random_read_wait); @@ -700,17 +737,48 @@ void add_input_randomness(unsigned int type, unsigned int code, } EXPORT_SYMBOL_GPL(add_input_randomness); -void add_interrupt_randomness(int irq) +static DEFINE_PER_CPU(struct fast_pool, irq_randomness); + +void add_interrupt_randomness(int irq, int irq_flags) { - struct timer_rand_state *state; + struct entropy_store *r; + struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness); + struct pt_regs *regs = get_irq_regs(); + unsigned long now = jiffies; + __u32 input[4], cycles = get_cycles(); + + input[0] = cycles ^ jiffies; + input[1] = irq; + if (regs) { + __u64 ip = instruction_pointer(regs); + input[2] = ip; + input[3] = ip >> 32; + } - state = get_timer_rand_state(irq); + fast_mix(fast_pool, input, sizeof(input)); - if (state == NULL) + if ((fast_pool->count & 1023) && + !time_after(now, fast_pool->last + HZ)) return; - DEBUG_ENT("irq event %d\n", irq); - add_timer_randomness(state, 0x100 + irq); + fast_pool->last = now; + + r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool; + mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool)); + /* + * If we don't have a valid cycle counter, and we see + * back-to-back timer interrupts, then skip giving credit for + * any entropy. + */ + if (cycles == 0) { + if (irq_flags & __IRQF_TIMER) { + if (fast_pool->last_timer_intr) + return; + fast_pool->last_timer_intr = 1; + } else + fast_pool->last_timer_intr = 0; + } + credit_entropy_bits(r, 1); } #ifdef CONFIG_BLOCK @@ -971,6 +1039,7 @@ static void init_std_data(struct entropy_store *r) spin_lock_irqsave(&r->lock, flags); r->entropy_count = 0; + r->entropy_total = 0; spin_unlock_irqrestore(&r->lock, flags); now = ktime_get_real(); -- cgit v1.2.1 From 902c098a3663de3fa18639efbb71b6080f0bcd3c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 Jul 2012 10:38:30 -0400 Subject: random: use lockless techniques in the interrupt path The real-time Linux folks don't like add_interrupt_randomness() taking a spinlock since it is called in the low-level interrupt routine. This also allows us to reduce the overhead in the fast path, for the random driver, which is the interrupt collection path. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- drivers/char/random.c | 78 +++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index 9fcceace239c..315feb1f59f3 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -418,9 +418,9 @@ struct entropy_store { /* read-write data: */ spinlock_t lock; unsigned add_ptr; + unsigned input_rotate; int entropy_count; int entropy_total; - int input_rotate; unsigned int initialized:1; __u8 last_data[EXTRACT_SIZE]; }; @@ -468,26 +468,24 @@ static __u32 const twist_table[8] = { * it's cheap to do so and helps slightly in the expected case where * the entropy is concentrated in the low-order bits. */ -static void mix_pool_bytes_extract(struct entropy_store *r, const void *in, - int nbytes, __u8 out[64]) +static void __mix_pool_bytes(struct entropy_store *r, const void *in, + int nbytes, __u8 out[64]) { unsigned long i, j, tap1, tap2, tap3, tap4, tap5; int input_rotate; int wordmask = r->poolinfo->poolwords - 1; const char *bytes = in; __u32 w; - unsigned long flags; - /* Taps are constant, so we can load them without holding r->lock. */ tap1 = r->poolinfo->tap1; tap2 = r->poolinfo->tap2; tap3 = r->poolinfo->tap3; tap4 = r->poolinfo->tap4; tap5 = r->poolinfo->tap5; - spin_lock_irqsave(&r->lock, flags); - input_rotate = r->input_rotate; - i = r->add_ptr; + smp_rmb(); + input_rotate = ACCESS_ONCE(r->input_rotate); + i = ACCESS_ONCE(r->add_ptr); /* mix one byte at a time to simplify size handling and churn faster */ while (nbytes--) { @@ -514,19 +512,23 @@ static void mix_pool_bytes_extract(struct entropy_store *r, const void *in, input_rotate += i ? 7 : 14; } - r->input_rotate = input_rotate; - r->add_ptr = i; + ACCESS_ONCE(r->input_rotate) = input_rotate; + ACCESS_ONCE(r->add_ptr) = i; + smp_wmb(); if (out) for (j = 0; j < 16; j++) ((__u32 *)out)[j] = r->pool[(i - j) & wordmask]; - - spin_unlock_irqrestore(&r->lock, flags); } -static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes) +static void mix_pool_bytes(struct entropy_store *r, const void *in, + int nbytes, __u8 out[64]) { - mix_pool_bytes_extract(r, in, bytes, NULL); + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); + __mix_pool_bytes(r, in, nbytes, out); + spin_unlock_irqrestore(&r->lock, flags); } struct fast_pool { @@ -564,23 +566,22 @@ static void fast_mix(struct fast_pool *f, const void *in, int nbytes) */ static void credit_entropy_bits(struct entropy_store *r, int nbits) { - unsigned long flags; - int entropy_count; + int entropy_count, orig; if (!nbits) return; - spin_lock_irqsave(&r->lock, flags); - DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name); - entropy_count = r->entropy_count; +retry: + entropy_count = orig = ACCESS_ONCE(r->entropy_count); entropy_count += nbits; if (entropy_count < 0) { DEBUG_ENT("negative entropy/overflow\n"); entropy_count = 0; } else if (entropy_count > r->poolinfo->POOLBITS) entropy_count = r->poolinfo->POOLBITS; - r->entropy_count = entropy_count; + if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) + goto retry; if (!r->initialized && nbits > 0) { r->entropy_total += nbits; @@ -593,7 +594,6 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) wake_up_interruptible(&random_read_wait); kill_fasync(&fasync, SIGIO, POLL_IN); } - spin_unlock_irqrestore(&r->lock, flags); } /********************************************************************* @@ -680,7 +680,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) sample.cycles = get_cycles(); sample.num = num; - mix_pool_bytes(&input_pool, &sample, sizeof(sample)); + mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL); /* * Calculate number of bits of randomness we probably added. @@ -764,7 +764,7 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_pool->last = now; r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool; - mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool)); + __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL); /* * If we don't have a valid cycle counter, and we see * back-to-back timer interrupts, then skip giving credit for @@ -829,7 +829,7 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) bytes = extract_entropy(r->pull, tmp, bytes, random_read_wakeup_thresh / 8, rsvd); - mix_pool_bytes(r, tmp, bytes); + mix_pool_bytes(r, tmp, bytes, NULL); credit_entropy_bits(r, bytes*8); } } @@ -890,9 +890,11 @@ static void extract_buf(struct entropy_store *r, __u8 *out) int i; __u32 hash[5], workspace[SHA_WORKSPACE_WORDS]; __u8 extract[64]; + unsigned long flags; /* Generate a hash across the pool, 16 words (512 bits) at a time */ sha_init(hash); + spin_lock_irqsave(&r->lock, flags); for (i = 0; i < r->poolinfo->poolwords; i += 16) sha_transform(hash, (__u8 *)(r->pool + i), workspace); @@ -905,7 +907,8 @@ static void extract_buf(struct entropy_store *r, __u8 *out) * brute-forcing the feedback as hard as brute-forcing the * hash. */ - mix_pool_bytes_extract(r, hash, sizeof(hash), extract); + __mix_pool_bytes(r, hash, sizeof(hash), extract); + spin_unlock_irqrestore(&r->lock, flags); /* * To avoid duplicates, we atomically extract a portion of the @@ -928,11 +931,10 @@ static void extract_buf(struct entropy_store *r, __u8 *out) } static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int reserved) + size_t nbytes, int min, int reserved) { ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); @@ -941,6 +943,8 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, extract_buf(r, tmp); if (fips_enabled) { + unsigned long flags; + spin_lock_irqsave(&r->lock, flags); if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) panic("Hardware RNG duplicated output!\n"); @@ -1034,22 +1038,18 @@ EXPORT_SYMBOL(get_random_bytes); static void init_std_data(struct entropy_store *r) { int i; - ktime_t now; - unsigned long flags; + ktime_t now = ktime_get_real(); + unsigned long rv; - spin_lock_irqsave(&r->lock, flags); r->entropy_count = 0; r->entropy_total = 0; - spin_unlock_irqrestore(&r->lock, flags); - - now = ktime_get_real(); - mix_pool_bytes(r, &now, sizeof(now)); - for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof flags) { - if (!arch_get_random_long(&flags)) + mix_pool_bytes(r, &now, sizeof(now), NULL); + for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) { + if (!arch_get_random_long(&rv)) break; - mix_pool_bytes(r, &flags, sizeof(flags)); + mix_pool_bytes(r, &rv, sizeof(rv), NULL); } - mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); + mix_pool_bytes(r, utsname(), sizeof(*(utsname())), NULL); } static int rand_initialize(void) @@ -1186,7 +1186,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count) count -= bytes; p += bytes; - mix_pool_bytes(r, buf, bytes); + mix_pool_bytes(r, buf, bytes, NULL); cond_resched(); } -- cgit v1.2.1 From a2080a67abe9e314f9e9c2cc3a4a176e8a8f8793 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 4 Jul 2012 11:16:01 -0400 Subject: random: create add_device_randomness() interface Add a new interface, add_device_randomness() for adding data to the random pool that is likely to differ between two devices (or possibly even per boot). This would be things like MAC addresses or serial numbers, or the read-out of the RTC. This does *not* add any actual entropy to the pool, but it initializes the pool to different values for devices that might otherwise be identical and have very little entropy available to them (particularly common in the embedded world). [ Modified by tytso to mix in a timestamp, since there may be some variability caused by the time needed to detect/configure the hardware in question. ] Signed-off-by: Linus Torvalds Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- drivers/char/random.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index 315feb1f59f3..df3358ab5b99 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -125,11 +125,20 @@ * The current exported interfaces for gathering environmental noise * from the devices are: * + * void add_device_randomness(const void *buf, unsigned int size); * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); * void add_interrupt_randomness(int irq, int irq_flags); * void add_disk_randomness(struct gendisk *disk); * + * add_device_randomness() is for adding data to the random pool that + * is likely to differ between two devices (or possibly even per boot). + * This would be things like MAC addresses or serial numbers, or the + * read-out of the RTC. This does *not* add any actual entropy to the + * pool, but it initializes the pool to different values for devices + * that might otherwise be identical and have very little entropy + * available to them (particularly common in the embedded world). + * * add_input_randomness() uses the input layer interrupt timing, as well as * the event type information from the hardware. * @@ -646,6 +655,25 @@ static void set_timer_rand_state(unsigned int irq, } #endif +/* + * Add device- or boot-specific data to the input and nonblocking + * pools to help initialize them to unique values. + * + * None of this adds any entropy, it is meant to avoid the + * problem of the nonblocking pool having similar initial state + * across largely identical devices. + */ +void add_device_randomness(const void *buf, unsigned int size) +{ + unsigned long time = get_cycles() ^ jiffies; + + mix_pool_bytes(&input_pool, buf, size, NULL); + mix_pool_bytes(&input_pool, &time, sizeof(time), NULL); + mix_pool_bytes(&nonblocking_pool, buf, size, NULL); + mix_pool_bytes(&nonblocking_pool, &time, sizeof(time), NULL); +} +EXPORT_SYMBOL(add_device_randomness); + static struct timer_rand_state input_timer_state; /* -- cgit v1.2.1 From e6d4947b12e8ad947add1032dd754803c6004824 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 5 Jul 2012 10:21:01 -0400 Subject: random: use the arch-specific rng in xfer_secondary_pool If the CPU supports a hardware random number generator, use it in xfer_secondary_pool(), where it will significantly improve things and where we can afford it. Also, remove the use of the arch-specific rng in add_timer_randomness(), since the call is significantly slower than get_cycles(), and we're much better off using it in xfer_secondary_pool() anyway. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- drivers/char/random.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index df3358ab5b99..f67ae3e473ba 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -254,6 +254,7 @@ #include #include #include +#include #ifdef CONFIG_GENERIC_HARDIRQS # include @@ -702,11 +703,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) goto out; sample.jiffies = jiffies; - - /* Use arch random value, fall back to cycles */ - if (!arch_get_random_int(&sample.cycles)) - sample.cycles = get_cycles(); - + sample.cycles = get_cycles(); sample.num = num; mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL); @@ -838,7 +835,11 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, */ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) { - __u32 tmp[OUTPUT_POOL_WORDS]; + union { + __u32 tmp[OUTPUT_POOL_WORDS]; + long hwrand[4]; + } u; + int i; if (r->pull && r->entropy_count < nbytes * 8 && r->entropy_count < r->poolinfo->POOLBITS) { @@ -849,17 +850,23 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) /* pull at least as many as BYTES as wakeup BITS */ bytes = max_t(int, bytes, random_read_wakeup_thresh / 8); /* but never more than the buffer size */ - bytes = min_t(int, bytes, sizeof(tmp)); + bytes = min_t(int, bytes, sizeof(u.tmp)); DEBUG_ENT("going to reseed %s with %d bits " "(%d of %d requested)\n", r->name, bytes * 8, nbytes * 8, r->entropy_count); - bytes = extract_entropy(r->pull, tmp, bytes, + bytes = extract_entropy(r->pull, u.tmp, bytes, random_read_wakeup_thresh / 8, rsvd); - mix_pool_bytes(r, tmp, bytes, NULL); + mix_pool_bytes(r, u.tmp, bytes, NULL); credit_entropy_bits(r, bytes*8); } + kmemcheck_mark_initialized(&u.hwrand, sizeof(u.hwrand)); + for (i = 0; i < 4; i++) + if (arch_get_random_long(&u.hwrand[i])) + break; + if (i) + mix_pool_bytes(r, &u.hwrand, sizeof(u.hwrand), 0); } /* -- cgit v1.2.1 From c2557a303ab6712bb6e09447df828c557c710ac9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 5 Jul 2012 10:35:23 -0400 Subject: random: add new get_random_bytes_arch() function Create a new function, get_random_bytes_arch() which will use the architecture-specific hardware random number generator if it is present. Change get_random_bytes() to not use the HW RNG, even if it is avaiable. The reason for this is that the hw random number generator is fast (if it is present), but it requires that we trust the hardware manufacturer to have not put in a back door. (For example, an increasing counter encrypted by an AES key known to the NSA.) It's unlikely that Intel (for example) was paid off by the US Government to do this, but it's impossible for them to prove otherwise --- especially since Bull Mountain is documented to use AES as a whitener. Hence, the output of an evil, trojan-horse version of RDRAND is statistically indistinguishable from an RDRAND implemented to the specifications claimed by Intel. Short of using a tunnelling electronic microscope to reverse engineer an Ivy Bridge chip and disassembling and analyzing the CPU microcode, there's no way for us to tell for sure. Since users of get_random_bytes() in the Linux kernel need to be able to support hardware systems where the HW RNG is not present, most time-sensitive users of this interface have already created their own cryptographic RNG interface which uses get_random_bytes() as a seed. So it's much better to use the HW RNG to improve the existing random number generator, by mixing in any entropy returned by the HW RNG into /dev/random's entropy pool, but to always _use_ /dev/random's entropy pool. This way we get almost of the benefits of the HW RNG without any potential liabilities. The only benefits we forgo is the speed/performance enhancements --- and generic kernel code can't depend on depend on get_random_bytes() having the speed of a HW RNG anyway. For those places that really want access to the arch-specific HW RNG, if it is available, we provide get_random_bytes_arch(). Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- drivers/char/random.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index f67ae3e473ba..eacd61479112 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1038,17 +1038,34 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, /* * This function is the exported kernel interface. It returns some - * number of good random numbers, suitable for seeding TCP sequence - * numbers, etc. + * number of good random numbers, suitable for key generation, seeding + * TCP sequence numbers, etc. It does not use the hw random number + * generator, if available; use get_random_bytes_arch() for that. */ void get_random_bytes(void *buf, int nbytes) +{ + extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0); +} +EXPORT_SYMBOL(get_random_bytes); + +/* + * This function will use the architecture-specific hardware random + * number generator if it is available. The arch-specific hw RNG will + * almost certainly be faster than what we can do in software, but it + * is impossible to verify that it is implemented securely (as + * opposed, to, say, the AES encryption of a sequence number using a + * key known by the NSA). So it's useful if we need the speed, but + * only if we're willing to trust the hardware manufacturer not to + * have put in a back door. + */ +void get_random_bytes_arch(void *buf, int nbytes) { char *p = buf; while (nbytes) { unsigned long v; int chunk = min(nbytes, (int)sizeof(unsigned long)); - + if (!arch_get_random_long(&v)) break; @@ -1057,9 +1074,11 @@ void get_random_bytes(void *buf, int nbytes) nbytes -= chunk; } - extract_entropy(&nonblocking_pool, p, nbytes, 0, 0); + if (nbytes) + extract_entropy(&nonblocking_pool, p, nbytes, 0, 0); } -EXPORT_SYMBOL(get_random_bytes); +EXPORT_SYMBOL(get_random_bytes_arch); + /* * init_std_data - initialize pool with system data -- cgit v1.2.1 From 00ce1db1a634746040ace24c09a4e3a7949a3145 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 Jul 2012 16:19:30 -0400 Subject: random: add tracepoints for easier debugging and verification Signed-off-by: "Theodore Ts'o" --- drivers/char/random.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index eacd61479112..e3180852ec85 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -266,6 +266,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + /* * Configuration information */ @@ -478,8 +481,8 @@ static __u32 const twist_table[8] = { * it's cheap to do so and helps slightly in the expected case where * the entropy is concentrated in the low-order bits. */ -static void __mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes, __u8 out[64]) +static void _mix_pool_bytes(struct entropy_store *r, const void *in, + int nbytes, __u8 out[64]) { unsigned long i, j, tap1, tap2, tap3, tap4, tap5; int input_rotate; @@ -531,13 +534,21 @@ static void __mix_pool_bytes(struct entropy_store *r, const void *in, ((__u32 *)out)[j] = r->pool[(i - j) & wordmask]; } -static void mix_pool_bytes(struct entropy_store *r, const void *in, +static void __mix_pool_bytes(struct entropy_store *r, const void *in, int nbytes, __u8 out[64]) +{ + trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_); + _mix_pool_bytes(r, in, nbytes, out); +} + +static void mix_pool_bytes(struct entropy_store *r, const void *in, + int nbytes, __u8 out[64]) { unsigned long flags; + trace_mix_pool_bytes(r->name, nbytes, _RET_IP_); spin_lock_irqsave(&r->lock, flags); - __mix_pool_bytes(r, in, nbytes, out); + _mix_pool_bytes(r, in, nbytes, out); spin_unlock_irqrestore(&r->lock, flags); } @@ -585,6 +596,7 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) retry: entropy_count = orig = ACCESS_ONCE(r->entropy_count); entropy_count += nbits; + if (entropy_count < 0) { DEBUG_ENT("negative entropy/overflow\n"); entropy_count = 0; @@ -599,6 +611,9 @@ retry: r->initialized = 1; } + trace_credit_entropy_bits(r->name, nbits, entropy_count, + r->entropy_total, _RET_IP_); + /* should we wake readers? */ if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) { wake_up_interruptible(&random_read_wait); @@ -971,6 +986,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; + trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_); xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); @@ -1005,6 +1021,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; + trace_extract_entropy_user(r->name, nbytes, r->entropy_count, _RET_IP_); xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, 0, 0); @@ -1062,6 +1079,7 @@ void get_random_bytes_arch(void *buf, int nbytes) { char *p = buf; + trace_get_random_bytes(nbytes, _RET_IP_); while (nbytes) { unsigned long v; int chunk = min(nbytes, (int)sizeof(unsigned long)); -- cgit v1.2.1 From c5857ccf293968348e5eb4ebedc68074de3dcda6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 14 Jul 2012 20:27:52 -0400 Subject: random: remove rand_initialize_irq() With the new interrupt sampling system, we are no longer using the timer_rand_state structure in the irq descriptor, so we can stop initializing it now. [ Merged in fixes from Sedat to find some last missing references to rand_initialize_irq() ] Signed-off-by: "Theodore Ts'o" Signed-off-by: Sedat Dilek --- drivers/char/random.c | 55 --------------------------------------------------- 1 file changed, 55 deletions(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index e3180852ec85..9793b40f5754 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -634,43 +634,6 @@ struct timer_rand_state { unsigned dont_count_entropy:1; }; -#ifndef CONFIG_GENERIC_HARDIRQS - -static struct timer_rand_state *irq_timer_state[NR_IRQS]; - -static struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - return irq_timer_state[irq]; -} - -static void set_timer_rand_state(unsigned int irq, - struct timer_rand_state *state) -{ - irq_timer_state[irq] = state; -} - -#else - -static struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - return desc->timer_rand_state; -} - -static void set_timer_rand_state(unsigned int irq, - struct timer_rand_state *state) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - desc->timer_rand_state = state; -} -#endif - /* * Add device- or boot-specific data to the input and nonblocking * pools to help initialize them to unique values. @@ -1133,24 +1096,6 @@ static int rand_initialize(void) } module_init(rand_initialize); -void rand_initialize_irq(int irq) -{ - struct timer_rand_state *state; - - state = get_timer_rand_state(irq); - - if (state) - return; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) - set_timer_rand_state(irq, state); -} - #ifdef CONFIG_BLOCK void rand_initialize_disk(struct gendisk *disk) { -- cgit v1.2.1 From cbc96b7594b5691d61eba2db8b2ea723645be9ca Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 23 Jul 2012 09:47:57 -0700 Subject: random: Add comment to random_initialize() Many platforms have per-machine instance data (serial numbers, asset tags, etc.) squirreled away in areas that are accessed during early system bringup. Mixing this data into the random pools has a very high value in providing better random data, so we should allow (and even encourage) architecture code to call add_device_randomness() from the setup_arch() paths. However, this limits our options for internal structure of the random driver since random_initialize() is not called until long after setup_arch(). Add a big fat comment to rand_initialize() spelling out this requirement. Suggested-by: Theodore Ts'o Signed-off-by: Tony Luck Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index 9793b40f5754..1a2dfa816041 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1087,6 +1087,16 @@ static void init_std_data(struct entropy_store *r) mix_pool_bytes(r, utsname(), sizeof(*(utsname())), NULL); } +/* + * Note that setup_arch() may call add_device_randomness() + * long before we get here. This allows seeding of the pools + * with some platform dependent data very early in the boot + * process. But it limits our options here. We must use + * statically allocated structures that already have all + * initializations complete at compile time. We should also + * take care not to overwrite the precious per platform data + * we were given. + */ static int rand_initialize(void) { init_std_data(&input_pool); -- cgit v1.2.1 From d2e7c96af1e54b507ae2a6a7dd2baf588417a7e5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 27 Jul 2012 22:26:08 -0400 Subject: random: mix in architectural randomness in extract_buf() Mix in any architectural randomness in extract_buf() instead of xfer_secondary_buf(). This allows us to mix in more architectural randomness, and it also makes xfer_secondary_buf() faster, moving a tiny bit of additional CPU overhead to process which is extracting the randomness. [ Commit description modified by tytso to remove an extended advertisement for the RDRAND instruction. ] Signed-off-by: H. Peter Anvin Acked-by: Ingo Molnar Cc: DJ Johnston Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- drivers/char/random.c | 56 +++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'drivers/char') diff --git a/drivers/char/random.c b/drivers/char/random.c index 1a2dfa816041..b86eae9b77df 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -277,6 +277,8 @@ #define SEC_XFER_SIZE 512 #define EXTRACT_SIZE 10 +#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) + /* * The minimum number of bits of entropy before we wake up a read on * /dev/random. Should be enough to do a significant reseed. @@ -813,11 +815,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, */ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) { - union { - __u32 tmp[OUTPUT_POOL_WORDS]; - long hwrand[4]; - } u; - int i; + __u32 tmp[OUTPUT_POOL_WORDS]; if (r->pull && r->entropy_count < nbytes * 8 && r->entropy_count < r->poolinfo->POOLBITS) { @@ -828,23 +826,17 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) /* pull at least as many as BYTES as wakeup BITS */ bytes = max_t(int, bytes, random_read_wakeup_thresh / 8); /* but never more than the buffer size */ - bytes = min_t(int, bytes, sizeof(u.tmp)); + bytes = min_t(int, bytes, sizeof(tmp)); DEBUG_ENT("going to reseed %s with %d bits " "(%d of %d requested)\n", r->name, bytes * 8, nbytes * 8, r->entropy_count); - bytes = extract_entropy(r->pull, u.tmp, bytes, + bytes = extract_entropy(r->pull, tmp, bytes, random_read_wakeup_thresh / 8, rsvd); - mix_pool_bytes(r, u.tmp, bytes, NULL); + mix_pool_bytes(r, tmp, bytes, NULL); credit_entropy_bits(r, bytes*8); } - kmemcheck_mark_initialized(&u.hwrand, sizeof(u.hwrand)); - for (i = 0; i < 4; i++) - if (arch_get_random_long(&u.hwrand[i])) - break; - if (i) - mix_pool_bytes(r, &u.hwrand, sizeof(u.hwrand), 0); } /* @@ -901,15 +893,19 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, static void extract_buf(struct entropy_store *r, __u8 *out) { int i; - __u32 hash[5], workspace[SHA_WORKSPACE_WORDS]; + union { + __u32 w[5]; + unsigned long l[LONGS(EXTRACT_SIZE)]; + } hash; + __u32 workspace[SHA_WORKSPACE_WORDS]; __u8 extract[64]; unsigned long flags; /* Generate a hash across the pool, 16 words (512 bits) at a time */ - sha_init(hash); + sha_init(hash.w); spin_lock_irqsave(&r->lock, flags); for (i = 0; i < r->poolinfo->poolwords; i += 16) - sha_transform(hash, (__u8 *)(r->pool + i), workspace); + sha_transform(hash.w, (__u8 *)(r->pool + i), workspace); /* * We mix the hash back into the pool to prevent backtracking @@ -920,14 +916,14 @@ static void extract_buf(struct entropy_store *r, __u8 *out) * brute-forcing the feedback as hard as brute-forcing the * hash. */ - __mix_pool_bytes(r, hash, sizeof(hash), extract); + __mix_pool_bytes(r, hash.w, sizeof(hash.w), extract); spin_unlock_irqrestore(&r->lock, flags); /* * To avoid duplicates, we atomically extract a portion of the * pool while mixing, and hash one final time. */ - sha_transform(hash, extract, workspace); + sha_transform(hash.w, extract, workspace); memset(extract, 0, sizeof(extract)); memset(workspace, 0, sizeof(workspace)); @@ -936,11 +932,23 @@ static void extract_buf(struct entropy_store *r, __u8 *out) * pattern, we fold it in half. Thus, we always feed back * twice as much data as we output. */ - hash[0] ^= hash[3]; - hash[1] ^= hash[4]; - hash[2] ^= rol32(hash[2], 16); - memcpy(out, hash, EXTRACT_SIZE); - memset(hash, 0, sizeof(hash)); + hash.w[0] ^= hash.w[3]; + hash.w[1] ^= hash.w[4]; + hash.w[2] ^= rol32(hash.w[2], 16); + + /* + * If we have a architectural hardware random number + * generator, mix that in, too. + */ + for (i = 0; i < LONGS(EXTRACT_SIZE); i++) { + unsigned long v; + if (!arch_get_random_long(&v)) + break; + hash.l[i] ^= v; + } + + memcpy(out, &hash, EXTRACT_SIZE); + memset(&hash, 0, sizeof(hash)); } static ssize_t extract_entropy(struct entropy_store *r, void *buf, -- cgit v1.2.1