diff options
Diffstat (limited to 'include/linux/dst.h')
-rw-r--r-- | include/linux/dst.h | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/include/linux/dst.h b/include/linux/dst.h new file mode 100644 index 000000000000..e26fed84b1aa --- /dev/null +++ b/include/linux/dst.h @@ -0,0 +1,587 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DST_H +#define __DST_H + +#include <linux/types.h> +#include <linux/connector.h> + +#define DST_NAMELEN 32 +#define DST_NAME "dst" + +enum { + /* Remove node with given id from storage */ + DST_DEL_NODE = 0, + /* Add remote node with given id to the storage */ + DST_ADD_REMOTE, + /* Add local node with given id to the storage to be exported and used by remote peers */ + DST_ADD_EXPORT, + /* Crypto initialization command (hash/cipher used to protect the connection) */ + DST_CRYPTO, + /* Security attributes for given connection (permissions for example) */ + DST_SECURITY, + /* Register given node in the block layer subsystem */ + DST_START, + DST_CMD_MAX +}; + +struct dst_ctl +{ + /* Storage name */ + char name[DST_NAMELEN]; + /* Command flags */ + __u32 flags; + /* Command itself (see above) */ + __u32 cmd; + /* Maximum number of pages per single request in this device */ + __u32 max_pages; + /* Stale/error transaction scanning timeout in milliseconds */ + __u32 trans_scan_timeout; + /* Maximum number of retry sends before completing transaction as broken */ + __u32 trans_max_retries; + /* Storage size */ + __u64 size; +}; + +/* Reply command carries completion status */ +struct dst_ctl_ack +{ + struct cn_msg msg; + int error; + int unused[3]; +}; + +/* + * Unfortunaltely socket address structure is not exported to userspace + * and is redefined there. + */ +#define SADDR_MAX_DATA 128 + +struct saddr { + /* address family, AF_xxx */ + unsigned short sa_family; + /* 14 bytes of protocol address */ + char sa_data[SADDR_MAX_DATA]; + /* Number of bytes used in sa_data */ + unsigned short sa_data_len; +}; + +/* Address structure */ +struct dst_network_ctl +{ + /* Socket type: datagram, stream...*/ + unsigned int type; + /* Let me guess, is it a Jupiter diameter? */ + unsigned int proto; + /* Peer's address */ + struct saddr addr; +}; + +struct dst_crypto_ctl +{ + /* Cipher and hash names */ + char cipher_algo[DST_NAMELEN]; + char hash_algo[DST_NAMELEN]; + + /* Key sizes. Can be zero for digest for example */ + unsigned int cipher_keysize, hash_keysize; + /* Alignment. Calculated by the DST itself. */ + unsigned int crypto_attached_size; + /* Number of threads to perform crypto operations */ + int thread_num; +}; + +/* Export security attributes have this bits checked in when client connects */ +#define DST_PERM_READ (1<<0) +#define DST_PERM_WRITE (1<<1) + +/* + * Right now it is simple model, where each remote address + * is assigned to set of permissions it is allowed to perform. + * In real world block device does not know anything but + * reading and writing, so it should be more than enough. + */ +struct dst_secure_user +{ + unsigned int permissions; + struct saddr addr; +}; + +/* + * Export control command: device to export and network address to accept + * clients to work with given device + */ +struct dst_export_ctl +{ + char device[DST_NAMELEN]; + struct dst_network_ctl ctl; +}; + +enum { + DST_CFG = 1, /* Request remote configuration */ + DST_IO, /* IO command */ + DST_IO_RESPONSE, /* IO response */ + DST_PING, /* Keepalive message */ + DST_NCMD_MAX, +}; + +struct dst_cmd +{ + /* Network command itself, see above */ + __u32 cmd; + /* + * Size of the attached data + * (in most cases, for READ command it means how many bytes were requested) + */ + __u32 size; + /* Crypto size: number of attached bytes with digest/hmac */ + __u32 csize; + /* Here we can carry secret data */ + __u32 reserved; + /* Read/write bits, see how they are encoded in bio structure */ + __u64 rw; + /* BIO flags */ + __u64 flags; + /* Unique command id (like transaction ID) */ + __u64 id; + /* Sector to start IO from */ + __u64 sector; + /* Hash data is placed after this header */ + __u8 hash[0]; +}; + +/* + * Convert command to/from network byte order. + * We do not use hton*() functions, since there is + * no 64-bit implementation. + */ +static inline void dst_convert_cmd(struct dst_cmd *c) +{ + c->cmd = __cpu_to_be32(c->cmd); + c->csize = __cpu_to_be32(c->csize); + c->size = __cpu_to_be32(c->size); + c->sector = __cpu_to_be64(c->sector); + c->id = __cpu_to_be64(c->id); + c->flags = __cpu_to_be64(c->flags); + c->rw = __cpu_to_be64(c->rw); +} + +/* Transaction id */ +typedef __u64 dst_gen_t; + +#ifdef __KERNEL__ + +#include <linux/blkdev.h> +#include <linux/bio.h> +#include <linux/device.h> +#include <linux/mempool.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/rbtree.h> + +#ifdef CONFIG_DST_DEBUG +#define dprintk(f, a...) printk(KERN_NOTICE f, ##a) +#else +static inline void __attribute__ ((format (printf, 1, 2))) + dprintk(const char *fmt, ...) {} +#endif + +struct dst_node; + +struct dst_trans +{ + /* DST node we are working with */ + struct dst_node *n; + + /* Entry inside transaction tree */ + struct rb_node trans_entry; + + /* Merlin kills this transaction when this memory cell equals zero */ + atomic_t refcnt; + + /* How this transaction should be processed by crypto engine */ + short enc; + /* How many times this transaction was resent */ + short retries; + /* Completion status */ + int error; + + /* When did we send it to the remote peer */ + long send_time; + + /* My name is... + * Well, computers does not speak, they have unique id instead */ + dst_gen_t gen; + + /* Block IO we are working with */ + struct bio *bio; + + /* Network command for above block IO request */ + struct dst_cmd cmd; +}; + +struct dst_crypto_engine +{ + /* What should we do with all block requests */ + struct crypto_hash *hash; + struct crypto_ablkcipher *cipher; + + /* Pool of pages used to encrypt data into before sending */ + int page_num; + struct page **pages; + + /* What to do with current request */ + int enc; + /* Who we are and where do we go */ + struct scatterlist *src, *dst; + + /* Maximum timeout waiting for encryption to be completed */ + long timeout; + /* IV is a 64-bit sequential counter */ + u64 iv; + + /* Secret data */ + void *private; + + /* Cached temporary data lives here */ + int size; + void *data; +}; + +struct dst_state +{ + /* The main state protection */ + struct mutex state_lock; + + /* Polling machinery for sockets */ + wait_queue_t wait; + wait_queue_head_t *whead; + /* Most of events are being waited here */ + wait_queue_head_t thread_wait; + + /* Who owns this? */ + struct dst_node *node; + + /* Network address for this state */ + struct dst_network_ctl ctl; + + /* Permissions to work with: read-only or rw connection */ + u32 permissions; + + /* Called when we need to clean private data */ + void (* cleanup)(struct dst_state *st); + + /* Used by the server: BIO completion queues BIOs here */ + struct list_head request_list; + spinlock_t request_lock; + + /* Guess what? No, it is not number of planets */ + atomic_t refcnt; + + /* This flags is set when connection should be dropped */ + int need_exit; + + /* + * Socket to work with. Second pointer is used for + * lockless check if socket was changed before performing + * next action (like working with cached polling result) + */ + struct socket *socket, *read_socket; + + /* Cached preallocated data */ + void *data; + unsigned int size; + + /* Currently processed command */ + struct dst_cmd cmd; +}; + +struct dst_info +{ + /* Device size */ + u64 size; + + /* Local device name for export devices */ + char local[DST_NAMELEN]; + + /* Network setup */ + struct dst_network_ctl net; + + /* Sysfs bits use this */ + struct device device; +}; + +struct dst_node +{ + struct list_head node_entry; + + /* Hi, my name is stored here */ + char name[DST_NAMELEN]; + /* My cache name is stored here */ + char cache_name[DST_NAMELEN]; + + /* Block device attached to given node. + * Only valid for exporting nodes */ + struct block_device *bdev; + /* Network state machine for given peer */ + struct dst_state *state; + + /* Block IO machinery */ + struct request_queue *queue; + struct gendisk *disk; + + /* Number of threads in processing pool */ + int thread_num; + /* Maximum number of pages in single IO */ + int max_pages; + + /* I'm that big in bytes */ + loff_t size; + + /* Exported to userspace node information */ + struct dst_info *info; + + /* + * Security attribute list. + * Used only by exporting node currently. + */ + struct list_head security_list; + struct mutex security_lock; + + /* + * When this unerflows below zero, university collapses. + * But this will not happen, since node will be freed, + * when reference counter reaches zero. + */ + atomic_t refcnt; + + /* How precisely should I be started? */ + int (*start)(struct dst_node *); + + /* Crypto capabilities */ + struct dst_crypto_ctl crypto; + u8 *hash_key; + u8 *cipher_key; + + /* Pool of processing thread */ + struct thread_pool *pool; + + /* Transaction IDs live here */ + atomic_long_t gen; + + /* + * How frequently and how many times transaction + * tree should be scanned to drop stale objects. + */ + long trans_scan_timeout; + int trans_max_retries; + + /* Small gnomes live here */ + struct rb_root trans_root; + struct mutex trans_lock; + + /* + * Transaction cache/memory pool. + * It is big enough to contain not only transaction + * itself, but additional crypto data (digest/hmac). + */ + struct kmem_cache *trans_cache; + mempool_t *trans_pool; + + /* This entity scans transaction tree */ + struct delayed_work trans_work; + + wait_queue_head_t wait; +}; + +/* Kernel representation of the security attribute */ +struct dst_secure +{ + struct list_head sec_entry; + struct dst_secure_user sec; +}; + +int dst_process_bio(struct dst_node *n, struct bio *bio); + +int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); +int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); + +static inline struct dst_state *dst_state_get(struct dst_state *st) +{ + BUG_ON(atomic_read(&st->refcnt) == 0); + atomic_inc(&st->refcnt); + return st; +} + +void dst_state_put(struct dst_state *st); + +struct dst_state *dst_state_alloc(struct dst_node *n); +int dst_state_socket_create(struct dst_state *st); +void dst_state_socket_release(struct dst_state *st); + +void dst_state_exit_connected(struct dst_state *st); + +int dst_state_schedule_receiver(struct dst_state *st); + +void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); + +static inline void dst_state_lock(struct dst_state *st) +{ + mutex_lock(&st->state_lock); +} + +static inline void dst_state_unlock(struct dst_state *st) +{ + mutex_unlock(&st->state_lock); +} + +void dst_poll_exit(struct dst_state *st); +int dst_poll_init(struct dst_state *st); + +static inline unsigned int dst_state_poll(struct dst_state *st) +{ + unsigned int revents = POLLHUP | POLLERR; + + dst_state_lock(st); + if (st->socket) + revents = st->socket->ops->poll(NULL, st->socket, NULL); + dst_state_unlock(st); + + return revents; +} + +static inline int dst_thread_setup(void *private, void *data) +{ + return 0; +} + +void dst_node_put(struct dst_node *n); + +static inline struct dst_node *dst_node_get(struct dst_node *n) +{ + atomic_inc(&n->refcnt); + return n; +} + +int dst_data_recv(struct dst_state *st, void *data, unsigned int size); +int dst_recv_cdata(struct dst_state *st, void *cdata); +int dst_data_send_header(struct socket *sock, + void *data, unsigned int size, int more); + +int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); + +int dst_process_io(struct dst_state *st); +int dst_export_crypto(struct dst_node *n, struct bio *bio); +int dst_export_send_bio(struct bio *bio); +int dst_start_export(struct dst_node *n); + +int __init dst_export_init(void); +void dst_export_exit(void); + +/* Private structure for export block IO requests */ +struct dst_export_priv +{ + struct list_head request_entry; + struct dst_state *state; + struct bio *bio; + struct dst_cmd cmd; +}; + +static inline void dst_trans_get(struct dst_trans *t) +{ + atomic_inc(&t->refcnt); +} + +struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); +int dst_trans_remove(struct dst_trans *t); +int dst_trans_remove_nolock(struct dst_trans *t); +void dst_trans_put(struct dst_trans *t); + +/* + * Convert bio into network command. + */ +static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, + u32 command, u64 id) +{ + cmd->cmd = command; + cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; + cmd->rw = bio->bi_rw; + cmd->size = bio->bi_size; + cmd->csize = 0; + cmd->id = id; + cmd->sector = bio->bi_sector; +}; + +int dst_trans_send(struct dst_trans *t); +int dst_trans_crypto(struct dst_trans *t); + +int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); +void dst_node_crypto_exit(struct dst_node *n); + +static inline int dst_need_crypto(struct dst_node *n) +{ + struct dst_crypto_ctl *c = &n->crypto; + /* + * Logical OR is appropriate here, but boolean one produces + * more optimal code, so it is used instead. + */ + return (c->hash_algo[0] | c->cipher_algo[0]); +} + +int dst_node_trans_init(struct dst_node *n, unsigned int size); +void dst_node_trans_exit(struct dst_node *n); + +/* + * Pool of threads. + * Ready list contains threads currently free to be used, + * active one contains threads with some work scheduled for them. + * Caller can wait in given queue when thread is ready. + */ +struct thread_pool +{ + int thread_num; + struct mutex thread_lock; + struct list_head ready_list, active_list; + + wait_queue_head_t wait; +}; + +void thread_pool_del_worker(struct thread_pool *p); +void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); +int thread_pool_add_worker(struct thread_pool *p, + char *name, + unsigned int id, + void *(* init)(void *data), + void (* cleanup)(void *data), + void *data); + +void thread_pool_destroy(struct thread_pool *p); +struct thread_pool *thread_pool_create(int num, char *name, + void *(* init)(void *data), + void (* cleanup)(void *data), + void *data); + +int thread_pool_schedule(struct thread_pool *p, + int (* setup)(void *stored_private, void *setup_data), + int (* action)(void *stored_private, void *setup_data), + void *setup_data, long timeout); +int thread_pool_schedule_private(struct thread_pool *p, + int (* setup)(void *private, void *data), + int (* action)(void *private, void *data), + void *data, long timeout, void *id); + +#endif /* __KERNEL__ */ +#endif /* __DST_H */ |