enum conn_states {
conn_listening, /**< the socket which listens for connections */
conn_new_cmd, /**< Prepare connection for next command */
conn_waiting, /**< waiting for a readable socket */
conn_read, /**< reading in a command line */
conn_parse_cmd, /**< try to parse a command from the input buffer */
conn_write, /**< writing out a simple response */
conn_nread, /**< reading in a fixed number of bytes */
conn_swallow, /**< swallowing unnecessary bytes w/o storing */
conn_closing, /**< closing this connection */
conn_mwrite, /**< writing out many items sequentially */
conn_closed, /**< connection is closed */
conn_watch, /**< held by the logger thread as a watcher */
conn_max_state /**< Max state value (used for assertion) */
};
main baseでのイベントループの処理について、リクエストを受け付ける際には、最初にdrive_machine関数が実行し、コネクションの状態(conn_state)がconn_listeningになっています。ここでコネクションの受付とworker threadの1つのコネクションキューへタスクの追加を行います。
No reply/QuietオプションはASCIIコマンドで利用すると、リクエストに対するエラーが適切に表示されないため使うべきではないとされています。
mutativeなコマンドでパケットが返ってくるのを待つのを避ける必要がある場面で利用することはできます。
バイナリモードでは適切に実装されています。
STAT pid 24875
STAT uptime 513
STAT time 1557149463
STAT version 1.5.14
STAT libevent 2.0.21-stable
STAT pointer_size 64
STAT rusage_user 26.598229
STAT rusage_system 86.178184
STAT max_connections 1024
STAT curr_connections 2
STAT total_connections 66
STAT rejected_connections 0
STAT connection_structures 34
STAT reserved_fds 20
STAT cmd_get 0
STAT cmd_set 600000
STAT cmd_flush 3
STAT cmd_touch 0
STAT get_hits 0
STAT get_misses 0
STAT get_expired 0
STAT get_flushed 0
STAT delete_misses 0
STAT delete_hits 0
STAT incr_misses 0
STAT incr_hits 0
STAT decr_misses 0
STAT decr_hits 0
STAT cas_misses 0
STAT cas_hits 0
STAT cas_badval 0
STAT touch_hits 0
STAT touch_misses 0
STAT auth_cmds 0
STAT auth_errors 0
STAT bytes_read 309601911
STAT bytes_written 4802522
STAT limit_maxbytes 67108864
STAT accepting_conns 1
STAT listen_disabled_num 0
STAT time_in_listen_disabled_us 0
STAT threads 4
STAT conn_yields 0
STAT hash_power_level 17
STAT hash_bytes 1048576
STAT hash_is_expanding 0
STAT slab_reassign_rescues 0
STAT slab_reassign_chunk_rescues 0
STAT slab_reassign_evictions_nomem 0
STAT slab_reassign_inline_reclaim 0
STAT slab_reassign_busy_items 144
STAT slab_reassign_busy_deletes 0
STAT slab_reassign_running 0
STAT slabs_moved 62
STAT lru_crawler_running 0
STAT lru_crawler_starts 1020
STAT lru_maintainer_juggles 178227
STAT malloc_fails 0
STAT log_worker_dropped 0
STAT log_worker_written 0
STAT log_watcher_skipped 0
STAT log_watcher_sent 0
STAT bytes 62500672
STAT curr_items 111808
STAT total_items 600000
STAT slab_global_page_pool 0
STAT expired_unfetched 111664
STAT evicted_unfetched 376384
STAT evicted_active 0
STAT evictions 376384
STAT reclaimed 111664
STAT crawler_reclaimed 0
STAT crawler_items_checked 224505
STAT lrutail_reflocked 1
STAT moves_to_cold 600000
STAT moves_to_warm 0
STAT moves_within_lru 0
STAT direct_reclaims 376384
STAT lru_bumps_dropped 0
stats items
STAT items:9:number 111808
STAT items:9:number_hot 0
STAT items:9:number_warm 0
STAT items:9:number_cold 111808
STAT items:9:age_hot 0
STAT items:9:age_warm 0
STAT items:9:age 142
STAT items:9:evicted 376384
STAT items:9:evicted_nonzero 0
STAT items:9:evicted_time 39
STAT items:9:outofmemory 0
STAT items:9:tailrepairs 0
STAT items:9:reclaimed 111664
STAT items:9:expired_unfetched 111664
STAT items:9:evicted_unfetched 376384
STAT items:9:evicted_active 0
STAT items:9:crawler_reclaimed 0
STAT items:9:crawler_items_checked 224505
STAT items:9:lrutail_reflocked 1
STAT items:9:moves_to_cold 600000
STAT items:9:moves_to_warm 0
STAT items:9:moves_within_lru 0
STAT items:9:direct_reclaims 376384
STAT items:9:hits_to_hot 0
STAT items:9:hits_to_warm 0
STAT items:9:hits_to_cold 0
STAT items:9:hits_to_temp 0
STAT 9:chunk_size 600
STAT 9:chunks_per_page 1747
STAT 9:total_pages 64
STAT 9:total_chunks 111808
STAT 9:used_chunks 111808
STAT 9:free_chunks 0
STAT 9:free_chunks_end 0
STAT 9:mem_requested 62500672
STAT 9:get_hits 0
STAT 9:cmd_set 600000
STAT 9:delete_hits 0
STAT 9:incr_hits 0
STAT 9:decr_hits 0
STAT 9:cas_hits 0
STAT 9:cas_badval 0
STAT 9:touch_hits 0
STAT active_slabs 1
STAT total_malloced 67108864
stats conns
STAT 26:addr tcp:0.0.0.0:11211
STAT 26:state conn_listening
STAT 26:secs_since_last_cmd 584
STAT 27:addr tcp6:[::]:11211
STAT 27:state conn_listening
STAT 27:secs_since_last_cmd 584
STAT 28:addr tcp:127.0.0.1:56626
STAT 28:state conn_parse_cmd
STAT 28:secs_since_last_cmd 0
STAT maxbytes 67108864
STAT maxconns 1024
STAT tcpport 11211
STAT udpport 0
STAT inter NULL
STAT verbosity 3
STAT oldest 406
STAT evictions on
STAT domain_socket NULL
STAT umask 700
STAT growth_factor 1.25
STAT chunk_size 48
STAT num_threads 4
STAT num_threads_per_udp 4
STAT stat_key_prefix :
STAT detail_enabled no
STAT reqs_per_event 20
STAT cas_enabled yes
STAT tcp_backlog 1024
STAT binding_protocol auto-negotiate
STAT auth_enabled_sasl no
STAT item_size_max 1048576
STAT maxconns_fast yes
STAT hashpower_init 0
STAT slab_reassign yes
STAT slab_automove 1
STAT slab_automove_ratio 0.80
STAT slab_automove_window 30
STAT slab_chunk_max 524288
STAT lru_crawler yes
STAT lru_crawler_sleep 100
STAT lru_crawler_tocrawl 0
STAT tail_repair_time 0
STAT flush_enabled yes
STAT dump_enabled yes
STAT hash_algorithm murmur3
STAT lru_maintainer_thread yes
STAT lru_segmented yes
STAT hot_lru_pct 20
STAT warm_lru_pct 40
STAT hot_max_factor 0.20
STAT warm_max_factor 2.00
STAT temp_lru no
STAT temporary_ttl 61
STAT idle_timeout 0
STAT watcher_logbuf_size 262144
STAT worker_logbuf_size 65536
STAT track_sizes yes
STAT inline_ascii_response no
stats cachedump
stats cachedump 12 100 コマンドのようにSlab Class ID(stats itemsなどで確認可能)と取得数を指定します。
1..52
ok 1 - cache_create
ok 2 - cache_constructor
ok 3 - cache_constructor_fail
ok 4 - cache_destructor
ok 5 - cache_reuse
ok 6 - cache_redzone
ok 7 - issue_161
ok 8 - strtol
ok 9 - strtoll
ok 10 - strtoul
ok 11 - strtoull
ok 12 - issue_44
ok 13 - vperror
ok 14 - issue_101
Signal handled: Terminated.
ok 15 - start_server
ok 16 - issue_92
ok 17 - issue_102
ok 18 - binary_noop
ok 19 - binary_quit
ok 20 - binary_quitq
ok 21 - binary_set
ok 22 - binary_setq
ok 23 - binary_add
ok 24 - binary_addq
ok 25 - binary_replace
ok 26 - binary_replaceq
ok 27 - binary_delete
ok 28 - binary_deleteq
ok 29 - binary_get
ok 30 - binary_getq
ok 31 - binary_getk
ok 32 - binary_getkq
ok 33 - binary_gat
ok 34 - binary_gatq
ok 35 - binary_gatk
ok 36 - binary_gatkq
ok 37 - binary_incr
ok 38 - binary_incrq
ok 39 - binary_decr
ok 40 - binary_decrq
ok 41 - binary_version
ok 42 - binary_flush
ok 43 - binary_flushq
ok 44 - binary_append
ok 45 - binary_appendq
ok 46 - binary_prepend
ok 47 - binary_prependq
ok 48 - binary_stat
ok 49 - binary_illegal
ok 50 - binary_pipeline_hickup
Signal handled: Interrupt.
ok 51 - shutdown
ok 52 - stop_server
ASCIIモードでは、キー長の制限が250バイトでしたが、バイナリモードでは、2^16(=65,536)バイトまで扱えるようになっています。
また、コマンドの種類増加など拡張性も上がっています。
リクエストとレスポンスの違いは、vbucket id(以前は予約済み領域) と Status となります。
各項目
Magic : マジックナンバー。リクエストパケットは 0x80、レスポンスパケットは0x81
Opcode : コマンドを表すコード
Key length : key長
Status : レスポンスのステータスコード
Extras length : Extras長
Data type : 予約済み領域
vbucket id : コマンドの仮想バケット
Total body length : extra + key + value のバイト数
Opaque : リクエストで送った値と同じ値をレスポンスを返すことで対応付けることに利用
Will be copied back to you in the response
CAS : CAS機能で使用するデータのバージョン
詳細
Opcodeはコマンドのコードに相当し下記コマンドがあります。
0x00 Get
0x01 Set
0x02 Add
0x03 Replace
0x04 Delete
0x05 Increment
0x06 Decrement
0x07 Quit
0x08 Flush
0x09 GetQ
0x0a No-op
0x0b Version
0x0c GetK
0x0d GetKQ
0x0e Append
0x0f Prepend
0x10 Stat
0x11 SetQ
0x12 AddQ
0x13 ReplaceQ
0x14 DeleteQ
0x15 IncrementQ
0x16 DecrementQ
0x17 QuitQ
0x18 FlushQ
0x19 AppendQ
0x1a PrependQ
0x1b Verbosity *
0x1c Touch *
0x1d GAT *
0x1e GATQ *
0x20 SASL list mechs
0x21 SASL Auth
0x22 SASL Step
0x30 RGet
0x31 RSet
0x32 RSetQ
0x33 RAppend
0x34 RAppendQ
0x35 RPrepend
0x36 RPrependQ
0x37 RDelete
0x38 RDeleteQ
0x39 RIncr
0x3a RIncrQ
0x3b RDecr
0x3c RDecrQ
0x3d Set VBucket *
0x3e Get VBucket *
0x3f Del VBucket *
0x40 TAP Connect *
0x41 TAP Mutation *
0x42 TAP Delete *
0x43 TAP Flush *
0x44 TAP Opaque *
0x45 TAP VBucket Set *
0x46 TAP Checkpoint Start *
0x47 TAP Checkpoint End *
レスポンスのStatusは以下の種類のものがあります。
0x0000 No error
0x0001 Key not found
0x0002 Key exists
0x0003 Value too large
0x0004 Invalid arguments
0x0005 Item not stored
0x0006 Incr/Decr on non-numeric value.
0x0007 The vbucket belongs to another server
0x0008 Authentication error
0x0009 Authentication continue
0x0081 Unknown command
0x0082 Out of memory
0x0083 Not supported
0x0084 Internal error
0x0085 Busy
0x0086 Temporary failure
typedef struct _stritem {
/* Protected by LRU locks */
struct _stritem *next;
struct _stritem *prev;
/* Rest are protected by an item lock */
struct _stritem *h_next; /* hash chain next */
rel_time_t time; /* least recent access */
rel_time_t exptime; /* expire time */
int nbytes; /* size of data */
unsigned short refcount;
uint8_t nsuffix; /* length of flags-and-length string */
uint8_t it_flags; /* ITEM_* above */
uint8_t slabs_clsid;/* which slab class we're in */
uint8_t nkey; /* key length, w/terminating null and padding */
/* this odd type prevents type-punning issues when we do
* the little shuffle to save space when not using CAS. */
union {
uint64_t cas;
char end;
} data[];
/* if it_flags & ITEM_CAS we have 8 bytes CAS */
/* then null-terminated key */
/* then " flags length\r\n" (no terminating null) */
/* then data with terminating \r\n (no terminating null; it's binary!) */
} item;
typedef struct {
unsigned int size; /* sizes of items */
unsigned int perslab; /* how many items per slab */
void *slots; /* list of item ptrs */
unsigned int sl_curr; /* total free items in list */
unsigned int slabs; /* how many slabs were allocated for this class */
void **slab_list; /* array of slab pointers */
unsigned int list_size; /* size of prev array */
size_t requested; /* The number of requested bytes */
} slabclass_t;
struct conn {
int sfd;
#ifdef TLS
SSL *ssl;
char *ssl_wbuf;
bool ssl_enabled;
#endif
sasl_conn_t *sasl_conn;
bool sasl_started;
bool authenticated;
enum conn_states state;
enum bin_substates substate;
rel_time_t last_cmd_time;
struct event event;
short ev_flags;
short which; /** which events were just triggered */
char *rbuf; /** buffer to read commands into */
char *rcurr; /** but if we parsed some already, this is where we stopped */
int rsize; /** total allocated size of rbuf */
int rbytes; /** how much data, starting from rcur, do we have unparsed */
char *wbuf;
char *wcurr;
int wsize;
int wbytes;
/** which state to go into after finishing current write */
enum conn_states write_and_go;
void *write_and_free; /** free this memory after finishing writing */
char *ritem; /** when we read in an item's value, it goes here */
int rlbytes;
/* data for the nread state */
/**
* item is used to hold an item structure created after reading the command
* line of set/add/replace commands, but before we finished reading the actual
* data. The data is read into ITEM_data(item) to avoid extra copying.
*/
void *item; /* for commands set/add/replace */
/* data for the swallow state */
int sbytes; /* how many bytes to swallow */
/* data for the mwrite state */
struct iovec *iov;
int iovsize; /* number of elements allocated in iov[] */
int iovused; /* number of elements used in iov[] */
struct msghdr *msglist;
int msgsize; /* number of elements allocated in msglist[] */
int msgused; /* number of elements used in msglist[] */
int msgcurr; /* element in msglist[] being transmitted now */
int msgbytes; /* number of bytes in current msg */
item **ilist; /* list of items to write out */
int isize;
item **icurr;
int ileft;
char **suffixlist;
int suffixsize;
char **suffixcurr;
int suffixleft;
#ifdef EXTSTORE
int io_wrapleft;
unsigned int recache_counter;
io_wrap *io_wraplist; /* linked list of io_wraps */
bool io_queued; /* FIXME: debugging flag */
#endif
enum protocol protocol; /* which protocol this connection speaks */
enum network_transport transport; /* what transport is used by this connection */
/* data for UDP clients */
int request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
struct sockaddr_in6 request_addr; /* udp: Who sent the most recent request */
socklen_t request_addr_size;
unsigned char *hdrbuf; /* udp packet headers */
int hdrsize; /* number of headers' worth of space is allocated */
bool noreply; /* True if the reply should not be sent. */
/* current stats command */
struct {
char *buffer;
size_t size;
size_t offset;
} stats;
/* Binary protocol stuff */
/* This is where the binary header goes */
protocol_binary_request_header binary_header;
uint64_t cas; /* the cas to return */
short cmd; /* current command being processed */
int opaque;
int keylen;
conn *next; /* Used for generating a list of conn structures */
LIBEVENT_THREAD *thread; /* Pointer to the thread object serving this connection */
ssize_t (*read)(conn *c, void *buf, size_t count);
ssize_t (*sendmsg)(conn *c, struct msghdr *msg, int flags);
ssize_t (*write)(conn *c, void *buf, size_t count);
};