[PATCH] : rework checks handling

From: Krzysztof Oledzki <ole#ans.pl>
Date: Fri, 11 Jan 2008 01:16:17 +0100


From eac023632cc39cc646feefd48a8bea82129cbfbc Mon Sep 17 00:00:00 2001 From: Krzysztof Piotr Oledzki <ole#ans.pl> Date: Fri, 11 Jan 2008 01:11:22 +0100
Subject: [MEDIUM]: rework checks handling

This RFC-quality patch adds two new variables: fastinter and downinter. When server state is:

It allows to set something like:

        server sr6 127.0.51.61:80 cookie s6 check inter 10000 downinter 20000 fastinter 500 fall 3 weight 40 In the above example haproxy uses 10000ms between checks but as soon as one check fails fastinter (500ms) is used. If server is down downinter (20000) is used or fastinter (500ms) if one check pass. Fastinter is also used when haproxy starts.

BTW: I think we need some improvements to parser as adding additional keywords is getting to much complicated. I'll try to prepare a rfc-patch soon.

New "timeout.check" variable was added, if set haproxy uses it as an additional read timeout, but only after a connection was already established. I was thinking about using "timeout.server" here but as most people set this with an addition reserve but still want checks to kick out laggy servers. Please also note that in most cases check request is much simpler and faster to handle than normal requests so this timeout should be smaller.

I also changed the timeout used for connection establishing from "inter" to "timeout.connect", as in most cases "inter" is _much_ to long for this purpose.

After all that changes I was brave enough to add my copyright info into the src/server.c as I'm the author of all two functions located in this file Hope it is OK.

I'm going to add some documentation about fastinter/downinter and timeout.check if this patch is acceptable and of course in the final version I'll comment all introduced debug fpritfs. ;)

---
 include/proto/server.h |    1 +
 include/types/proxy.h  |    1 +
 include/types/server.h |    2 +-
 src/cfgparse.c         |   25 ++++++++++++++++++++++++-
 src/checks.c           |   38 +++++++++++++++++++++++---------------
 src/proxy.c            |    6 +++++-
 src/server.c           |   18 ++++++++++++++++++
 7 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/include/proto/server.h b/include/proto/server.h
index 27e4f36..f3b5e16 100644
--- a/include/proto/server.h
+++ b/include/proto/server.h

@@ -32,6 +32,7 @@
#include <proto/queue.h> int srv_downtime(struct server *s); +int srv_getinter(struct server *s); #endif /* _PROTO_SERVER_H */ diff --git a/include/types/proxy.h b/include/types/proxy.h index 5d397d4..dc62cfe 100644 --- a/include/types/proxy.h +++ b/include/types/proxy.h
@@ -178,6 +178,7 @@ struct proxy {
struct timeval server; /* server I/O timeout (in milliseconds) */ struct timeval appsession; /* appsession cookie expiration */ struct timeval httpreq; /* maximum time for complete HTTP request */ + struct timeval check; /* maximum time for complete check */ } timeout; char *id; /* proxy id */ struct list pendconns; /* pending connections with no server assigned yet */ diff --git a/include/types/server.h b/include/types/server.h index 483e260..3ea86ad 100644 --- a/include/types/server.h +++ b/include/types/server.h
@@ -93,7 +93,7 @@ struct server {
short check_port; /* the port to use for the health checks */ int health; /* 0->rise-1 = bad; rise->rise+fall-1 = good */ int rise, fall; /* time in iterations */ - int inter; /* time in milliseconds */ + int inter, fastinter, downinter; /* checks: time in milliseconds */ int slowstart; /* slowstart time in seconds (ms in the conf) */ int result; /* health-check result : SRV_CHK_* */ int curfd; /* file desc used for current test, or -1 if not in test */ diff --git a/src/cfgparse.c b/src/cfgparse.c index 0c89c5b..4fcc981 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c
@@ -679,6 +679,7 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int inv)
if (curproxy->cap & PR_CAP_BE) { curproxy->timeout.connect = defproxy.timeout.connect; curproxy->timeout.server = defproxy.timeout.server; + curproxy->timeout.check = defproxy.timeout.check; curproxy->timeout.queue = defproxy.timeout.queue; curproxy->timeout.tarpit = defproxy.timeout.tarpit; curproxy->source_addr = defproxy.source_addr;
@@ -1507,6 +1508,8 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int inv)
newsrv->curfd = -1; /* no health-check in progress */ newsrv->inter = DEF_CHKINTR; + newsrv->fastinter = 0; /* 0 => use newsrv->inter instead */ + newsrv->downinter = 0; /* 0 => use newsrv->inter instead */ newsrv->rise = DEF_RISETIME; newsrv->fall = DEF_FALLTIME; newsrv->health = newsrv->rise; /* up, but will fall down at first failure */
@@ -1540,6 +1543,26 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int inv)
newsrv->inter = val; cur_arg += 2; } + else if (!strcmp(args[cur_arg], "fastinter")) { + const char *err = parse_time_err(args[cur_arg + 1], &val, TIME_UNIT_MS); + if (err) { + Alert("parsing [%s:%d]: unexpected character '%c' in 'fastinter' argument of server %s.\n", + file, linenum, *err, newsrv->id); + return -1; + } + newsrv->fastinter = val; + cur_arg += 2; + } + else if (!strcmp(args[cur_arg], "downinter")) { + const char *err = parse_time_err(args[cur_arg + 1], &val, TIME_UNIT_MS); + if (err) { + Alert("parsing [%s:%d]: unexpected character '%c' in 'downinter' argument of server %s.\n", + file, linenum, *err, newsrv->id); + return -1; + } + newsrv->downinter = val; + cur_arg += 2; + } else if (!strcmp(args[cur_arg], "addr")) { newsrv->check_addr = *str2sa(args[cur_arg + 1]); cur_arg += 2;
@@ -1642,7 +1665,7 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int inv)
} #endif else { - Alert("parsing [%s:%d] : server %s only supports options 'backup', 'cookie', 'check', 'inter', 'rise', 'fall', 'addr', 'port', 'source', 'minconn', 'maxconn', 'maxqueue', 'slowstart' and 'weight'.\n", + Alert("parsing [%s:%d] : server %s only supports options 'backup', 'cookie', 'check', 'inter', 'fastinter', 'downinter', 'rise', 'fall', 'addr', 'port', 'source', 'minconn', 'maxconn', 'maxqueue', 'slowstart' and 'weight'.\n", file, linenum, newsrv->id); return -1; } diff --git a/src/checks.c b/src/checks.c index c631946..7bd54c2 100644 --- a/src/checks.c +++ b/src/checks.c
@@ -173,6 +173,7 @@ static int event_srv_chk_w(int fd)
struct task *t = fdtab[fd].owner; struct server *s = t->context; + fprintf(stderr, "event_srv_chk_w, state=%ld\n", unlikely(fdtab[fd].state)); if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))) goto out_error;
@@ -200,6 +201,12 @@ static int event_srv_chk_w(int fd)
ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT | MSG_NOSIGNAL); #endif if (ret == s->proxy->check_len) { + /* we allow up to <timeout.check> if nonzero or <timeout.server> for a responce */ + fprintf(stderr, "event_srv_chk_w, ms=%lu\n", + __tv_to_ms(&s->proxy->timeout.check)); + if (__tv_to_ms(&s->proxy->timeout.check)) + tv_add(&t->expire, &now, &s->proxy->timeout.check); + EV_FD_SET(fd, DIR_RD); /* prepare for reading reply */ goto out_nowake; }
@@ -495,8 +502,9 @@ void process_chk(struct task *t, struct timeval *next)
#ifdef DEBUG_FULL assert (!EV_FD_ISSET(fd, DIR_RD)); #endif - /* FIXME: we allow up to <inter> for a connection to establish, but we should use another parameter */ - tv_ms_add(&t->expire, &now, s->inter); + fprintf(stderr, "process_chk: 4+, %lu\n", __tv_to_ms(&s->proxy->timeout.connect)); + /* we allow up to <timeout.connect> for a connection to establish */ + tv_add(&t->expire, &now, &s->proxy->timeout.connect); task_queue(t); /* restore t to its place in the task list */ *next = t->expire; return;
@@ -524,10 +532,10 @@ void process_chk(struct task *t, struct timeval *next)
else set_server_down(s); - //fprintf(stderr, "process_chk: 7\n"); - /* FIXME: we allow up to <inter> for a connection to establish, but we should use another parameter */ + fprintf(stderr, "process_chk: 7, %lu\n", __tv_to_ms(&s->proxy->timeout.connect)); + /* we allow up to <timeout.connect> for a connection to establish */ while (tv_isle(&t->expire, &now)) - tv_ms_add(&t->expire, &t->expire, s->inter); + tv_add(&t->expire, &t->expire, &s->proxy->timeout.connect); goto new_chk; } else {
@@ -662,11 +670,11 @@ void process_chk(struct task *t, struct timeval *next)
rv = 0; if (global.spread_checks > 0) { - rv = s->inter * global.spread_checks / 100; + rv = srv_getinter(s) * global.spread_checks / 100; rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0))); - //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, s->inter, global.spread_checks, rv); + //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv); } - tv_ms_add(&t->expire, &now, s->inter + rv); + tv_ms_add(&t->expire, &now, srv_getinter(s) + rv); goto new_chk; } else if ((s->result & SRV_CHK_ERROR) || tv_isle(&t->expire, &now)) {
@@ -683,11 +691,11 @@ void process_chk(struct task *t, struct timeval *next)
rv = 0; if (global.spread_checks > 0) { - rv = s->inter * global.spread_checks / 100; + rv = srv_getinter(s) * global.spread_checks / 100; rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0))); - //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, s->inter, global.spread_checks, rv); + //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv); } - tv_ms_add(&t->expire, &now, s->inter + rv); + tv_ms_add(&t->expire, &now, srv_getinter(s) + rv); goto new_chk; } /* if result is unknown and there's no timeout, we have to wait again */
@@ -723,9 +731,9 @@ int start_checks() {
if (!(s->state & SRV_CHECKED)) continue; - if ((s->inter >= SRV_CHK_INTER_THRES) && - (!mininter || mininter > s->inter)) - mininter = s->inter; + if ((srv_getinter(s) >= SRV_CHK_INTER_THRES) && + (!mininter || mininter > srv_getinter(s))) + mininter = srv_getinter(s); nbchk++; }
@@ -759,7 +767,7 @@ int start_checks() {
/* check this every ms */ tv_ms_add(&t->expire, &now, - ((mininter && mininter >= s->inter) ? mininter : s->inter) * srvpos / nbchk); + ((mininter && mininter >= srv_getinter(s)) ? mininter : srv_getinter(s)) * srvpos / nbchk); task_queue(t); srvpos++; diff --git a/src/proxy.c b/src/proxy.c index 7019606..281ee8e 100644 --- a/src/proxy.c +++ b/src/proxy.c
@@ -118,6 +118,10 @@ int proxy_parse_timeout(const char **args, struct proxy *proxy,
tv = &proxy->timeout.connect; td = &defpx->timeout.connect; cap = PR_CAP_BE; + } else if (!strcmp(args[0], "check")) { + tv = &proxy->timeout.check; + td = &defpx->timeout.check; + cap = PR_CAP_BE; } else if (!strcmp(args[0], "appsession")) { tv = &proxy->timeout.appsession; td = &defpx->timeout.appsession;
@@ -128,7 +132,7 @@ int proxy_parse_timeout(const char **args, struct proxy *proxy,
cap = PR_CAP_BE; } else { snprintf(err, errlen, - "timeout '%s': must be 'client', 'server', 'connect', " + "timeout '%s': must be 'client', 'server', 'connect', 'check', " "'appsession', 'queue', 'http-request' or 'tarpit'", args[0]); return -1; diff --git a/src/server.c b/src/server.c index 8b0fa14..2efb578 100644 --- a/src/server.c +++ b/src/server.c
@@ -2,6 +2,7 @@
* Server management functions. * * Copyright 2000-2006 Willy Tarreau <w#1wt.eu> + * Copyright 2007-2008 Krzysztof Piotr Oledzki <ole#ans.pl> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License
@@ -25,6 +26,23 @@ int srv_downtime(struct server *s) {
return now.tv_sec - s->last_change + s->down_time; } +int srv_getinter(struct server *s) { + + if ((s->state & SRV_CHECKED) && (s->health == s->rise + s->fall - 1)) { + fprintf(stderr, "srv_getinter(%s/%s): s->inter=%d\n", s->proxy->id, s->id, s->inter); + return s->inter; + } + + if (!(s->state & SRV_RUNNING) && s->health==0) { + fprintf(stderr, "srv_getinter(%s/%s): s->downinter=%d\n", s->proxy->id, s->id, s->downinter); + return (s->downinter)?(s->downinter):(s->inter); + } + + fprintf(stderr, "srv_getinter(%s/%s): s->fastinter=%d\n", s->proxy->id, s->id, s->fastinter); + return (s->fastinter)?(s->fastinter):(s->inter); +} + + /* * Local variables: * c-indent-level: 8 -- 1.5.3.7
Received on 2008/01/11 01:16

This archive was generated by hypermail 2.2.0 : 2008/01/11 01:30 CET