[PATCH] : Count retries and redispatches also for servers + extend logs + %d->%u cleanup

From: Krzysztof Oledzki <ole#ans.pl>
Date: Tue, 25 Dec 2007 04:49:32 +0100


From c71a482f5c303392d06c228d868a4898efde07df Mon Sep 17 00:00:00 2001 From: Krzysztof Piotr Oledzki <ole#ans.pl> Date: Tue, 25 Dec 2007 04:01:58 +0100
Subject: [MINOR]: Count retries and redispatches also for servers + extend logs + %d->%u cleanup

This patch extends a little previously added functionality to also count retries and redispatches for servers. Now it is possible to know which server causes redispatches as it is not always the same that takes most retries.

I also extended logs to show how many retries were done and if redispatching was necessary ('+'). Currently I'm using "s->conn_retries>0" logic to match redispatched connections but in the future it may be required to add for example additional SN_REDISP flag.

The documentation about logs was also fixed a little (sorry, english only), as current version uses totally different format. BTW: examples are still outdated, maybe next time...

Finally, I changed %d -> %u for retries/redispatches as those variables are declared as unsigned.

---
 doc/haproxy-en.txt     |   46 +++++++++++++++++++++++++---------------------
 include/types/server.h |    2 +-
 src/backend.c          |    4 +++-
 src/dumpstats.c        |   12 ++++++------
 src/log.c              |    4 +++-
 src/proto_http.c       |    8 ++++++--
 6 files changed, 44 insertions(+), 32 deletions(-)

diff --git a/doc/haproxy-en.txt b/doc/haproxy-en.txt
index 73da3d9..a18a14d 100644
--- a/doc/haproxy-en.txt
+++ b/doc/haproxy-en.txt

@@ -1679,33 +1679,37 @@ Example of HTTP logging :
option dontlognull log 192.168.2.200 local3 ->>> haproxy[674]: 127.0.0.1:33319 [15/Oct/2003:08:31:57] relais-http Srv1 9/0/7/147/723 200 243 - - ---- 2/3/3 0/0 "HEAD / HTTP/1.0" +>>> haproxy[674]: 127.0.0.1:33319 [15/Oct/2003:08:31:57] relais-http Srv1 9/0/7/147/723 200 243 - - ---- 34/34/15/8/3 0/0 "HEAD / HTTP/1.0" More complete example - haproxy[18989]: 10.0.0.1:34552 [15/Oct/2003:15:26:31] relais-http Srv1 3183/-1/-1/-1/11215 503 0 - - SC-- 137/202/205 0/0 {w.ods.org|Mozilla} {} "HEAD / HTTP/1.0" + haproxy[18989]: 10.0.0.1:34552 [15/Oct/2003:15:26:31] relais-http Srv1 3183/-1/-1/-1/11215 503 0 - - SC-- 205/202/150/137/4+ 0/0 {w.ods.org|Mozilla} {} "HEAD / HTTP/1.0" - Field Format Example + Field Format Example - 1 process_name '[' pid ']:' haproxy[18989]: - 2 client_ip ':' client_port 10.0.0.1:34552 - 3 '[' date ']' [15/Oct/2003:15:26:31] - 4 listener_name relais-http - 5 server_name Srv1 - 6 Tq '/' Tw '/' Tc '/' Tr '/' Tt 3183/-1/-1/-1/11215 - 7 HTTP_return_code 503 - 8 bytes_read 0 - 9 captured_request_cookie - - 10 captured_response_cookie - - 11 termination_state SC-- - 12 srv_conn '/' listener_conn '/' process_conn 137/202/205 - 13 position in srv_queue / listener_queue 0/0 - 14 '{' captured_request_headers '}' {w.ods.org|Mozilla} - 15 '{' captured_response_headers '}' {} - 16 '"' HTTP_request '"' "HEAD / HTTP/1.0" + 1 process_name '[' pid ']:' haproxy[18989]: + 2 client_ip ':' client_port 10.0.0.1:34552 + 3 '[' date ']' [15/Oct/2003:15:26:31] + 4 listener_name relais-http + 5 server_name Srv1 + 6 Tq '/' Tw '/' Tc '/' Tr '/' Tt 3183/-1/-1/-1/11215 + 7 HTTP_return_code 503 + 8 bytes_read 0 + 9 captured_request_cookie - + 10 captured_response_cookie - + 11 termination_state SC-- + 12 actconn '/' feconn '/' beconn '/' srv_conn '/' retries 205/202/150/137/4+ + 13 position in srv_queue / listener_queue 0/0 + 14 '{' captured_request_headers '}' {w.ods.org|Mozilla} + 15 '{' captured_response_headers '}' {} + 16 '"' HTTP_request '"' "HEAD / HTTP/1.0" Note for log parsers: the URI is ALWAYS the end of the line starting with the first double quote '"'. +The retries count may have additional '+' sign means that the connection was +redispatched from one server to another where retries limit (retries 4) +was depleted. + The problem when logging at end of connection is that you have no clue about what is happening during very long sessions. To workaround this problem, a new option 'logasap' has been introduced in 1.1.28/1.2.1. When specified, the
@@ -1716,7 +1720,7 @@ case, the number of bytes reported is the number of header bytes sent to the
client. In order to avoid confusion with normal logs, the total time field and the -number of bytes are prefixed with a '+' sign which mean that real numbers are +number of bytes are prefixed with a '+' sign which means that real numbers are certainly bigger. Example :
@@ -1729,7 +1733,7 @@ Example :
option logasap log 192.168.2.200 local3 ->>> haproxy[674]: 127.0.0.1:33320 [15/Oct/2003:08:32:17] relais-http Srv1 9/10/7/14/+30 200 +243 - - ---- 1/1/3 1/0 "GET /image.iso HTTP/1.0" +>>> haproxy[674]: 127.0.0.1:33320 [15/Oct/2003:08:32:17] relais-http Srv1 9/10/7/14/+30 200 +243 - - ---- 3/1/1/1/0 1/0 "GET /image.iso HTTP/1.0" 4.2.3) Timing events -------------------- diff --git a/include/types/server.h b/include/types/server.h index 5c47d17..483e260 100644 --- a/include/types/server.h +++ b/include/types/server.h
@@ -113,7 +113,7 @@ struct server {
time_t last_change; /* last time, when the state was changed */ unsigned failed_conns, failed_resp; /* failed connect() and responses */ - unsigned retries; /* retried connections */ + unsigned retries, redispatches; /* retried and redispatched connections */ unsigned failed_secu; /* blocked responses because of security concerns */ unsigned cum_sess; /* cumulated number of sessions really sent to this server */ unsigned cum_lbconn; /* cumulated number of sessions directed by load balancing */ diff --git a/src/backend.c b/src/backend.c index 6ba3937..f96858d 100644 --- a/src/backend.c +++ b/src/backend.c
@@ -1419,8 +1419,10 @@ int srv_retryable_connect(struct session *t)
if (may_dequeue_tasks(t->srv, t->be)) task_wakeup(t->srv->queue_mgt); - if (t->srv) + if (t->srv) { t->srv->failed_conns++; + t->srv->redispatches++; + } t->be->redispatches++; t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET); diff --git a/src/dumpstats.c b/src/dumpstats.c index f752271..27e15f7 100644 --- a/src/dumpstats.c +++ b/src/dumpstats.c
@@ -740,7 +740,7 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri,
/* errors : request, connect, response */ "<td align=right></td><td align=right>%d</td><td align=right>%d</td>\n" /* warnings: retries, redispatches */ - "<td align=right>%d</td><td align=right></td>" + "<td align=right>%u</td><td align=right>%u</td>" "", (sv->state & SRV_BACKUP) ? "backup" : "active", sv_state, sv->id,
@@ -750,7 +750,7 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri,
sv->bytes_in, sv->bytes_out, sv->failed_secu, sv->failed_conns, sv->failed_resp, - sv->retries); + sv->retries, sv->redispatches); /* status */ chunk_printf(&msg, sizeof(trash), "<td nowrap>");
@@ -817,7 +817,7 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri,
/* errors : request, connect, response */ ",%d,%d," /* warnings: retries, redispatches */ - "%d,," + "%u,%u," "", px->id, sv->id, sv->nbpend, sv->nbpend_max,
@@ -825,7 +825,7 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri,
sv->bytes_in, sv->bytes_out, sv->failed_secu, sv->failed_conns, sv->failed_resp, - sv->retries); + sv->retries, sv->redispatches); /* status */ chunk_printf(&msg, sizeof(trash),
@@ -901,7 +901,7 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri,
/* errors : request, connect, response */ "<td align=right></td><td align=right>%d</td><td align=right>%d</td>\n" /* warnings: retries, redispatches */ - "<td align=right>%d</td><td align=right>%d</td>" + "<td align=right>%u</td><td align=right>%u</td>" /* backend status: reflect backend status (up/down): we display UP * if the backend has known working servers or if it has no server at * all (eg: for stats). Then we display the total weight, number of
@@ -943,7 +943,7 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri,
/* errors : request, connect, response */ ",%d,%d," /* warnings: retries, redispatches */ - "%d,%d," + "%u,%u," /* backend status: reflect backend status (up/down): we display UP * if the backend has known working servers or if it has no server at * all (eg: for stats). Then we display the total weight, number of diff --git a/src/log.c b/src/log.c index c7ea25c..b9a9778 100644 --- a/src/log.c +++ b/src/log.c
@@ -356,7 +356,7 @@ void tcp_sess_log(struct session *s)
send_log(prx_log, LOG_INFO, "%s:%d [%02d/%s/%04d:%02d:%02d:%02d.%03d]" " %s %s/%s %d/%d/%s%d %s%lld" - " %c%c %d/%d/%d/%d %d/%d\n", + " %c%c %d/%d/%d/%d/%u%s %d/%d\n", pn, (s->cli_addr.ss_family == AF_INET) ? ntohs(((struct sockaddr_in *)&s->cli_addr)->sin_port) :
@@ -371,6 +371,8 @@ void tcp_sess_log(struct session *s)
sess_term_cond[(s->flags & SN_ERR_MASK) >> SN_ERR_SHIFT], sess_fin_state[(s->flags & SN_FINST_MASK) >> SN_FINST_SHIFT], actconn, fe->feconn, be->beconn, s->srv ? s->srv->cur_sess : 0, + (s->conn_retries>0)?(be->conn_retries - s->conn_retries):be->conn_retries, + (s->conn_retries==-1)?"+":"", s->logs.srv_queue_size, s->logs.prx_queue_size); s->logs.logwait = 0; diff --git a/src/proto_http.c b/src/proto_http.c index f387d0b..2c794ca 100644 --- a/src/proto_http.c +++ b/src/proto_http.c
@@ -769,7 +769,7 @@ static void http_sess_log(struct session *s)
send_log(prx_log, LOG_INFO, "%s:%d [%02d/%s/%04d:%02d:%02d:%02d.%03d]" " %s %s/%s %d/%d/%d/%d/%s%d %d %s%lld" - " %s %s %c%c%c%c %d/%d/%d/%d %d/%d%s\n", + " %s %s %c%c%c%c %d/%d/%d/%d/%u%s %d/%d%s\n", pn, (s->cli_addr.ss_family == AF_INET) ? ntohs(((struct sockaddr_in *)&s->cli_addr)->sin_port) :
@@ -791,6 +791,8 @@ static void http_sess_log(struct session *s)
(be->options & PR_O_COOK_ANY) ? sess_cookie[(txn->flags & TX_CK_MASK) >> TX_CK_SHIFT] : '-', (be->options & PR_O_COOK_ANY) ? sess_set_cookie[(txn->flags & TX_SCK_MASK) >> TX_SCK_SHIFT] : '-', actconn, fe->feconn, be->beconn, s->srv ? s->srv->cur_sess : 0, + (s->conn_retries>0)?(be->conn_retries - s->conn_retries):be->conn_retries, + (s->conn_retries==-1)?"+":"", s->logs.srv_queue_size, s->logs.prx_queue_size, tmpline); s->logs.logwait = 0;
@@ -2505,8 +2507,10 @@ int process_srv(struct session *t)
if (may_dequeue_tasks(t->srv, t->be)) task_wakeup(t->srv->queue_mgt); - if (t->srv) + if (t->srv) { t->srv->failed_conns++; + t->srv->redispatches++; + } t->be->redispatches++; t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET); -- 1.5.3.7
Received on 2007/12/25 04:49

This archive was generated by hypermail 2.2.0 : 2007/12/25 05:00 CET