From 677e385fec445987e76da9270a982bdf3d909a92 Mon Sep 17 00:00:00 2001
From: Yves Rutschle <git1@rutschle.net>
Date: Mon, 13 Aug 2018 22:29:09 +0200
Subject: [PATCH] new probing algorithm

---
 ChangeLog | 17 ++++++++++++++
 probe.c   | 67 +++++++++++++++++++++++++++++--------------------------
 t         |  6 ++---
 test.cfg  |  2 +-
 4 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a4f1d58..6289c91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+vNEXT:
+	New probing method:
+	Before, probes were tried in order, repeating on the
+	same probe as long it returned PROBE_AGAIN before
+	moving to the next one. This means a probe which
+	requires a lot of data (i.e. returne PROBE_AGAIN for
+	a long time) could prevent sucessful matches from 
+	subsequent probes. The configuration file needed to
+	take that into account.
+
+	Now, all probes are tried each time new data is
+	found. If any probe matches, use it. If at least one
+	probe requires more data, wait for more. If all
+	probes failed, connect to the last one. So the only
+	thing to know when writing the configuration file is
+	that 'anyprot' needs to be last.
+
 v1.19: 20JAN2018
 	Added 'syslog_facility' configuration option to
 	specify where to log.
diff --git a/probe.c b/probe.c
index 91fe88d..b9f7e21 100644
--- a/probe.c
+++ b/probe.c
@@ -180,13 +180,16 @@ static int is_tinc_protocol( const char *p, int len, struct proto *proto)
  * */
 static int is_xmpp_protocol( const char *p, int len, struct proto *proto)
 {
+    if (memmem(p, len, "jabber", 6))
+        return PROBE_MATCH;
+
     /* sometimes the word 'jabber' shows up late in the initial string,
        sometimes after a newline. this makes sure we snarf the entire preamble
        and detect it. (fixed for adium/pidgin) */
     if (len < 50)
         return PROBE_AGAIN;
 
-    return memmem(p, len, "jabber", 6) ? 1 : 0;
+    return PROBE_NEXT;
 }
 
 static int probe_http_method(const char *p, int len, const char *opt)
@@ -372,8 +375,8 @@ static int regex_probe(const char *p, int len, struct proto *proto)
 int probe_client_protocol(struct connection *cnx)
 {
     char buffer[BUFSIZ];
-    struct proto *p;
-    int n;
+    struct proto *p, *last_p;
+    int n, res, again = 0;
 
     n = read(cnx->q[0].fd, buffer, sizeof(buffer));
     /* It's possible that read() returns an error, e.g. if the client
@@ -382,40 +385,40 @@ int probe_client_protocol(struct connection *cnx)
      * function does not have to deal with a specific  failure condition (the
      * connection will just fail later normally). */
 
-    /* Dump hex values of the packet */
-    if (verbose>1) {
-        fprintf(stderr, "hexdump of incoming packet:\n");
-        hexdump(buffer, n);
-    }
-
     if (n > 0) {
-        int res = PROBE_NEXT;
-
-        defer_write(&cnx->q[1], buffer, n);
-
-        for (p = cnx->proto; p && res == PROBE_NEXT; p = p->next) {
-            char* probe_str[3] = {"PROBE_NEXT",
-                                 "PROBE_MATCH",
-                                 "PROBE_AGAIN"};
-            if (! p->probe) continue;
-            if (verbose) fprintf(stderr, "probing for %s...", p->description);
-
-            cnx->proto = p;
-            res = p->probe(cnx->q[1].begin_deferred_data, cnx->q[1].deferred_data_size, p);
-            if (verbose) fprintf(stderr, "%s\n", probe_str[res]);
+        if (verbose > 1) {
+            fprintf(stderr, "hexdump of incoming packet:\n");
+            hexdump(buffer, n);
         }
-        if (res != PROBE_NEXT)
-            return res;
+        defer_write(&cnx->q[1], buffer, n);
     }
 
-    if (verbose) 
-        fprintf(stderr, 
-                "all probes failed, connecting to first protocol: %s\n", 
-                protocols->description);
+    for (p = cnx->proto; p; p = p->next) {
+        char* probe_str[3] = {"PROBE_NEXT", "PROBE_MATCH", "PROBE_AGAIN"};
+        if (! p->probe) continue;
 
-    /* If none worked, return the first one affected (that's completely
-     * arbitrary) */
-    cnx->proto = protocols;
+        /* Don't probe last protocol if it is anyprot (and store last protocol) */
+        if (! p->next) {
+            last_p = p;
+            if (!strcmp(p->description, "anyprot"))
+                break;
+        }
+
+        res = p->probe(cnx->q[1].begin_deferred_data, cnx->q[1].deferred_data_size, p);
+        if (verbose) fprintf(stderr, "probing for %s: %s\n", p->description, probe_str[res]);
+
+        if (res == PROBE_MATCH) {
+            cnx->proto = p;
+            return PROBE_MATCH;
+        }
+        if (res == PROBE_AGAIN)
+            again++;
+    }
+    if (again)
+        return PROBE_AGAIN;
+
+    /* Everything failed: match the last one */
+    cnx->proto = last_p;
     return PROBE_MATCH;
 }
 
diff --git a/t b/t
index 46d2011..f7284a9 100755
--- a/t
+++ b/t
@@ -70,7 +70,7 @@ sub test_probes {
             'tinc' => { data => "0 hello" },
             'xmpp' => {data => "I should get a real jabber connection initialisation here" },
             'adb' => { data => "CNXN....................host:..." },
-            'anyprot' => {data => "hello, this needs to be longer than the longest probe that returns PROBE_AGAIN" },
+            'anyprot' => {data => "hello anyprot this needs to be longer than xmpp and adb which expect about 50 characters, which I all have to write before the timeout!" },
         );
 
         my $cnx = new IO::Socket::INET(PeerHost => "localhost:$sslh_port");
@@ -94,8 +94,8 @@ sub test_probes {
             print "Received: protocol $prefix data [$data]\n";
             close $cnx;
 
-            is($prefix, $p->{name});
-            is($data, $protocols{$p->{name}}->{data});
+            is($prefix, $p->{name}, "probe $p->{name} connected correctly");
+            is($data, $protocols{$p->{name}}->{data}, "data shoveled correctly");
         }
     }
 }
diff --git a/test.cfg b/test.cfg
index f5e7bf4..8704e39 100644
--- a/test.cfg
+++ b/test.cfg
@@ -6,7 +6,7 @@ foreground: true;
 inetd: false;
 numeric: false;
 transparent: false;
-timeout: 4;   # Probe test writes slowly
+timeout: 10;   # Probe test writes slowly
 pidfile: "/tmp/sslh_test.pid";
 
 syslog_facility: "auth";