initial hash object with testing framework

This commit is contained in:
yrutschle 2022-03-30 22:56:41 +02:00
parent 9c3274359a
commit 454a261c95
39 changed files with 1202 additions and 0 deletions

259
hash.c Normal file
View File

@ -0,0 +1,259 @@
/*
* a fixed-sized hash
*
# Copyright (C) 2022 Yves Rutschle
#
# This program is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE. See the GNU General Public License for more
# details.
#
# The full text for the General Public License is here:
# http://www.gnu.org/licenses/gpl.html
#
# */
/* * The hash is open-addressing, linear search, robin-hood insertion, with
* backward shift deletion and moving floor.
* https://codecapsule.com/2013/11/11/robin-hood-hashing/
* https://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
* This means items are reordered upon insertion and deletion, and the hash
* is well-ordered at all times with no tombstones.
*
* Items that 'wrap' around push the 'floor' up. Searching for low items will
* therefore start from the floor up.
*
* Each pointer is either:
* - to a connection struct
* - FREE (NULL) if not allocated
*
* */
#include <stdlib.h>
#include <stddef.h>
#include "gap.h"
#include "hash.h"
static const int h_keylen = 5; /* How many bits in the hash key? (hash is 2^h_keylen big) */
static const int hash_size = (1 << h_keylen); /* 8 => 256 */
static void* const FREE = NULL;
struct hash {
int item_cnt; /* Number of items in the hash */
int floor; /* Where is the highest key. Or the lowest insert point */
gap_array* data;
hash_make_key_fn hash_make_key;
hash_cmp_item_fn cmp_item;
};
typedef struct hash hash;
hash* hash_init(hash_make_key_fn make_key, hash_cmp_item_fn cmp_item)
{
hash* h = malloc(sizeof(*h));
if (!h) return NULL;
h->item_cnt = 0;
h->floor = 0;
h->data = gap_init(hash_size);
h->hash_make_key = make_key;
h->cmp_item = cmp_item;
return h;
}
/* Return the index following i in h */
static int hash_next_index(hash* h, int i)
{
return (i + 1) % hash_size;
}
/* Returns the index in h of specified address, -1 if not found
* item is an item object that must return the target wanted index and for
* which comparison with the searched object will succeed.
* */
int hash_find_index(hash* h, hash_item item)
{
hash_item cnx;
int index = h->hash_make_key(item);
int cnt = 0;
if (index < h->floor) index = h->floor;
cnx = gap_get(h->data, index);
#ifdef DEBUG
fprintf(stderr, "searching %d\n", index);
#endif
while (cnx != FREE) {
if (cnt++ > hash_size) return -1;
if (!h->cmp_item(cnx, item))
break;
index = hash_next_index(h, index);
cnx = gap_get(h->data, index);
#ifdef DEBUG
fprintf(stderr, "searching %d\n", index);
#endif
}
if (cnx == FREE) return -1;
return index;
}
/* says if we should swap the bubble (element that's going up) and the current
* indexed element.
* index: current insertion index
* wanted_index: index wanted by the element at the current index
* bubble: bubble wanted index
* floor: index of lowest wanted index (or index of the highest wrapped wanted
* index)
* wrapped: whether we wrapped or not (if we reach the floor from below after
* wrapping, we should swap as it's a 'highest' element pushing the floor up.
* it we're inserting a higher-than bottom element at the floor, it should
* bubble up).
*/
static int i_should_swap(int index, int wanted_index, int bubble_index, int floor, int wrapped)
{
int res;
if ((index == floor))
res = wrapped;
else
res = bubble_index < wanted_index;
#if DEBUG
fprintf(stderr, "i_should_swap(%d, %d, %d, %d) = %d\n", index, wanted_index, bubble_index, floor, res);
#endif
return res;
}
int hash_insert(hash* h, hash_item new)
{
int bubble_wanted_index = h->hash_make_key(new);
int index = bubble_wanted_index;
gap_array* hash = h->data;
if (h->item_cnt == hash_size)
return -1;
if (index < h->floor) index = h->floor;
hash_item curr_item = gap_get(hash, index);
int orig_floor = h->floor;
int wrapped = 0;
while (1) {
if (curr_item == NULL) {
#if DEBUG
fprintf(stderr, "final insert at %d\n", index);
#endif
gap_set(hash, index, new);
h->item_cnt++;
return 0;
}
int curr_wanted_index = h->hash_make_key(curr_item);
if (i_should_swap(index, curr_wanted_index, bubble_wanted_index, orig_floor, wrapped)) {
gap_set(h->data, index, new);
#if DEBUG
fprintf(stderr, "intermediate insert [%s] at %d\n", &new->client_addr, index);
#endif
new = curr_item;
bubble_wanted_index = curr_wanted_index;
}
index = hash_next_index(h, index);
curr_item = gap_get(hash, index);
if (index == 0) h->floor++;
if (index == 0) wrapped = 1;
}
return 0;
}
static int next_in_right_place(hash* h, hash_item item, int index)
{
if (!item) return 0;
int wanted_index = h->hash_make_key(item);
return (wanted_index == index);
}
/* Remove cnx from the hash */
int hash_remove(hash* h, hash_item item)
{
gap_array* hash = h->data;
int index = hash_find_index(h, item);
if (index == -1) return -1; /* Tried to remove something that isn't there */
int lower_floor = 0; /* If we remove something below the floor, we'll need to lower it */
while (1) {
if (index < h->floor) lower_floor = 1;
int next_index = hash_next_index(h, index);
hash_item next = gap_get(h->data, next_index);
if ((next == FREE) || next_in_right_place(h, next, next_index)) {
h->item_cnt--;
if (lower_floor) h->floor--;
gap_set(hash, index, FREE);
return 0;
}
gap_set(hash, index, next);
index = hash_next_index(h, index);;
#if DEBUG
fprintf(stderr, "index %d floor %d\n", index, h->floor);
#endif
}
return 0;
}
#include <stdio.h>
#include <string.h>
#define STR_LENGTH 16
struct hash_item {
int wanted_index;
char str[STR_LENGTH];
};
void hash_dump(hash* h, char* filename)
{
char str[STR_LENGTH];
FILE* out = fopen(filename, "w");
if (!out) {
perror(filename);
exit(1);
}
fprintf(out, "<hash floor=%d elem=%d>\n", h->floor, h->item_cnt);
for (int i = 0; i < hash_size; i++) {
hash_item item = gap_get(h->data, i);
int idx = 0;
memset(str, 0, STR_LENGTH);
if (item) {
idx = h->hash_make_key(item);
memcpy(str, item->str, STR_LENGTH);
}
fprintf(out, "\t%d:%d:%s\n", i, idx, str);
}
fprintf(out, "</hash>\n");
fclose(out);
}

16
hash.h Normal file
View File

@ -0,0 +1,16 @@
typedef struct hash hash;
typedef struct hash_item* hash_item;
typedef int (*hash_make_key_fn)(hash_item item);
/* Function that compares two items: returns 0 if they are the same */
typedef int (*hash_cmp_item_fn)(hash_item item1, hash_item item2);
hash* hash_init(hash_make_key_fn make_key, hash_cmp_item_fn cmp_item);
int hash_find_index(hash* h, hash_item item);
int hash_insert(hash* h, hash_item new);
int hash_remove(hash* h, hash_item item);
void hash_dump(hash* h, char* filename); /* For development only */

6
hashtest/Makefile Normal file
View File

@ -0,0 +1,6 @@
OBJ=../hash.o ../gap.o htest.o
htest: $(OBJ)
$(CC) -o htest $(OBJ)

8
hashtest/delete.tst Normal file
View File

@ -0,0 +1,8 @@
# Basic delete
a 10 aa
a 10 ab
a 10 ac
a 20 ba
a 21 bb
d 21 bb

34
hashtest/delete.tst.ref Normal file
View File

@ -0,0 +1,34 @@
<hash floor=0 elem=4>
0:0:
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:10:aa
11:10:ab
12:10:ac
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:20:ba
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:0:
31:0:
</hash>

View File

@ -0,0 +1,9 @@
# Delete inside a block with nothing after
a 10 aa
a 10 ab
a 12 ac
a 13 ad
a 14 ae
d 14 ae

View File

@ -0,0 +1,34 @@
<hash floor=0 elem=4>
0:0:
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:10:aa
11:10:ab
12:12:ac
13:13:ad
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:0:
31:0:
</hash>

View File

@ -0,0 +1,9 @@
# wrap-around and delete below floor
a 2 ba
a 30 aa
a 30 ab
a 30 ac
a 30 ad
a 2 bb
d 30 ab

View File

@ -0,0 +1,34 @@
<hash floor=1 elem=5>
0:30:ad
1:0:
2:2:ba
3:2:bb
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:30:aa
31:30:ac
</hash>

View File

@ -0,0 +1,10 @@
# delete in a discontinuous block
a 10 aa
a 11 ab
a 12 ac
a 14 ad
a 10 bc
d 11 ab

View File

@ -0,0 +1,34 @@
<hash floor=0 elem=4>
0:0:
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:10:aa
11:10:bc
12:12:ac
13:0:
14:14:ad
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:0:
31:0:
</hash>

11
hashtest/delete_empty.tst Normal file
View File

@ -0,0 +1,11 @@
# Delete an unexisting element. And on an empty hash
a 10 aa
d 10 ab
d 12 bc
# Empty for real
d 10 aa
d 10 aa

View File

@ -0,0 +1,34 @@
<hash floor=0 elem=0>
0:0:
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:0:
31:0:
</hash>

39
hashtest/delete_full.tst Normal file
View File

@ -0,0 +1,39 @@
# delete on a full hash
# First, fill the hash :-)
a 0 aa
a 1 ab
a 2 ac
a 3 ad
a 4 ae
a 5 af
a 6 ag
a 7 ah
a 8 ai
a 9 af
a 10 ba
a 11 bb
a 12 bc
a 13 bd
a 14 be
a 15 bf
a 16 bg
a 17 bh
a 18 bi
a 19 bj
a 20 ca
a 21 cb
a 22 cd
a 23 ce
a 24 cf
a 25 cg
a 26 ch
a 27 ci
a 28 cj
a 29 ck
a 30 da
a 31 db
d 21 cb

View File

@ -0,0 +1,34 @@
<hash floor=0 elem=31>
0:0:aa
1:1:ab
2:2:ac
3:3:ad
4:4:ae
5:5:af
6:6:ag
7:7:ah
8:8:ai
9:9:af
10:10:ba
11:11:bb
12:12:bc
13:13:bd
14:14:be
15:15:bf
16:16:bg
17:17:bh
18:18:bi
19:19:bj
20:20:ca
21:0:
22:22:cd
23:23:ce
24:24:cf
25:25:cg
26:26:ch
27:27:ci
28:28:cj
29:29:ck
30:30:da
31:31:db
</hash>

View File

@ -0,0 +1,10 @@
# Delete inside a block with something discontinuous
a 10 aa
a 10 ab
a 12 ac
a 13 ad
a 14 ae
# ab shifts, ac and next doesn't
d 10 aa

View File

@ -0,0 +1,34 @@
<hash floor=0 elem=4>
0:0:
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:10:ab
11:0:
12:12:ac
13:13:ad
14:14:ae
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:0:
31:0:
</hash>

8
hashtest/delete_wrap.tst Normal file
View File

@ -0,0 +1,8 @@
# Basic delete when wrapping, between wrap and floor
a 30 aa
a 30 ab
a 30 ac
a 30 ba
a 30 bb
d 30 ac

View File

@ -0,0 +1,34 @@
<hash floor=2 elem=4>
0:30:ba
1:30:bb
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:30:aa
31:30:ab
</hash>

View File

@ -0,0 +1,10 @@
# Delete inside a block with wrapping, with something after
a 30 aa
a 30 ab
a 30 ac
a 1 ad
a 3 ae
# shift ad but not ae
d 14 ae

View File

@ -0,0 +1,34 @@
<hash floor=1 elem=5>
0:30:ac
1:1:ad
2:0:
3:3:ae
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:30:aa
31:30:ab
</hash>

View File

@ -0,0 +1,8 @@
# delete before wrap
a 30 aa
a 30 ab
a 30 ac
a 30 ad
# shift ac and ad
d 30 ab

View File

@ -0,0 +1,34 @@
<hash floor=1 elem=3>
0:30:ad
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:30:aa
31:30:ac
</hash>

View File

@ -0,0 +1,11 @@
# Delete with wrapping in discontinuous group
a 30 aa
a 30 ab
a 30 ac
a 31 ad
a 2 ba
a 3 bb
# shift ac and ad but not ba and bb
d 30 ab

View File

@ -0,0 +1,34 @@
<hash floor=1 elem=5>
0:31:ad
1:0:
2:2:ba
3:3:bb
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:30:aa
31:30:ac
</hash>

BIN
hashtest/htest Executable file

Binary file not shown.

105
hashtest/htest.c Normal file
View File

@ -0,0 +1,105 @@
/* Wee testing program from the hash code:
* htest <script> <dump>
*
* scripts are a list of operations:
* a $index $string
* => add an element at specified index
* d $index $string
* => remove an element
* s $index $string
* => prints the actual element index, if it's there
*
* The hash is dumped to the dump file at each iteration.
*/
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include "../hash.h"
#define STR_LENGTH 16
struct hash_item {
int wanted_index;
char str[STR_LENGTH];
};
static int cmp_item(hash_item item1, hash_item item2)
{
return strcmp(item1->str, item2->str);
}
static int hash_make_key(hash_item item)
{
return item->wanted_index;
}
static void htest_next_key(FILE* f, char* action, int* key, char str[STR_LENGTH])
{
int res = 0;
while ((res != 3) && (res != EOF))
res = fscanf(f, "%c %d %s\n", action, key, str);
if (res == EOF) exit(0);
}
int main(int argc, char* argv[])
{
hash* h = hash_init(&hash_make_key, &cmp_item);
char action;
hash_item item;
int line = 0;
FILE* f;
if (argc != 3) {
fprintf(stderr, "Usage: htest <script file> <dump file>\n");
exit(1);
}
char* script_file = argv[1];
char* dump_file = argv[2];
f = fopen(argv[1], "r");
if (!f) {
perror(script_file);
exit(1);
}
while (1) {
item= malloc(sizeof(*item));
action = ' ';
line++;
htest_next_key(f, &action, &item->wanted_index, item->str);
fprintf(stderr, "action %d: %c %d %s\n", line, action, item->wanted_index, item->str);
switch (action) {
case 'a': /* add */
fprintf(stderr, "inserting [%s] at %d\n", item->str, item->wanted_index);
hash_insert(h, item);
break;
case 'd': /* del */
fprintf(stderr, "removing [%s] at %d\n", item->str, item->wanted_index);
hash_remove(h, item);
break;
case 's': /* search */
fprintf(stderr, "searching\n");
int i = hash_find_index(h, item);
fprintf(stderr, "searching %d[%s]: %d\n", item->wanted_index, item->str, i);
break;
case 'q': /* quit */
exit(1);
}
hash_dump(h, dump_file);
}
return 0;
}

6
hashtest/insert.tst Normal file
View File

@ -0,0 +1,6 @@
# Basic insertions
a 10 aa
a 10 ab
a 10 ac
a 20 ba
a 21 bb

34
hashtest/insert.tst.ref Normal file
View File

@ -0,0 +1,34 @@
<hash floor=0 elem=5>
0:0:
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:10:aa
11:10:ab
12:10:ac
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:20:ba
21:21:bb
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:0:
31:0:
</hash>

View File

@ -0,0 +1,8 @@
# insert and bubble with single empty space
a 10 aa
a 11 ab
a 12 ac
a 14 ad
a 10 bc

View File

@ -0,0 +1,34 @@
<hash floor=0 elem=5>
0:0:
1:0:
2:0:
3:0:
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:10:aa
11:10:bc
12:11:ab
13:12:ac
14:14:ad
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:0:
31:0:
</hash>

40
hashtest/insert_full.tst Normal file
View File

@ -0,0 +1,40 @@
# Insert on a full hash
# First, fill the hash :-)
a 0 aa
a 1 ab
a 2 ac
a 3 ad
a 4 ae
a 5 af
a 6 ag
a 7 ah
a 8 ai
a 9 af
a 10 ba
a 11 bb
a 12 bc
a 13 bd
a 14 be
a 15 bf
a 16 bg
a 17 bh
a 18 bi
a 19 bj
a 20 ca
a 21 cb
a 22 cd
a 23 ce
a 24 cf
a 25 cg
a 26 ch
a 27 ci
a 28 cj
a 29 ck
a 30 da
a 31 db
# it's full!
a 20 zz
a 31 za

View File

@ -0,0 +1,34 @@
<hash floor=0 elem=32>
0:0:aa
1:1:ab
2:2:ac
3:3:ad
4:4:ae
5:5:af
6:6:ag
7:7:ah
8:8:ai
9:9:af
10:10:ba
11:11:bb
12:12:bc
13:13:bd
14:14:be
15:15:bf
16:16:bg
17:17:bh
18:18:bi
19:19:bj
20:20:ca
21:21:cb
22:22:cd
23:23:ce
24:24:cf
25:25:cg
26:26:ch
27:27:ci
28:28:cj
29:29:ck
30:30:da
31:31:db
</hash>

View File

@ -0,0 +1,7 @@
# wrap-around and insert at full floor
a 2 ba
a 30 aa
a 30 ab
a 30 ac
a 30 ad
a 2 bb

View File

@ -0,0 +1,34 @@
<hash floor=2 elem=6>
0:30:ac
1:30:ad
2:2:ba
3:2:bb
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:30:aa
31:30:ab
</hash>

7
hashtest/insert_wrap.tst Normal file
View File

@ -0,0 +1,7 @@
# wrap-around and insert above floor
a 30 aa
a 30 ab
a 30 ac
a 30 ad
a 0 ba
a 0 bb

View File

@ -0,0 +1,34 @@
<hash floor=2 elem=6>
0:30:ac
1:30:ad
2:0:ba
3:0:bb
4:0:
5:0:
6:0:
7:0:
8:0:
9:0:
10:0:
11:0:
12:0:
13:0:
14:0:
15:0:
16:0:
17:0:
18:0:
19:0:
20:0:
21:0:
22:0:
23:0:
24:0:
25:0:
26:0:
27:0:
28:0:
29:0:
30:30:aa
31:30:ab
</hash>

41
hashtest/mkrand.pl Executable file
View File

@ -0,0 +1,41 @@
#! /usr/bin/perl
# Creates a script of random accesses and deletes
use strict;
my $i = 0;
sub mkstr {
$i++;
return chr(ord('a') + ($i / 26) % 26) . chr(ord('a') + $i % 26);
}
my @elems;
sub add_elem {
my $val = int(rand(32));
my $str = mkstr($val);
push @elems, "$val $str";
print "a $val $str\n";
}
sub del_elem {
my $remove = splice(@elems, rand @elems, 1);
print "d $remove\n";
}
while (1) {
if (@elems < 5) {
add_elem;
} elsif (@elems > 28) {
del_elem;
} else {
if (rand() < .5) {
add_elem;
} else {
del_elem;
}
}
}

30
hashtest/run Executable file
View File

@ -0,0 +1,30 @@
#! /usr/bin/perl -w
# This runs all the tests.
# Tests scripts are in *.tst files.
# Corresponding output is put in *.out.
# Reference output is put in *.ref.
# Any discrepency will be reported!
use strict;
my @res;
foreach my $fn (`ls *.tst`) {
chomp $fn;
my $cmd = "./htest $fn $fn.out";
print "$cmd\n";
`$cmd`;
my $res = system("diff -u $fn.ref $fn.out");
push @res, [$fn, ($res == 0 ? "OK" : "*KO*")];
}
format =
@<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>
$_->[0], $_->[1]
.
#format_name STDOUT test_result;
map { write; } @res;