This file is indexed.

/usr/include/dirsrv/sds.h is in 389-ds-base-dev 1.3.7.10-1ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
/* BEGIN COPYRIGHT BLOCK
 * Copyright (c) 2016, William Brown <william at blackhats dot net dot au>
 * Copyright (c) 2017, Red Hat, Inc
 * All rights reserved.
 *
 * License: GPL (version 3 or any later version).
 * See LICENSE for details.
 * END COPYRIGHT BLOCK **/

#pragma once

/**
 * \defgroup sds_misc SDS Miscelaneous
 * Miscelaneous functions for SDS. This may be replaced by a PAL from ns-slapd soon.
 */

/**
 * \defgroup sds_queue SDS Queue
 * Unprotected queue for void types
 */

/**
 * \defgroup sds_tqueue SDS Thread Safe Queue
 * Thread safe queue, protected by platform mutexs.
 */

/**
 * \defgroup sds_lqueue SDS Lock Free Queue
 * Thread safe lock free queue. Falls back to platform mutex if cpu intrinsics are not present.
 */

/**
 * \defgroup sds_bptree SDS B+Tree
 * Unprotected B+Tree for void types. Supports fast set operations, search, insert, delete and build.
 *
 * This structure should be considered for all locations where unprotected AVL treels, or array based
 * binary search trees are used.
 */

/**
 * \defgroup sds_bptree_cow SDS Copy on Write B+Tree
 * Thread safe Copy of Write B+Tree. Supports parallel read transactions and serialised writes with ref counted members.
 *
 * This is a very important structure, and is the primary reason to consume this library. There are a number of important properties
 * that this tree provides.
 *
 * A key concept of this structure is that all values and keys contained have their lifetimes bound to the life of the tree and it's
 * nodes. This is due to the transactional nature of the structure. Consider a value that has been inserted into the tree. We take a
 * read transaction of the tree in thread A. Thread B now deletes the value that was inserted. Because A still refers to the value,
 * it is not freed until thread A closes it's read transaction. This guarantees that all read transactions are always valid, while
 * also maintaining that memory can be freed correctly. This has many applications including the Directory Server plugin subsystem.
 * We can use this property to guarantee a plugin exists for the life of an operation, and we only free it once all consumers have
 * completed their operations with it.
 *
 * Another key aspect of this tree is that write transactions *do not* block read transactions. Reads can be completed in parallel
 * to writes with complete safety. Write operations are serialised in this structure. This makes the structure extremely effective
 * for long read operations with many queries, without interrupting other threads.
 *
 * Along with the value and key ownership properties, we can guarantee that the content of the tree within a read transaction
 * will *not* be removed or added and will be consistent within the tree (however, we can't guarantee a user doesn't change the value.)
 *
 * As a comparison, the single thread tree is on paper, much faster than the COW version due to less memory operations. However in a
 * multi thread use case, the COW version is signifigantly faster *and* safer than a mutex protected single thread tree.
 *
 * This tree can be used as a form of object manager for C, as well as a way to allow async operations between threads while maintaining,
 * thread safety across an application. This assumes that the operations are largely parallel and batch read oriented over write heavy small
 * transactions.
 */

/** \addtogroup sds_misc
 * @{
 */

/**
 * sds_result encapsulates the set of possible success or failures an operation
 * may have during processing. You must always check this value from function
 * calls.
 */
typedef enum _sds_result {
    /**
     * SDS_SUCCESS 0 represents that the operation had no errors in processing.
     */
    SDS_SUCCESS = 0,
    /**
     * SDS_UNKNOWN_ERROR 1 represents that an unexpected issue was encountered,
     * you should raise a bug about this if you encounter it!
     */
    SDS_UNKNOWN_ERROR = 1,
    /**
     * SDS_NULL_POINTER 2 represents that a NULL pointer was provided to a function
     * that expects a true pointer to valid data. It may also represent that
     * the internals of a datastructure have been corrupted with an unexpected NULL
     * pointer, and the operation can not proceed.
     */
    SDS_NULL_POINTER = 2,
    /**
     * SDS_TEST_FAILED 3 represents that a test case has failed. You may need to
     * go back to the test case to see the originating error.
     */
    SDS_TEST_FAILED = 3,
    /**
     * SDS_DUPLICATE_KEY 4 represents that the key value already exists in the
     * datastructure. This is generally seen during insertion operations.
     */
    SDS_DUPLICATE_KEY = 4,
    /**
     * SDS_CHECKSUM_FAILURE 5 represents that the in memory data does not pass
     * checksum validation. This may be due to the fault of the application or
     * hardware errors.
     */
    SDS_CHECKSUM_FAILURE = 5,
    /**
     * SDS_INVALID_NODE_ID 6 represents that the internal data of a datastructure
     * node contains invalid identifying information.
     */
    SDS_INVALID_NODE_ID = 6,
    /**
     * SDS_INVALID_KEY 7 represents that a key is invalid in the state of this node.
     * generally this is due to a bug in the datastructure code where keys are
     * out of order, or incorrectly nulled.
     */
    SDS_INVALID_KEY = 7,
    /**
     * SDS_INVALID_VALUE_SIZE 8 represents that the size of the value provided
     * does not hold true for certain assumptions. For example, a null pointer
     * with a non zero size, or a pointer with a zero size.
     */
    SDS_INVALID_VALUE_SIZE = 8,
    /**
     * SDS_INVALID_POINTER 9 represents that within the datastructure an invalid
     * pointer structure has been created that may cause further dataloss or
     * corruption. This could be due to error in the datastructure code, or
     * memory issues.
     */
    SDS_INVALID_POINTER = 9,
    /**
     * SDS_INVALID_NODE 10 represents that certain datastructure properties of
     * this node have not been upheld. For example, branches should never contain
     * data in a b+tree, or that the capacity of the node is incorrect and a
     * split or merge should have occured.
     */
    SDS_INVALID_NODE = 10,
    /**
     * SDS_INVALID_KEY_ORDER 11 indicates that the ordering of keys within a
     * structure is incorrect, and may cause data loss.
     */
    SDS_INVALID_KEY_ORDER = 11,
    /**
     * SDS_RETRY 12 represents that the previous operation should be attempted
     * again. This is only for INTERNAL use. You will never see this returned.
     */
    SDS_RETRY = 12,
    /**
     * SDS_KEY_PRESENT 13 is a SUCCESS condition, similar to SDS_SUCCESS. This
     * differs from SDS_SUCCESS in that it shows that a key value exists within
     * some datastructure, and you must explcitly check for this case (vs
     * SDS_KEY_NOT_PRESENT).
     */
    SDS_KEY_PRESENT = 13,
    /**
     * SDS_KEY_NOT_PRESENT 14 is a SUCCESS condition, similar to SDS_SUCCESS. This
     * differs from SDS_SUCCESS in that is show that a key value does not exist
     * with some datastructure, but the operation was still a SUCCESS in completion.
     * You must explicitly check for this case to ensure you know the true result
     * of a function.
     */
    SDS_KEY_NOT_PRESENT = 14,
    /**
     * SDS_INCOMPATIBLE_INSTANCE 15 represents that during a set operation, you
     * are attempting to manipulate datastructures with potentiall different key
     * and value types. This would result in datacorruption or undefined behaviour
     * so instead, we return a pre-emptive error.
     */
    SDS_INCOMPATIBLE_INSTANCE = 15,
    /**
     * SDS_LIST_EXHAUSTED 16 represents that there are no more elements in this
     * datastructure to be consumed during an iteration.
     */
    SDS_LIST_EXHAUSTED = 16,
    /**
     * SDS_INVALID_TXN 17 represents that a transaction identifier is invalid
     * for the requested operation. IE using a read only transaction to attempt
     * a write.
     */
    SDS_INVALID_TXN = 17,
} sds_result;

/**
 * sds_malloc wraps the system memory allocator with an OOM check. During debugging
 * this call guarantees that memory is zeroed before use.
 *
 * \param size the amount of memory to allocate in bytes.
 * \retval pointer to the allocated memory.
 */
void *sds_malloc(size_t size);
/**
 * sds_calloc wraps the system calloc call with an OOM check. This call like calloc,
 * guarantees the returned memory is zeroed before usage.
 *
 * \param size the amount of memory to allocate in bytes.
 * \retval pointer to the allocated memory.
 */
void *sds_calloc(size_t size);
/**
 * sds_memalign wraps the posix_memalign function with an OOM and alignment check. During debugging
 * this call guarantees that memory is zeroed before use.
 *
 * \param size the amount of memory to allocate in bytes.
 * \param alignment the size to align to in bytes. Must be a power of 2.
 * \retval pointer to the allocated memory.
 */
void *sds_memalign(size_t size, size_t alignment);
/**
 * sds_free wraps the system free call with a null check.
 *
 * \param ptr the memory to free.
 */
void sds_free(void *ptr);

/**
 * sds_crc32c uses the crc32c algorithm to create a verification checksum of data.
 * This checksum is for data verification, not cryptographic purposes. It is used
 * largely in debugging to find cases when bytes in structures are updated incorrectly,
 * or to find memory bit flips during operation. If avaliable, this will use the
 * intel sse4 crc32c hardware acceleration.
 *
 * \param crc The running CRC value. Initially should be 0. If in doubt, use 0.
 * \param data Pointer to the data to checksum.
 * \param length number of bytes to validate.
 * \retval crc The crc of this data. May be re-used in subsequent sds_crc32c calls
 * for certain datatypes.
 */
uint32_t sds_crc32c(uint32_t crc, const unsigned char *data, size_t length);

/**
 * sds_siphash13 provides an implementation of the siphash algorithm for use in
 * hash based datastructures. It is chosen due to is resilance against hash
 * attacks, such that it makes it higly secure as a choice for a hashmap.
 *
 * \param src The data to hash
 * \param src_sz The size of the data to hash
 * \param key The security key to mix with the hash. This should be randomised once
 * at startup and stored for use with further operations.
 * \retval The uint64_t representing the hash of the data.
 */
uint64_t sds_siphash13(const void *src, size_t src_sz, const char key[16]);

/**
 * sds_uint64_t_compare takes two void *, and treats them as uint64_t *.
 *
 * \param a The first uint64_t *.
 * \param b The second uint64_t *.
 * \retval result of the comparison.
 */
int64_t sds_uint64_t_compare(void *a, void *b);
/**
 * Free the allocated uint64_t * from sds_uint64_t_alloc.
 *
 * \param key Do nothing to the key.
 */
void sds_uint64_t_free(void *key);
/**
 * sds_uint64_t_dup Returns a copy of the uint64_t * to the caller.
 *
 * \param key The uint64_t * to copy.
 * \retval result the copy of the value.
 */
void *sds_uint64_t_dup(void *key);
/**
 * sds_strcmp exists to wrap the system strcmp call with the correct types needed
 * for libsds datastructures to operate correctly.
 *
 * \param a Pointer to the first string.
 * \param b Pointer to the second string.
 * \retval Difference between the values. 0 indicates they are the same.
 */
int64_t sds_strcmp(void *a, void *b);
/**
 * sds_strdup exists to wrap the system strdup call with the correct types needed for
 * libsds datastructures to operate correctly.
 *
 * \param key Pointer to the string to duplicate.
 * \retval Pointer to the newly allocated string.
 */
void *sds_strdup(void *key);


/**
 * @}
 */
/* end sds_misc */

/** \addtogroup sds_bptree_cow
 * @{
 */

/**
 * sds_txt_state lists the possible states of a transaction.
 */
typedef enum _sds_txn_state {
    /**
     * SDS_TXN_READ 0 This is a read only transaction. Not mutable actions may
     * be taken.
     */
    SDS_TXN_READ = 0,
    /**
     * SDS_TXN_WRITE 1 This is a read and write transaction. You may perform
     * all read operations, and also all write operations.
     */
    SDS_TXN_WRITE = 1,
} sds_txn_state;

/**
 * sds_bptree_node_list stores a linked list of sds_bptree_nodes for tracking.
 * Internally this is used extensively for transaction references.
 */
typedef struct _sds_bptree_node_list
{
    /**
     * checksum of the node list item.
     */
    uint32_t checksum;
    /**
     * Pointer to the node item.
     */
    struct _sds_bptree_node *node;
    /**
     * Pointer to the next node list item. NULL indicates list termination.
     */
    struct _sds_bptree_node_list *next;
} sds_bptree_node_list;

/**
 * sds_bptree_transaction Manages the content and lifetime of nodes within the tree. It is the basis of our
 * garbage collection system, using atomic reference counts to synchronise our behaviour.
 */
typedef struct _sds_bptree_transaction
{
    /**
     * Checksum of the data in this structure.
     */
    uint32_t checksum;
    /**
     * Reference count to the number of consumers of this transaction. This is
     * atomically updated. If this is the "active" master transaction, this is
     * guaranteed to be 1 or greater. When a new write transaction is commited,
     * if this reference count falls to 0, this transaction is implicitly
     * destroy.
     */
    uint32_t reference_count;
    /**
     * The state of the transaction. All transactions start as SDS_WRITE_TXN,
     * and upon commit move to the SDS_READ_TXN state.
     */
    sds_txn_state state;
    /**
     * Pointer to the cow b+tree instance that created us.
     */
    struct _sds_bptree_cow_instance *binst;
    /**
     * Pointer to the b+tree instance that this transaction holds.
     */
    struct _sds_bptree_instance *bi;
    /**
     * The unique identifier of this transaction.
     */
    uint64_t txn_id;
    /**
     * The list of nodes that this transaction "owns". When the reference count
     * moves to 0, these nodes will be freed. This list is created during a write
     * transaction of "items that will not be needed when we are removed.".
     *
     * IE, when we have a txn A, then a new transaction B is made, we "copy" node
     * 1a to node 1b. Node 1b is "created" by txn B, and node 1a is "owend" by
     * txn A, because it's the last transaction that depends on this nodes existance.
     */
    sds_bptree_node_list *owned;
    /**
     * The list of nodes that this transaction "created". This is used during an
     * abort of the txn to roll back any changes that we made.
     */
    sds_bptree_node_list *created;
    /**
     * The current root node. Each transaction owns a unique root node which
     * anchors the branches of the copy on write structure.
     */
    struct _sds_bptree_node *root;
    /**
     * The previous transaction that we are derived from.
     */
    struct _sds_bptree_transaction *parent_txn;
    /**
     * The next transaction that derives from us.
     */
    struct _sds_bptree_transaction *child_txn;
} sds_bptree_transaction;

/**
 * @}
 */
/* end sds_bptree_cow */


/** \addtogroup sds_bptree
 * @{
 */

/**
 * SDS_BPTREE_DEFAULT_CAPACITY 5 represements that there are 5 values held per
 * in the B+Tree and COW B+Tree. A signifigant amount of testing went into the
 * tuning of this value for best performance.
 */
#define SDS_BPTREE_DEFAULT_CAPACITY 3
/**
 * SDS_BPTREE_HALF_CAPACITY 3 is pre-calculated as "ceiling(default capacity / 2)".
 * This is used to assert when node splits and merges should be taken.
 */
#define SDS_BPTREE_HALF_CAPACITY 2
/**
 * SDS_BPTREE_BRANCH 6 indicates that each node may potentially have up to 6 child
 * nodes. This yields a broad tree, that requires fewer cache misses to travese
 * (despite the higher number of comparisons to search).
 */
#define SDS_BPTREE_BRANCH 4

/**
 * This is the core of the B+Tree structures. This node represents the branches
 * and leaves of the structure.
 */
typedef struct _sds_bptree_node
{
#ifdef SDS_DEBUG
    /**
     * checksum of the structure data to detect errors. Must be the first element
     * in the struct.
     */
    uint32_t checksum;
#endif
    /**
     * Statically sized array of pointers to the keys of this structure.
     */
    void *keys[SDS_BPTREE_DEFAULT_CAPACITY];
    /**
     * Statically sized array of pointers to values. This is tagged by the level
     * flag. If level is 0, this is a set of values that have been inserted
     * by the consumer. If the level is > 0, this is the pointers to further
     * node structs.
     *
     * In a leaf, this is [value, value, value, value, value, link -> ]
     *
     * In a non-leaf, this is [link, link, link, link, link, link]
     */
    void *values[SDS_BPTREE_BRANCH];
    /**
     * The number of values currently stored in this structure.
     */
    uint32_t item_count;
    /**
     * This number of "rows" above the leaves this node is. 0 represents a true
     * leaf node, anything greater is a branch.
     */
    uint32_t level;
    /* Put these last because they are only used in the insert / delete path, not the search.
     * This way the keys and values are always in the cache associated with the ptr.
     */
    /**
     * The id of the transaction that created this node. This is used so that
     * within a transaction, we don't double copy values if we already copied
     * them.
     */
    uint64_t txn_id;
    /**
     * Back reference to our parent. This is faster than creating a traversal
     * list during each insertion (by a large factor).
     */
    struct _sds_bptree_node *parent;
} sds_bptree_node;

/**
 * The instance of the B+Tree. Stores references to function pointers for
 * manipulation of keys and values within the tree. Maintains the root checksum
 * which estabilshes the "root of trust" to all other nodes in the tree.
 */
typedef struct _sds_bptree_instance
{
    /**
     * checksum of the instance data.
     */
    uint32_t checksum;
    /**
     * This flag determines if we maintain and update checksum values during
     * tree operations, and that we verify these during the verification operation.
     */
    uint16_t offline_checksumming;
    /**
     * This flag determines if we verify all checksums during the read and write
     * paths of the code. Adds a large performance overhead, but guarantees
     * the data in the tree is "consistent".
     */
    uint16_t search_checksumming;
    /**
     * Internal tracking id for tree display.
     */
    uint16_t print_iter;

    /**
     * Pointer to the current tree root node.
     */
    sds_bptree_node *root; /* The current root node of the DB */

    /**
     * This function should be able to free values of the type that will be
     * inserted into the tree.
     */
    void (*value_free_fn)(void *value);
    /**
     * This function should be able to duplicate value of the type that will be
     * inserted into the tree. Values may be freed and duplicated in an order
     * you do not expect, so blindly returning a pointer to the same data may
     * not be wise if free then destroys it.
     */
    void *(*value_dup_fn)(void *value);
    /**
     * Key comparison function. This must return an int64_t of the difference in
     * the values. A result of < 0 indicates that a is less than b. A result of
     * 0 indicates that the values of a and b are identical. A result of > 0
     * indicates that a is greater than b.
     */
    int64_t (*key_cmp_fn)(void *a, void *b); /* Comparison function pointer */
    /**
     * Key free function. This must free the pointer provided by key.
     */
    void (*key_free_fn)(void *key);
    /**
     * Key duplication function. This duplicates the value of key and returns a
     * new pointer to it. This is used extensively in the tree structure, and
     * the key or result may be freed out of order, so don't blindly return the
     * same data.
     */
    void *(*key_dup_fn)(void *key);
} sds_bptree_instance;

/**
 * @}
 */
/* end sds_bptree */


/** \addtogroup sds_bptree_cow
 * @{
 */

/**
 * The copy on write tree instance. This stores the current active reading
 * transaction, number of active transactions, and pointers to the root of the
 * b+tree.
 *
 * This instance is the most important part of the tree and provides us with memory safety. Once we
 * have memory safety, individual transactions are free to garbage collect on their own. Garbage collection
 * is coordinated from the instance.
 *
 * ## MEMORY SAFTEY
 *
 * Within the transaction, we have a small number of important fields. These are:
 *
 * - read_lock
 * - write_lock
 * - tail_txn
 *
 * ### Read Transaction Begin
 *
 * At the beginning of a read transaction, we take a rolock on read_lock. This ensures that on
 * our working thread, that the correct sequence of barriers for memory consistency of the tree
 * is issued.
 *
 * Within the read_lock, we use cpu atomics to increment the transaction reference count by 1.
 * We then release the read_lock.
 *
 * In this way, we allow multiple read transactions to be initiated at the same time, with the
 * only serialisation point being the atomic increment.
 *
 * While the read transaction is "active" (IE the transaction that is listed in binst, where new
 * transactions will reference), the reference count will never go below 1.
 *
 * ### Write Transaction
 *
 * At the beginning of a write transaction, we take the write_lock mutex. This guarantees we are
 * the only writer in the tree at a point in time. Due to the way mutexes work, this guarantees
 * our memory barries of the tree are issued, so we see all other writes that have occured.
 *
 * During a write, we store a list of all nodes that were cloned: Both so that we know who we cloned
 * *from* but aso what nodes we *created*.
 *
 * ### Write Abort
 *
 * During a write abort, we release the write lock immediately. We then use the created node list, and
 * free all contents as they were never made active.
 *
 * ### Write commit
 *
 * During a commit, we prepare the transaction by changing it's state flags, and setting it's reference
 * count atomically. We relinquish the created list, because we have no need to free the nodes that are
 * about to become part of the tree. However, we update the previous transaction with the "owned" list,
 * as these are nodes we cloned from: IE these are nodes that serve no *future* role in the tree, so the
 * previous transaction is the last point where they are valid and needed - The last transaction now must
 * clean up after itself when it's ready to GC!
 *
 * We set our reference count to 2, to indicate that a following node (the previous active transaction)
 * relies on us, and that we are also active.
 *
 * We now take the write variant of the rw read_lock. This waits for all in progress read transactions to
 * begin, and we take exclusive access of this lock. We then pivot the older transaction out with our
 * transaction. The read_lock is released, and read transactions may now resume.
 *
 * At this point, we release the write_lock, and a new write may begin.
 *
 * We decrement the reference count of the previous transaction which may cause it to drop to 0. This is
 * the equivalent of a read transaction close. After this point, the previous transactions reference
 * count will only decrement, as previous holders close their transactions. Increment only occurs from the
 * active transaction!
 *
 * ### Read transaction close.
 *
 * When we close a read transaction, we require no locks. We use cpu atomic to set and fetch the
 * reference count.
 *
 * If the reference count remains positive, we complete, and continue.
 *
 * If the reference count drops to 0, we take a reference to the next transaction in the series,
 * and we free our node. We then decrement the reference counter of the next transaction and check
 * the result.
 *
 * Either, the reference count is > 0 because it is the active transaction *or* there is a current
 * read transaction.
 *
 * OR, the reference count reaches 0 because there are no active transactions, so we can free this node.
 * in the same operation, we then decrement the reference count to the next node in the series,
 * and we repeat this til we reach a node with a positive reference count.
 *
 * Due to the design of this system, we are guarantee no thread races, as one and only one thread
 * can ever set the reference count to 0: either the closing read, or the closing previous transaction.
 * This gives us complete thread safety, but without the need for a mutex to ensure this.
 *
 * ### Usage of the memory safe properties.
 *
 * Due to the way the barriers are issued, when you have a read transaction or a write transaction, you can
 * guarantee that the tree memory is consistent and correct, and that all values will remain alive until
 * you close the transaction. This means you can use this to allow parallel tasks in complex cases. Directory
 * Server has such a case with the plugin subsystem. Consider every operation needs a consistent list of plugins.
 *
 * You build a tree of the plugin references  (dlopen pointers, reference count, pblock) and store them.
 *
 * When you begin the operation you take a read transaction of this. This means every plugin in the operation is
 * guaranteed to be in the tree and held open for the duration of this operation. During the read, if another thread
 * commits the delete on a plugin, this drops the ref count, but not below 0 - the operation can continue safely.
 *
 * Once the operation is complete, the transaction is closed. At this point, the vacuum runs during close, which would
 * trigger the value free function. This would now actually close the plugin pointers, values. Additionally, the correct use
 * of the transaction guarantees the memory content of the plugin is correct due to the barriers inside of the transaction.
 *
 */
typedef struct _sds_bptree_cow_instance
{
    /**
     * checksum of the instance data.
     */
    uint32_t checksum;
    /**
     * Pointer to the current b+tree instance. This contains our function pointers
     * for free, dup of key and values.
     */
    sds_bptree_instance *bi;
    /**
     * The pointer to the current "latest" commited transaction. When a new read
     * transaction is taken, this is the value that is returned (until a new
     * write transaction is commited.)
     */
    sds_bptree_transaction *txn;
    /**
     * The pointer to the current "oldest" alive transaction. This is needed for
     * correct cleanup without corrupting intermediate trees.
     *
     * Consider we have a set of transactions, such as A -> B -> C. Between each
     * a set of changes has occured. We assume that if we have branches in these,
     * then during a cow operation they can be updated. So we end up in this
     * situation.
     *
     *        [ A ]         [ B ]         [ C ]
     *          |             |             |
     *      [ root a ]    [ root b ]    [ root c ]
     *       /     \\       /     \\       /     \\
     *     [ a ]  [ x ]  [ a ]  [ b ]  [ c ]  [ b ]
     *
     * Now, when we clone txn B, because C performed a cow on the leaf A, txn B
     * "owns" leaf A.
     * At this point, leaf A will be freed, which frees it from underneath
     * transaction A. We have corrupted the tree of a previous node!
     *
     * To prevent this, we only truly free a transaction when the ref count is 0
     * AND it's the last transaction in the list of live transactions. This way, B
     * even though ref count reached 0, would not be freed til A is also freed.
     *
     */
    sds_bptree_transaction *tail_txn;
    /**
     * The number of active transactions in the system.
     */
    uint64_t txn_count;
    /**
     * The transaction read lock. This is used to prevent corruption of the txn
     * pointer during a read or write transaction acquisition.
     */
    pthread_rwlock_t *read_lock;
    /**
     * The transaction write lock. During a write transaction, this is held for
     * the duration to allow only a single writer.
     */
    pthread_mutex_t *write_lock;
} sds_bptree_cow_instance;

/**
 * @}
 */
/* end sds_bptree_cow */

/* Queue operations */

/** \addtogroup sds_queue
 * @{
 */

/**
 * An internal queue node element.
 */
typedef struct _sds_queue_node
{
    /**
     * The data this node holds.
     */
    void *element;
    /**
     * The pointer to the next element of the queue.
     */
    struct _sds_queue_node *next;
    /**
     * The pointer to the previous element of the queue.
     */
    struct _sds_queue_node *prev;
} sds_queue_node;

/**
 * A queue that is internally a doubly linked list.
 */
typedef struct _sds_queue
{
    /**
     * The pointer to the current active head node. This is the "next" node that
     * will be dequeued and acted upon during the dequeue (pop) operation.
     */
    struct _sds_queue_node *head;
    /**
     * The tail of the queue. This is the "last" value that was inserted.
     */
    struct _sds_queue_node *tail;
    /**
     * If there are remaining nodes when the queue is destroyed, the queue will
     * be drained and this free function called on the value of within.
     */
    void (*value_free_fn)(void *value);
} sds_queue;

/**
 * Initialise a queue suitable for access within a single thread. Free function
 * must be set.
 *
 * \param q_ptr A pointer to the struct pointer you wish to allocate. This may be
 * on the heap or stack.
 * \param value_free_fn A function pointer to free values that are enqueued to
 * this structure.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_queue_init(sds_queue **q_ptr, void (*value_free_fn)(void *value));
/**
 * Enqueue an item to the tail of this queue. Equivalent to "push".
 *
 * \param q The struct pointer to the queue you wish to push to.
 * \param elem The data to enqueue.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_queue_enqueue(sds_queue *q, void *elem);
/**
 * Dequeue an item from the head of the queue. Equivalent to "pop".
 *
 * \param q The struct pointer to the queue you wish to pop from.
 * \param elem a pointer to a location which will be over-ridden with the data that
 * is dequeued. This may be NULLed, even during an error.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_queue_dequeue(sds_queue *q, void **elem);
/**
 * Indicate you are complete with the queue. Free and delete any remaining
 * internal structures, and free and still queued nodes. You must *always*
 * call this function to dispose of this structure.
 *
 * \param q The struct pointer to the queue you wish to dispose of.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_queue_destroy(sds_queue *q);

/**
 * @}
 */
/* end sds_queue */

/** \addtogroup sds_tqueue
 * @{
 */

/**
 * Implement a thread safe wrapper around an sds queue. This guarantees multithread
 * safety to the operations of the queue.
 */
typedef struct _sds_tqueue
{
    /**
     * Pointer to the underlying queue structure.
     */
    sds_queue *uq;
    /**
     * Lock that protects queue operations.
     */
    pthread_mutex_t lock;
} sds_tqueue;
/* Thread safe queue operations. */

/**
 * Initialise a queue suitable for access across multiple threads. Free function
 * must be set.
 *
 * \param q_ptr A pointer to the struct pointer you wish to allocate. This may be
 * on the heap or stack.
 * \param value_free_fn A function pointer to free values that are enqueued to
 * this structure.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_tqueue_init(sds_tqueue **q_ptr, void (*value_free_fn)(void *value));
/**
 * Enqueue an item to the tail of this queue. Equivalent to "push".
 * The safety of this operation is implicit, you do not need any other mutexes
 * to protect this operation.
 *
 * \param q The struct pointer to the queue you wish to push to.
 * \param elem The data to enqueue.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_tqueue_enqueue(sds_tqueue *q, void *elem);
/**
 * Dequeue an item from the head of the queue. Equivalent to "pop".
 * The safety of this operation is implicit, you do not need any other mutexes
 * to protect this operation.
 *
 * \param q The struct pointer to the queue you wish to pop from.
 * \param elem a pointer to a location which will be over-ridden with the data that
 * is dequeued. This may be NULLed, even during an error.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_tqueue_dequeue(sds_tqueue *q, void **elem);
/**
 * Indicate you are complete with the queue. Free and delete any remaining
 * internal structures, and free and still queued nodes. You must *always*
 * call this function to dispose of this structure.
 *
 * \param q The struct pointer to the queue you wish to dispose of.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_tqueue_destroy(sds_tqueue *q);

/**
 * @}
 */
/* end sds_tqueue */

/** \addtogroup sds_lqueue
 * @{
 */
/* Lock free queue operations. */

#ifdef ATOMIC_QUEUE_OPERATIONS
/* Mask the definition of the queue */
struct _sds_lqueue;
/**
 * Structure that provides a wrapper to the liblfds queue. This *must* be allocated
 * with memalign to a 128 byte boundary! The SDS library will *correctly* do this
 * for you!
 */
typedef struct _sds_lqueue sds_lqueue;
#else
/**
 * Type definition of lock free queue to the mutex queue in the case that cpu
 * intrinsics are not available, or the operating system is unsupported.
 */
typedef struct _sds_tqueue sds_lqueue;
#endif

/**
 * Initialise a queue suitable for access across multiple threads. Free function
 * must be set. This utilises hardware intrinsics to provide atomicity that may
 * be faster in highly contended applications. Use this structure in place of a
 * tqueue only if you know that content of the queue will be high.
 *
 * \param q_ptr A pointer to the struct pointer you wish to allocate. This may be
 * on the heap or stack.
 * \param value_free_fn A function pointer to free values that are enqueued to
 * this structure.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_lqueue_init(sds_lqueue **q_ptr, void (*value_free_fn)(void *value));
/**
 * Initialise this thread ready to use the lqueue. This is *critical*, and accessing
 * the queue without calling tprep may result in undefined behaviour.
 */
sds_result sds_lqueue_tprep(sds_lqueue *q);
/**
 * Enqueue an item to the tail of this queue. Equivalent to "push".
 * The safety of this operation is implicit, you do not need any other mutexes
 * to protect this operation.
 *
 * \param q The struct pointer to the queue you wish to push to.
 * \param elem The data to enqueue.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_lqueue_enqueue(sds_lqueue *q, void *elem);
/**
 * Dequeue an item from the head of the queue. Equivalent to "pop".
 * The safety of this operation is implicit, you do not need any other mutexes
 * to protect this operation.
 *
 * \param q The struct pointer to the queue you wish to pop from.
 * \param elem a pointer to a location which will be over-ridden with the data that
 * is dequeued. This may be NULLed, even during an error.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_lqueue_dequeue(sds_lqueue *q, void **elem);
/**
 * Indicate you are complete with the queue. Free and delete any remaining
 * internal structures, and free and still queued nodes. You must *always*
 * call this function to dispose of this structure.
 *
 * Important to note, is that all thread consumers of this queue must be stopped
 * before you call lqueue destroy. This is your responsibility to ensure this.
 *
 * \param q The struct pointer to the queue you wish to dispose of.
 * \retval sds_result to indicate the status of the operation.
 */
sds_result sds_lqueue_destroy(sds_lqueue *q);

/**
 * @}
 */
/* end sds_lqueue */

/*
 * How does the COW locking work? It depends on which operation is occuring.
 *
 * If we are beginning a search, first we take the read_lock. We then increment
 * txn ref_count, and release the lock after taking a reference.
 *
 * If we are doing a write, we take the write_lock, perform the operation. If
 * we roll back, release the write_lock.
 * If we commit, take the read_lock, now update the current active txn. Dec the
 * ref count to the former transaction.
 */

/** \addtogroup sds_bptree
 * @{
 */

/**
 * Initialise an sds b+tree for usage.
 *
 * \param binst_ptr Pointer to a struct pointer for the instance to initialise.
 * \param checksumming Flag to enable online and search checksumming. 0 to disable.
 * \param key_cmp_fn Key comparison function.
 * \param value_free_fn Function to free values that are assigned to this structure.
 * \param key_free_fn Function to free keys that are inside of this structure.
 * \param key_dup_fn Function to duplication keys within the structure.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_init(sds_bptree_instance **binst_ptr, uint16_t checksumming, int64_t (*key_cmp_fn)(void *a, void *b), void (*value_free_fn)(void *value), void (*key_free_fn)(void *key), void *(*key_dup_fn)(void *key));
/**
 * Bulk load data into a b+tree instance. The existing data in the b+tree instance
 * will be destroyed during the operation. Keys *must* be sorted prior to calling
 * this function. The index of items in values must match the corresponding key
 * in keys, or be NULL. values may be NULL, which is assumed that all keys have null
 * values associated. Count is the number of elements in keys and values. Key values
 * are owned by the tree from this point, you MUST NOT free them. They must be able to
 * be freed with your key_free_fn.
 *
 * If you must load a large amount of data to the tree, this operation is
 * signifigantly faster than repeated calls to insert, but relies on you using
 * an appropriate qsort function.
 *
 * \param binst Instance to purge and load.
 * \param keys Array of sorted keys to load.
 * \param values Array of sorted values to load.
 * \param count Number of values in the arrays.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_load(sds_bptree_instance *binst, void **keys, void **values, size_t count);

/* Operations */

/**
 * Destroy the instance. This frees all remaining values and keys from the structure.
 * After this is called, the bptree may not be accessed.
 *
 * \param binst Instance to destroy.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_destroy(sds_bptree_instance *binst);
/**
 * Search the instance for a key. Returns a SDS_KEY_PRESENT or SDS_KEY_NOT_PRESENT.
 *
 * \param binst Instance to search.
 * \param key Key to search for.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_search(sds_bptree_instance *binst, void *key);
/**
 * Retrieve the value associated with key from this structure.
 *
 * \param binst Instance to retrieve the value from.
 * \param key Key to search for.
 * \param target Destination for the value to end up in. May be NULLed even if the
 * key is not found.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_retrieve(sds_bptree_instance *binst, void *key, void **target);
/**
 * Delete a key and value from the tree. This implies that the values are freed
 * as part of this process.
 *
 * \param binst Instance to remove the key and value from.
 * \param key Key to delete, along with it's data.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_delete(sds_bptree_instance *binst, void *key);
/**
 * Insert the key and value to the tree. If the key already exists, this operation
 * will fail. The key is duplicated on insert, and the duplicated key's life is bound
 * to the tree. This allows you to use stack values as keys, as we duplicate them
 * on insert.
 *
 * \param binst Instance to insert into.
 * \param key Key to insert.
 * \param value Value to insert associated with key. May be NULL.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_insert(sds_bptree_instance *binst, void *key, void *value);
/**
 * Verify the contents of the tree are correct and sane. If checksumming is enabled,
 * this will validate all checksums. This is generally useful for debugging only
 * or if you think you have some data issue related to your key comparison function.
 *
 * \param binst Instance to verify.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_verify(sds_bptree_instance *binst);

/* Set manipulation */

/**
 * Map a function over the instance. This does not create a new instance, you
 * are expected to use the function to hand the data out to some other function.
 *
 * WARNING! This function will probably change soon!
 *
 * \param binst The instance to map over.
 * \param fn The function to be applied to each key-value pair.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_map(sds_bptree_instance *binst, void (*fn)(void *k, void *v));
/**
 * From instance a, and instance b, create a new insance that contains the
 * keys and values where keys exist in a or b but not both.
 *
 * \param binst_a Instance A
 * \param binst_b Instance B
 * \param binst_difference Output for a new instance containing clones of different elements.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_difference(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_difference);
/**
 * Union the sets A and B, and return a new instance that contains keys and values
 * that are present in either or both.
 *
 * \param binst_a Instance A
 * \param binst_b Instance B
 * \param binst_union Output for a new instance containing the elements from both sets.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_union(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_union);
/**
 * Intersect the sets A and B, and return the elements that are present in both
 * sets (but not either).
 *
 * \param binst_a Instance A
 * \param binst_b Instance B
 * \param binst_intersect Output for a new instance containing the elements that are intersecting.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_intersect(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_intersect);
/**
 * Return the set where elements that exist in A but not B only are returned.
 *
 * \param binst_a Instance A
 * \param binst_b Instance B
 * \param  binst_compliment Output for a new instance that contains elements unique to A.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_compliment(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_compliment);
/**
 * Filter the set by applying a predicate, and return a new set containing the matching elements.
 *
 * \param binst_a Instance A
 * \param fn Predicate to apply. If this function returns 0 the element is excluded. All other values include the key/value.
 * \param binst_subset Output for a new instance containing the matched elements.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_filter(sds_bptree_instance *binst_a, int64_t (*fn)(void *k, void *v), sds_bptree_instance **binst_subset);

/**
 * @}
 */
/* end sds_bptree */

/*
sds_result sds_bptree_subset(struct sds_bptree_instance *binst_a, struct sds_bptree_instance *binst_b);
*/

/** \addtogroup sds_bptree_cow
 * @{
 */

// Similar for the COW versions

/**
 * Initialise an sds cow b+tree for usage. This allocates space for the tree
 * and bootstraps the initial blank transaction.
 *
 * \param binst_ptr The pointer you wish to have filled with the cow b+tree instance.
 * \param checksumming During DEBUG, this flag enables tree content checksumming.
 * \param key_cmp_fn Comparison function for keys in the tree.
 * \param value_free_fn During operation, the values assigned to this tree are owned by the instance.
 *               This allows us to free the values when required.
 * \param value_dup_fn During a copy on write operation, we need to be able to copy the values in the tree.
 * \param key_free_fn Free the key value
 * \param key_dup_fn Duplicate a key value
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_init(sds_bptree_cow_instance **binst_ptr, uint16_t checksumming, int64_t (*key_cmp_fn)(void *a, void *b), void (*value_free_fn)(void *value), void *(*value_dup_fn)(void *key), void (*key_free_fn)(void *key), void *(*key_dup_fn)(void *key));
/**
 * Destroy an instance. This will destroy all open transactions and free all
 * tree elements.
 *
 * \param binst  The cow b+tree to destroy.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_destroy(sds_bptree_cow_instance *binst);
/**
 * Verify an instance holds under a number of properties. This should only be used
 * in debbuging issues. If you find an issue, add it to the test cases!
 *
 * \param binst The cow b+tree to verify.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_verify(sds_bptree_cow_instance *binst);

/**
 * Begin a read only transaction on the tree. This guarantees the memory consistency
 * of all values in the tree for the duration of the operation.
 *
 * \param binst The cow b+tree to start a transaction in.
 * \param btxn The pointer to a transaction to be allocated.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_rotxn_begin(sds_bptree_cow_instance *binst, sds_bptree_transaction **btxn);
/**
 * Complete a read only transaction. After you have closed the transaction, it may
 * not be used again. This may trigger a garbage collection. After you close the
 * transaction, you may NOT reference any elements you viewed in the tree. Given
 * that there is no penalty to holding this open, just keep the transaction open
 * til you are sure you do not need the values any longer.
 *
 * \param btxn The transaction to close.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_rotxn_close(sds_bptree_transaction **btxn);
/**
 * Begin a write transaction. This allows you to alter the content of the tree.
 * Due to the exclusive nature of this transaction, it is best if you are able
 * to keep this transaction for the "minimal" time possible as it is serialised.
 * Changes made in a write transaction are *guaranteed* to have no impact on any
 * types currently in a read transaction.
 *
 * \param binst The b+tree to begin a write transaction in.
 * \param btxn The pointer to a location for the transaction to be created into.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_wrtxn_begin(sds_bptree_cow_instance *binst, sds_bptree_transaction **btxn);
/**
 * Abort and roll back the changes made during a write transaction. This operation
 * is guaranteed safe to all other transactions, including future writes. After the
 * abort function is called, you must *NOT* access this transaction again.
 *
 * \param btxn The transaction to abort and destroy.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_wrtxn_abort(sds_bptree_transaction **btxn);
/**
 * Commit the transaction. After this operation, the transaction must not be accessed
 * again. All changes to the tree are now visible after this call is made, and new
 * read transactions will have this view. Commit does not affect pre-existing
 * read transactions data.
 *
 * \param btxn The transaction to commit.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_wrtxn_commit(sds_bptree_transaction **btxn);

/**
 * Search a tree with a valid transaction reference. This returns KEY_PRESENT
 * or KEY_NOT_PRESENT if the search suceeds or not. Search may operation on a valid
 * uncommited write transaction, or a read transaction.
 *
 * \param btxn The transaction point to search.
 * \param key The key to search for.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_search(sds_bptree_transaction *btxn, void *key);
/**
 * Search a tree with a valid transaction reference. This returns KEY_PRESENT
 * or KEY_NOT_PRESENT if the search suceeds or not. Additionally, the value
 * attached to key is placed into the pointer for target. Search may operation on a valid
 * uncommited write transaction, or a read transaction.
 *
 * \param btxn The transaction point to search.
 * \param key The key to search for.
 * \param target The pointer where value will be placed on sucessful search. NULL is a
 *  valid value, so be sure to check the result.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_retrieve(sds_bptree_transaction *btxn, void *key, void **target);
/**
 * Delete key and the associated data from the tree. This operates only on a valid
 * write transaction, and changes made are not reflected until a commit is made.
 * existing reads will never see this change til they close and open new transactions.
 *
 * \param btxn The write transaction to operate on.
 * \param key The key to delete, along with associated data. If you have called retrieve
 * on this key, the key and value must not be accessed after this point within this
 * transactions lifetime as the values may be invalidated.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_delete(sds_bptree_transaction *btxn, void *key);
/**
 * Insert a key and associated value to the tree via a valid write transaction.
 * values and keys inserted to the tree now completely belong to the tree, and may
 * be duplicated or freed at any time. After you have given a key and value to the
 * tree, you must only access them via the retrieve interface in valid scenarios.
 *
 * \param btxn The write transaction to operate on.
 * \param key The key to insert. If a duplicate key is detected, and error is returned.
 * \param value The value to insert. May be NULL.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_insert(sds_bptree_transaction *btxn, void *key, void *value);

/**
 * Update a key to have a new associated value within a valid write transaction.
 * This is more efficient than delete -> insert, so for updates it's preferred.
 * This is needed in the case that you have previous read transactions, and want
 * to alter a value, without affecting the read. You would use this by calling
 * retrieve, copying the value, then calling update on the b+tree.
 *
 * \param btxn The write transaction to operate on.
 * \param key The key to update. If the key does not exist, fall back to insert.
 * \param value The value to update. May be NULL.
 * \retval Result of the operation as sds_result.
 */

sds_result sds_bptree_cow_update(sds_bptree_transaction *btxn, void *key, void *value);

/**
 * Search atomic functions as search, but implies a single short lived read transaction.
 *
 * If you have multiple searches to make, it is better to use a read transaction due to
 * the memory design of the transaction. Multiple atomics may cause contention on
 * certain parts of the transaction code.
 *
 * \param binst The cow b+tree to search.
 * \param key The key to search for.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_search_atomic(sds_bptree_cow_instance *binst, void *key);
/**
 * Retrieve atomic functions as retrieve, but implies a single short lived read transaction. Calling this implies that you *must* free tha value returned to you.
 *
 * If you have multiple searches to make, it is better to use a read transaction due to
 * the memory design of the transaction. Multiple atomics may cause contention on
 * certain parts of the transaction code.
 *
 * \param binst The cow b+tree to search.
 * \param key The key to search for.
 * \param target The value retrieved. You must free this after use, with the same free function as the binst holds.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_retrieve_atomic(sds_bptree_cow_instance *binst, void *key, void **target);
/**
 * Delete atomic functions as delete, but implise a single short livied write transaction, and commit phase.
 *
 * If you have multiple searches to make, it is better to use a write transaction due to
 * the memory design of the transaction. Multiple atomics may cause contention on
 * certain parts of the transaction code.
 *
 * \param binst The cow b+tree to delete from.
 * \param key The key to delete. This removes the associated value.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_delete_atomic(sds_bptree_cow_instance *binst, void *key);
/**
 * Insert atomic functions as insert, but implies a single short lived write transaction and commit phase.
 *
 * If you have multiple searches to make, it is better to use a write transaction due to
 * the memory design of the transaction. Multiple atomics may cause contention on
 * certain parts of the transaction code.
 *
 * \param binst The cow b+tree to insert to.
 * \param key The key to insert.
 * \param value The value to insert associated with key.
 * \retval Result of the operation as sds_result.
 */
sds_result sds_bptree_cow_insert_atomic(sds_bptree_cow_instance *binst, void *key, void *value);


/**
 * @}
 */
/* end sds_bptree_cow */

#define HT_SLOTS 16

typedef enum _sds_ht_slot_state {
    SDS_HT_EMPTY = 0,
    SDS_HT_VALUE = 1,
    SDS_HT_BRANCH = 2,
} sds_ht_slot_state;

typedef struct _sds_ht_value
{
    uint32_t checksum;
    void *key;
    void *value;
    // may make this a LL of values later for collisions
} sds_ht_value;

typedef struct _sds_ht_slot
{
    sds_ht_slot_state state;
    union
    {
        sds_ht_value *value;
        struct _sds_ht_node *node;
    } slot;
} sds_ht_slot;

typedef struct _sds_ht_node
{
    uint32_t checksum;
    uint64_t txn_id;
    uint_fast32_t count;
#ifdef SDS_DEBUG
    uint64_t depth;
#endif
    struct _sds_ht_node *parent;
    size_t parent_slot;
    sds_ht_slot slots[HT_SLOTS];
} sds_ht_node;

typedef struct _sds_ht_instance
{
    uint32_t checksum;
    char hkey[16];
    sds_ht_node *root;
    int64_t (*key_cmp_fn)(void *a, void *b);
    uint64_t (*key_size_fn)(void *key);
    void *(*key_dup_fn)(void *key);
    void (*key_free_fn)(void *key);
    void *(*value_dup_fn)(void *value);
    void (*value_free_fn)(void *value);
} sds_ht_instance;

uint64_t sds_uint64_t_size(void *key);

sds_result
sds_ht_init(sds_ht_instance **ht_ptr,
            int64_t (*key_cmp_fn)(void *a, void *b),
            void (*value_free_fn)(void *value),
            void *(*key_dup_fn)(void *key),
            void (*key_free_fn)(void *key),
            uint64_t (*key_size_fn)(void *key));

sds_result sds_ht_insert(sds_ht_instance *ht_ptr, void *key, void *value);
sds_result sds_ht_search(sds_ht_instance *ht_ptr, void *key, void **value);
sds_result sds_ht_delete(sds_ht_instance *ht_ptr, void *key);
sds_result sds_ht_verify(sds_ht_instance *ht_ptr);
sds_result sds_ht_destroy(sds_ht_instance *ht_ptr);