/usr/include/dirsrv/sds.h is in 389-ds-base-dev 1.3.7.10-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 | /* BEGIN COPYRIGHT BLOCK
* Copyright (c) 2016, William Brown <william at blackhats dot net dot au>
* Copyright (c) 2017, Red Hat, Inc
* All rights reserved.
*
* License: GPL (version 3 or any later version).
* See LICENSE for details.
* END COPYRIGHT BLOCK **/
#pragma once
/**
* \defgroup sds_misc SDS Miscelaneous
* Miscelaneous functions for SDS. This may be replaced by a PAL from ns-slapd soon.
*/
/**
* \defgroup sds_queue SDS Queue
* Unprotected queue for void types
*/
/**
* \defgroup sds_tqueue SDS Thread Safe Queue
* Thread safe queue, protected by platform mutexs.
*/
/**
* \defgroup sds_lqueue SDS Lock Free Queue
* Thread safe lock free queue. Falls back to platform mutex if cpu intrinsics are not present.
*/
/**
* \defgroup sds_bptree SDS B+Tree
* Unprotected B+Tree for void types. Supports fast set operations, search, insert, delete and build.
*
* This structure should be considered for all locations where unprotected AVL treels, or array based
* binary search trees are used.
*/
/**
* \defgroup sds_bptree_cow SDS Copy on Write B+Tree
* Thread safe Copy of Write B+Tree. Supports parallel read transactions and serialised writes with ref counted members.
*
* This is a very important structure, and is the primary reason to consume this library. There are a number of important properties
* that this tree provides.
*
* A key concept of this structure is that all values and keys contained have their lifetimes bound to the life of the tree and it's
* nodes. This is due to the transactional nature of the structure. Consider a value that has been inserted into the tree. We take a
* read transaction of the tree in thread A. Thread B now deletes the value that was inserted. Because A still refers to the value,
* it is not freed until thread A closes it's read transaction. This guarantees that all read transactions are always valid, while
* also maintaining that memory can be freed correctly. This has many applications including the Directory Server plugin subsystem.
* We can use this property to guarantee a plugin exists for the life of an operation, and we only free it once all consumers have
* completed their operations with it.
*
* Another key aspect of this tree is that write transactions *do not* block read transactions. Reads can be completed in parallel
* to writes with complete safety. Write operations are serialised in this structure. This makes the structure extremely effective
* for long read operations with many queries, without interrupting other threads.
*
* Along with the value and key ownership properties, we can guarantee that the content of the tree within a read transaction
* will *not* be removed or added and will be consistent within the tree (however, we can't guarantee a user doesn't change the value.)
*
* As a comparison, the single thread tree is on paper, much faster than the COW version due to less memory operations. However in a
* multi thread use case, the COW version is signifigantly faster *and* safer than a mutex protected single thread tree.
*
* This tree can be used as a form of object manager for C, as well as a way to allow async operations between threads while maintaining,
* thread safety across an application. This assumes that the operations are largely parallel and batch read oriented over write heavy small
* transactions.
*/
/** \addtogroup sds_misc
* @{
*/
/**
* sds_result encapsulates the set of possible success or failures an operation
* may have during processing. You must always check this value from function
* calls.
*/
typedef enum _sds_result {
/**
* SDS_SUCCESS 0 represents that the operation had no errors in processing.
*/
SDS_SUCCESS = 0,
/**
* SDS_UNKNOWN_ERROR 1 represents that an unexpected issue was encountered,
* you should raise a bug about this if you encounter it!
*/
SDS_UNKNOWN_ERROR = 1,
/**
* SDS_NULL_POINTER 2 represents that a NULL pointer was provided to a function
* that expects a true pointer to valid data. It may also represent that
* the internals of a datastructure have been corrupted with an unexpected NULL
* pointer, and the operation can not proceed.
*/
SDS_NULL_POINTER = 2,
/**
* SDS_TEST_FAILED 3 represents that a test case has failed. You may need to
* go back to the test case to see the originating error.
*/
SDS_TEST_FAILED = 3,
/**
* SDS_DUPLICATE_KEY 4 represents that the key value already exists in the
* datastructure. This is generally seen during insertion operations.
*/
SDS_DUPLICATE_KEY = 4,
/**
* SDS_CHECKSUM_FAILURE 5 represents that the in memory data does not pass
* checksum validation. This may be due to the fault of the application or
* hardware errors.
*/
SDS_CHECKSUM_FAILURE = 5,
/**
* SDS_INVALID_NODE_ID 6 represents that the internal data of a datastructure
* node contains invalid identifying information.
*/
SDS_INVALID_NODE_ID = 6,
/**
* SDS_INVALID_KEY 7 represents that a key is invalid in the state of this node.
* generally this is due to a bug in the datastructure code where keys are
* out of order, or incorrectly nulled.
*/
SDS_INVALID_KEY = 7,
/**
* SDS_INVALID_VALUE_SIZE 8 represents that the size of the value provided
* does not hold true for certain assumptions. For example, a null pointer
* with a non zero size, or a pointer with a zero size.
*/
SDS_INVALID_VALUE_SIZE = 8,
/**
* SDS_INVALID_POINTER 9 represents that within the datastructure an invalid
* pointer structure has been created that may cause further dataloss or
* corruption. This could be due to error in the datastructure code, or
* memory issues.
*/
SDS_INVALID_POINTER = 9,
/**
* SDS_INVALID_NODE 10 represents that certain datastructure properties of
* this node have not been upheld. For example, branches should never contain
* data in a b+tree, or that the capacity of the node is incorrect and a
* split or merge should have occured.
*/
SDS_INVALID_NODE = 10,
/**
* SDS_INVALID_KEY_ORDER 11 indicates that the ordering of keys within a
* structure is incorrect, and may cause data loss.
*/
SDS_INVALID_KEY_ORDER = 11,
/**
* SDS_RETRY 12 represents that the previous operation should be attempted
* again. This is only for INTERNAL use. You will never see this returned.
*/
SDS_RETRY = 12,
/**
* SDS_KEY_PRESENT 13 is a SUCCESS condition, similar to SDS_SUCCESS. This
* differs from SDS_SUCCESS in that it shows that a key value exists within
* some datastructure, and you must explcitly check for this case (vs
* SDS_KEY_NOT_PRESENT).
*/
SDS_KEY_PRESENT = 13,
/**
* SDS_KEY_NOT_PRESENT 14 is a SUCCESS condition, similar to SDS_SUCCESS. This
* differs from SDS_SUCCESS in that is show that a key value does not exist
* with some datastructure, but the operation was still a SUCCESS in completion.
* You must explicitly check for this case to ensure you know the true result
* of a function.
*/
SDS_KEY_NOT_PRESENT = 14,
/**
* SDS_INCOMPATIBLE_INSTANCE 15 represents that during a set operation, you
* are attempting to manipulate datastructures with potentiall different key
* and value types. This would result in datacorruption or undefined behaviour
* so instead, we return a pre-emptive error.
*/
SDS_INCOMPATIBLE_INSTANCE = 15,
/**
* SDS_LIST_EXHAUSTED 16 represents that there are no more elements in this
* datastructure to be consumed during an iteration.
*/
SDS_LIST_EXHAUSTED = 16,
/**
* SDS_INVALID_TXN 17 represents that a transaction identifier is invalid
* for the requested operation. IE using a read only transaction to attempt
* a write.
*/
SDS_INVALID_TXN = 17,
} sds_result;
/**
* sds_malloc wraps the system memory allocator with an OOM check. During debugging
* this call guarantees that memory is zeroed before use.
*
* \param size the amount of memory to allocate in bytes.
* \retval pointer to the allocated memory.
*/
void *sds_malloc(size_t size);
/**
* sds_calloc wraps the system calloc call with an OOM check. This call like calloc,
* guarantees the returned memory is zeroed before usage.
*
* \param size the amount of memory to allocate in bytes.
* \retval pointer to the allocated memory.
*/
void *sds_calloc(size_t size);
/**
* sds_memalign wraps the posix_memalign function with an OOM and alignment check. During debugging
* this call guarantees that memory is zeroed before use.
*
* \param size the amount of memory to allocate in bytes.
* \param alignment the size to align to in bytes. Must be a power of 2.
* \retval pointer to the allocated memory.
*/
void *sds_memalign(size_t size, size_t alignment);
/**
* sds_free wraps the system free call with a null check.
*
* \param ptr the memory to free.
*/
void sds_free(void *ptr);
/**
* sds_crc32c uses the crc32c algorithm to create a verification checksum of data.
* This checksum is for data verification, not cryptographic purposes. It is used
* largely in debugging to find cases when bytes in structures are updated incorrectly,
* or to find memory bit flips during operation. If avaliable, this will use the
* intel sse4 crc32c hardware acceleration.
*
* \param crc The running CRC value. Initially should be 0. If in doubt, use 0.
* \param data Pointer to the data to checksum.
* \param length number of bytes to validate.
* \retval crc The crc of this data. May be re-used in subsequent sds_crc32c calls
* for certain datatypes.
*/
uint32_t sds_crc32c(uint32_t crc, const unsigned char *data, size_t length);
/**
* sds_siphash13 provides an implementation of the siphash algorithm for use in
* hash based datastructures. It is chosen due to is resilance against hash
* attacks, such that it makes it higly secure as a choice for a hashmap.
*
* \param src The data to hash
* \param src_sz The size of the data to hash
* \param key The security key to mix with the hash. This should be randomised once
* at startup and stored for use with further operations.
* \retval The uint64_t representing the hash of the data.
*/
uint64_t sds_siphash13(const void *src, size_t src_sz, const char key[16]);
/**
* sds_uint64_t_compare takes two void *, and treats them as uint64_t *.
*
* \param a The first uint64_t *.
* \param b The second uint64_t *.
* \retval result of the comparison.
*/
int64_t sds_uint64_t_compare(void *a, void *b);
/**
* Free the allocated uint64_t * from sds_uint64_t_alloc.
*
* \param key Do nothing to the key.
*/
void sds_uint64_t_free(void *key);
/**
* sds_uint64_t_dup Returns a copy of the uint64_t * to the caller.
*
* \param key The uint64_t * to copy.
* \retval result the copy of the value.
*/
void *sds_uint64_t_dup(void *key);
/**
* sds_strcmp exists to wrap the system strcmp call with the correct types needed
* for libsds datastructures to operate correctly.
*
* \param a Pointer to the first string.
* \param b Pointer to the second string.
* \retval Difference between the values. 0 indicates they are the same.
*/
int64_t sds_strcmp(void *a, void *b);
/**
* sds_strdup exists to wrap the system strdup call with the correct types needed for
* libsds datastructures to operate correctly.
*
* \param key Pointer to the string to duplicate.
* \retval Pointer to the newly allocated string.
*/
void *sds_strdup(void *key);
/**
* @}
*/
/* end sds_misc */
/** \addtogroup sds_bptree_cow
* @{
*/
/**
* sds_txt_state lists the possible states of a transaction.
*/
typedef enum _sds_txn_state {
/**
* SDS_TXN_READ 0 This is a read only transaction. Not mutable actions may
* be taken.
*/
SDS_TXN_READ = 0,
/**
* SDS_TXN_WRITE 1 This is a read and write transaction. You may perform
* all read operations, and also all write operations.
*/
SDS_TXN_WRITE = 1,
} sds_txn_state;
/**
* sds_bptree_node_list stores a linked list of sds_bptree_nodes for tracking.
* Internally this is used extensively for transaction references.
*/
typedef struct _sds_bptree_node_list
{
/**
* checksum of the node list item.
*/
uint32_t checksum;
/**
* Pointer to the node item.
*/
struct _sds_bptree_node *node;
/**
* Pointer to the next node list item. NULL indicates list termination.
*/
struct _sds_bptree_node_list *next;
} sds_bptree_node_list;
/**
* sds_bptree_transaction Manages the content and lifetime of nodes within the tree. It is the basis of our
* garbage collection system, using atomic reference counts to synchronise our behaviour.
*/
typedef struct _sds_bptree_transaction
{
/**
* Checksum of the data in this structure.
*/
uint32_t checksum;
/**
* Reference count to the number of consumers of this transaction. This is
* atomically updated. If this is the "active" master transaction, this is
* guaranteed to be 1 or greater. When a new write transaction is commited,
* if this reference count falls to 0, this transaction is implicitly
* destroy.
*/
uint32_t reference_count;
/**
* The state of the transaction. All transactions start as SDS_WRITE_TXN,
* and upon commit move to the SDS_READ_TXN state.
*/
sds_txn_state state;
/**
* Pointer to the cow b+tree instance that created us.
*/
struct _sds_bptree_cow_instance *binst;
/**
* Pointer to the b+tree instance that this transaction holds.
*/
struct _sds_bptree_instance *bi;
/**
* The unique identifier of this transaction.
*/
uint64_t txn_id;
/**
* The list of nodes that this transaction "owns". When the reference count
* moves to 0, these nodes will be freed. This list is created during a write
* transaction of "items that will not be needed when we are removed.".
*
* IE, when we have a txn A, then a new transaction B is made, we "copy" node
* 1a to node 1b. Node 1b is "created" by txn B, and node 1a is "owend" by
* txn A, because it's the last transaction that depends on this nodes existance.
*/
sds_bptree_node_list *owned;
/**
* The list of nodes that this transaction "created". This is used during an
* abort of the txn to roll back any changes that we made.
*/
sds_bptree_node_list *created;
/**
* The current root node. Each transaction owns a unique root node which
* anchors the branches of the copy on write structure.
*/
struct _sds_bptree_node *root;
/**
* The previous transaction that we are derived from.
*/
struct _sds_bptree_transaction *parent_txn;
/**
* The next transaction that derives from us.
*/
struct _sds_bptree_transaction *child_txn;
} sds_bptree_transaction;
/**
* @}
*/
/* end sds_bptree_cow */
/** \addtogroup sds_bptree
* @{
*/
/**
* SDS_BPTREE_DEFAULT_CAPACITY 5 represements that there are 5 values held per
* in the B+Tree and COW B+Tree. A signifigant amount of testing went into the
* tuning of this value for best performance.
*/
#define SDS_BPTREE_DEFAULT_CAPACITY 3
/**
* SDS_BPTREE_HALF_CAPACITY 3 is pre-calculated as "ceiling(default capacity / 2)".
* This is used to assert when node splits and merges should be taken.
*/
#define SDS_BPTREE_HALF_CAPACITY 2
/**
* SDS_BPTREE_BRANCH 6 indicates that each node may potentially have up to 6 child
* nodes. This yields a broad tree, that requires fewer cache misses to travese
* (despite the higher number of comparisons to search).
*/
#define SDS_BPTREE_BRANCH 4
/**
* This is the core of the B+Tree structures. This node represents the branches
* and leaves of the structure.
*/
typedef struct _sds_bptree_node
{
#ifdef SDS_DEBUG
/**
* checksum of the structure data to detect errors. Must be the first element
* in the struct.
*/
uint32_t checksum;
#endif
/**
* Statically sized array of pointers to the keys of this structure.
*/
void *keys[SDS_BPTREE_DEFAULT_CAPACITY];
/**
* Statically sized array of pointers to values. This is tagged by the level
* flag. If level is 0, this is a set of values that have been inserted
* by the consumer. If the level is > 0, this is the pointers to further
* node structs.
*
* In a leaf, this is [value, value, value, value, value, link -> ]
*
* In a non-leaf, this is [link, link, link, link, link, link]
*/
void *values[SDS_BPTREE_BRANCH];
/**
* The number of values currently stored in this structure.
*/
uint32_t item_count;
/**
* This number of "rows" above the leaves this node is. 0 represents a true
* leaf node, anything greater is a branch.
*/
uint32_t level;
/* Put these last because they are only used in the insert / delete path, not the search.
* This way the keys and values are always in the cache associated with the ptr.
*/
/**
* The id of the transaction that created this node. This is used so that
* within a transaction, we don't double copy values if we already copied
* them.
*/
uint64_t txn_id;
/**
* Back reference to our parent. This is faster than creating a traversal
* list during each insertion (by a large factor).
*/
struct _sds_bptree_node *parent;
} sds_bptree_node;
/**
* The instance of the B+Tree. Stores references to function pointers for
* manipulation of keys and values within the tree. Maintains the root checksum
* which estabilshes the "root of trust" to all other nodes in the tree.
*/
typedef struct _sds_bptree_instance
{
/**
* checksum of the instance data.
*/
uint32_t checksum;
/**
* This flag determines if we maintain and update checksum values during
* tree operations, and that we verify these during the verification operation.
*/
uint16_t offline_checksumming;
/**
* This flag determines if we verify all checksums during the read and write
* paths of the code. Adds a large performance overhead, but guarantees
* the data in the tree is "consistent".
*/
uint16_t search_checksumming;
/**
* Internal tracking id for tree display.
*/
uint16_t print_iter;
/**
* Pointer to the current tree root node.
*/
sds_bptree_node *root; /* The current root node of the DB */
/**
* This function should be able to free values of the type that will be
* inserted into the tree.
*/
void (*value_free_fn)(void *value);
/**
* This function should be able to duplicate value of the type that will be
* inserted into the tree. Values may be freed and duplicated in an order
* you do not expect, so blindly returning a pointer to the same data may
* not be wise if free then destroys it.
*/
void *(*value_dup_fn)(void *value);
/**
* Key comparison function. This must return an int64_t of the difference in
* the values. A result of < 0 indicates that a is less than b. A result of
* 0 indicates that the values of a and b are identical. A result of > 0
* indicates that a is greater than b.
*/
int64_t (*key_cmp_fn)(void *a, void *b); /* Comparison function pointer */
/**
* Key free function. This must free the pointer provided by key.
*/
void (*key_free_fn)(void *key);
/**
* Key duplication function. This duplicates the value of key and returns a
* new pointer to it. This is used extensively in the tree structure, and
* the key or result may be freed out of order, so don't blindly return the
* same data.
*/
void *(*key_dup_fn)(void *key);
} sds_bptree_instance;
/**
* @}
*/
/* end sds_bptree */
/** \addtogroup sds_bptree_cow
* @{
*/
/**
* The copy on write tree instance. This stores the current active reading
* transaction, number of active transactions, and pointers to the root of the
* b+tree.
*
* This instance is the most important part of the tree and provides us with memory safety. Once we
* have memory safety, individual transactions are free to garbage collect on their own. Garbage collection
* is coordinated from the instance.
*
* ## MEMORY SAFTEY
*
* Within the transaction, we have a small number of important fields. These are:
*
* - read_lock
* - write_lock
* - tail_txn
*
* ### Read Transaction Begin
*
* At the beginning of a read transaction, we take a rolock on read_lock. This ensures that on
* our working thread, that the correct sequence of barriers for memory consistency of the tree
* is issued.
*
* Within the read_lock, we use cpu atomics to increment the transaction reference count by 1.
* We then release the read_lock.
*
* In this way, we allow multiple read transactions to be initiated at the same time, with the
* only serialisation point being the atomic increment.
*
* While the read transaction is "active" (IE the transaction that is listed in binst, where new
* transactions will reference), the reference count will never go below 1.
*
* ### Write Transaction
*
* At the beginning of a write transaction, we take the write_lock mutex. This guarantees we are
* the only writer in the tree at a point in time. Due to the way mutexes work, this guarantees
* our memory barries of the tree are issued, so we see all other writes that have occured.
*
* During a write, we store a list of all nodes that were cloned: Both so that we know who we cloned
* *from* but aso what nodes we *created*.
*
* ### Write Abort
*
* During a write abort, we release the write lock immediately. We then use the created node list, and
* free all contents as they were never made active.
*
* ### Write commit
*
* During a commit, we prepare the transaction by changing it's state flags, and setting it's reference
* count atomically. We relinquish the created list, because we have no need to free the nodes that are
* about to become part of the tree. However, we update the previous transaction with the "owned" list,
* as these are nodes we cloned from: IE these are nodes that serve no *future* role in the tree, so the
* previous transaction is the last point where they are valid and needed - The last transaction now must
* clean up after itself when it's ready to GC!
*
* We set our reference count to 2, to indicate that a following node (the previous active transaction)
* relies on us, and that we are also active.
*
* We now take the write variant of the rw read_lock. This waits for all in progress read transactions to
* begin, and we take exclusive access of this lock. We then pivot the older transaction out with our
* transaction. The read_lock is released, and read transactions may now resume.
*
* At this point, we release the write_lock, and a new write may begin.
*
* We decrement the reference count of the previous transaction which may cause it to drop to 0. This is
* the equivalent of a read transaction close. After this point, the previous transactions reference
* count will only decrement, as previous holders close their transactions. Increment only occurs from the
* active transaction!
*
* ### Read transaction close.
*
* When we close a read transaction, we require no locks. We use cpu atomic to set and fetch the
* reference count.
*
* If the reference count remains positive, we complete, and continue.
*
* If the reference count drops to 0, we take a reference to the next transaction in the series,
* and we free our node. We then decrement the reference counter of the next transaction and check
* the result.
*
* Either, the reference count is > 0 because it is the active transaction *or* there is a current
* read transaction.
*
* OR, the reference count reaches 0 because there are no active transactions, so we can free this node.
* in the same operation, we then decrement the reference count to the next node in the series,
* and we repeat this til we reach a node with a positive reference count.
*
* Due to the design of this system, we are guarantee no thread races, as one and only one thread
* can ever set the reference count to 0: either the closing read, or the closing previous transaction.
* This gives us complete thread safety, but without the need for a mutex to ensure this.
*
* ### Usage of the memory safe properties.
*
* Due to the way the barriers are issued, when you have a read transaction or a write transaction, you can
* guarantee that the tree memory is consistent and correct, and that all values will remain alive until
* you close the transaction. This means you can use this to allow parallel tasks in complex cases. Directory
* Server has such a case with the plugin subsystem. Consider every operation needs a consistent list of plugins.
*
* You build a tree of the plugin references (dlopen pointers, reference count, pblock) and store them.
*
* When you begin the operation you take a read transaction of this. This means every plugin in the operation is
* guaranteed to be in the tree and held open for the duration of this operation. During the read, if another thread
* commits the delete on a plugin, this drops the ref count, but not below 0 - the operation can continue safely.
*
* Once the operation is complete, the transaction is closed. At this point, the vacuum runs during close, which would
* trigger the value free function. This would now actually close the plugin pointers, values. Additionally, the correct use
* of the transaction guarantees the memory content of the plugin is correct due to the barriers inside of the transaction.
*
*/
typedef struct _sds_bptree_cow_instance
{
/**
* checksum of the instance data.
*/
uint32_t checksum;
/**
* Pointer to the current b+tree instance. This contains our function pointers
* for free, dup of key and values.
*/
sds_bptree_instance *bi;
/**
* The pointer to the current "latest" commited transaction. When a new read
* transaction is taken, this is the value that is returned (until a new
* write transaction is commited.)
*/
sds_bptree_transaction *txn;
/**
* The pointer to the current "oldest" alive transaction. This is needed for
* correct cleanup without corrupting intermediate trees.
*
* Consider we have a set of transactions, such as A -> B -> C. Between each
* a set of changes has occured. We assume that if we have branches in these,
* then during a cow operation they can be updated. So we end up in this
* situation.
*
* [ A ] [ B ] [ C ]
* | | |
* [ root a ] [ root b ] [ root c ]
* / \\ / \\ / \\
* [ a ] [ x ] [ a ] [ b ] [ c ] [ b ]
*
* Now, when we clone txn B, because C performed a cow on the leaf A, txn B
* "owns" leaf A.
* At this point, leaf A will be freed, which frees it from underneath
* transaction A. We have corrupted the tree of a previous node!
*
* To prevent this, we only truly free a transaction when the ref count is 0
* AND it's the last transaction in the list of live transactions. This way, B
* even though ref count reached 0, would not be freed til A is also freed.
*
*/
sds_bptree_transaction *tail_txn;
/**
* The number of active transactions in the system.
*/
uint64_t txn_count;
/**
* The transaction read lock. This is used to prevent corruption of the txn
* pointer during a read or write transaction acquisition.
*/
pthread_rwlock_t *read_lock;
/**
* The transaction write lock. During a write transaction, this is held for
* the duration to allow only a single writer.
*/
pthread_mutex_t *write_lock;
} sds_bptree_cow_instance;
/**
* @}
*/
/* end sds_bptree_cow */
/* Queue operations */
/** \addtogroup sds_queue
* @{
*/
/**
* An internal queue node element.
*/
typedef struct _sds_queue_node
{
/**
* The data this node holds.
*/
void *element;
/**
* The pointer to the next element of the queue.
*/
struct _sds_queue_node *next;
/**
* The pointer to the previous element of the queue.
*/
struct _sds_queue_node *prev;
} sds_queue_node;
/**
* A queue that is internally a doubly linked list.
*/
typedef struct _sds_queue
{
/**
* The pointer to the current active head node. This is the "next" node that
* will be dequeued and acted upon during the dequeue (pop) operation.
*/
struct _sds_queue_node *head;
/**
* The tail of the queue. This is the "last" value that was inserted.
*/
struct _sds_queue_node *tail;
/**
* If there are remaining nodes when the queue is destroyed, the queue will
* be drained and this free function called on the value of within.
*/
void (*value_free_fn)(void *value);
} sds_queue;
/**
* Initialise a queue suitable for access within a single thread. Free function
* must be set.
*
* \param q_ptr A pointer to the struct pointer you wish to allocate. This may be
* on the heap or stack.
* \param value_free_fn A function pointer to free values that are enqueued to
* this structure.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_queue_init(sds_queue **q_ptr, void (*value_free_fn)(void *value));
/**
* Enqueue an item to the tail of this queue. Equivalent to "push".
*
* \param q The struct pointer to the queue you wish to push to.
* \param elem The data to enqueue.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_queue_enqueue(sds_queue *q, void *elem);
/**
* Dequeue an item from the head of the queue. Equivalent to "pop".
*
* \param q The struct pointer to the queue you wish to pop from.
* \param elem a pointer to a location which will be over-ridden with the data that
* is dequeued. This may be NULLed, even during an error.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_queue_dequeue(sds_queue *q, void **elem);
/**
* Indicate you are complete with the queue. Free and delete any remaining
* internal structures, and free and still queued nodes. You must *always*
* call this function to dispose of this structure.
*
* \param q The struct pointer to the queue you wish to dispose of.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_queue_destroy(sds_queue *q);
/**
* @}
*/
/* end sds_queue */
/** \addtogroup sds_tqueue
* @{
*/
/**
* Implement a thread safe wrapper around an sds queue. This guarantees multithread
* safety to the operations of the queue.
*/
typedef struct _sds_tqueue
{
/**
* Pointer to the underlying queue structure.
*/
sds_queue *uq;
/**
* Lock that protects queue operations.
*/
pthread_mutex_t lock;
} sds_tqueue;
/* Thread safe queue operations. */
/**
* Initialise a queue suitable for access across multiple threads. Free function
* must be set.
*
* \param q_ptr A pointer to the struct pointer you wish to allocate. This may be
* on the heap or stack.
* \param value_free_fn A function pointer to free values that are enqueued to
* this structure.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_tqueue_init(sds_tqueue **q_ptr, void (*value_free_fn)(void *value));
/**
* Enqueue an item to the tail of this queue. Equivalent to "push".
* The safety of this operation is implicit, you do not need any other mutexes
* to protect this operation.
*
* \param q The struct pointer to the queue you wish to push to.
* \param elem The data to enqueue.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_tqueue_enqueue(sds_tqueue *q, void *elem);
/**
* Dequeue an item from the head of the queue. Equivalent to "pop".
* The safety of this operation is implicit, you do not need any other mutexes
* to protect this operation.
*
* \param q The struct pointer to the queue you wish to pop from.
* \param elem a pointer to a location which will be over-ridden with the data that
* is dequeued. This may be NULLed, even during an error.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_tqueue_dequeue(sds_tqueue *q, void **elem);
/**
* Indicate you are complete with the queue. Free and delete any remaining
* internal structures, and free and still queued nodes. You must *always*
* call this function to dispose of this structure.
*
* \param q The struct pointer to the queue you wish to dispose of.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_tqueue_destroy(sds_tqueue *q);
/**
* @}
*/
/* end sds_tqueue */
/** \addtogroup sds_lqueue
* @{
*/
/* Lock free queue operations. */
#ifdef ATOMIC_QUEUE_OPERATIONS
/* Mask the definition of the queue */
struct _sds_lqueue;
/**
* Structure that provides a wrapper to the liblfds queue. This *must* be allocated
* with memalign to a 128 byte boundary! The SDS library will *correctly* do this
* for you!
*/
typedef struct _sds_lqueue sds_lqueue;
#else
/**
* Type definition of lock free queue to the mutex queue in the case that cpu
* intrinsics are not available, or the operating system is unsupported.
*/
typedef struct _sds_tqueue sds_lqueue;
#endif
/**
* Initialise a queue suitable for access across multiple threads. Free function
* must be set. This utilises hardware intrinsics to provide atomicity that may
* be faster in highly contended applications. Use this structure in place of a
* tqueue only if you know that content of the queue will be high.
*
* \param q_ptr A pointer to the struct pointer you wish to allocate. This may be
* on the heap or stack.
* \param value_free_fn A function pointer to free values that are enqueued to
* this structure.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_lqueue_init(sds_lqueue **q_ptr, void (*value_free_fn)(void *value));
/**
* Initialise this thread ready to use the lqueue. This is *critical*, and accessing
* the queue without calling tprep may result in undefined behaviour.
*/
sds_result sds_lqueue_tprep(sds_lqueue *q);
/**
* Enqueue an item to the tail of this queue. Equivalent to "push".
* The safety of this operation is implicit, you do not need any other mutexes
* to protect this operation.
*
* \param q The struct pointer to the queue you wish to push to.
* \param elem The data to enqueue.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_lqueue_enqueue(sds_lqueue *q, void *elem);
/**
* Dequeue an item from the head of the queue. Equivalent to "pop".
* The safety of this operation is implicit, you do not need any other mutexes
* to protect this operation.
*
* \param q The struct pointer to the queue you wish to pop from.
* \param elem a pointer to a location which will be over-ridden with the data that
* is dequeued. This may be NULLed, even during an error.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_lqueue_dequeue(sds_lqueue *q, void **elem);
/**
* Indicate you are complete with the queue. Free and delete any remaining
* internal structures, and free and still queued nodes. You must *always*
* call this function to dispose of this structure.
*
* Important to note, is that all thread consumers of this queue must be stopped
* before you call lqueue destroy. This is your responsibility to ensure this.
*
* \param q The struct pointer to the queue you wish to dispose of.
* \retval sds_result to indicate the status of the operation.
*/
sds_result sds_lqueue_destroy(sds_lqueue *q);
/**
* @}
*/
/* end sds_lqueue */
/*
* How does the COW locking work? It depends on which operation is occuring.
*
* If we are beginning a search, first we take the read_lock. We then increment
* txn ref_count, and release the lock after taking a reference.
*
* If we are doing a write, we take the write_lock, perform the operation. If
* we roll back, release the write_lock.
* If we commit, take the read_lock, now update the current active txn. Dec the
* ref count to the former transaction.
*/
/** \addtogroup sds_bptree
* @{
*/
/**
* Initialise an sds b+tree for usage.
*
* \param binst_ptr Pointer to a struct pointer for the instance to initialise.
* \param checksumming Flag to enable online and search checksumming. 0 to disable.
* \param key_cmp_fn Key comparison function.
* \param value_free_fn Function to free values that are assigned to this structure.
* \param key_free_fn Function to free keys that are inside of this structure.
* \param key_dup_fn Function to duplication keys within the structure.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_init(sds_bptree_instance **binst_ptr, uint16_t checksumming, int64_t (*key_cmp_fn)(void *a, void *b), void (*value_free_fn)(void *value), void (*key_free_fn)(void *key), void *(*key_dup_fn)(void *key));
/**
* Bulk load data into a b+tree instance. The existing data in the b+tree instance
* will be destroyed during the operation. Keys *must* be sorted prior to calling
* this function. The index of items in values must match the corresponding key
* in keys, or be NULL. values may be NULL, which is assumed that all keys have null
* values associated. Count is the number of elements in keys and values. Key values
* are owned by the tree from this point, you MUST NOT free them. They must be able to
* be freed with your key_free_fn.
*
* If you must load a large amount of data to the tree, this operation is
* signifigantly faster than repeated calls to insert, but relies on you using
* an appropriate qsort function.
*
* \param binst Instance to purge and load.
* \param keys Array of sorted keys to load.
* \param values Array of sorted values to load.
* \param count Number of values in the arrays.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_load(sds_bptree_instance *binst, void **keys, void **values, size_t count);
/* Operations */
/**
* Destroy the instance. This frees all remaining values and keys from the structure.
* After this is called, the bptree may not be accessed.
*
* \param binst Instance to destroy.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_destroy(sds_bptree_instance *binst);
/**
* Search the instance for a key. Returns a SDS_KEY_PRESENT or SDS_KEY_NOT_PRESENT.
*
* \param binst Instance to search.
* \param key Key to search for.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_search(sds_bptree_instance *binst, void *key);
/**
* Retrieve the value associated with key from this structure.
*
* \param binst Instance to retrieve the value from.
* \param key Key to search for.
* \param target Destination for the value to end up in. May be NULLed even if the
* key is not found.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_retrieve(sds_bptree_instance *binst, void *key, void **target);
/**
* Delete a key and value from the tree. This implies that the values are freed
* as part of this process.
*
* \param binst Instance to remove the key and value from.
* \param key Key to delete, along with it's data.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_delete(sds_bptree_instance *binst, void *key);
/**
* Insert the key and value to the tree. If the key already exists, this operation
* will fail. The key is duplicated on insert, and the duplicated key's life is bound
* to the tree. This allows you to use stack values as keys, as we duplicate them
* on insert.
*
* \param binst Instance to insert into.
* \param key Key to insert.
* \param value Value to insert associated with key. May be NULL.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_insert(sds_bptree_instance *binst, void *key, void *value);
/**
* Verify the contents of the tree are correct and sane. If checksumming is enabled,
* this will validate all checksums. This is generally useful for debugging only
* or if you think you have some data issue related to your key comparison function.
*
* \param binst Instance to verify.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_verify(sds_bptree_instance *binst);
/* Set manipulation */
/**
* Map a function over the instance. This does not create a new instance, you
* are expected to use the function to hand the data out to some other function.
*
* WARNING! This function will probably change soon!
*
* \param binst The instance to map over.
* \param fn The function to be applied to each key-value pair.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_map(sds_bptree_instance *binst, void (*fn)(void *k, void *v));
/**
* From instance a, and instance b, create a new insance that contains the
* keys and values where keys exist in a or b but not both.
*
* \param binst_a Instance A
* \param binst_b Instance B
* \param binst_difference Output for a new instance containing clones of different elements.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_difference(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_difference);
/**
* Union the sets A and B, and return a new instance that contains keys and values
* that are present in either or both.
*
* \param binst_a Instance A
* \param binst_b Instance B
* \param binst_union Output for a new instance containing the elements from both sets.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_union(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_union);
/**
* Intersect the sets A and B, and return the elements that are present in both
* sets (but not either).
*
* \param binst_a Instance A
* \param binst_b Instance B
* \param binst_intersect Output for a new instance containing the elements that are intersecting.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_intersect(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_intersect);
/**
* Return the set where elements that exist in A but not B only are returned.
*
* \param binst_a Instance A
* \param binst_b Instance B
* \param binst_compliment Output for a new instance that contains elements unique to A.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_compliment(sds_bptree_instance *binst_a, sds_bptree_instance *binst_b, sds_bptree_instance **binst_compliment);
/**
* Filter the set by applying a predicate, and return a new set containing the matching elements.
*
* \param binst_a Instance A
* \param fn Predicate to apply. If this function returns 0 the element is excluded. All other values include the key/value.
* \param binst_subset Output for a new instance containing the matched elements.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_filter(sds_bptree_instance *binst_a, int64_t (*fn)(void *k, void *v), sds_bptree_instance **binst_subset);
/**
* @}
*/
/* end sds_bptree */
/*
sds_result sds_bptree_subset(struct sds_bptree_instance *binst_a, struct sds_bptree_instance *binst_b);
*/
/** \addtogroup sds_bptree_cow
* @{
*/
// Similar for the COW versions
/**
* Initialise an sds cow b+tree for usage. This allocates space for the tree
* and bootstraps the initial blank transaction.
*
* \param binst_ptr The pointer you wish to have filled with the cow b+tree instance.
* \param checksumming During DEBUG, this flag enables tree content checksumming.
* \param key_cmp_fn Comparison function for keys in the tree.
* \param value_free_fn During operation, the values assigned to this tree are owned by the instance.
* This allows us to free the values when required.
* \param value_dup_fn During a copy on write operation, we need to be able to copy the values in the tree.
* \param key_free_fn Free the key value
* \param key_dup_fn Duplicate a key value
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_init(sds_bptree_cow_instance **binst_ptr, uint16_t checksumming, int64_t (*key_cmp_fn)(void *a, void *b), void (*value_free_fn)(void *value), void *(*value_dup_fn)(void *key), void (*key_free_fn)(void *key), void *(*key_dup_fn)(void *key));
/**
* Destroy an instance. This will destroy all open transactions and free all
* tree elements.
*
* \param binst The cow b+tree to destroy.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_destroy(sds_bptree_cow_instance *binst);
/**
* Verify an instance holds under a number of properties. This should only be used
* in debbuging issues. If you find an issue, add it to the test cases!
*
* \param binst The cow b+tree to verify.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_verify(sds_bptree_cow_instance *binst);
/**
* Begin a read only transaction on the tree. This guarantees the memory consistency
* of all values in the tree for the duration of the operation.
*
* \param binst The cow b+tree to start a transaction in.
* \param btxn The pointer to a transaction to be allocated.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_rotxn_begin(sds_bptree_cow_instance *binst, sds_bptree_transaction **btxn);
/**
* Complete a read only transaction. After you have closed the transaction, it may
* not be used again. This may trigger a garbage collection. After you close the
* transaction, you may NOT reference any elements you viewed in the tree. Given
* that there is no penalty to holding this open, just keep the transaction open
* til you are sure you do not need the values any longer.
*
* \param btxn The transaction to close.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_rotxn_close(sds_bptree_transaction **btxn);
/**
* Begin a write transaction. This allows you to alter the content of the tree.
* Due to the exclusive nature of this transaction, it is best if you are able
* to keep this transaction for the "minimal" time possible as it is serialised.
* Changes made in a write transaction are *guaranteed* to have no impact on any
* types currently in a read transaction.
*
* \param binst The b+tree to begin a write transaction in.
* \param btxn The pointer to a location for the transaction to be created into.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_wrtxn_begin(sds_bptree_cow_instance *binst, sds_bptree_transaction **btxn);
/**
* Abort and roll back the changes made during a write transaction. This operation
* is guaranteed safe to all other transactions, including future writes. After the
* abort function is called, you must *NOT* access this transaction again.
*
* \param btxn The transaction to abort and destroy.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_wrtxn_abort(sds_bptree_transaction **btxn);
/**
* Commit the transaction. After this operation, the transaction must not be accessed
* again. All changes to the tree are now visible after this call is made, and new
* read transactions will have this view. Commit does not affect pre-existing
* read transactions data.
*
* \param btxn The transaction to commit.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_wrtxn_commit(sds_bptree_transaction **btxn);
/**
* Search a tree with a valid transaction reference. This returns KEY_PRESENT
* or KEY_NOT_PRESENT if the search suceeds or not. Search may operation on a valid
* uncommited write transaction, or a read transaction.
*
* \param btxn The transaction point to search.
* \param key The key to search for.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_search(sds_bptree_transaction *btxn, void *key);
/**
* Search a tree with a valid transaction reference. This returns KEY_PRESENT
* or KEY_NOT_PRESENT if the search suceeds or not. Additionally, the value
* attached to key is placed into the pointer for target. Search may operation on a valid
* uncommited write transaction, or a read transaction.
*
* \param btxn The transaction point to search.
* \param key The key to search for.
* \param target The pointer where value will be placed on sucessful search. NULL is a
* valid value, so be sure to check the result.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_retrieve(sds_bptree_transaction *btxn, void *key, void **target);
/**
* Delete key and the associated data from the tree. This operates only on a valid
* write transaction, and changes made are not reflected until a commit is made.
* existing reads will never see this change til they close and open new transactions.
*
* \param btxn The write transaction to operate on.
* \param key The key to delete, along with associated data. If you have called retrieve
* on this key, the key and value must not be accessed after this point within this
* transactions lifetime as the values may be invalidated.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_delete(sds_bptree_transaction *btxn, void *key);
/**
* Insert a key and associated value to the tree via a valid write transaction.
* values and keys inserted to the tree now completely belong to the tree, and may
* be duplicated or freed at any time. After you have given a key and value to the
* tree, you must only access them via the retrieve interface in valid scenarios.
*
* \param btxn The write transaction to operate on.
* \param key The key to insert. If a duplicate key is detected, and error is returned.
* \param value The value to insert. May be NULL.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_insert(sds_bptree_transaction *btxn, void *key, void *value);
/**
* Update a key to have a new associated value within a valid write transaction.
* This is more efficient than delete -> insert, so for updates it's preferred.
* This is needed in the case that you have previous read transactions, and want
* to alter a value, without affecting the read. You would use this by calling
* retrieve, copying the value, then calling update on the b+tree.
*
* \param btxn The write transaction to operate on.
* \param key The key to update. If the key does not exist, fall back to insert.
* \param value The value to update. May be NULL.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_update(sds_bptree_transaction *btxn, void *key, void *value);
/**
* Search atomic functions as search, but implies a single short lived read transaction.
*
* If you have multiple searches to make, it is better to use a read transaction due to
* the memory design of the transaction. Multiple atomics may cause contention on
* certain parts of the transaction code.
*
* \param binst The cow b+tree to search.
* \param key The key to search for.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_search_atomic(sds_bptree_cow_instance *binst, void *key);
/**
* Retrieve atomic functions as retrieve, but implies a single short lived read transaction. Calling this implies that you *must* free tha value returned to you.
*
* If you have multiple searches to make, it is better to use a read transaction due to
* the memory design of the transaction. Multiple atomics may cause contention on
* certain parts of the transaction code.
*
* \param binst The cow b+tree to search.
* \param key The key to search for.
* \param target The value retrieved. You must free this after use, with the same free function as the binst holds.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_retrieve_atomic(sds_bptree_cow_instance *binst, void *key, void **target);
/**
* Delete atomic functions as delete, but implise a single short livied write transaction, and commit phase.
*
* If you have multiple searches to make, it is better to use a write transaction due to
* the memory design of the transaction. Multiple atomics may cause contention on
* certain parts of the transaction code.
*
* \param binst The cow b+tree to delete from.
* \param key The key to delete. This removes the associated value.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_delete_atomic(sds_bptree_cow_instance *binst, void *key);
/**
* Insert atomic functions as insert, but implies a single short lived write transaction and commit phase.
*
* If you have multiple searches to make, it is better to use a write transaction due to
* the memory design of the transaction. Multiple atomics may cause contention on
* certain parts of the transaction code.
*
* \param binst The cow b+tree to insert to.
* \param key The key to insert.
* \param value The value to insert associated with key.
* \retval Result of the operation as sds_result.
*/
sds_result sds_bptree_cow_insert_atomic(sds_bptree_cow_instance *binst, void *key, void *value);
/**
* @}
*/
/* end sds_bptree_cow */
#define HT_SLOTS 16
typedef enum _sds_ht_slot_state {
SDS_HT_EMPTY = 0,
SDS_HT_VALUE = 1,
SDS_HT_BRANCH = 2,
} sds_ht_slot_state;
typedef struct _sds_ht_value
{
uint32_t checksum;
void *key;
void *value;
// may make this a LL of values later for collisions
} sds_ht_value;
typedef struct _sds_ht_slot
{
sds_ht_slot_state state;
union
{
sds_ht_value *value;
struct _sds_ht_node *node;
} slot;
} sds_ht_slot;
typedef struct _sds_ht_node
{
uint32_t checksum;
uint64_t txn_id;
uint_fast32_t count;
#ifdef SDS_DEBUG
uint64_t depth;
#endif
struct _sds_ht_node *parent;
size_t parent_slot;
sds_ht_slot slots[HT_SLOTS];
} sds_ht_node;
typedef struct _sds_ht_instance
{
uint32_t checksum;
char hkey[16];
sds_ht_node *root;
int64_t (*key_cmp_fn)(void *a, void *b);
uint64_t (*key_size_fn)(void *key);
void *(*key_dup_fn)(void *key);
void (*key_free_fn)(void *key);
void *(*value_dup_fn)(void *value);
void (*value_free_fn)(void *value);
} sds_ht_instance;
uint64_t sds_uint64_t_size(void *key);
sds_result
sds_ht_init(sds_ht_instance **ht_ptr,
int64_t (*key_cmp_fn)(void *a, void *b),
void (*value_free_fn)(void *value),
void *(*key_dup_fn)(void *key),
void (*key_free_fn)(void *key),
uint64_t (*key_size_fn)(void *key));
sds_result sds_ht_insert(sds_ht_instance *ht_ptr, void *key, void *value);
sds_result sds_ht_search(sds_ht_instance *ht_ptr, void *key, void **value);
sds_result sds_ht_delete(sds_ht_instance *ht_ptr, void *key);
sds_result sds_ht_verify(sds_ht_instance *ht_ptr);
sds_result sds_ht_destroy(sds_ht_instance *ht_ptr);
|