This file is indexed.

/usr/include/vspline/eval.h is in vspline-dev 0.3.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
/************************************************************************/
/*                                                                      */
/*    vspline - a set of generic tools for creation and evaluation      */
/*              of uniform b-splines                                    */
/*                                                                      */
/*            Copyright 2015 - 2017 by Kay F. Jahnke                    */
/*                                                                      */
/*    The git repository for this software is at                        */
/*                                                                      */
/*    https://bitbucket.org/kfj/vspline                                 */
/*                                                                      */
/*    Please direct questions, bug reports, and contributions to        */
/*                                                                      */
/*    kfjahnke+vspline@gmail.com                                        */
/*                                                                      */
/*    Permission is hereby granted, free of charge, to any person       */
/*    obtaining a copy of this software and associated documentation    */
/*    files (the "Software"), to deal in the Software without           */
/*    restriction, including without limitation the rights to use,      */
/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
/*    sell copies of the Software, and to permit persons to whom the    */
/*    Software is furnished to do so, subject to the following          */
/*    conditions:                                                       */
/*                                                                      */
/*    The above copyright notice and this permission notice shall be    */
/*    included in all copies or substantial portions of the             */
/*    Software.                                                         */
/*                                                                      */
/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
/*                                                                      */
/************************************************************************/

/*! \file eval.h

    \brief code to evaluate uniform b-splines

    This body of code contains class evaluator and auxilliary classes which are
    needed for it's smooth operation.

    The evaluation is a reasonably straightforward process: A subset of the coefficient
    array, containing coefficients 'near' the point of interest, is picked out, and
    a weighted summation over this subset produces the result of the evaluation.
    The complex bit is to have the right coefficients in the first place
    (this is what prefiltering does), and to use the appropriate weights on
    the coefficient window. For b-splines, there is an efficient method to
    calculate the weights by means of a matrix multiplication, which is easily
    extended to handle b-spline derivatives as well. Since this code lends itself
    to a generic implementation, and it can be parametrized by the spline's order,
    and since the method performs well, I use it here in preference to the code
    which Thevenaz uses (which is, for the orders of splines it encompasses, the matrix
    multiplication written out with a few optimizations, like omitting multiplications
    with zero, and slightly more concise calculation of powers)
    
    Evaluation of a b-spline is, compared to prefiltering, more computationally intensive
    and less memory-bound, so the profit from vectorization, especially for float data,
    is more pronounced here than for the prefiltering. On my system, I found single-precision
    operation was about three to four times as fast as unvectorized code (AVX2).
    
    The central class of this file is class evaluator. evaluator objects are set up to
    provide evaluation of a specific b-spline. Once they are set up they don't change and
    effectively become pure functors with several overloaded evaluation methods for different
    constellations of parameters. The evaluation methods typically take their arguments per
    reference. The details of the evaluation variants, together with explanations of
    specializations used for extra speed, can be found with the individual evaluation
    routines.
    
    What do I mean by the term 'pure' functor? It's a concept from functional programming.
    It means that calling the functor will not have any effect on the functor itself - it
    can't change once it has been constructed. This has several nice effects: it can
    potentially be optimized very well, it is thread-safe, and it will play well with
    functional programming concepts - and it's conceptionally appealing.
    
    Code using class evaluator will probably use it at some core place where it is
    part of some processing pipeline. An example would be an image processing program:
    one might have some outer loop generating arguments (typically simdized types)
    which are processed one after the other to yield a result. The processing will
    typically have several stages, like coordinate generation and transformations,
    then use class evaluator to pick an interpolated intermediate result, which is
    further processed by, say, colour or data type manipulations before finally
    being stored in some target container. The whole processing pipeline can be
    coded to become a single functor, with one of class evaluator's eval-type
    routines embedded somewhere in the middle, and all that's left is code to
    efficiently handle the source and destination to provide arguments to the
    pipeline - like the code in remap.h. And since the code in remap.h is made to
    provide the data feeding and storing, the only coding needed is for the pipeline,
    which is where the 'interesting' stuff happens.
    
    The code in this file concludes by providing factory functions to obtain
    evaluators for vspline::bspline objects. These factory functions produce
    objects which are type-erased (see vspline::grok_type) wrappers around the
    evaluators, which hide what's inside and simply provide evaluation of the
    spline at given coordinates. These objects also provide operator(), so that
    they can be used like functions. If Vc is used and the spline's data type
    is vectorizable, evaluation of vectorized coordinates is also supported and
    will produce vectorized values. Since the objects produced by the factory
    functions are derived from vspline::unary_functor, they can be fed to the
    functions in transform.h, like any other vspline::unary_functor.
*/

#ifndef VSPLINE_EVAL_H
#define VSPLINE_EVAL_H

#include "bspline.h"
#include "unary_functor.h"
#include "map.h"

namespace vspline {

/// is_singular tests if a type is either a plain fundamental or a Vc SIMD type.
/// The second possibility is only considered if Vc is used at all.
/// This test serves to differentiate between nD values like vigra TinyVectors
/// which fail the test and singular values, which pass. Note that this test
/// fails vigra::TinyVector < T , 1 > even though one might consider it 'singular'.

template < class T >
using is_singular = typename
  std::conditional
  <    std::is_fundamental < T > :: value
#ifdef USE_VC
    || Vc::is_simd_vector < T > :: value
#endif
    ,
    std::true_type ,
    std::false_type
  > :: type ;

/// next we have coordinate splitting functions. For odd splines, coordinates
/// are split into an integral part and a remainder, which is used for weight
/// generation.
/// we have two variants for odd_split and the dispatch below them.
/// Note how the initial call to std::floor produces a real type, which
/// is used to subtract from 'v', yielding the remainder in 'fv'. Only after
/// having used this real representation of the integral part, it is cast
/// to an integral type by assigning it to 'iv'. This is the most efficient
/// route, better than producing an integral-typed integral part directly
/// and subtracting that from 'v', which would require another conversion.
/// Technically, one might consider this 'split' as a remainder division by 1.

template < typename ic_t , typename rc_t >
void odd_split ( rc_t v , ic_t& iv , rc_t& fv , std::true_type )
{
  rc_t fl_i = std::floor ( v ) ;
  fv = v - fl_i ;
  iv = ic_t ( fl_i )  ;
}

template < typename ic_t , typename rc_t >
void odd_split ( rc_t v , ic_t& iv , rc_t& fv , std::false_type )
{
  for ( int d = 0 ; d < vigra::ExpandElementResult < rc_t > :: size ; d++ )
    odd_split ( v[d] , iv[d] , fv[d] , std::true_type() ) ;
}

template < typename ic_t , typename rc_t >
void odd_split ( rc_t v , ic_t& iv , rc_t& fv )
{
  odd_split ( v , iv , fv , is_singular<rc_t>() ) ;
}

/// for even splines, the integral part is obtained by rounding. when the
/// result of rounding is subtracted from the original coordinate, a value
/// between -0.5 and 0.5 is obtained which is used for weight generation.
/// we have two variants for even_split and the dispatch below.

// TODO: there is a problem here: the lower limit for an even spline
// is -0.5, which should be rounded *towards* zero, but std::round rounds
// away from zero. The same applies to the upper limit, which should
// also be rounded towards zero, not away from it. Currently I am working
// around the issue by increasing the spline's headroom by 1 for even splines,
// but I'd like to be able to use rounding towards zero. It might be argued,
// though, that potentially producing out-of-range access by values which
// are only just outside the range is cutting it a bit fine and the extra
// headroom for even splines makes the code more robust, so accepting the
// extra headroom would be just as acceptable as the widened right brace for
// some splines which saves checking the incoming coordinate against
// the upper limit. The code increasing the headroom is in bspline.h,
// in bspline's constructor just befor the call to setup_metrics.

template < typename ic_t , typename rc_t >
void even_split ( rc_t v , ic_t& iv , rc_t& fv , std::true_type )
{
  rc_t fl_i = std::round ( v ) ;
  fv = v - fl_i ;
  iv = ic_t ( fl_i ) ;
}

template < typename ic_t , typename rc_t >
void even_split ( rc_t v , ic_t& iv , rc_t& fv , std::false_type )
{
  for ( int d = 0 ; d < vigra::ExpandElementResult < rc_t > :: size ; d++ )
    even_split ( v[d] , iv[d] , fv[d] , std::true_type() ) ;
}

template < typename ic_t , typename rc_t >
void even_split ( rc_t v , ic_t& iv , rc_t& fv )
{
  even_split ( v , iv , fv , is_singular<rc_t>() ) ;
}

#ifdef USE_VC

/// for TinyVectors of Vc SIMD vector types (Vc::Vector, Vc::SimdArray),
/// we enable operator+= and operator*=
/// This allows us to write concise code which does not need
/// to iterate over the TinyVector's elements and bypasses
/// vigra's trait system, which we'd otherwise have to supply
/// with NumericTraits and PromoteTraits for the SIMD types.

template < typename vector_type , int N >
TinyVector
< typename std::enable_if
  < Vc::is_simd_vector < vector_type > :: value ,
    vector_type
  > :: type ,
  N
>
& operator+= ( TinyVector < vector_type , N > & tv ,
               const TinyVector < vector_type , N > & other )
{
  for ( int i = 0 ; i < N ; i++ )
    tv[i] += other[i] ;
  return tv ;
}

template < typename vector_type , int N >
TinyVector
< typename std::enable_if
  < Vc::is_simd_vector < vector_type > :: value ,
    vector_type
  > :: type ,
  N
>
& operator*=
  ( TinyVector < vector_type , N > & tv ,
    const TinyVector < vector_type , N > & other )
{
  for ( int i = 0 ; i < N ; i++ )
    tv[i] *= other[i] ;
  return tv ;
}

/// now we also need the broadcast operation of TinyVector<T,N> *= T
/// for it all to pan out. We refrain from defining the reverse operation
/// and the plain op types for now, but we widen the spec a little to
/// allow multiplication with another vectorized type, so that mixed
/// float/double operations become possible.

template < typename vector_type , typename vt2 , int N >
TinyVector
< typename std::enable_if
  < Vc::is_simd_vector < vector_type > :: value ,
    vector_type
  > :: type ,
  N
>
& operator*= ( TinyVector < vector_type , N > & tv ,
               const vt2 & other )
{
  for ( int i = 0 ; i < N ; i++ )
    tv[i] *= other ;
  return tv ;
}

#endif

/// a 'weight matrix' is used to calculate a set of weights for a given remainder
/// part of a real coordinate. The 'weight matrix' is multipled with a vector
/// containing the power series of the given remainder, yielding a set of weights
/// to apply to a window of b-spline coefficients.
///
/// The routine 'calculate_weight_matrix' originates from vigra. I took the original
/// routine BSplineBase<spline_order, T>::calculateWeightMatrix() from vigra and
/// changed it in several ways:
///
/// - the spline degree is now a runtime parameter, not a template argument
///
/// - the derivative degree is passed in as an additional parameter, directly
///   yielding the appropriate weight matrix needed to calculate a b-spline's derivative
///   with only a slight modification to the original code
///
/// - the code uses my modified bspline basis function which takes the degree as a
///   run time parameter instead of a template argument and works with integral
///   operands and precalculated values, which makes it very fast, even for high
///   spline degrees. bspline_basis() is in basis.h.

template < class target_type >
MultiArray < 2 , target_type > calculate_weight_matrix
( int degree , int derivative )
{
  const int order = degree + 1 ;
  
  // guard against impossible parameters
  
  if ( derivative >= order )
    return MultiArray < 2 , target_type >() ;

  // allocate space for the weight matrix
  
  MultiArray < 2 , target_type > res ( order , order - derivative ) ;
  
  long double faculty = 1.0 ;
  
  for ( int row = 0 ; row < order - derivative ; row++ )
  {
    if ( row > 1 )
      faculty *= row ;

    int x = degree / 2 ; // (note: integer division)

    // we store to a MultiArray, which is row-major, so storing as we do
    // places the results in memory in the precise order in which we want to
    // use them in the weight calculation.
    // note how we pass x to bspline_basis() as an integer. This way, we pick
    // a very efficient version of the basis function which only evaluates at
    // whole numbers. This basis function version does hardly any calculations
    // but instead relies on precalculated values. see bspline_basis in prefilter.h
    // note: with large degrees (20+), this still takes a fair amount of time, but
    // rather seconds than minutes with the standard routine.
    
    for ( int column = 0 ; column < order ; ++column , --x )
      res ( column , row )
      =   bspline_basis<long double> ( x , degree , row + derivative )
        / faculty;
  }

  return res;
}

/// this functor calculates weights for a b-spline or it's derivatives.
/// with d == 0, the weights are calculated for plain evaluation.
/// Initially I implemented weight_matrix as a static member, hoping the code
/// would perform better, but I could not detect significant benefits. Passing
/// the derivative to the constructor gives more flexibility and less type
/// proliferation.

template < typename ele_type >
struct bspline_derivative_weights
{
  MultiArray < 2 , ele_type > weight_matrix ;
  int degree ;
  int derivative ;
  int columns ;
  int rows ;

  // we need a default constructor to create TinyVectors of this type
  // note that a default-constructed functor must not be called!
  
  bspline_derivative_weights()
  { } ;

  /// bspline_derivative_weights' constructor takes the desired b-spline degree
  /// and the desired derivative, defaulting to 0.

  bspline_derivative_weights ( int _degree , int _derivative = 0 )
  : weight_matrix ( calculate_weight_matrix < ele_type >
                    ( _degree , _derivative )
                  ) ,
    degree ( _degree ) ,
    derivative ( _derivative ) ,
    columns ( _degree + 1 ) , 
    rows ( _degree + 1 - _derivative )
    { } ;
  
  /// operator() deposits weights for the given delta at the location 'result'
  /// points to. target_type and delta_type may be fundamentals or simdized types.
    
  template < class target_type , class delta_type >
  void operator() ( target_type* result , const delta_type & delta ) const
  {
    target_type power ( delta ) ;
    auto factor_it = weight_matrix.begin() ;
    auto end = weight_matrix.end() ;

    // the result is initialized with the first row of the 'weight matrix'.
    // We save ourselves multiplying it with delta^0.
 
    for ( int c = 0 ; c < columns ; c++ )
    {
      result[c] = *factor_it ;
      ++factor_it ;
    }
    
    if ( degree )
    {
      for ( ; ; )
      {
        for ( int c = 0 ; c < columns ; c++ )
        {
          result[c] += power * *factor_it ;
          ++factor_it ;
        }
        if ( factor_it == end )
        {
          // avoid next multiplication if exhausted, break now
          break ;
        }
         // otherwise produce next power(s) of delta(s)
        power *= target_type ( delta ) ;
      }
    }
  }
} ;

/// class evaluator encodes evaluation of a B-spline. Technically, a vspline::evaluator
/// is a vspline::unary_functor, which has the specific capability of evaluating a
/// specific uniform b-spline. This makes it a candidate to be passed to the functions
/// in transform.h, like remap() and transform(), and it also makes it suitable for
/// vspline's functional constructs like chaining, mapping, etc.
///
/// While creating and using vspline::evaluators is simple enough, especially from
/// vspline::bspline objects, there are also factory functions producing objects capable
/// of evaluating a b-spline. These objects are wrappers around a vspline::evaluator,
/// please see the factory functions make_evaluator() and make_safe_evaluator() at the
/// end of this file.
///
/// If you don't want to concern yourself with the details, the easiest way is to
/// have a bspline object handy and use one of the factory functions, assigning the
/// resulting functor to an auto variable:
///
/// // given a vspline::bspline object 'bspl'
/// // create an object which can evaluate the spline
/// auto ev = vspline::make_safe_evaluator ( bspl ) ;
/// // which can be used like this:
/// auto value = ev ( some_real_coordinate ) ;
///
/// The evaluation relies on 'braced' coefficients, as they are normally provided by
/// a vspline::bspline object (the exception being bspline objects created with
/// UNBRACED or MANUAL strategy). While the most general constructor will accept
/// a raw pointer to coefficients (enclosed in the necessary 'brace'), this will rarely
/// be used, and an evaluator will be constructed from a bspline object. In the most
/// trivial case there are only two things which need to be done:
///
/// The specific type of evaluator has to be established by providing the relevant template
/// arguments. Here, we need two types: the 'coordinate type' and the 'value type'.
///
/// - The coordinate type is encoded as a vigra::TinyVector of some real data type - if you're
/// doing image processing, the typical type would be a vigra::TinyVector < float , 2 >.
/// fundamental real types are also accepted (for 1D splines)
///
/// - The value type has to be either an elementary real data type such as 'float' or 'double',
/// or a vigra::TinyVector of such an elementary type. Other data types which can be handled
/// by vigra's ExpandElementResult mechanism should also work. When processing colour images,
/// your value type would typically be a vigra::TinyVector < float , 3 >.
///
/// Note that class evaluator operates with 'native' spline coordinates, which run with
/// the coefficient array's core shape, so typically from 0 to M-1 for a 1D spline over
/// M values. Access with different coordinates is most easily done by 'chaining' class
/// evaluator objects to other vspline::unary_functor objects providing coordinate
/// translation, see unary_functor.h, map.h and domain.h.
///
/// While the template arguments specify coordinate and value type for unvectorized
/// operation, the types for vectorized operation are inferred from them, using vspline's
/// vector_traits mechanism. The width of SIMD vectors to be used has to be established.
/// This is not mandatory - if omitted, a default value is picked.
///
/// With the evaluator's type established, an evaluator of this type can be constructed by
/// passing a vspline::bspline object to the constructor. Naturally, the bspline object has
/// to contain data of the same value type, and the spline has to have the same number of
/// dimensions as the coordinate type. Alternatively, coefficients can be passed in as a
/// pointer into a field of suitably braced coefficients.
/// 
/// I have already hinted at the evaluation process used, but here it is again in a nutshell:
/// The coordinate at which the spline is to be evaluated is split into it's integral part
/// and a remaining fraction. The integral part defines the location where a window from the
/// coefficient array is taken, and the fractional part defines the weights to use in calculating
/// a weighted sum over this window. This weighted sum represents the result of the evaluation.
/// Coordinate splitting is done with the method split(), which picks the appropriate variant
/// (different code is used for odd and even splines)
///
/// The generation of the weights to be applied to the window of coefficients is performed
/// by employing the weight functors above. What's left to do is to bring all the components
/// together, which happens in class evaluator. The workhorse code in the subclass _eval
/// takes care of performing the necessary operations recursively over the dimensions of the
/// spline.
///
/// a vspline::evaluator is technically a vspline::unary_functor. This way, it can be directly
/// used by constructs like vspline::chain and has a consistent interface which allows code
/// using evaluators to query it's specifics. Since evaluation uses no conditionals on the
/// data path, the whole process can be formulated as a set of templated member functions
/// using vectorized types or unvectorized types, so the code itself is vector-agnostic.
/// This makes for a nicely compact body of code inside class evaluator, at the cost of
/// having to provide a bit of collateral code to make data access syntactically uniform.
///
/// There is a variety of overloads of class evaluator's eval() method available. Some of
/// these overloads go beyond the 'usual' eval routines, like evaluation with known weights,
/// sidestepping the weight generation from coordinates.
///
/// The evaluation strategy is to have all dependencies of the evaluation except for the actual
/// coordinates taken care of by the constructor - and immutable for the evaluator's lifetime.
/// The resulting object has no state which is modified after construction, making it thread-safe.
/// It also constitutes a 'pure' functor in a functional-programming sense, because it has
/// no mutable state and no side-effects, as can be seen by the fact that the eval methods
/// are all marked const.
///
/// The eval() overloads form a hierarchy, as evaluation progresses from accepting unsplit real
/// coordinates to offsets and weights. This allows calling code to handle parts of the
/// delegation hierarchy itself, only using an evaluator at a specific level.
///
/// By providing the evaluation in this way, it becomes easy for calling code to integrate
/// the evaluation into more complex functors. Consider, for example, code
/// which generates coordinates with a functor, then evaluates a b-spline at these coordinates,
/// and finally subjects the resultant values to some postprocessing. All these processing
/// steps can be bound into a single functor, and the calling code can be reduced to polling
/// this functor until it has provided the desired number of output values.
///
/// While the 'unspecialized' evaluator will try and do 'the right thing' by using general
/// purpose code fit for all eventualities, for time-critical operation there is a
/// specialization which can be used to make the code faster:
///
/// - template argument 'specialize' can be set to 0 to forcibly use (more efficient) nearest
/// neighbour interpolation, which has the same effect as simply running with degree 0 but avoids
/// code which isn't needed for nearest neighbour interpolation (like the application of weights,
/// which is futile under the circumstances, the weight always being 1.0).
/// specialize can also be set to 1 for explicit n-linear interpolation. Any other value will
/// result in the general-purpose code being used.
///
/// Note that, contrary to my initial implementation, all forms of coordinate mapping were
/// removed from class evaluator. The 'mapping' which is left is, more aptly, called
/// 'splitting', since the numeric value of the incoming coordinate is never modified.
/// Folding arbitrary coordinates into the spline's defined range now has to be done
/// externally, typically by wrapping class evaluator together with some coordinate
/// modification code into a combined vspline::unary_functor. map.h provides code for
/// common mappings, see there. Arbitrary manipulation of incoming and outgoing data
/// can be realized by using vspline::chain in unary_functor.h, see there.
///
/// Note how the default number of vector elements is fixed by picking the number of ele_type
/// which vspline::vector_traits considers appropriate. There should rarely be a need to
/// choose a different number of vector elements: evaluation will often be the most
/// computationally intensive part of a processing chain, and therefore this choice is
/// sensible. But it's not mandatory. Just keep in mind that when building processing
/// pipelines, all their elements must use the same vectorization width.

// TODO: evaluator uses a great many typedefs which is a bit over the top, even though
// it was helpful in the development process.

template < typename _coordinate_type , // nD real coordinate
           typename _value_type ,      // type of coefficient/result
           // nr. of vector elements
           int _vsize = vspline::vector_traits < _value_type > :: size ,
           // per default, don't specialize for degree 0 or 1 spline
           int specialize = -1
         >
class evaluator
: public unary_functor < _coordinate_type , _value_type , _vsize >
{
  
public:

  // we want to access facilites of the base class (vspline::unary_functor<...>)
  // so we use a typedef for the base class.

  typedef unary_functor < _coordinate_type , _value_type , _vsize > base_type ;

  typedef _value_type value_type ;           // === base_type::out_type
  typedef _coordinate_type coordinate_type ; // === base_type::in_type

  // we are relying on base_type to provide several types:
  
  typedef typename base_type::in_ele_type rc_type ;   // elementary type of a coordinate
  typedef typename base_type::out_ele_type ele_type ; // elementary type of value_type

  using base_type::dim_in ;
  using base_type::dim_out ;
  using base_type::vsize ;
  
  enum { dimension = base_type::dim_in }  ;
  enum { level = dimension - 1 }  ;
  enum { channels = base_type::dim_out } ;

  // types for nD integral indices, these are not in base_type
  
  typedef int ic_type ;            // we're only using int for indices
  
  typedef vigra::TinyVector < ic_type , dimension > nd_ic_type ;

  typedef vigra::TinyVector < rc_type , dimension > nd_rc_type ;

  /// view_type is used for a view to the coefficient array

  typedef MultiArrayView < dimension , value_type >                view_type ;

  /// type used for nD array coordinates, array shapes

  typedef typename view_type::difference_type                      shape_type ;

  typedef vigra::TinyVector < int , dimension >                    derivative_spec_type ;

  typedef typename MultiArrayView < 1 , ic_type > :: const_iterator offset_iterator ;
  
#ifdef USE_VC

  // types for vectorized evaluation.

  using typename base_type::in_v ;
  using typename base_type::in_ele_v ;
  using typename base_type::out_v ;
  using typename base_type::out_ele_v ;

  typedef typename base_type::out_ele_v ele_v ;

  typedef typename vector_traits < ic_type , vsize > :: ele_v ic_v ;
  typedef typename vector_traits < nd_ic_type , vsize > :: type nd_ic_v ;
  typedef typename vector_traits < nd_rc_type , vsize > :: type nd_rc_v ;
  
#endif // USE_VC

private:
  
  // Initially I was using a template argument for this flag, but it turned out
  // that using a const bool set at construction time performed just as well.
  // Since this makes using class evaluator easier, I have chosen to go this way.

  const bool even_spline_degree ;    ///< flag containing the 'evenness' of the spline
  
  const ele_type * const cf_ebase ;     ///< coefficient base adress in terms of elementary type
  const shape_type cf_estride ;         ///< strides in terms of elementary type
  MultiArray < 1 , ic_type > eoffsets ; ///< offsets in terms of elementary type
  vigra::TinyVector < bspline_derivative_weights < ele_type > , dimension > wgt ;
  const int spline_degree ;
  const int spline_order ;
  const int window_size ;
  
  int min_offset , max_offset ;
  bool have_min_max_offset ;

public:

  /// split function. This function is used to split incoming real coordinates
  /// into an integral and a remainder part, which are used at the core of the
  /// evaluation. selection of even or odd splitting is done via the const bool
  /// flag 'even_spline_degree' My initial implementation had this flag as a
  /// template argument, but this way it's more flexible and there seems to
  /// be no runtime penalty. This method delegates to the free function templates
  /// even_split and odd_split, respectively, which are defined above class evaluator.

  template < class IT , class RT >
  void split ( const RT& input , IT& select , RT& tune ) const
  {
    if ( even_spline_degree )
      even_split < IT , RT > ( input , select , tune ) ;
    else
      odd_split < IT , RT > ( input , select , tune ) ;
  }

  const int & get_order() const
  {
    return spline_order ;
  }

  const int & get_degree() const
  {
    return spline_degree ;
  }

  const shape_type & get_estride() const
  {
    return cf_estride ;
  }

  /// this constructor is the most flexible variant and will ultimately be called
  /// by all other constructor overloads. This constructor will not usually be
  /// called directly - most code will use the overloads taking vspline::bspline
  /// objects. This constructor takes four arguments:
  ///
  /// - A pointer into the coefficient array, expressed as a pointer to the elementary
  /// type of the spline's value_type. So if the spline contains float RGB pixels,
  /// the elementary type is plain float. The pointer is expected to point to the
  /// coefficient which coincides with the origin of the data the spline was built
  /// over. Note that usually this is *not* simply the beginning of the coefficient
  /// array, since the coeffcicent array is 'braced' - surrounded by additional
  /// coefficients to allow evaluation without bounds checking. When generating
  /// this pointer from a bspline object, what's passed here is the base address
  /// of the coefficient array's 'core', which is the part of the coeffcicent
  /// array corresponding 1:1 with the data the spline was built over.
  ///
  /// - The stride of the data, expressed in units of the elementary type.
  /// If the data are float RGB pixels and held in a MultiArray, this will be
  /// thrice the MultiArray's stride.
  ///
  /// - The degree of the spline (3 == cubic spline)
  ///
  /// - Specification of the desired derivatives of the spline,
  /// defaults to 0 (plain evaluation).
  ///
  /// The calling code must not(!) deallocate the coefficients while the evaluator is
  /// in use. The coefficients are accessed via the pointer passed in, they are not
  /// copied.
  ///
  /// Note that if you have an array of coefficients which is not braced, you can still
  /// construct an evaluator from it, but it will be your responsibility to avoid
  /// out-of-bounds access to coefficients: there will be no safeguard, the evaluation
  /// code will not check incoming coordinates for validity, and even seemingly safe
  /// operations near the margins may fail because the evaluation code makes assumptions
  /// about the size of the brace which may not be obvious. If you want to go down this
  /// path, please refer to the bracing-related code in class bspline. There are two
  /// functions, get_left_brace_size() and get_right_brace_size(), which calculate
  /// the sizes of the braces the evaluation code expects. The easiest path to take
  /// would be to obtain the brace sized for your specific spline type and extract
  /// a MultiArrayView to the central part of your coefficient array which leaves
  /// a margin of just this size:
  ///
  /// // given a coefficient array cf in a MultiArrayView
  /// // and the left and right brace size lbsz and rbsz
  ///
  /// auto core = cf.subarray ( lbsz , cf.shape() - rbsz ) ;
  ///
  /// then you can pass core.data() to this constructor and evaluate with coordinates
  /// modified to reflect the smaller size of the coefficient subarray you're using
  /// instead of the 'full' coefficient array. There are situations where such a course
  /// of action is sensible, for example: you might have image data in memory which you
  /// want to display quickly using bilinear interpolation, not caring if a bit of the
  /// margin is not shown. Since you are using bilinear interpolation, you need no
  /// prefiltering and can access the image data directly.
  ///
  /// Note how this constructor can be used to create an evaluator for
  /// a 'shifted' spline by simply passing a spline_degree which is different
  /// to what was used for prefiltering. This is, of course, only safe if a 
  /// lower degree is used or the coefficient array has sufficient headroom
  /// to avoid out-of-bounds access. See the discussion of shifting in the
  /// documentation of the constructor overload taking a bspline object.
  ///
  /// So, in a nutshell, if you use this constructor variant, you are supposed to
  /// know what you're doing. Most user code will use the 'safe' path and come from
  /// a bspline object.
  
  evaluator ( const ele_type * const _cf_ebase ,
              const shape_type & _cf_estride ,
              int _spline_degree ,
              derivative_spec_type _derivative  = derivative_spec_type ( 0 ) )
  : cf_ebase ( _cf_ebase ) ,
    cf_estride ( _cf_estride ) ,
    spline_degree ( _spline_degree ) ,
    even_spline_degree ( ! ( _spline_degree & 1 ) ) ,
    spline_order ( _spline_degree + 1 ) ,
    wgt { bspline_derivative_weights < ele_type > ( _spline_degree , 0 ) } ,
    window_size ( std::pow ( _spline_degree + 1 , int(dimension) ) )
  {
    // if any derivatives are used, reinitalize the weight functors where
    // a derivative other than 0 is requested. TODO potentially slightly wasteful,
    // if no degree-0 evaluation is done or duplicate weight functors are produced.
    // But setting up the weight functors is only expensive for high-degree splines,
    // so I consider this a corner case and ignore it for now.

    for ( int d = 0 ; d < dimension ; d++ )
    {
      if ( _derivative[d] )
      {
        wgt[d] = bspline_derivative_weights < ele_type >
          ( _spline_degree , _derivative[d] ) ;
      }
    }
        
    // The evaluation forms a weighted sum over a window into the coeffcicent array.
    // The sequence of offsets we calculate here is the set of pointer differences
    // from the first element in that window to all elements in the window. It's
    // another way of coding this window, where all index calculations have already
    // been done beforehand rather than performing them during the traversal of the
    // window by means of stride/shape arithmetic. Coding the window in this fashion
    // also makes it easy to vectorize the code.
    
    eoffsets = MultiArray < 1 , ptrdiff_t > ( window_size ) ;
    
    // we want to iterate over all nD indexes in a window which has equal extent
    // of spline_order in all directions (like the reconstruction filter's kernel),
    // relative to a point which is spline_degree/2 away from the origin along
    // every axis. This sounds complicated but is really quite simple: For a cubic
    // b-spline over 2D data we'd get
    
    // (-1,-1) , (0,-1), (1,-1), (2,-1)
    // (-1, 0) , (0, 0), (1, 0), (2, 0)
    // (-1, 1) , (0, 1), (1, 1), (2, 1)
    // (-1, 2) , (0, 2), (1, 2), (2, 2)
    
    // for the indexes, which are subsequently multiplied with the strides and
    // summed up to obtain 1D offsets instead of nD coordinates. So if the coefficient
    // array has strides (10,100) and the coefficients are single-channel, the sequence
    // of offsets generated is
    
    // -110, -100, -90, -80,  // which is -1 * 10 + -1 * 100, 0 * 10 + -1 * 100 ...
    //  -10,    0,  10,  20,
    //   90,  100, 110, 120,
    //  190,  200, 210, 220
  
    shape_type window_shape ( spline_order ) ;
    vigra::MultiCoordinateIterator<dimension> mci ( window_shape ) ;
    
    // we want to save the offsets in 'eoffsets'
    
    auto target = eoffsets.begin() ;

    for ( int i = 0 ; i < window_size ; i++ )
    {
      // offsets are calculated by multiplying indexes with the coefficient array's
      // strides and summing up. So now we have offsets instead of nD indices, and
      // using these offsets saves us index calculations during evaluation.
      // Note how we subtract spline_degree/2 to obtain indexes which are relative
      // to the window's center.
      *target = sum ( ( *mci - spline_degree / 2 ) * cf_estride ) ;
      ++mci ;
      ++target ;
    }
  } ;

  /// construct an evaluator from a bspline object
  ///
  /// when using the higher-level interface to vspline's facilities - via class
  /// bspline - the bspline object provides the coefficients. The derivative
  /// specification is passed in just as for the general constructor. Note that
  /// the derivative specification can be individually chosen for each axis.
  /// This constructor delegates to the one above, 'unpacking' the information
  /// in the bspline object to provide the 'raw' pointer and stride it needs.
  ///
  /// Note how the pointer to the bspline object's 'core' is cast to ele_type,
  /// the elementary type of the spline's value_type, and how the stride is also
  /// modified to refer to ele_type for class evaluator's internal use.
  ///
  /// This constructor optionally takes a third argument 'shift', which
  /// indicates that the coefficient data in the spline should be interpreted
  /// as if they were coefficients of a different-degree spline, where the
  /// value of 'shift' gives the value of this difference. This may or may
  /// not be technically possible. User code is advised to check the
  /// feasability of a shift by calling the spline's shiftable() method
  /// before constructing the evaluator - this constructor will throw an
  /// exception if the spline can't be used with the requested shift.
  /// creating an evaluator with shift has the same effect as shifting
  /// the spline and then creating en evaluator from it, but it does not
  /// change the bspline object, which is often desirable.
  
  evaluator ( const bspline < value_type , dimension > & bspl ,
              derivative_spec_type _derivative = derivative_spec_type ( 0 ) ,
              int shift = 0
            )
  : evaluator ( (ele_type*) ( bspl.core.data() ) ,
                channels * bspl.core.stride() ,
                bspl.spline_degree + shift ,
                _derivative )
  {
    if ( bspl.spline_degree > 1 && ! bspl.braced )
      throw not_supported
       ( "for spline degree > 1: evaluation needs braced coefficients" ) ;

    // while the general constructor above has already been called,
    // we haven't yet made certain that a requested shift has resulted
    // in a valid evaluator. We check this now and throw an exception
    // if the shift was illegal.

    if ( ! bspl.shiftable ( shift ) )
      throw not_supported
       ( "insufficient frame size. the requested shift can not be performed." ) ;
      
// #define ASSURE_IN_BOUNDS // should not be defined in production code
    
    // as we are creating the evaluator from a bspline object,
    // we can make sure that evalation will not access the spline
    // outside it's specification. The code we use to calculate
    // the bracing makes sure we are on the safe side even if
    // access is just out of bounds. The following bit of code
    // doublechecks that this is the case by looking at the offsets
    // at which the spline would be accessed when doing an evaluation
    // just outside the bounds, emitting a message if this would
    // land outside the coefficient array.
    // The warning criterion is even narrower, it warns also if
    // there are superfluous coefficients which will not be used
    // even if the spline is evaluated at these extreme positions,
    // hence the test for !=, instead of >, or <, respectively
    
#ifdef ASSURE_IN_BOUNDS
    
    min_offset =   (ele_type*) ( bspl.container.data() )
                 - cf_ebase ;
    
    max_offset =   (ele_type*) (   bspl.container.data() 
                                 + sum (   bspl.container.stride()
                                         * (   bspl.container.shape()
                                             - shape_type(1) ) ) )
                 - cf_ebase ;
                 
    have_min_max_offset = true ;
    
    auto it_first_ofs = eoffsets.begin() ;
    auto first_ofs = *it_first_ofs ;

    auto low = bspl.lower_limit() - .0001 ;
    shape_type ls ;
    split ( low , ls , low ) ;
    int lofs = sum ( ls * bspl.core.stride() ) ;
    lofs *= channels ;
    lofs += first_ofs ;
    
    if ( lofs < min_offset )
      std::cerr << "WARNING: min_ofs " << min_offset
                << " low ofs " << lofs
                << " " << bc_name[bspl.bcv[0]]
                << " " << bspl.spline_degree
                << std::endl ;

    auto it_last_ofs = eoffsets.end() ;
    it_last_ofs-- ;
    auto last_ofs = *it_last_ofs ;
    
    auto high = bspl.upper_limit() + .0001 ;
    shape_type hs ;
    split ( high , hs , high ) ;
    int hofs = sum ( hs * bspl.core.stride() ) ;
    hofs *= channels ;
    hofs += last_ofs ;
    
    if ( hofs > max_offset )
      std::cerr << "WARNING: max_ofs " << max_offset
                << " high ofs " << hofs
                << " " << bc_name[bspl.bcv[0]]
                << " " << bspl.spline_degree
                << " last_ofs " << last_ofs
                << " high " << high
                << std::endl ;
                
#endif

  } ;

  /// obtain_weights calculates the weights to be applied to a section
  /// of the coefficients from  the fractional parts of the split coordinates.
  /// What is calculated here is the evaluation of the spline's basis function
  /// at dx, dx+1 , dx+2..., but doing it naively is computationally expensive,
  /// as the evaluation of the spline's basis function at arbitrary values has
  /// to look at the value, find out the right interval, and then calculate
  /// the value with the appropriate function. But we always have to calculate
  /// the basis function for *all* intervals anyway, and the method used here
  /// performs this tasks efficiently using a vector/matrix multiplication.
  /// If the spline is more than 1-dimensional, we need a set of weights for
  /// every dimension. The weights are accessed through a 2D MultiArrayView.
  /// For every dimension, there are spline_order weights.
  
  template < typename nd_rc_type , typename weight_type >
  void obtain_weights ( const MultiArrayView < 2 , weight_type > & weight ,
                        const nd_rc_type& c ) const
  {
    auto ci = c.cbegin() ;
    for ( int axis = 0 ; axis < dimension ; ++ci , ++axis )
      wgt[axis] ( weight.data() + axis * spline_order , *ci ) ;
  }

  /// obtain weight for a single axis

  template < typename rc_type , typename weight_type >
  void obtain_weights ( weight_type * p_weight ,
                        const int & axis ,
                        const rc_type& c ) const
  {
    wgt[axis] ( p_weight , c ) ;
  }

private:
  
  // next we have collateral code which we keep private in class evaluator.
  // TODO some of the above could also be private.
  
  // to be able to use the same code to access the coefficients, no matter if
  // the operation is vectorized or not, we provide a set of 'load' functions
  // which encapsulate the memory access. This allows us to uniformly handle
  // vectorized and unvectorized data.

  /// load function for single T

  template < typename T >
  static inline void load ( T & target , const T * const mem , int index )
  {
    target = mem [ index ] ;
  }

  /// load function for TinyVectors of T
  
  template < typename T , int N >
  static inline void load ( TinyVector < T , N > & target , const T * const mem , int index )
  {
    for ( int i = 0 ; i < N ; i++ )
      target[i] = mem [ index + i ] ;
  }

  /// load function for std::complex<T>
  
  template < typename T >
  static inline void load ( std::complex<T> & target , const T * const mem , int index )
  {
    target = std::complex<T> ( mem [ index ] , mem [ index + 1 ] ) ;
  }
  
  // TODO: can we generalize load for aggregates?

#ifdef USE_VC

  /// load function for single SIMD types. Here, instead of the single index
  /// in the load function above, we accept gather indexes for a SIMD gather
  /// operation.

  template < typename vector_type >
  typename std::enable_if < Vc::is_simd_vector < vector_type > :: value > :: type
  static inline
  load ( vector_type & target ,
        const typename vector_type::value_type * mem ,
        const typename vector_type::IndexType & indexes )
  {
    target.gather ( mem , indexes ) ;
  }

  /// load function for TinyVectors of SIMD types
  
  template < typename vector_type , int N >
  typename std::enable_if < Vc::is_simd_vector < vector_type > :: value > :: type
  static inline
  load ( TinyVector < vector_type , N > & target ,
         const typename vector_type::value_type * mem ,
         const typename vector_type::IndexType & indexes )
  {
    for ( int e = 0 ; e < N ; e++ )
      target[e].gather ( mem + e , indexes ) ;
  }

#endif

  /// _eval is the workhorse routine and implements the recursive arithmetic
  /// needed to evaluate the spline. First the weights for the current dimension
  /// are obtained from the weights object passed in. Once the weights are known,
  /// they are successively multiplied with the results of recursively calling
  /// _eval for the next lower dimension and the products are summed up to produce
  /// the return value. The scheme of using a recursive evaluation has several
  /// benefits: - it needs no explicit intermediate storage of partial sums
  /// (uses stack instead) and it makes the process dimension-agnostic in an
  /// elegant way. therefore, the code is also thread-safe. note that this routine
  /// is used for operation on braced splines, with the sequence of offsets to be
  /// visited fixed at the evaluator's construction.
  
  template < class dtype , int level , class wtype , class otype >
  struct _eval
  {
    dtype operator() ( const ele_type * const & ebase ,
                       const otype & origin ,
                       offset_iterator & ofs ,
                       const MultiArrayView < 2 , wtype > & weight
                     ) const
    {
      dtype sum = dtype() ; ///< to accumulate the result
      dtype subsum ;        ///< to pick up the result of the recursive call

      for ( int i = 0 ; i < weight.shape ( 0 ) ; i++ )
      {
        subsum = _eval < dtype , level - 1 , wtype , otype >()
                  ( ebase , origin , ofs , weight ) ;
                 
        subsum *= weight [ vigra::Shape2 ( i , level ) ] ;
        sum += subsum ;
      }
      return sum ;
    }
  } ;

  /// at level 0 the recursion ends, now we finally apply the weights for axis 0
  /// to the window of coefficients. Note how ofs is passed in per reference. This
  /// looks wrong, but it's necessary: When, in the course of the recursion, the
  /// level 0 routine is called again, it needs to access the next bunch of
  /// spline_order coefficients.
  ///
  /// Just incrementing the reference saves us incrementing higher up.
  /// This is the point where we access the spline's coefficients. Since _eval works
  /// for vectorized and unvectorized code alike, this access is coded as a call to
  /// 'load' which provides a uniform syntax for the memory access. The implementation
  /// of the load routines used here is just above.
  
  template < class dtype , class wtype , class otype >
  struct _eval < dtype , 0 , wtype , otype >
  {
    dtype operator() ( const ele_type * const & ebase ,
                       const otype & origin ,
                       offset_iterator & ofs ,
                       const MultiArrayView < 2 , wtype > & weight
                     ) const
    {
      dtype sum = dtype() ;
      dtype help ;

      for ( int i = 0 ; i < weight.shape ( 0 ) ; i++ )
      {
        load ( help , ebase , origin + *ofs ) ;
        help *= weight [ vigra::Shape2 ( i , 0 ) ] ;
        sum += help ;
        ++ofs ;
      }
      return sum ;
    }
    
  } ;

  /// specialized code for degree-1 b-splines, aka linear interpolation
  /// here, there is no gain to be had from working with precomputed per-axis weights,
  /// the weight generation is trivial. So the specialization given here is faster:
  
  template < class dtype , int level , class wtype , class otype >
  struct _eval_linear
  {
    dtype operator() ( const ele_type * const & ebase ,  ///< base adresses of components
                       const otype& origin ,        // offsets to evaluation window origins
                       offset_iterator & ofs ,     // offsets to coefficients inside this window
                       const wtype& tune ) const       // weights to apply
    {
      dtype sum ;    ///< to accumulate the result
      dtype subsum ;

      sum = _eval_linear < dtype , level - 1 , wtype , otype >()
              ( ebase , origin , ofs , tune ) ;
              
      sum *= ( 1 - tune [ level ] ) ;
           
      subsum = _eval_linear < dtype , level - 1 , wtype , otype >()
                ( ebase , origin , ofs , tune );
      
      subsum *= tune [ level ] ;
      sum += subsum ;
      
      return sum ;
    }  
  } ;

  /// again, level 0 terminates the recursion, again accessing the spline's
  /// coefficients with the 'load' function defined above.
  
  template < class dtype , class wtype , class otype >
  struct _eval_linear < dtype , 0 , wtype , otype >
  {
    dtype operator() ( const ele_type * const & ebase ,  ///< base adresses of components
                       const otype& origin ,        // offsets to evaluation window origins
                       offset_iterator & ofs ,     // offsets to coefficients inside this window
                       const wtype& tune ) const       // weights to apply
    {
      dtype sum , help ;
      auto o1 = *ofs ;
      ++ofs ;
      auto o2 = *ofs ;
      ++ofs ;

      load ( sum , ebase , origin + o1 ) ;
      sum *= ( 1 - tune [ 0 ] ) ;
      
      load ( help , ebase , origin + o2 ) ;
      help *= tune [ 0 ] ;
      sum += help ;
      
      return sum ;
    }
  } ;

public:
  
  /// the 'innermost' eval routine is called with offset(s) and weights. This
  /// routine is separate because it is used from outside (namely by grid_eval).
  /// In this final delegate we call the workhorse code in class _eval 
  
  // TODO: what if the spline is degree 0 or 1? for these cases, grid_eval
  // should not pick this general-purpose routine
  
  template < class IC , class ELE , class OUT >
  void eval ( const IC& select ,  // offsets to lower corners of the subarrays
              const MultiArrayView < 2 , ELE > & weight , // vectorized weights
              OUT & result ) const
  {
    // we need an instance of this iterator because it's passed into _v_eval by reference
    // and manipulated by the code there:
    
    offset_iterator ofs = eoffsets.begin() ;
    
    // now we can call the recursive _v_eval routine yielding the result
    
    result = _eval < OUT , level , ELE , IC >()
               ( cf_ebase , select , ofs , weight ) ;
  }
  
  /// 'penultimate' eval starts from (an) offset(s) to (a) coefficient window(s); here
  /// the nD integral index(es) to the coefficient window(s) has/have already been
  /// 'condensed' into (a) 1D offset(s) into the coefficient array's memory.
  /// Here we have the specializations affected by the template argument 'specialize'
  /// which activates more efficient code for degree 0 (nearest neighbour) and degree 1
  /// (linear interpolation) splines. I draw the line here; one might add further
  /// specializations, but from degree 2 onwards the weights are reused several times
  /// so looking them up in a small table (as the general-purpose code for unspecialized
  /// operation does) should be more efficient (TODO test).
  ///
  /// we have three variants, depending on 'specialize'. first is the specialization
  /// for nearest-neighbour interpolation, which doesn't delegate further, since the
  /// result can be obtained directly by gathering from the coefficients:
  
  // dispatch for neraest-neighbour interpolation (degree 0)
  
  template < class IC , class NDRC , class OUT >
  void eval ( const IC& select ,  // offsets to coefficient windows
              const NDRC& tune ,  // fractional parts of the coordinates
              OUT & result ,      // target
              std::integral_constant < int , 0 > ) const
  {
    load ( result , cf_ebase , select ) ;
  }

  /// eval dispatch for linear interpolation (degree 1)
  
  template < class IC , class NDRC , class OUT >
  void eval ( const IC& select ,  // offsets to coefficient windows
              const NDRC& tune ,  // fractional parts of the coordinates
              OUT & result ,      // target
              std::integral_constant < int , 1 > ) const
  {
    offset_iterator ofs = eoffsets.begin() ;
  
    // we move here from coordinates to weights. First we derive
    // component_t, which is either OUT itself for single channel
    // value_types or OUT's value_type if it is a TinyVector, as
    // would be the case for multichannel data.

    typedef typename
      std::conditional < is_singular < OUT > :: value ,
                         vigra::TinyVector < OUT , 1 > ,
                         OUT
                       > :: type :: value_type component_t ;

    // for further processing, we wrap component_t in a TinyVector,
    // even if we're using 1D coordinates. The resulting type, weight_t,
    // has the same structure as 'tune', but it has the value_type's
    // fundametal type.
                       
    typedef vigra::TinyVector < component_t , dim_in > weight_t ;

    result = _eval_linear < OUT , level , weight_t , IC >()
              ( cf_ebase , select , ofs , weight_t(tune) ) ;
   }

  // eval dispatch for arbitrary spline degrees
  
  template < int arbitrary_spline_degree , class IC , class NDRC , class OUT >
  void eval ( const IC& select ,  // offset(s) to coefficient window(s)
              const NDRC& tune ,  // fractional parts of the coordinates
              OUT & result ,      // target
              std::integral_constant < int , arbitrary_spline_degree > ) const
  {
    // we move here from coordinates to weights. First we derive
    // component_t, which is either OUT itself for single channel
    // value_types or OUT's value_type if it is a TinyVector, as
    // would be the case for multichannel data.
    
    typedef typename
      std::conditional < is_singular < OUT > :: value ,
                         vigra::TinyVector < OUT , 1 > ,
                         OUT
                       > :: type :: value_type component_t ;
    
    // now 'weight' is a 2D vigra::MultiArray of this singular type:
                       
    MultiArray < 2 , component_t > weight
      ( vigra::Shape2 ( spline_order , dimension ) ) ;
    
    // get a set of weights for the real remainder of the coordinate
    
    obtain_weights ( weight , tune ) ;
    
    // and delegate to the variant taking the weights

    eval ( select , weight , result ) ;
  }

  /// This variant of eval() takes unsplit coordinates. This is the eval variant which
  /// implements what is 'expected' of a vspline::unary_functor instantiated with the
  /// given input and output type, while the variants above, to which this one
  /// delegates, are additional entry points specific to class evaluator. Of course,
  /// the way class unary_functor is set up, the routines that a class derived from
  /// unary_functor provides is arbitrary, but the 'expectation' is conventional. 
  ///
  /// depending on 'specialize' we dispatch to one of the three specializations above.
  ///
  /// typewise, we have a wide scope here: IN may be a singular fundamental, a singular
  /// simdized type or a TinyVector of either. Same holds true for OUT. To be able to
  /// handle the call sequence consistently, we transform singular types to TinyVectors
  /// of one element. Doing so, a 'coordinate' is always recognizable as such by the
  /// fact that it is a TinyVector. 'condensed' coordinates (aka offsets), on the other
  /// hand, are not, so we can tell even a 1D coordinate from an offset. 
  
  template < class IN , class OUT >
  void eval ( const IN & input ,    // number of dimensions * coordinate vectors
              OUT & result )  const // number of channels * value vectors
  {
    // type IN may be singular or a TinyVector, we want a TinyVector,
    // possibly with only one element, so we 'wrap' input, so that we have
    // a definite TinyVector, containing 'input', the coordinate where we 
    // want to evaluate the spline.
    
    auto _input = wrap ( input ) ;
    typedef decltype ( _input ) nd_rc_t ;
    typedef typename nd_rc_t::value_type rc_t ;
    
#ifdef USE_VC
    
    // deduce an SIMD index type with vsize vector elements
    
    typedef typename vspline::vector_traits
                     < ic_type , vsize > :: type ic_v ;
    
    // produce nD index type, depending on incoming coordinate
    // being a simdized type or not
                     
    typedef typename std::conditional
      < Vc::is_simd_vector < rc_t > :: value ,
        vigra::TinyVector < ic_v , dimension > ,
        vigra::TinyVector < ic_type , dimension >
      > :: type nd_ic_t ;

#else
  
   // without Vc, there are no alternatives.

   typedef vigra::TinyVector < ic_type , dimension > nd_ic_t ;
   
#endif
   
    // two variables of these two types take the result of splitting 'input'
    // into it's integral and remainder parts

    nd_ic_t select ;
    nd_rc_t tune ;
    
    split ( _input , select , tune ) ;
    
    // now we define the type for a single component of the integral type.
    // This is the target for converting the nD index(es) to (an) offset(s)
    
    typedef typename nd_ic_t::value_type IC ;
    
    // condense nD index(es) into offset(s) by multiplying with the per-axis
    // strides and summing up. Note how we use 'expanded' strides which express
    // the strides in terms of the elementary type rather than value_type itself,
    // which may be a TinyVector of the elementary type.
    
    IC origin = select[0] * ic_type ( cf_estride [ 0 ] ) ;
    for ( int d = 1 ; d < dimension ; d++ )
      origin += select[d] * ic_type ( cf_estride [ d ] ) ;
    
    // pass on to eval overload taking the offset, dispatching on 'specialize'

    eval ( origin , tune , result ,
           std::integral_constant < int , specialize > () ) ;
  }
} ;

namespace detail
{

/// helper object to create a type-erased vspline::evaluator for
/// a given bspline object. The evaluator is specialized to the
/// spline's degree, so that degree-0 splines are evaluated with
/// nearest neighbour interpolation, degree-1 splines with linear
/// interpolation, and all other splines with general b-spline
/// evaluation. The resulting vspline::evaluator is 'grokked' to
/// erase it's type to make it easier to handle on the receiving
/// side.

template < typename spline_type ,
           typename rc_type ,
           int _vsize >
struct build_ev
{
  vspline::grok_type < bspl_coordinate_type < spline_type , rc_type > ,
                       bspl_value_type < spline_type > ,
                       _vsize >
  operator() ( const spline_type & bspl )
  {
    typedef bspl_coordinate_type < spline_type , rc_type > crd_t ;
    typedef bspl_value_type < spline_type > value_t ;

    if ( bspl.spline_degree == 0 )    
      return vspline::grok
            ( vspline::evaluator
                < crd_t , value_t , _vsize , 0 > ( bspl )
            ) ;
    else if ( bspl.spline_degree == 1 )    
      return vspline::grok
            ( vspline::evaluator
                < crd_t , value_t , _vsize , 1 > ( bspl )
            ) ;
    else  
      return vspline::grok
            ( vspline::evaluator
                < crd_t , value_t , _vsize , -1 > ( bspl )
            ) ;
  }
} ;

/// helper object to create a vspline::mapper object with gate types
/// matching a bspline's boundary conditions and extents matching the
/// spline's lower and upper limits. Please note that these limits
/// depend on the boundary conditions and are not always simply
/// 0 and N-1, as they are for, say, mirror boundary conditions.
/// see lower_limit() and upper_limit() in vspline::bspline.
///
/// gate types are inferred from boundary conditions like this:
/// PERIODIC -> periodic_gate
/// MIRROR, REFLECT, SPHERICAL -> mirror_gate
/// all other boundary conditions -> clamp_gate
///
/// The mapper object is chained to an evaluator, resulting in
/// a functor providing safe access to the evaluator. The functor
/// is subsequently 'grokked' to produce a uniform return type.

template < int level ,
           typename spline_type ,
           typename rc_type ,
           int _vsize ,
           class ... gate_types >
struct build_safe_ev
{
  vspline::grok_type < bspl_coordinate_type < spline_type , rc_type > ,
                       bspl_value_type < spline_type > ,
                       _vsize >
  operator() ( const spline_type & bspl , gate_types ... gates )
  {
    // find out the spline's lower and upper limit for the current level

    rc_type lower ( bspl.lower_limit ( level ) ) ;
    rc_type upper ( bspl.upper_limit ( level ) ) ;

    // depending on the spline's boundary condition for the current
    // level, construct an appropriate gate object and recurse to
    // the next level

    switch ( bspl.bcv [ level ] )
    {
      case vspline::PERIODIC:
      {
        auto gt = vspline::periodic < rc_type , _vsize >
                  ( lower , upper ) ;
        return build_safe_ev < level - 1 , spline_type , rc_type , _vsize ,
                              decltype ( gt ) , gate_types ... >()
              ( bspl , gt , gates ... ) ; 
        break ;
      }
      case vspline::MIRROR:
      case vspline::REFLECT:
      case vspline::SPHERICAL:
      {
        auto gt = vspline::mirror < rc_type , _vsize >
                  ( lower , upper ) ;
        return build_safe_ev < level - 1 , spline_type , rc_type , _vsize ,
                              decltype ( gt ) , gate_types ... >()
              ( bspl , gt , gates ... ) ; 
        break ;
      }
      default:
      {
        auto gt = vspline::clamp < rc_type , _vsize >
                  ( lower , upper , lower , upper ) ;
        return build_safe_ev < level - 1 , spline_type , rc_type , _vsize ,
                              decltype ( gt ) , gate_types ... >()
              ( bspl , gt , gates ... ) ; 
        break ;
      }
    }
  } 
} ;

/// at level -1, there are no more axes to deal with, here the recursion
/// ends and the actual mapper object is created. Specializing on the
/// spline's degree (0, 1, or indeterminate), an evaluator is created
/// and chained to the mapper object. The resulting functor is grokked
/// to produce a uniform return type, which is returned to the caller.

template < typename spline_type ,
           typename rc_type ,
           int _vsize ,
           class ... gate_types >
struct build_safe_ev < -1 , spline_type , rc_type , _vsize , gate_types ... >
{
  vspline::grok_type < bspl_coordinate_type < spline_type , rc_type > ,
                       bspl_value_type < spline_type > ,
                       _vsize >
  operator() ( const spline_type & bspl , gate_types ... gates )
  {
    typedef bspl_coordinate_type < spline_type , rc_type > crd_t ;
    typedef bspl_value_type < spline_type > value_t ;

    if ( bspl.spline_degree == 0 )    
      return vspline::grok
            (   vspline::mapper
                  < crd_t , _vsize , gate_types... > ( gates ... )
              + vspline::evaluator
                  < crd_t , value_t , _vsize , 0 > ( bspl )
            ) ;
    else if ( bspl.spline_degree == 1 )    
      return vspline::grok
            (   vspline::mapper
                  < crd_t , _vsize , gate_types... > ( gates ... )
              + vspline::evaluator
                  < crd_t , value_t , _vsize , 1 > ( bspl )
            ) ;
    else  
      return vspline::grok
            (   vspline::mapper
                  < crd_t , _vsize , gate_types... > ( gates ... )
              + vspline::evaluator
                  < crd_t , value_t , _vsize , -1 > ( bspl )
            ) ;
  }
} ;

} ; // namespace detail

/// make_evaluator is a factory function, producing a functor
/// which provides access to an evaluator object. Evaluation
/// using the resulting object is *not* intrinsically safe,
/// it's the user's responsibility not to pass coordinates
/// which are outside the spline's defined range. If you need
/// safe access, see 'make_safe_evaluator' below. Not safe in
/// this context means that evaluation at out-of-bounds
/// locations will result in a memory fault or produce
/// undefined results. Note that vspline's bspline objects
/// are set up as to allow evaluation at the lower and upper
/// limit of the spline and will also tolerate values 'just
/// outside' the bounds to guard against quantization errors.
/// see vspline::bspline for details.
///
/// The evaluator will be specialized to the spline's degree:
/// degree 0 splines will be evaluated with nearest neighbour,
/// degree 1 splines with linear interpolation, all other splines
/// will use general b-spline evaluation.
///
/// This function returns the evaluator wrapped in an object which
/// hides it's type. This object only 'knows' what coordinates it
/// can take and what values it will produce. The extra level of
/// indirection costs a bit of performance, but having a common type
/// simplifies handling. The wrapped evaluator also provides operator().
///
/// So, if you have a vspline::bspline object 'bspl', you can use this
/// factory function like this:
///
/// auto ev = make_evaluator ( bspl ) ;
/// typedef typename decltype(ev)::in_type coordinate_type ;
/// coordinate_type c ;
/// auto result = ev ( c ) ;
///
/// make_evaluator requires one template argument: spline_type, the
/// type of the vspline::bspline object you want to have evaluated.
/// Optionally, you can specify the elementary type for coordinates
/// (use either float or double) and the vectorization width. The
/// latter will only have an effect if Vc is used (-DUSE_VC) and
/// the spline's data type can be vectorized. Otherwise evaluation
/// will 'collapse' to using unvectorized code. Per default, the
/// vectorization width will be inferred from the spline's value_type
/// by querying vspline::vector_traits, which tries to provide a
/// 'good' choice. Note that a specific evaluator will only be capable
/// of processing vectorized coordinates of precisely the _vsize it
/// has been created with.

template < class spline_type ,
           typename rc_type = float , // watch out, default not very precise!
           int _vsize = vspline::vector_traits
                         < typename spline_type::value_type > :: size >
vspline::grok_type < bspl_coordinate_type < spline_type , rc_type > ,
                     typename spline_type::value_type ,
                     _vsize >
make_evaluator ( const spline_type & bspl )
{
  // first we figure out if the spline's data type can be vectorized.
  // If so, we'll accept any _vsize template argument, but if not, we
  // only accept _vsize == 1. If there is a mismatch, we fail a
  // static_assert to provide a reasonably meaningful message.

  typedef typename spline_type::value_type value_type ;
  typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
  enum { vsize = is_vectorizable < ele_type > :: value ? _vsize : 1 } ;
  
  static_assert
  ( vsize == _vsize ,
    "only _vsize 1 acceptable: can't provide vectorization for the spline's data type" ) ;
  
  // if all is well, we build the evaluator

  return detail::build_ev < spline_type ,
                            rc_type ,
                            _vsize >() ( bspl ) ;
}

/// make_safe_evaluator is a factory function, producing a functor
/// which provides safe access to an evaluator object. This functor
/// will map incoming coordinates into the spline's defined range,
/// as given by the spline with it's lower_limit and upper_limit
/// functions, honoring the bspline objects's boundary conditions.
/// So if, for example, the spline is periodic, all incoming
/// coordinates are valid and will be mapped to the first period.
/// Note the use of lower_limit and upper_limit. These values
/// also depend on the spline's boundary conditions, please see
/// class vspline::bspline for details. If there is no way to
/// meaningfully fold the coordinate into the defined range, the
/// coordinate is clamped to the nearest limit.
///
/// The evaluator will be specialized to the spline's degree:
/// degree 0 splines will be evaluated with nearest neighbour,
/// degree 1 splines with linear interpolation, all other splines
/// will use general b-spline evaluation.
///
/// This function returns the functor wrapped in an object which
/// hides it's type. This object only 'knows' what coordinates it
/// can take and what values it will produce. The extra level of
/// indirection costs a bit of performance, but having a common type
/// simplifies handling: the type returned by this function only
/// depends on the spline's data type, the coordinate type and,
/// optionally, the vectorization width.

template < class spline_type ,
           typename rc_type = float , // watch out, default not very precise!
           int _vsize = vspline::vector_traits
                         < typename spline_type::value_type > :: size >
vspline::grok_type < bspl_coordinate_type < spline_type , rc_type > ,
                     typename spline_type::value_type ,
                     _vsize >
make_safe_evaluator ( const spline_type & bspl )
{
  // first we figure out if the spline's data type can be vectorized.
  // If so, we'll accept any _vsize template argument, but if not, we
  // only accept _vsize == 1. If there is a mismatch, we fail a
  // static_assert to provide a reasonably meaningful message.

  typedef typename spline_type::value_type value_type ;
  typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
  
  enum { vsize = is_vectorizable < ele_type > :: value ? _vsize : 1 } ;
  
  static_assert
  ( vsize == _vsize ,
    "only _vsize 1 acceptable: can't provide vectorization for the spline's data type" ) ;

  // if all is well, we build the evaluator

  return detail::build_safe_ev < spline_type::dimension - 1 ,
                                 spline_type ,
                                 rc_type ,
                                 _vsize >() ( bspl ) ;
} ;

} ; // end of namespace vspline

#endif // VSPLINE_EVAL_H