This file is indexed.

/usr/share/perl5/LaTeXML/Post/MakeIndex.pm is in latexml 0.8.2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
# /=====================================================================\ #
# |  LaTeXML::Post::MakeIndex                                           | #
# | Make an index from scanned indexmark's                              | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Post::MakeIndex;
use strict;
use warnings;
use LaTeXML::Util::Pathname;
use LaTeXML::Common::XML;
use charnames qw(:full);
use Unicode::Normalize;
use LaTeXML::Post;
use Text::Unidecode;
use base qw(LaTeXML::Post::Collector);

# Options:
#   permuted : Generates a permuted index
#              The phrases (separated by ! in LaTeX) within each \index entry
#              are permuted before adding to the index tree.
#   split  : whether the split into separate pages by initial.
sub new {
  my ($class, %options) = @_;
  my $self = $class->SUPER::new(%options);
  $$self{permuted} = $options{permuted};
  $$self{split}    = $options{split};
  return $self; }

sub toProcess {
  my ($self, $doc) = @_;
  return $doc->findnode('//ltx:index'); }

sub process {
  my ($self, $doc, $index) = @_;
  my @indices = ($doc);
  $doc->addDate();
  my ($tree, $allphrases) = $self->build_tree($doc, $index);
  if ($tree) {
    if ($$self{split}) {
      @indices = map { $self->rescan($_) }
        $self->makeSubCollectionDocuments($doc, $index,
        map { ($_ => $self->makeIndexList($doc, $allphrases, $$tree{subtrees}{$_})) }
          keys %{ $$tree{subtrees} }); }
    else {
      $doc->addNodes($index, $self->makeIndexList($doc, $allphrases, $tree));
      @indices = ($self->rescan($doc)); } }
  return @indices; }

# ================================================================================
# Data generated:
#  $tree : tree representation of the index.
#  $allphrases : used for inferring the connection from see-also phrases to normal index entries.
# ================================================================================
# Extracting a tree of index entries from the database
sub build_tree {
  my ($self, $doc, $index) = @_;
  if (my @keys = grep { /^INDEX:/ } $$self{db}->getKeys) {
    NoteProgress(" [" . scalar(@keys) . " entries]");

    my $id = $index->getAttribute('xml:id');
    my $allphrases = {};    # Keep a hash of all phrase textContent=>id encountered (for seealso)
    my $tree = { subtrees => {}, referrers => {}, id => $id, parent => undef };
    foreach my $key (@keys) {
      my $entry   = $$self{db}->lookup($key);
      my $phrases = $entry->getValue('phrases');
      my @phrases = @$phrases;
      if (!scalar(@phrases)) {
        Warn('expected', $key, undef, "Missing phrases in indexmark: '$key'");
        next; }

      if ($$self{permuted}) {
        map { $self->add_entry($doc, $allphrases, $tree, $entry, @{$_}) }
          cyclic_permute(@phrases); }
      else {
        $self->add_entry($doc, $allphrases, $tree, $entry, @phrases); } }
    return ($tree, $allphrases); }
  else {
    return (undef, undef); } }

# NOTE: We're building ID's for each entry, of the form idx.key.key...
# I'd like to insert the initial in the case of split index: idx.A.key.key...
# But this makes it impossible to predict the id of a phrase key, w/o knowing
# whether the index has been split!
# OTOH, leaving it out risks that a single letter entry, say "A", will have the
# same id as the A page! (or maybe not if the key is downcased....)
sub add_entry {
  my ($self, $doc, $allphrases, $tree, $entry, @phrases) = @_;
  # NOTE: Still need option for splitting!
  # We'll just prefix a level for the initial...
  if ($$self{split}) {
    my $init    = $doc->initial($phrases[0]->getAttribute('key'));
    my $subtree = $$tree{subtrees}{$init};
    if (!$subtree) {
      $subtree = $$tree{subtrees}{$init}
        = { phrase => $init, subtrees => {}, referrers => {}, id => $$tree{id}, parent => $tree }; }
    add_rec($doc, $allphrases, $subtree, $entry, @phrases); }
  else {
    add_rec($doc, $allphrases, $tree, $entry, @phrases); }
  return; }

sub add_rec {
  my ($doc, $allphrases, $tree, $entry, @phrases) = @_;
  if (@phrases) {
    my $phrase  = shift(@phrases);
    my $key     = $phrase->getAttribute('key');
    my $keyid   = getIndexKeyID($key);
    my $subtree = $$tree{subtrees}{$key};
    if (!$subtree) {
      my $id = $$tree{id} . '.' . $keyid;
      my $fullkey = ($$tree{key} ? "$$tree{key}." : '') . $key;
      # phrasetext is for see & seealso lookup
      my $phrasetext = getIndexContentKey($phrase);
      # Store multi-level phrase as well, using space to separate levels
      my $fullphrasetext = ($$tree{fullphrasetext} ? $$tree{fullphrasetext} . ' ' : '')
        . $phrasetext;
      $$allphrases{$fullkey}              = $id;
      $$allphrases{ lc($fullkey) }        = $id;
      $$allphrases{$fullphrasetext}       = $id;
      $$allphrases{ lc($fullphrasetext) } = $id;

      my $phrasecopy = $doc->cloneNode($phrase);
      $subtree = $$tree{subtrees}{$key} = { key => $fullkey, id => $id,
        phrase         => $phrasecopy,
        phrasetext     => $phrasetext,
        fullphrasetext => $fullphrasetext,
        subtrees       => {}, referrers => {}, parent => $tree };
    }
    add_rec($doc, $allphrases, $subtree, $entry, @phrases); }
  else {
    if (my $seealso = $entry->getValue('see_also')) {
      $$tree{see_also} = $seealso; }
    if (my $refs = $entry->getValue('referrers')) {
      map { $$tree{referrers}{$_} = $$refs{$_} } keys %$refs; } }
  return; }

# Extract the text from the (xml) phrase node, with some normalization.
sub getIndexContentKey {
  my ($node) = @_;
  my $string = (ref $node ? $node->textContent : $node);
  $string =~ s/^\s+//s;
  $string =~ s/\s+$//s;
  $string =~ s/\s+/ /gs;
  $string =~ s/\s*[\.\,\;]+$//s;    # Remove trailing punctuation
  return $string; }

# This generates the ID from the node
# By stripping non-letters, this strips out TOO MUCH; esp. greek etc, in math disappears
# Should we keep unicode? (any compatibility issues there?0
# Should we just strip to rfc spec?
# Should we get the TeX for math?
sub getIndexKeyID {
  my ($key) = @_;
  $key =~ s/^\s+//s; $key =~ s/\s+$//s;    # Trim leading/trailing, in any case
       # We don't want accented chars (do we?) but we need to decompose the accents!
  $key = NFD($key);
  $key = unidecode($key);
  $key =~ s/[^a-zA-Z0-9]//g;
## Shouldn't be case insensitive?
##  $key =~ tr|A-Z|a-z|;
  return $key; }

# ================================================================================
# Generate permutations of indexing phrases.
sub permute {
  my (@l) = @_;
  if (scalar(@l) > 1) {
    return map { permute_aux($l[$_], @l[0 .. $_ - 1], @l[$_ + 1 .. $#l]) } 0 .. $#l; }
  else {
    return [@l]; } }

sub permute_aux {
  my ($first, @rest) = @_;
  return map { [$first, @$_] } permute(@rest); }

# Or would cyclic permutations be more appropriate?
#  We could get odd orderings, if authors aren't consistent,
# but would avoid silly redundancies in small top-level listings.
sub cyclic_permute {
  my (@l) = @_;
  if (scalar(@l) > 1) {
    return map { [@l[$_ .. $#l], @l[0 .. $_ - 1]] } 0 .. $#l; }
  else {
    return [@l]; } }

# ================================================================================
# Formatting the resulting index tree.

sub makeIndexList {
  my ($self, $doc, $allphrases, $tree) = @_;
  my $subtrees = $$tree{subtrees};
  if (my @keys = $doc->unisort(keys %$subtrees)) {
    return ['ltx:indexlist', {},
      map { $self->makeIndexEntry($doc, $allphrases, $$subtrees{$_}) } @keys]; }
  else {
    return (); } }

sub makeIndexEntry {
  my ($self, $doc, $allphrases, $tree) = @_;
  my $refs    = $$tree{referrers};
  my $seealso = $$tree{see_also};
  my @links   = ();
  if (keys %$refs) {
    push(@links, ['ltx:text', {}, ' '], $self->combineIndexEntries($doc, $refs)); }
  if ($seealso) {
    my %saw = ();
    foreach my $see (@$seealso) {
      push(@links, ', ');    # if @links;
      if (my $name = $see->getAttribute('name')) {
        push(@links, ['ltx:text', { font => 'italic' }, $name, ' ']); }
      my $phrase = getIndexContentKey($see);
      if (my @seelinks = $self->seealsoSearch($doc, $allphrases, $tree, $see)) {
        push(@links, @seelinks); }
      else {
        Warn('expected', $phrase, undef,
          "Missing index see-also term '$phrase'", "(seen under $$tree{key})")
          unless $doc->findnodes("descendant-or-self::ltx:ref", $see);
        push(@links, ['ltx:text', {}, $see->childNodes]); } } }

  return ['ltx:indexentry', { 'xml:id' => $$tree{id} },
    ['ltx:indexphrase', {}, $doc->trimChildNodes($$tree{phrase})],
    (@links ? (['ltx:indexrefs', {}, @links]) : ()),
    $self->makeIndexList($doc, $allphrases, $tree)]; }

# Sorting comparison that puts different cases together
# Really, it's only used for id's... (would like an id-sort!)
sub alphacmp {
  return (lc($a) cmp lc($b)) || ($a cmp $b); }

# combine a set of links into the document; this corresponds to the list of page numbers.
# we want them sorted in document order, but also want to combine the end points of ranges.
sub combineIndexEntries {
  my ($self, $doc, $refs) = @_;
  my @ids = sort alphacmp keys %$refs;
  #   my @ids = $doc->unisort(keys %$refs);
  my @links = ();
  while (@ids) {
    my $id    = shift(@ids);
    my $entry = $$refs{$id};
    if ($$entry{rangestart}) {
      my $startid = $id;
      my $endid   = $id;
      my $lvl     = 1;
      while (@ids) {
        $endid = shift(@ids);
        $lvl-- if $$refs{$endid}{rangestart};
        $lvl-- if $$refs{$endid}{rangeend};
        last unless $lvl; }
      push(@links,
        ['ltx:text', {},
          $self->makeIndexRefs($doc, $startid,
            grep { $_ ne 'rangestart' } sort keys %$entry),
          "\x{2014}",
          $self->makeIndexRefs($doc, $endid,
            grep { $_ ne 'rangeend' } sort keys %{ $$refs{$endid} })]); }
    else {
      push(@links, $self->makeIndexRefs($doc, $id, sort keys %$entry)); } }
  return conjoin(@links); }

# Make a single ref to a "page", in a particular style.
# Given that sorted styles gives bold, italic, normal,
# let's just do the first.
sub makeIndexRefs {
  my ($self, $doc, $id, @styles) = @_;
  return ((($styles[0] || 'normal') ne 'normal')
    ? ['ltx:text', { font => $styles[0] }, ['ltx:ref', { idref => $id, show => 'typerefnum' }]]
    : ['ltx:ref', { idref => $id, show => 'typerefnum' }]); }

#======================================================================
# Dealing with See & Seealso entries.
# A LOTTA work, for such a little thing!

# Regular index entries, possibly with several levels, are somewhat
# structured & formalized: they need to match so that entries can be combined;
# they also optionally allow for a sort (& comparison) key.
# Seealso entries, are not so structured; there's no provision for a sort key.
# They USUALLY will refer to another regular index entry, but aren't required to.
# It is nice to LINK such a seealso entry to the corresponding regular entry, if possible!

# So, we go on a fishing expedition to find possible phrases.
# I find several idioms in use (but perhaps biased by DLMF):
# (1) "topic" may refer to top-level "topic", or one within the current subtree
# (2) "topic1, topic2 and topic3" may refer to a single entry, or may refer to 3 separate entries.
# (3) "topic1 topic2" or "topic1, topic2" may refer to a single entry, or may refer
#     to a 2 level entry like \index(topic1|topic2)
# And finally, case & plural differences may indicate distinct top-level concepts,
# or may simply be insignificant variations in phrasing!

sub seealsoSearch {
  my ($self, $doc, $allphrases, $contexttree, $see) = @_;
  return seealsoSearch_rec($doc, $allphrases, $contexttree, seealsoPartition($doc, $see)); }

# @parts are alternating (potential) term, (potential) delimiter, ...
sub seealsoSearch_rec {
  my ($doc, $allphrases, $contexttree, @parts) = @_;
  my ($link, @links);
  if (scalar(@parts) < 1) { return (); }
  elsif (scalar(@parts) < 3) {
    # Single term? (w/ possible trailing punct) just look it up
    if ($link = lookupSeealsoPhrase($doc, $allphrases, $contexttree, $parts[0])) {
      return ($link, ($parts[1] ? cdr($parts[1]) : ())); } }
  # try first delimiter "literally" (possibly reiterpretedy by lookupSeealsoPhrase)
  # recurse, so that all alternatives of next delimiter will be considered.
  elsif (@links = seealsoSearch_rec($doc, $allphrases, $contexttree,
      seealsoJoin(@parts[0 .. 2]), @parts[3 .. $#parts])) {
    return @links; }
  # try any delimiter as possibly a separator between individual entries;
  # and recurse to handle next delimiter
  elsif (($link = lookupSeealsoPhrase($doc, $allphrases, $contexttree, $parts[0]))
    && (@links = seealsoSearch_rec($doc, $allphrases, $contexttree,
        @parts[2 .. $#parts]))) {
    return ($link, cdr($parts[1]), @links); }
  return; }

sub car { return $$_[0][0]; }

sub cdr {
  my ($key, @xml) = @{ $_[0] };
  return @xml; }

# Reassemble a partition (list of [key,xml] pairs) into a single such pair.
sub seealsoJoin {
  my (@parts) = @_;
  return [getIndexContentKey(join('', map { $$_[0] } @parts)), map { cdr($_) } @parts]; }

# Look for single phrase either within one of the levels of the current $contexttree,
# or at top-level. Try it as-is, or ignoring commas, and/or case-differences.
sub lookupSeealsoPhrase {
  my ($doc, $allphrases, $contexttree, $pair) = @_;
  my ($phrase, @xml) = @$pair;
  # concoct various phrases to search for
  my $pnc   = $phrase; $pnc =~ s/,\s*/ /sg;        # Ignore punct?
  my $ps    = $phrase; $ps =~ s/(\w+)s\b/$1/sg;    # Ignore plurals?
  my $psnc  = $ps;     $psnc =~ s/,\s*/ /sg;       # Ignore punct AND plurals?
  my $pnlvl = $phrase; $pnlvl =~ s/,\s*/./sg;      # Convert punct to levels?
  foreach my $trial ($phrase, lc($phrase),
    $pnc,   lc($pnc),
    $ps,    lc($ps),
    $psnc,  lc($psnc),
    $pnlvl, lc($pnlvl),
    ) {
    my $t = $contexttree;
    while ($t) {
      if (my $id = $$allphrases{ ($$t{fullphrasetext} ? $$t{fullphrasetext} . " " : '') . $trial }) {
        return ['ltx:ref', { idref => $id }, @xml]; }
      $t = $$t{parent}; } }
  return; }

# Partition a seealso (xml) phrase into a sequence of alternating
#   candidate index phrases (which will be looked up)
#   candidate delimiters (which potentially split the phrase or not;
#      see discussion above about delimiters).
# This is analogous to a simple split, but that
# (a) the argument is XML, with the pieces of delimiter ("," space, "and"...)
#    potentially distributed amongst distinct xml elements, due to styling.
# (b) we want to preserve the XML associated with each phrase & delimiter
#    in order to fill-in and separate the resulting ltx:ref's
# Messy!
sub seealsoPartition {
  my ($doc, $see) = @_;
  my @parts = seealsoPartition_aux($doc, $see);
  # Combine adjacent conjunctions & punctuation chunks
  my @result = (shift(@parts));
  while (@parts) {
    my $next    = shift(@parts);
    my $prev_is = ($result[-1][0] =~ /^,?\s*(?:,|\.|\s+|\band\s+also|\band|\bor)\s*$/);
    my $next_is = ($$next[0] =~ /^(?:,|\.|\s+|and\b|or\b)/);
    # If either BOTH or NEITHER prev & next are delimiters, combine them.
    if (!($prev_is xor $next_is)) {
      my ($k, @x) = @$next;
      $result[-1][0] .= $k;
      push(@{ $result[-1] }, @x); }
    else {
      push(@result, $next); } }
  # Now merge any adjacent phrase and/or space chunks into candidate phrase
  @parts = @result; @result = (shift(@parts));
  while (@parts) {
    my $next = shift(@parts);
    # If next is pure space, combine with prev AND following!
    if (($$next[0] =~ /^\s+$/s) && scalar(@parts)) {
      my ($k1, @x1) = @$next;
      my ($k2, @x2) = @{ shift(@parts) };
      $result[-1][0] .= $k1 . $k2;
      push(@{ $result[-1] }, @x1, @x2); }
    else {
      push(@result, $next); } }
  return @result; }

# Recursively split the XML $see into pure phrase or delimiter chunks.
# we'll still need to combine adjacent chunks appropriately (see above).
sub seealsoPartition_aux {
  my ($doc, $see) = @_;
  my @result = ();
  foreach my $ch ($see->childNodes) {
    my $t = $ch->nodeType;
    if ($t == XML_TEXT_NODE) {
      my $string = $ch->textContent;
      while ($string) {
        if ($string =~ s/^(,|\.|\s+|and\s+also\b|and\b|or\b)//) {
          push(@result, [$1, $1]); }
        elsif ($string =~ s/^([^,\.\s]+)//) {
          push(@result, [getIndexContentKey($1), $1]); } }
      push(@result, [getIndexContentKey($string), $string]) if $string; }
    elsif ($t != XML_ELEMENT_NODE) { }
    else {
      my $tag = $doc->getQName($ch);
      if ($tag =~ /^(ltx:text|ltx:emph)$/) {
        my $attr = { map { ($_ => $ch->getAttribute($_)) } $ch->attributes };
        push(@result, map { [$$_[0], [$tag, $attr, cdr($_)]] } seealsoPartition_aux($doc, $ch)); }
      else {
        push(@result, [getIndexContentKey($ch), $ch]); }
    } }
  return @result; }

# ================================================================================
sub conjoin {
  my (@items) = @_;
  my @result = ();
  if (@items) {
    push(@result, shift(@items));
    while (@items) {
      push(@result, ", ", shift(@items)); } }
  return @result; }

# ================================================================================
1;