This file is indexed.

/usr/include/tesseract/char_bigrams.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/**********************************************************************
 * File:        char_bigrams.h
 * Description: Declaration of a Character Bigrams Class
 * Author:    Ahmad Abdulkader
 * Created:   2007
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

// The CharBigram class represents the interface to the character bigram
// table used by Cube
// A CharBigram object can be constructed from the Char Bigrams file
// Given a sequence of characters, the "Cost" method returns the Char Bigram
// cost of the string according to the table

#ifndef CHAR_BIGRAMS_H
#define CHAR_BIGRAMS_H

#include <string>
#include "char_set.h"

namespace tesseract {

// structure representing a single bigram value
struct Bigram {
  int cnt;
  int cost;
};

// structure representing the char bigram array of characters
// following a specific character
struct CharBigram {
  int total_cnt;
  char_32 max_char;
  Bigram *bigram;
};

// structure representing the whole bigram table
struct CharBigramTable {
  int total_cnt;
  int worst_cost;
  char_32 max_char;
  CharBigram *char_bigram;
};

class CharBigrams {
 public:
  CharBigrams();
  ~CharBigrams();
  // Construct the CharBigrams class from a file
  static CharBigrams *Create(const string &data_file_path,
                             const string &lang);
  // Top-level function to return the mean character bigram cost of a
  // sequence of characters.  If char_set is not NULL, use
  // tesseract functions to return a case-invariant cost.
  // This avoids unnecessarily penalizing all-one-case words or
  // capitalized words (first-letter upper-case and remaining letters
  // lower-case).
  int Cost(const char_32 *str, CharSet *char_set) const;

 protected:
  // Returns the character bigram cost of two characters.
  int PairCost(char_32 ch1, char_32 ch2) const;
  // Returns the mean character bigram cost of a sequence of
  // characters. Adds a space at the beginning and end to account for
  // cost of starting and ending characters.
  int MeanCostWithSpaces(const char_32 *char_32_ptr) const;

 private:
  // Only words this length or greater qualify for case-invariant character
  // bigram cost.
  static const int kMinLengthCaseInvariant = 4;


  CharBigramTable bigram_table_;
};
}

#endif  // CHAR_BIGRAMS_H