The following issues were found

unittest/lstm_test.cc
3 issues
syntax error
Error

Line: 50

              }

// Color learns almost as fast as normalized grey/2D.
TEST_F(LSTMTrainerTest, ColorTest) {
  // A basic single-layer, single direction LSTM.
  SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2D-color-lstm", true, true);
  double lstm_uni_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(lstm_uni_err, 85);
  //  EXPECT_GT(lstm_uni_err, 66);

            

Reported by Cppcheck.

char - Statically-sized arrays can be improperly restricted, leading to potential overflows or other issues
Security

Line: 192 Column: 9 CWE codes: 119 120
Suggestion: Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length

                // Number of layers.
  const size_t kNumLayers = 8;
  // Expected layer names.
  const char *kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2", ":3:0", ":4:0", ":4:1:0", ":5"};
  const char *kLayerNames[kNumLayers] = {"Input",  "Convolve",  "ConvNL", "Maxpool",
                                         "Lfys32", "Lbx128LTR", "Lbx128", "Output"};
  // Expected number of weights.
  const int kNumWeights[kNumLayers] = {0,
                                       0,

            

Reported by FlawFinder.

char - Statically-sized arrays can be improperly restricted, leading to potential overflows or other issues
Security

Line: 193 Column: 9 CWE codes: 119 120
Suggestion: Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length

                const size_t kNumLayers = 8;
  // Expected layer names.
  const char *kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2", ":3:0", ":4:0", ":4:1:0", ":5"};
  const char *kLayerNames[kNumLayers] = {"Input",  "Convolve",  "ConvNL", "Maxpool",
                                         "Lfys32", "Lbx128LTR", "Lbx128", "Output"};
  // Expected number of weights.
  const int kNumWeights[kNumLayers] = {0,
                                       0,
                                       16 * (25 + 1),

            

Reported by FlawFinder.

src/ccstruct/ratngs.cpp
3 issues
strlen - Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected)
Security

Line: 239 Column: 24 CWE codes: 126

               */
void WERD_CHOICE::init(const char *src_string, const char *src_lengths, float src_rating,
                       float src_certainty, uint8_t src_permuter) {
  int src_string_len = strlen(src_string);
  if (src_string_len == 0) {
    this->init(8);
  } else {
    this->init(src_lengths ? strlen(src_lengths) : src_string_len);
    length_ = reserved_;

            

Reported by FlawFinder.

strlen - Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected)
Security

Line: 243 Column: 30 CWE codes: 126

                if (src_string_len == 0) {
    this->init(8);
  } else {
    this->init(src_lengths ? strlen(src_lengths) : src_string_len);
    length_ = reserved_;
    int offset = 0;
    for (int i = 0; i < length_; ++i) {
      int unichar_length = src_lengths ? src_lengths[i] : 1;
      unichar_ids_[i] = unicharset_->unichar_to_id(src_string + offset, unichar_length);

            

Reported by FlawFinder.

strlen - Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected)
Security

Line: 437 Column: 34 CWE codes: 126

                  const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
    *word_str += ch;
    if (word_lengths_str != nullptr) {
      *word_lengths_str += (char)strlen(ch);
    }
  }
}

/**

            

Reported by FlawFinder.

unittest/bitvector_test.cc
3 issues
syntax error
Error

Line: 116

              };

// Tests the sieve of Eratosthenes as a way of testing set/reset and I/O.
TEST_F(BitVectorTest, Primes) {
  BitVector map;
  ComputePrimes(&map);
  TestPrimes(map);
  // It still works if we use the copy constructor.
  BitVector map2(map);

            

Reported by Cppcheck.

fopen - Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents?
Security

Line: 129 Column: 14 CWE codes: 362

                TestPrimes(map3);
  // Test file i/o too.
  std::string filename = OutputNameToPath("primesbitvector");
  FILE *fp = fopen(filename.c_str(), "wb");
  ASSERT_TRUE(fp != nullptr);
  EXPECT_TRUE(map.Serialize(fp));
  fclose(fp);
  fp = fopen(filename.c_str(), "rb");
  ASSERT_TRUE(fp != nullptr);

            

Reported by FlawFinder.

fopen - Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents?
Security

Line: 133 Column: 8 CWE codes: 362

                ASSERT_TRUE(fp != nullptr);
  EXPECT_TRUE(map.Serialize(fp));
  fclose(fp);
  fp = fopen(filename.c_str(), "rb");
  ASSERT_TRUE(fp != nullptr);
  BitVector read_map;
  EXPECT_TRUE(read_map.DeSerialize(false, fp));
  fclose(fp);
  TestPrimes(read_map);

            

Reported by FlawFinder.

src/lstm/tfnetwork.cpp
3 issues
memcpy - Does not check for buffer overflows when copying to destination
Security

Line: 49 Column: 3 CWE codes: 120
Suggestion: Make sure destination can always hold the source data

                model_proto_.SerializeToString(&proto_str);
  // TODO: optimize and avoid copy from proto_str to data.
  std::vector<char> data(proto_str.size());
  memcpy(&data[0], proto_str.data(), proto_str.size());
  return fp->Serialize(data);
}

// Reads from the given file. Returns false in case of error.
// Should be overridden by subclasses, but NOT called by their DeSerialize.

            

Reported by FlawFinder.

memcpy - Does not check for buffer overflows when copying to destination
Security

Line: 79 Column: 3 CWE codes: 120
Suggestion: Make sure destination can always hold the source data

                Tensor input_tensor(tensorflow::DT_FLOAT, shape);
  // The flat() member gives a 1d array, with a data() member to get the data.
  auto eigen_tensor = input_tensor.flat<float>();
  memcpy(eigen_tensor.data(), input.f(0), input.Width() * depth * sizeof(input.f(0)[0]));
  // Add the tensor to the vector of inputs.
  tf_inputs.emplace_back(model_proto_.image_input(), input_tensor);

  // Provide tensors giving the width and/or height of the image if they are
  // required. Some tf ops require a separate tensor with knowledge of the

            

Reported by FlawFinder.

memcpy - Does not check for buffer overflows when copying to destination
Security

Line: 119 Column: 3 CWE codes: 120
Suggestion: Make sure destination can always hold the source data

                ASSERT_HOST(output_depth == output_shape_.depth());
  output->Resize2d(false, output_steps, output_depth);
  auto eigen_output = output_tensor.flat<float>();
  memcpy(output->f(0), eigen_output.data(), output_steps * output_depth * sizeof(output->f(0)[0]));
}

int TFNetwork::InitFromProto() {
  spec_ = model_proto_.spec();
  input_shape_.SetShape(model_proto_.batch_size(), std::max(0, model_proto_.y_size()),

            

Reported by FlawFinder.

src/training/pango/ligature_table.cpp
3 issues
char - Statically-sized arrays can be improperly restricted, leading to potential overflows or other issues
Security

Line: 119 Column: 3 CWE codes: 119 120
Suggestion: Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length

                std::string result;
  UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length());
  UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length());
  char tmp[5];
  int len;
  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
    len = it.get_utf8(tmp);
    tmp[len] = '\0';
    auto lig_it = lig_to_norm_table_.find(tmp);

            

Reported by FlawFinder.

char - Statically-sized arrays can be improperly restricted, leading to potential overflows or other issues
Security

Line: 138 Column: 3 CWE codes: 119 120
Suggestion: Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length

                std::string result;
  UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length());
  UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length());
  char tmp[5];
  int len;
  int norm_ind;
  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
    len = it.get_utf8(tmp);
    tmp[len] = '\0';

            

Reported by FlawFinder.

strlen - Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected)
Security

Line: 102 Column: 25 CWE codes: 126

                  // Add custom extra ligatures.
    for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
      norm_to_lig_table_[UNICHARSET::kCustomLigatures[i][0]] = UNICHARSET::kCustomLigatures[i][1];
      int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]);
      if (min_norm_length_ == 0 || norm_length < min_norm_length_) {
        min_norm_length_ = norm_length;
      }
      if (norm_length > max_norm_length_) {
        max_norm_length_ = norm_length;

            

Reported by FlawFinder.

src/lstm/generate_lut.py
3 issues
Missing module docstring
Error

Line: 1 Column: 1

              #!/usr/bin/env python3

# Create C/C++ code for two lookup tables.

import math

# Size of static tables.
kTableSize = 4096
# Scale factor for float arg to int index.

            

Reported by Pylint.

Constant name "kTableSize" doesn't conform to UPPER_CASE naming style
Error

Line: 8 Column: 1

              import math

# Size of static tables.
kTableSize = 4096
# Scale factor for float arg to int index.
kScaleFactor = 256.0

print("// Generated code with lookup tables")
print('#include "functions.h"')

            

Reported by Pylint.

Constant name "kScaleFactor" doesn't conform to UPPER_CASE naming style
Error

Line: 10 Column: 1

              # Size of static tables.
kTableSize = 4096
# Scale factor for float arg to int index.
kScaleFactor = 256.0

print("// Generated code with lookup tables")
print('#include "functions.h"')
print("namespace tesseract {")


            

Reported by Pylint.

unittest/indexmapbidi_test.cc
3 issues
syntax error
Error

Line: 74

              };

// Tests the sieve of Eratosthenes as a way of testing setup.
TEST_F(IndexMapBiDiTest, Primes) {
  IndexMapBiDi map;
  ComputePrimes(&map);
  TestPrimes(map);
  // It still works if we assign it to another.
  IndexMapBiDi map2;

            

Reported by Cppcheck.

fopen - Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents?
Security

Line: 88 Column: 14 CWE codes: 362

                TestPrimes(base_map);
  // Test file i/o too.
  std::string filename = OutputNameToPath("primesmap");
  FILE *fp = fopen(filename.c_str(), "wb");
  CHECK(fp != nullptr);
  EXPECT_TRUE(map.Serialize(fp));
  fclose(fp);
  fp = fopen(filename.c_str(), "rb");
  CHECK(fp != nullptr);

            

Reported by FlawFinder.

fopen - Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents?
Security

Line: 92 Column: 8 CWE codes: 362

                CHECK(fp != nullptr);
  EXPECT_TRUE(map.Serialize(fp));
  fclose(fp);
  fp = fopen(filename.c_str(), "rb");
  CHECK(fp != nullptr);
  IndexMapBiDi read_map;
  EXPECT_TRUE(read_map.DeSerialize(false, fp));
  fclose(fp);
  TestPrimes(read_map);

            

Reported by FlawFinder.

src/ccutil/unicity_table.h
2 issues
read - Check buffer boundaries if used in a loop including recursive loops
Security

Line: 119 Column: 8 CWE codes: 120 20

                bool write(FILE *f, std::function<bool(FILE *, const T &)> cb) const {
    return table_.write(f, cb);
  }
  bool read(tesseract::TFile *f, std::function<bool(tesseract::TFile *, T *)> cb) {
    return table_.read(f, cb);
  }

private:
  GenericVector<T> table_;

            

Reported by FlawFinder.

read - Check buffer boundaries if used in a loop including recursive loops
Security

Line: 120 Column: 19 CWE codes: 120 20

                  return table_.write(f, cb);
  }
  bool read(tesseract::TFile *f, std::function<bool(tesseract::TFile *, T *)> cb) {
    return table_.read(f, cb);
  }

private:
  GenericVector<T> table_;
};

            

Reported by FlawFinder.

src/ccmain/recogtraining.cpp
2 issues
fopen - Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents?
Security

Line: 50 Column: 23 CWE codes: 362

                  output_fname[lastdot - output_fname.c_str()] = '\0';
  }
  output_fname += ".txt";
  FILE *output_file = fopen(output_fname.c_str(), "a+");
  if (output_file == nullptr) {
    tprintf("Error: Could not open file %s\n", output_fname.c_str());
    ASSERT_HOST(output_file);
  }
  return output_file;

            

Reported by FlawFinder.

fopen - Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents?
Security

Line: 95 Column: 20 CWE codes: 362

                }
  box_fname += ".box";
  // ReadNextBox() will close box_file
  FILE *box_file = fopen(box_fname.c_str(), "r");
  if (box_file == nullptr) {
    tprintf("Error: Could not open file %s\n", box_fname.c_str());
    ASSERT_HOST(box_file);
  }


            

Reported by FlawFinder.

src/ccmain/paragraphs.cpp
2 issues
char - Statically-sized arrays can be improperly restricted, leading to potential overflows or other issues
Security

Line: 517 Column: 3 CWE codes: 119 120
Suggestion: Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length

              
void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
                                          std::vector<std::string> &dbg) const {
  char s[30];
  snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", lmargin_, lindent_, rindent_, rmargin_);
  dbg.emplace_back(s);
  std::string model_string;
  model_string += static_cast<char>(GetLineType());
  model_string += ":";

            

Reported by FlawFinder.

strlen - Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected)
Security

Line: 2499 Column: 25 CWE codes: 126

                }
  info->text = "";
  const std::unique_ptr<const char[]> text(it.GetUTF8Text(RIL_TEXTLINE));
  int trailing_ws_idx = strlen(text.get()); // strip trailing space
  while (trailing_ws_idx > 0 &&
         // isspace() only takes ASCII
         isascii(text[trailing_ws_idx - 1]) && isspace(text[trailing_ws_idx - 1])) {
    trailing_ws_idx--;
  }

            

Reported by FlawFinder.