The following issues were found

unittest/unicharcompress_test.cc
1 issues
syntax error
Error

Line: 165

                int encoded_null_char_;
};

TEST_F(UnicharcompressTest, DoesChinese) {
  LOG(INFO) << "Testing chi_tra";
  LoadUnicharset("chi_tra.unicharset");
  ExpectCorrect("chi_tra");
  LOG(INFO) << "Testing chi_sim";
  LoadUnicharset("chi_sim.unicharset");

            

Reported by Cppcheck.

unittest/unicharset_test.cc
1 issues
syntax error
Error

Line: 29

                }
};

TEST(UnicharsetTest, Basics) {
  // This test verifies basic insertion, unichar_to_id, and encode.
  UNICHARSET u;
  u.unichar_insert("a");
  EXPECT_EQ(u.size(), 4);
  u.unichar_insert("f");

            

Reported by Cppcheck.

unittest/validate_grapheme_test.cc
1 issues
syntax error
Error

Line: 31

                EXPECT_EQ(glyphs[2], std::string("\u0c0e"));
}

TEST(ValidateGraphemeTest, SingleConsonantOK) {
  std::string str = "\u0cb9"; // HA
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);

            

Reported by Cppcheck.

unittest/validate_indic_test.cc
1 issues
syntax error
Error

Line: 37

              }

// Only one dependent vowel is allowed.
TEST(ValidateIndicTest, OnlyOneDependentVowel) {
  std::string str = "\u0d15\u0d3e\u0d42"; // KA AA UU
  std::string dest;
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &dest))
      << PrintString32WithUnicodes(str);

            

Reported by Cppcheck.

unittest/validate_khmer_test.cc
1 issues
syntax error
Error

Line: 31

              }

// Test some random Khmer words with dotted circles.
TEST(ValidateKhmerTest, BadKhmerWords) {
  std::string result;
  // Multiple dependent vowels not allowed
  std::string str = "\u1796\u17b6\u17b7";
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result));

            

Reported by Cppcheck.

unittest/validate_myanmar_test.cc
1 issues
syntax error
Error

Line: 27

              }

// Test some random Myanmar words with dotted circles.
TEST(ValidateMyanmarTest, BadMyanmarWords) {
  std::string str = "က်န္းမာေရး";
  std::vector<std::string> glyphs;
  EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                            GraphemeNormMode::kCombined, true, str.c_str(),
                                            &glyphs));

            

Reported by Cppcheck.

include/tesseract/unichar.h
1 issues
char - Statically-sized arrays can be improperly restricted, leading to potential overflows or other issues
Security

Line: 171 Column: 3 CWE codes: 119 120
Suggestion: Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length

                // A UTF-8 representation of 1 or more Unicode characters.
  // The last element (chars[UNICHAR_LEN - 1]) is a length if
  // its value < UNICHAR_LEN, otherwise it is a genuine character.
  char chars[UNICHAR_LEN]{};
};

} // namespace tesseract

#endif // TESSERACT_CCUTIL_UNICHAR_H_

            

Reported by FlawFinder.