The following issues were found
unittest/unicharcompress_test.cc
1 issues
Line: 165
int encoded_null_char_;
};
TEST_F(UnicharcompressTest, DoesChinese) {
LOG(INFO) << "Testing chi_tra";
LoadUnicharset("chi_tra.unicharset");
ExpectCorrect("chi_tra");
LOG(INFO) << "Testing chi_sim";
LoadUnicharset("chi_sim.unicharset");
Reported by Cppcheck.
unittest/unicharset_test.cc
1 issues
Line: 29
}
};
TEST(UnicharsetTest, Basics) {
// This test verifies basic insertion, unichar_to_id, and encode.
UNICHARSET u;
u.unichar_insert("a");
EXPECT_EQ(u.size(), 4);
u.unichar_insert("f");
Reported by Cppcheck.
unittest/validate_grapheme_test.cc
1 issues
Line: 31
EXPECT_EQ(glyphs[2], std::string("\u0c0e"));
}
TEST(ValidateGraphemeTest, SingleConsonantOK) {
std::string str = "\u0cb9"; // HA
std::vector<std::string> glyphs;
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
<< PrintString32WithUnicodes(str);
Reported by Cppcheck.
unittest/validate_indic_test.cc
1 issues
Line: 37
}
// Only one dependent vowel is allowed.
TEST(ValidateIndicTest, OnlyOneDependentVowel) {
std::string str = "\u0d15\u0d3e\u0d42"; // KA AA UU
std::string dest;
EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
str.c_str(), &dest))
<< PrintString32WithUnicodes(str);
Reported by Cppcheck.
unittest/validate_khmer_test.cc
1 issues
Line: 31
}
// Test some random Khmer words with dotted circles.
TEST(ValidateKhmerTest, BadKhmerWords) {
std::string result;
// Multiple dependent vowels not allowed
std::string str = "\u1796\u17b6\u17b7";
EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
str.c_str(), &result));
Reported by Cppcheck.
unittest/validate_myanmar_test.cc
1 issues
Line: 27
}
// Test some random Myanmar words with dotted circles.
TEST(ValidateMyanmarTest, BadMyanmarWords) {
std::string str = "က်န္းမာေရး";
std::vector<std::string> glyphs;
EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
GraphemeNormMode::kCombined, true, str.c_str(),
&glyphs));
Reported by Cppcheck.
include/tesseract/unichar.h
1 issues
Line: 171
Column: 3
CWE codes:
119
120
Suggestion:
Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_
Reported by FlawFinder.