The following issues were found
src/training/unicharset/normstrngs.cpp
5 issues
Line: 234
Column: 38
CWE codes:
126
}
bool IsUTF8Whitespace(const char *text) {
return SpanUTF8Whitespace(text) == strlen(text);
}
unsigned int SpanUTF8Whitespace(const char *text) {
int n_white = 0;
for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
Reported by FlawFinder.
Line: 239
Column: 58
CWE codes:
126
unsigned int SpanUTF8Whitespace(const char *text) {
int n_white = 0;
for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
it != UNICHAR::end(text, strlen(text)); ++it) {
if (!IsWhitespace(*it)) {
break;
}
n_white += it.utf8_len();
Reported by FlawFinder.
Line: 240
Column: 33
CWE codes:
126
unsigned int SpanUTF8Whitespace(const char *text) {
int n_white = 0;
for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
it != UNICHAR::end(text, strlen(text)); ++it) {
if (!IsWhitespace(*it)) {
break;
}
n_white += it.utf8_len();
}
Reported by FlawFinder.
Line: 251
Column: 58
CWE codes:
126
unsigned int SpanUTF8NotWhitespace(const char *text) {
int n_notwhite = 0;
for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
it != UNICHAR::end(text, strlen(text)); ++it) {
if (IsWhitespace(*it)) {
break;
}
n_notwhite += it.utf8_len();
Reported by FlawFinder.
Line: 252
Column: 33
CWE codes:
126
unsigned int SpanUTF8NotWhitespace(const char *text) {
int n_notwhite = 0;
for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
it != UNICHAR::end(text, strlen(text)); ++it) {
if (IsWhitespace(*it)) {
break;
}
n_notwhite += it.utf8_len();
}
Reported by FlawFinder.
src/ccutil/genericvector.h
5 issues
Line: 203
Column: 5
CWE codes:
120
Suggestion:
Make sure destination can always hold the source data
// operator=() for each element like double_the_size() does.
static T *double_the_size_memcpy(int current_size, T *data) {
T *data_new = new T[current_size * 2];
memcpy(data_new, data, sizeof(T) * current_size);
delete[] data;
return data_new;
}
// Reverses the elements of the vector.
Reported by FlawFinder.
Line: 287
Column: 14
CWE codes:
362
// returning false on error.
inline bool LoadDataFromFile(const char *filename, GenericVector<char> *data) {
bool result = false;
FILE *fp = fopen(filename, "rb");
if (fp != nullptr) {
fseek(fp, 0, SEEK_END);
auto size = std::ftell(fp);
fseek(fp, 0, SEEK_SET);
// Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
Reported by FlawFinder.
Line: 307
Column: 14
CWE codes:
362
// The default FileWriter writes the vector of char to the filename file,
// returning false on error.
inline bool SaveDataToFile(const GenericVector<char> &data, const char *filename) {
FILE *fp = fopen(filename, "wb");
if (fp == nullptr) {
return false;
}
bool result = static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
fclose(fp);
Reported by FlawFinder.
Line: 170
Column: 8
CWE codes:
120
20
// Returns false on error or if the callback returns false.
// DEPRECATED. Use [De]Serialize[Classes] instead.
bool write(FILE *f, std::function<bool(FILE *, const T &)> cb) const;
bool read(TFile *f, std::function<bool(TFile *, T *)> cb);
// Writes a vector of simple types to the given file. Assumes that bitwise
// read/write of T will work. Returns false in case of error.
// TODO(rays) Change all callers to use TFile and remove deprecated methods.
bool Serialize(FILE *fp) const;
bool Serialize(TFile *fp) const;
Reported by FlawFinder.
Line: 744
Column: 24
CWE codes:
120
20
}
template <typename T>
bool GenericVector<T>::read(TFile *f, std::function<bool(TFile *, T *)> cb) {
int32_t reserved;
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
return false;
}
reserve(reserved);
Reported by FlawFinder.
src/ccstruct/boxread.cpp
5 issues
Line: 62
Column: 20
CWE codes:
362
FILE *OpenBoxFile(const char *fname) {
std::string filename = BoxFileName(fname);
FILE *box_file = nullptr;
if (!(box_file = fopen(filename.c_str(), "rb"))) {
CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", filename.c_str());
}
return box_file;
}
Reported by FlawFinder.
Line: 156
Column: 3
CWE codes:
119
120
Suggestion:
Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
TBOX *bounding_box) {
int page = 0;
char buff[kBoxReadBufSize]; // boxfile read buffer
char *buffptr = buff;
while (fgets(buff, sizeof(buff) - 1, box_file)) {
(*line_number)++;
Reported by FlawFinder.
Line: 202
Column: 3
CWE codes:
119
120
Suggestion:
Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length
TBOX *bounding_box) {
*bounding_box = TBOX(); // Initialize it to empty.
utf8_str = "";
char uch[kBoxReadBufSize];
const char *buffptr = boxfile_str;
// Read the unichar without messing up on Tibetan.
// According to issue 253 the utf-8 surrogates 85 and A0 are treated
// as whitespace by sscanf, so it is more reliable to just find
// ascii space and tab.
Reported by FlawFinder.
Line: 245
Column: 5
CWE codes:
120
}
// Test for long space-delimited string label.
if (strcmp(uch, kMultiBlobLabelCode) == 0 && (buffptr = strchr(buffptr, '#')) != nullptr) {
strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
chomp_string(uch);
uch_len = strlen(uch);
}
// Validate UTF8 by making unichars with it.
Reported by FlawFinder.
Line: 248
Column: 15
CWE codes:
126
strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
chomp_string(uch);
uch_len = strlen(uch);
}
// Validate UTF8 by making unichars with it.
int used = 0;
while (used < uch_len) {
tesseract::UNICHAR ch(uch + used, uch_len - used);
Reported by FlawFinder.
src/ccutil/unichar.cpp
5 issues
Line: 57
Column: 3
CWE codes:
120
Suggestion:
Make sure destination can always hold the source data
break; // Illegal surrogate
}
}
memcpy(chars, utf8_str, total_len);
if (total_len < UNICHAR_LEN) {
chars[UNICHAR_LEN - 1] = total_len;
while (total_len < UNICHAR_LEN - 1) {
chars[total_len++] = 0;
}
Reported by FlawFinder.
Line: 137
Column: 3
CWE codes:
120
Suggestion:
Make sure destination can always hold the source data
char *UNICHAR::utf8_str() const {
int len = utf8_len();
char *str = new char[len + 1];
memcpy(str, chars, len);
str[len] = 0;
return str;
}
// Get the number of bytes in the first character of the given utf8 string.
Reported by FlawFinder.
Line: 144
Column: 16
CWE codes:
119
120
Suggestion:
Perform bounds checking, use functions that limit length, or ensure that the size is larger than the maximum possible length
// Get the number of bytes in the first character of the given utf8 string.
int UNICHAR::utf8_step(const char *utf8_str) {
static const char utf8_bytes[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Reported by FlawFinder.
Line: 191
Column: 3
CWE codes:
120
utf8_output[0] = ' ';
return 1;
}
strncpy(utf8_output, it_, len);
return len;
}
int UNICHAR::const_iterator::utf8_len() const {
ASSERT_HOST(it_ != nullptr);
Reported by FlawFinder.
Line: 221
Column: 27
CWE codes:
126
// Returns an empty vector if the input contains invalid UTF-8.
/* static */
std::vector<char32> UNICHAR::UTF8ToUTF32(const char *utf8_str) {
const int utf8_length = strlen(utf8_str);
std::vector<char32> unicodes;
unicodes.reserve(utf8_length);
const_iterator end_it(end(utf8_str, utf8_length));
for (const_iterator it(begin(utf8_str, utf8_length)); it != end_it; ++it) {
if (it.is_legal()) {
Reported by FlawFinder.
src/ccmain/tessedit.cpp
5 issues
Line: 128
CWE codes:
476
// language-specific variables from [lang].traineddata file, so that custom
// config files can override values in [lang].traineddata file.
for (int i = 0; i < configs_size; ++i) {
read_config_file(configs[i], set_params_constraint);
}
// Set params specified in vars_vec (done after setting params from config
// files, so that params in vars_vec can override those from files).
if (vars_vec != nullptr && vars_values != nullptr) {
Reported by Cppcheck.
Line: 51
Column: 13
CWE codes:
362
path += "configs/";
path += filename;
FILE *fp;
if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
fclose(fp);
} else {
path = datadir;
path += "tessconfigs/";
path += filename;
Reported by FlawFinder.
Line: 57
Column: 15
CWE codes:
362
path = datadir;
path += "tessconfigs/";
path += filename;
if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
fclose(fp);
} else {
path = filename;
}
}
Reported by FlawFinder.
Line: 143
Column: 25
CWE codes:
362
}
if (!tessedit_write_params_to_file.empty()) {
FILE *params_file = fopen(tessedit_write_params_to_file.c_str(), "wb");
if (params_file != nullptr) {
ParamUtils::PrintParams(params_file, this->params());
fclose(params_file);
} else {
tprintf("Failed to open %s for writing params.\n", tessedit_write_params_to_file.c_str());
Reported by FlawFinder.
Line: 262
Column: 15
CWE codes:
126
++start;
}
// Find the index of the end of the lang code in string start.
int end = strlen(start);
const char *plus = strchr(start, '+');
if (plus != nullptr && plus - start < end) {
end = plus - start;
}
std::string lang_code(start);
Reported by FlawFinder.
src/classify/intproto.cpp
4 issues
Line: 1180
Column: 23
CWE codes:
190
Suggestion:
If source untrusted, check both minimum and maximum, even if the input had no minus sign (large numbers can roll over into negative number; consider saving to an unsigned value if that is intended)
if (ev_type == SVET_POPUP) {
if (ev->command_id == IDA_SHAPE_INDEX) {
if (shape_table_ != nullptr) {
*shape_id = atoi(ev->parameter);
*adaptive_on = false;
*pretrained_on = true;
if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
int font_id;
shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id, &font_id);
Reported by FlawFinder.
Line: 850
Column: 27
CWE codes:
120
20
}
if (version_id >= 4) {
using namespace std::placeholders; // for _1, _2
this->fontinfo_table_.read(fp, std::bind(read_info, _1, _2));
if (version_id >= 5) {
this->fontinfo_table_.read(fp, std::bind(read_spacing_info, _1, _2));
}
this->fontset_table_.read(fp, [](auto *f, auto *fs) { return f->DeSerialize(*fs); } );
}
Reported by FlawFinder.
Line: 852
Column: 29
CWE codes:
120
20
using namespace std::placeholders; // for _1, _2
this->fontinfo_table_.read(fp, std::bind(read_info, _1, _2));
if (version_id >= 5) {
this->fontinfo_table_.read(fp, std::bind(read_spacing_info, _1, _2));
}
this->fontset_table_.read(fp, [](auto *f, auto *fs) { return f->DeSerialize(*fs); } );
}
return (Templates);
Reported by FlawFinder.
Line: 854
Column: 26
CWE codes:
120
20
if (version_id >= 5) {
this->fontinfo_table_.read(fp, std::bind(read_spacing_info, _1, _2));
}
this->fontset_table_.read(fp, [](auto *f, auto *fs) { return f->DeSerialize(*fs); } );
}
return (Templates);
} /* ReadIntTemplates */
Reported by FlawFinder.
src/training/common/commandlineflags.cpp
4 issues
Line: 132
Column: 34
CWE codes:
126
static void PrintCommandLineFlags() {
const char *kFlagNamePrefix = "FLAGS_";
const int kFlagNamePrefixLen = strlen(kFlagNamePrefix);
for (auto ¶m : GlobalParams()->int_params) {
if (!strncmp(param->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) {
printf(" --%s %s (type:int default:%d)\n",
param->name_str() + kFlagNamePrefixLen,
param->info_str(), int32_t(*param));
Reported by FlawFinder.
Line: 223
Column: 14
CWE codes:
126
int32_t int_val;
if (IntFlagExists(lhs.c_str(), &int_val)) {
if (rhs != nullptr) {
if (!strlen(rhs)) {
// Bad input of the format --int_flag=
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
exit(1);
}
if (!SafeAtoi(rhs, &int_val)) {
Reported by FlawFinder.
Line: 253
Column: 14
CWE codes:
126
double double_val;
if (DoubleFlagExists(lhs.c_str(), &double_val)) {
if (rhs != nullptr) {
if (!strlen(rhs)) {
// Bad input of the format --double_flag=
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
exit(1);
}
if (!SafeAtod(rhs, &double_val)) {
Reported by FlawFinder.
Line: 287
Column: 14
CWE codes:
126
// --flag form
bool_val = true;
} else {
if (!strlen(rhs)) {
// Bad input of the format --bool_flag=
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
exit(1);
}
if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) {
Reported by FlawFinder.
src/ccstruct/pageres.cpp
4 issues
Line: 1015
Column: 67
CWE codes:
126
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) {
return uch_set->unichar_to_id("\"");
}
return INVALID_UNICHAR_ID;
}
Reported by FlawFinder.
Line: 1015
Column: 27
CWE codes:
126
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) {
return uch_set->unichar_to_id("\"");
}
return INVALID_UNICHAR_ID;
}
Reported by FlawFinder.
Line: 1036
Column: 26
CWE codes:
126
UNICHAR_ID WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (strlen(ch) == 1 && strlen(next_ch) == 1 && (*ch == '-' || *ch == '~') &&
(*next_ch == '-' || *next_ch == '~')) {
return uch_set->unichar_to_id("-");
}
return INVALID_UNICHAR_ID;
}
Reported by FlawFinder.
Line: 1036
Column: 7
CWE codes:
126
UNICHAR_ID WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (strlen(ch) == 1 && strlen(next_ch) == 1 && (*ch == '-' || *ch == '~') &&
(*next_ch == '-' || *next_ch == '~')) {
return uch_set->unichar_to_id("-");
}
return INVALID_UNICHAR_ID;
}
Reported by FlawFinder.
src/ccutil/serialis.cpp
4 issues
Line: 34
Column: 14
CWE codes:
362
// returning false on error.
bool LoadDataFromFile(const char *filename, std::vector<char> *data) {
bool result = false;
FILE *fp = fopen(filename, "rb");
if (fp != nullptr) {
fseek(fp, 0, SEEK_END);
auto size = std::ftell(fp);
fseek(fp, 0, SEEK_SET);
// Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
Reported by FlawFinder.
Line: 54
Column: 14
CWE codes:
362
// The default FileWriter writes the vector of char to the filename file,
// returning false on error.
bool SaveDataToFile(const std::vector<char> &data, const char *filename) {
FILE *fp = fopen(filename, "wb");
if (fp == nullptr) {
return false;
}
bool result = static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
fclose(fp);
Reported by FlawFinder.
Line: 164
Column: 3
CWE codes:
120
Suggestion:
Make sure destination can always hold the source data
is_writing_ = false;
swap_ = false;
data_->resize(size); // TODO: optimize no init
memcpy(&(*data_)[0], data, size);
return true;
}
bool TFile::Open(FILE *fp, int64_t end_offset) {
offset_ = 0;
Reported by FlawFinder.
Line: 236
Column: 5
CWE codes:
120
Suggestion:
Make sure destination can always hold the source data
}
}
if (required_size > 0 && buffer != nullptr) {
memcpy(buffer, &(*data_)[offset_], required_size);
}
offset_ += required_size;
return required_size / size;
}
Reported by FlawFinder.
src/textord/devanagari_processing.cpp
4 issues
Line: 419
Column: 10
CWE codes:
676
Suggestion:
Use getrlimit(2), setrlimit(2), and sysconf(3) instead
int thresh = (topline_onpixel_count * 70) / 100;
int ulimit = topline_ylevel;
int llimit = topline_ylevel;
while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) {
--ulimit;
}
while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) {
++llimit;
}
Reported by FlawFinder.
Line: 419
Column: 42
CWE codes:
676
Suggestion:
Use getrlimit(2), setrlimit(2), and sysconf(3) instead
int thresh = (topline_onpixel_count * 70) / 100;
int ulimit = topline_ylevel;
int llimit = topline_ylevel;
while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) {
--ulimit;
}
while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) {
++llimit;
}
Reported by FlawFinder.
Line: 420
Column: 7
CWE codes:
676
Suggestion:
Use getrlimit(2), setrlimit(2), and sysconf(3) instead
int ulimit = topline_ylevel;
int llimit = topline_ylevel;
while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) {
--ulimit;
}
while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) {
++llimit;
}
Reported by FlawFinder.
Line: 427
Column: 23
CWE codes:
676
Suggestion:
Use getrlimit(2), setrlimit(2), and sysconf(3) instead
}
if (shirorekha_top) {
*shirorekha_top = ulimit;
}
if (shirorekha_bottom) {
*shirorekha_bottom = llimit;
}
if (shirorekha_ylevel) {
Reported by FlawFinder.