Foxit PDF SDK
fs_ocr.h
Go to the documentation of this file.
1 #if (defined(_WIN32) || defined(_WIN64)) || (defined(__linux__) && defined(__x86_64__) && !defined(__ANDROID__))
2 
16 #ifndef FS_OCR_H_
17 #define FS_OCR_H_
18 
19 #include "common/fs_common.h"
20 #include "pdf/fs_pdfdoc.h"
21 #include "pdf/fs_pdfpage.h"
22 
28 namespace foxit {
32 namespace addon {
36 namespace ocr {
48 class OCREngine FS_FINAL : public Object {
49  public:
69  static ErrorCode Initialize(const wchar_t* ocr_resource_path);
70 
99  static ErrorCode Initialize(const wchar_t* ocr_resource_path, bool is_shared_cpu_cores_mode);
100 
109  static void Release();
110 
122  static void SetLogFile(const char* log_file_path);
123 
135  static void SetLogFile(const wchar_t* log_file_path);
136 
156  static void SetLanguages(const wchar_t* languages);
157 };
158 
160 class OCRConfig FS_FINAL : public Object {
161  public:
166  :is_detect_pictures(true)
167  ,is_remove_noise(true)
168  ,is_correct_skew(true)
170 
179  this->is_detect_pictures = is_detect_pictures;
180  this->is_remove_noise = is_remove_noise;
181  this->is_correct_skew = is_correct_skew;
182  this->is_enable_text_extraction_mode = is_enable_text_extraction_mode;
183  }
184 
196  this->is_detect_pictures = is_detect_pictures;
197  this->is_remove_noise = is_remove_noise;
198  this->is_correct_skew = is_correct_skew;
199  this->is_enable_text_extraction_mode = is_enable_text_extraction_mode;
200  }
201 
209  OCRConfig& operator=(const OCRConfig& other) {
214  return (*this);
215  }
216 
224  bool operator!=(const OCRConfig& other) {
226  return true;
227  return false;
228  }
229 
237 
247 
256 
269 };
270 
272 class OCRSettingData FS_FINAL : public Object {
273  public:
278 
288  this->pdf_doc = pdf_doc;
289  this->page_range = page_range;
290  this->is_editable = is_editable;
291  this->ocr_config = ocr_config;
292  }
293 
305  this->pdf_doc = pdf_doc;
306  this->page_range = page_range;
307  this->is_editable = is_editable;
308  this->ocr_config = ocr_config;
309  }
310 
319  pdf_doc = data.pdf_doc;
320  page_range = data.page_range;
321  is_editable = data.is_editable;
322  ocr_config = data.ocr_config;
323  return (*this);
324  }
325 
333  bool operator!=(const OCRSettingData& data) {
334  if (pdf_doc != data.pdf_doc || page_range != data.page_range || is_editable != data.is_editable || ocr_config != data.ocr_config)
335  return true;
336  return false;
337  }
338 
341 
344 
350 
353 };
354 
357 
358 
363 class OCRSuspectInfo FS_FINAL : public Object {
364  public:
367 
370 
373 };
374 
377 
378 
384 class OCR FS_FINAL : public Base {
385  public:
389  OCR();
390 
396  OCR(const OCR& other);
397 
398  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
399  explicit OCR(FS_HANDLE handle);
401  ~OCR();
402 
410  OCR& operator = (const OCR& other);
418  bool operator == (const OCR& other) const;
426  bool operator != (const OCR& other) const;
427 
435  bool IsEmpty() const;
436 
449  void OCRPDFPage(pdf::PDFPage pdf_page, bool is_editable);
450 
464  void OCRPDFPage(pdf::PDFPage pdf_page, bool is_editable, const OCRConfig& config);
465 
478  void OCRPDFDocument(pdf::PDFDoc pdf_doc, bool is_editable);
479 
493  void OCRPDFDocument(pdf::PDFDoc pdf_doc, bool is_editable, const OCRConfig& config);
494 
495 #if (defined(_WIN32) || defined(_WIN64)) || (defined(__linux__) && defined(__x86_64__) && !defined(__ANDROID__))
496 
512  void OCRPDFDocuments(const ocr::OCRSettingDataArray& settingdata_array);
513 #endif
514 
524  OCRSuspectInfoArray GetOCRSuspectsInfo(pdf::PDFDoc ocred_pdf_doc);
525 };
526 
527 } // namespace ocr
528 } // namespace addon
529 } // namespace foxit
530 
531 #endif // FS_OCR_H_
532 
533 #endif // #if (defined(_WIN32) || defined(_WIN64)) || (defined(__linux__) && defined(__x86_64__) && !defined(__ANDROID__))
Definition: fs_ocr.h:160
bool is_detect_pictures
Decide whether to detect pictures. true means the pictures will be detected during analysis process....
Definition: fs_ocr.h:236
Definition: fs_common.h:1273
CFX_Object Object
Object type.
Definition: fs_basictypes.h:221
bool is_editable
Decide whether the OCR result is editable. true means the OCR result is editable. false means the OCR...
Definition: fs_ocr.h:349
Header file for PDF document related definitions and classes.
static void SetLogFile(const char *log_file_path)
Set log file for OCR engine.
OCRSettingData(pdf::PDFDoc pdf_doc, const common::Range &page_range, bool is_editable, const OCRConfig &ocr_config)
Constructor, with parameters.
Definition: fs_ocr.h:287
WIDE STRING CLASS.
Definition: fx_string.h:1461
Definition: fs_pdfdoc.h:648
OCRConfig()
Constructor.
Definition: fs_ocr.h:165
void Set(pdf::PDFDoc pdf_doc, const common::Range &page_range, bool is_editable, const OCRConfig &ocr_config)
Set value.
Definition: fs_ocr.h:304
static ErrorCode Initialize(const wchar_t *ocr_resource_path)
Initialize OCR engine.
OCRSettingData()
Constructor.
Definition: fs_ocr.h:277
WString suspect_words
Suspicious words after OCR recognition.
Definition: fs_ocr.h:372
bool operator==(const char *str1, const CFX_ByteString &str2)
Check if two byte strings are equal.
Definition: fs_basictypes.h:128
OCRSettingData & operator=(const OCRSettingData &data)
Assign operator.
Definition: fs_ocr.h:318
Definition: fs_ocr.h:48
Definition: fs_ocr.h:384
ErrorCode
Enumeration for error code.
Definition: fs_basictypes.h:237
OCRConfig & operator=(const OCRConfig &other)
Assign operator.
Definition: fs_ocr.h:209
foxit::RectF words_rect
The box rectangle, in PDF coordinate system for suspicious words.
Definition: fs_ocr.h:369
pdf::PDFDoc pdf_doc
A valid PDF document that need to be OCR.
Definition: fs_ocr.h:340
Definition: fs_pdfpage.h:412
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:214
int page_index
The index of page.
Definition: fs_ocr.h:366
Header file for common definitions and classes.
bool operator!=(const OCRSettingData &data)
Not equal operator.
Definition: fs_ocr.h:333
OCRConfig(bool is_detect_pictures, bool is_remove_noise, bool is_correct_skew, bool is_enable_text_extraction_mode)
Constructor, with parameters.
Definition: fs_ocr.h:178
Definition: fs_ocr.h:363
Definition: fs_basictypes.h:443
bool operator!=(const OCRConfig &other)
Not equal operator.
Definition: fs_ocr.h:224
Header file for PDF page related definitions and classes.
static void SetLanguages(const wchar_t *languages)
Set the name of languages which would be included in the language database for doing OCR.
bool is_correct_skew
Decide whether to enable skew correction. true means to enable skew correction. false means not to en...
Definition: fs_ocr.h:255
Foxit namespace.
Definition: fs_pdf3d.h:27
void Set(bool is_detect_pictures, bool is_remove_noise, bool is_correct_skew, bool is_enable_text_extraction_mode)
Set value.
Definition: fs_ocr.h:195
bool is_remove_noise
Decide whether to remove noise of the image of PDF. It can be useful if the image of the PDF contains...
Definition: fs_ocr.h:246
bool is_enable_text_extraction_mode
Decide whether to enable text extraction mode.
Definition: fs_ocr.h:268
Definition: fs_ocr.h:272
common::Range page_range
The range of pages that need to be OCR.
Definition: fs_ocr.h:343
OCRConfig ocr_config
The OCRConfig object.
Definition: fs_ocr.h:352
static void Release()
Release OCR engine.
Definition: fx_coordinates.h:771