Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif // #ifndef _FX_NO_XFA_
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
91  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
111 
117  TextPageCharInfo(const TextPageCharInfo& char_info) {
118  this->font = char_info.font;
119  this->flag = char_info.flag;
120  this->font_size = char_info.font_size;
121  this->origin_x = char_info.origin_x;
122  this->origin_y = char_info.origin_y;
123  this->char_box = char_info.char_box;
124  this->char_outbox = char_info.char_outbox;
125  this->matrix = char_info.matrix;
126  }
127 
136  this->font = char_info.font;
137  this->flag = char_info.flag;
138  this->font_size = char_info.font_size;
139  this->origin_x = char_info.origin_x;
140  this->origin_y = char_info.origin_y;
141  this->char_box = char_info.char_box;
142  this->char_outbox = char_info.char_outbox;
143  this->matrix = char_info.matrix;
144  return *this;
145  }
146 
154  bool operator == (const TextPageCharInfo& char_info) const {
155  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
156  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
157  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
158  return false;
159 
160  return true;
161  }
162 
170  bool operator != (const TextPageCharInfo& char_info) const{
171  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
172  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
173  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
174  return true;
175 
176  return false;
177  }
178 
196  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
197  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
198  this->font = font;
199  this->flag = flag;
200  this->font_size = font_size;
201  this->origin_x = origin_x;
202  this->origin_y = origin_y;
203  this->char_box = char_box;
204  this->char_outbox = char_outbox;
205  this->matrix = matrix;
206  }
207 
212 
220 
226  float font_size;
227 
231  float origin_x;
232 
236  float origin_y;
237 
242 
247 
252 };
253 
269 class TextPage FS_FINAL : public Base {
270  public:
276  typedef enum _TextParseFlags {
283  } TextParseFlags;
284 
290  typedef enum _TextOrderFlag {
295  } TextOrderFlag;
296 
297 
305  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
306 
308  ~TextPage();
314  TextPage(const TextPage& other);
322  TextPage& operator = (const TextPage& other);
323 
331  bool operator == (const TextPage& other) const;
339  bool operator != (const TextPage& other) const;
340 
348  bool IsEmpty() const;
349 
355  int GetCharCount() const;
356 
366  TextPageCharInfo GetCharInfo(int char_index);
367 
382  WString GetChars(int start = 0, int count = -1) const;
383 
396  int GetIndexAtPos(float x, float y, float tolerance) const;
397 
405  WString GetTextInRect(const RectF& rect) const;
406 
415  WString GetText(TextOrderFlag flag) const;
416 
431  common::Range GetWordAtPos(float x, float y, float tolerance) const;
432 
444  int GetTextRectCount(int start = 0, int count = -1);
445 
455  RectF GetTextRect(int rect_index) const;
456 
467  common::Rotation GetBaselineRotation(int rect_index);
468 
477 
485  common::Range GetCharRange(const RectF& rect);
486 
501 
502  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
503  explicit TextPage(FS_HANDLE handle = NULL);
504 };
505 
517 class TextSearch FS_FINAL : public Base {
518  public:
524  typedef enum _SearchFlags {
535  } SearchFlags;
536 
537 
553  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
554 
555  #ifndef _FX_NO_XFA_
556 
567  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
568 #endif // #ifndef _FX_NO_XFA_
569 
574  explicit TextSearch(const foxit::pdf::TextPage& text_page);
575 
584  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
585 
587  ~TextSearch();
593  TextSearch(const TextSearch& other);
601  TextSearch& operator = (const TextSearch& other);
602 
610  bool operator == (const TextSearch& other) const;
618  bool operator != (const TextSearch& other) const;
619 
627  bool IsEmpty() const;
628 
636  bool SetPattern(const wchar_t* key_words);
637 
653  bool SetStartPage(int page_index);
654 
670  bool SetEndPage(int page_index);
671 
693  bool SetStartCharacter(int char_index);
694 
706  bool SetSearchFlags(uint32 search_flags);
707 
714  bool FindNext();
715 
722  bool FindPrev();
723 
729  RectFArray GetMatchRects() const;
730 
739  int GetMatchPageIndex() const;
740 
747 
758 
769 
776  int GetMatchStartCharIndex() const;
777 
784  int GetMatchEndCharIndex() const;
785 
786  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
787  explicit TextSearch(FS_HANDLE handle = NULL);
788 };
789 
798 class TextLink FS_FINAL : public Base{
799  public:
801  ~TextLink();
807  TextLink(const TextLink& other);
815  TextLink& operator = (const TextLink& other);
816 
824  bool operator == (const TextLink& other) const;
832  bool operator != (const TextLink& other) const;
833 
841  bool IsEmpty() const;
842 
851  WString GetURI();
852 
858  int GetStartCharIndex();
859 
865  int GetEndCharIndex();
866 
873  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
874  explicit TextLink(FS_HANDLE handle = NULL);
875 
876 };
877 
882 class PageTextLinks FS_FINAL : public Base{
883  public:
889  explicit PageTextLinks(const TextPage& page);
895  PageTextLinks(const PageTextLinks& other);
903  PageTextLinks& operator = (const PageTextLinks& other);
911  bool operator == (const PageTextLinks& other) const ;
919  bool operator != (const PageTextLinks& other) const ;
920 
928  bool IsEmpty() const;
930  ~PageTextLinks();
931 
937  int GetTextLinkCount();
938 
947  TextLink GetTextLink(int index);
948 
949  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
950  explicit PageTextLinks(FS_HANDLE handle = NULL);
951 };
952 } // namespace pdf
953 } // namespace foxit
954 #endif // FS_SEARCH_H_
955 
foxit::pdf::TextPage::TextParseFlags
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:276
foxit::pdf::TextSearch::e_SearchMatchCase
If set, match the case of keyword when searching.
Definition: fs_search.h:528
foxit::pdf::TextPage::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
foxit::pdf::TextPage::e_TextStreamOrder
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:292
foxit::pdf::TextPageCharInfo::origin_x
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:231
foxit::pdf::TextPageCharInfo::char_box
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:241
foxit::FS_HANDLE
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:214
foxit::pdf::TextPage::e_ParseTextOutputHyphen
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:280
foxit::pdf::TextPage::GetTextRectArrayByRect
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
foxit::pdf::annots::Annot
Definition: fs_annot.h:965
foxit::pdf::TextPageCharInfo::font
common::Font font
A font for character.
Definition: fs_search.h:211
foxit::pdf::TextPage::GetCharCount
int GetCharCount() const
Get the count of all the characters.
foxit::pdf::TextSearch::SetEndPage
bool SetEndPage(int page_index)
Set ending page index.
foxit::pdf::TextSearch::operator!=
bool operator!=(const TextSearch &other) const
Not equal operator.
foxit::Object
CFX_Object Object
Object type.
Definition: fs_basictypes.h:217
foxit::pdf::TextPageCharInfo::flag
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:219
foxit::pdf::TextSearch::GetMatchStartCharIndex
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
foxit::pdf::TextPageCharInfo::matrix
Matrix matrix
The matrix of the character.
Definition: fs_search.h:251
foxit::pdf::TextPage::operator=
TextPage & operator=(const TextPage &other)
Assign operator.
foxit::pdf::SearchCancelCallback::NeedToCancelNow
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
foxit::pdf::TextPageCharInfo::origin_y
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:236
foxit::pdf::TextPage::GetTextRect
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
foxit::pdf::TextPage::e_TextDisplayOrder
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:294
foxit::pdf::TextPage
Definition: fs_search.h:269
foxit::pdf::TextPage::GetWordAtPos
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
foxit::pdf::TextPageCharInfo::e_Hyphen
Character flag: Hyphen.
Definition: fs_search.h:69
fs_common.h
Header file for common definitions and classes.
foxit::pdf::TextSearch::GetMatchRects
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
CFX_ArrayTemplate< RectF >
foxit::pdf::TextPageCharInfo::e_Generated
Character flag: Generated.
Definition: fs_search.h:65
foxit::pdf::TextPageCharInfo::~TextPageCharInfo
~TextPageCharInfo()
Destructor.
Definition: fs_search.h:110
foxit::pdf::TextPageCharInfo::char_outbox
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:246
foxit::pdf::TextPage::GetBaselineRotation
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
foxit::pdf::TextPage::GetCharRange
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
foxit::pdf::TextSearch::GetMatchEndCharIndex
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
foxit::pdf::TextSearch::operator==
bool operator==(const TextSearch &other) const
Equal operator.
foxit::pdf::TextPageCharInfo
Definition: fs_search.h:52
foxit::pdf::TextPage::e_ParseTextUseStreamOrder
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:282
foxit::pdf::TextPage::GetText
WString GetText(TextOrderFlag flag) const
Get the page text.
foxit::pdf::TextSearch::SetStartCharacter
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
foxit::pdf::TextSearch::FindPrev
bool FindPrev()
Search for previous matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:117
foxit::pdf::TextPageCharInfo::font_size
float font_size
Font size for character.
Definition: fs_search.h:226
fs_xfa.h
Header file for XFA related definitions and functions.
foxit::pdf::TextPageCharInfo::operator=
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:135
foxit::pdf::TextPageCharInfo::e_UnUnicode
Character flag: UnUnicode.
Definition: fs_search.h:67
foxit::pdf::TextPage::GetChars
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
foxit::pdf::TextSearch::SetPattern
bool SetPattern(const wchar_t *key_words)
Set keywords to search.
foxit::pdf::TextSearch::GetMatchPageIndex
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
foxit::pdf::TextSearch::SetStartPage
bool SetStartPage(int page_index)
Set starting page index.
foxit::pdf::TextPage::GetIndexAtPos
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
foxit::common::Rotation
Rotation
Enumeration for rotation.
Definition: fs_common.h:57
foxit::pdf::TextPageCharInfo::e_Normal
Character flag: Normal.
Definition: fs_search.h:63
foxit::pdf::TextSearch::e_SearchMatchWholeWord
If set, match the whole word of keyword when searching.
Definition: fs_search.h:530
foxit::pdf::TextSearch::e_SearchConsecutive
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:532
foxit::pdf::TextPageCharInfo::TextCharFlag
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
fs_pdfpage.h
Header file for PDF page related definitions and classes.
foxit
Foxit namespace.
Definition: fs_taggedpdf.h:27
foxit::pdf::TextPage::e_ParseTextNormal
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:278
foxit::pdf::TextPage::TextPage
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
foxit::addon::xfa::XFADoc
Definition: fs_xfa.h:898
foxit::pdf::TextSearch::operator=
TextSearch & operator=(const TextSearch &other)
Assign operator.
foxit::pdf::TextPage::GetTextInRect
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:90
foxit::pdf::TextSearch::FindNext
bool FindNext()
Search for next matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
foxit::pdf::TextSearch::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
NULL
#define NULL
The null-pointer value.
Definition: fx_system.h:780
foxit::pdf::TextSearch::e_SearchNormal
No special searching options.
Definition: fs_search.h:526
CFX_FloatRect
Definition: fx_coordinates.h:771
foxit::pdf::TextPageCharInfo::Set
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:196
foxit::pdf::TextSearch::SearchFlags
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:524
foxit::pdf::TextSearch
Definition: fs_search.h:517
foxit::pdf::PDFDoc
Definition: fs_pdfdoc.h:613
foxit::pdf::PDFPage
Definition: fs_pdfpage.h:411
foxit::pdf::SearchCancelCallback
Definition: fs_search.h:39
foxit::pdf::TextPageCharInfo::e_Unknown
Character flag: Unknown.
Definition: fs_search.h:61
foxit::pdf::TextSearch::TextSearch
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
foxit::pdf::TextPageCharInfo::e_ComboWord
Character flag: ComboWord.
Definition: fs_search.h:71
foxit::pdf::TextSearch::SetSearchFlags
bool SetSearchFlags(uint32 search_flags)
Set search flags.
foxit::pdf::TextPage::~TextPage
~TextPage()
Destructor.
foxit::common::Font
Definition: fs_common.h:1344
CFX_Matrix
Definition: fx_coordinates.h:1076
foxit::pdf::TextSearch::e_SearchNotMatchFullWidth
If set, to ignore full-width characters and treat all characters as standard ASCII or standard-width ...
Definition: fs_search.h:534
fs_annot.h
Header file for annotation related definitions and classes.
foxit::pdf::TextSearch::GetMatchSentenceEndIndex
int GetMatchSentenceEndIndex()
Get the index of the last character of current matched pattern, based on the matched sentence.
foxit::pdf::TextPageCharInfo::operator==
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:154
CFX_WideString
WIDE STRING CLASS.
Definition: fx_string.h:1452
foxit::pdf::TextSearch::GetMatchSentence
WString GetMatchSentence()
Get the sentence that contains current match pattern.
foxit::common::Range
Definition: fs_common.h:1189
foxit::pdf::TextPage::GetTextUnderAnnot
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
foxit::pdf::TextPage::GetTextRectCount
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
foxit::uint32
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:196
foxit::pdf::TextPage::operator!=
bool operator!=(const TextPage &other) const
Not equal operator.
foxit::pdf::TextSearch::GetMatchSentenceStartIndex
int GetMatchSentenceStartIndex()
Get the index of the first character of current matched pattern, based on the matched sentence.
foxit::pdf::TextSearch::~TextSearch
~TextSearch()
Destructor.
foxit::Base
Definition: fs_basictypes.h:419
foxit::pdf::TextPage::operator==
bool operator==(const TextPage &other) const
Equal operator.
foxit::pdf::TextPage::TextOrderFlag
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:290
foxit::pdf::TextPage::GetCharInfo
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
foxit::pdf::TextPageCharInfo::operator!=
bool operator!=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:170