OCR.NET for Android 1.0 » Developer.Team

OCR.NET for Android 1.0

OCR.NET for Android 1.0
OCR.NET for Android 1.0 | 16 Mb


Optical character recognition library for Android. Use OCR.NET for Android to retrieve text from images.
uses Tesseract OCR engine and Leptonica image processing library
supports Android armeabi-v7a devices
source code included in full version
royalty free distribution in applications

Constructors

public Ocr(Activity activity);

Initializes a new instance of the Ocr class.

Properties

public bool Active {get; set;}

Set Active property to true to initialize Tesseract library.

public string[] AvailableLanguages {get;}

Returns the available languages.

public int Confidence {get;}

Average word confidence for Tesseract page result.

public Rectangle[] ConnectedComponents {get;}

Returns the location of the selected individual connected (text) components.

public string DataPath {get; set;}

Path to tessdata folder containing Tesseract language data.

public float DeskewAngle {get;}

Deskew angle.

public EngineMode EngineMode {get; set;}

public enum EngineMode
{
  TesseractOnly, // use Tesseract engine only (the fastest)
  CubeOnly, // use Cube engine only (better accuracy)
  Combined, // use both engines combined (the best accuracy)
  Default // use default engine, it is Tesseract engine only
} 

Chooses OCR engine.

public IntPtr Handle {get;}

Handle of Tesseract engine.

public static string ImagelibVersions {get;}

Returns versions of image libraries used by Leptonica.

public string InitLanguages {get;}

Returns the languages string used in the last valid initialization. Languages loaded as dependencies of other loaded languages aren't included in this property.

public Language Language {get; set;}

public enum Language
{
  Default, Custom,
  Arabic, Bangla, Bulgarian, Catalan, Cherokee, ChineseSimplified, ChineseTraditional, Czech, Danish, Dutch, English, Finnish, French, German, Greek, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Latvian, Lithuanian, Norwegian, Polish, Portuguese, Quechua, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swedish, Tagalog, Thai, Turkish, Ukrainian, Vietnamese
} 

Language used by Tesseract engine. Language data are available at http://code.google.com/p/tesseract-ocr/source/checkout.

 

public string LanguageCode {get; set;}

Custom language code or multiple language codes used by Tesseract engine. To specify multiple languages, separate language codes by '+' character: eng+slk+rus.

public string[] LoadedLanguages {get;}

Returns the loaded languages. Includes all languages loaded by the last Tesseract initialization, including those loaded as dependencies of other loaded languages.

public int PageCount {get;}

Returns number of pages in multipage TIFF image.

public int PageNumber {get; set;}

Page number in multipage TIFF image, that will be used for OCR.

public PageOrientation PageOrientation {get;}

public enum PageOrientation { Up, Right, Down, Left }

Page orientation.

public PageSegmentation PageSegmentation {get; set;}

public enum PageSegmentation
{
  OSDOnly, // orientation and script detection only
  AutoOSD, // automatic page segmentation with orientation and script detection (OSD)
  AutoOnly, // automatic page segmentation, but no OSD nor OCR
  Auto, // automatic page segmentation, but no OSD
  SingleColumn, // assume a single column of text of variable sizes
  SingleVerticalBlock, // assume a single uniform block of vertically aligned text
  SingleBlock, // assume a single uniform block of text
  SingleLine, // treat the image as a single text line
  SingleWord, // treat the image as a single word
  CircleWord, // treat the image as a single word in a circle
  SingleChar, // treat the image as a single character
  SparseText, // find as much text as possible in no particular order
  SparseTextOSD // sparse text with orientation and script detection (OSD)
} 

Selects page layout analysis mode.

public Paragraph[] Paragraphs {get;}

public class Paragraph
{
  public Rectangle Location {get;} // left, top, right and bottom coordinates
  public string Text {get;} // recognized text
  public float Confidence {get;} // mean confidence interpreted as a percent probability (0.0 - 100.0)
  public int FirstLineIndent {get;} // intent of the first line
  public float DeskewAngle {get;} // deskew angle
  public ParagraphJustification Justification {get;} // text justification
  public PageOrientation Orientation {get;} // orientation
  public WritingDirection WritingDirection {get;} // writing direction
  public TextLineOrder TextLineOrder {get;} // line order
  public bool IsListItem {get;} // list item
  public bool IsCrown {get;} // crown
  public bool IsLeftToRight {get;} // LTR direction
} 

public enum ParagraphJustification { Unknown, Left, Center, Right } 

Provides detailed information of the selected paragraph.

public Image Picture {get; set;}

Picture used for OCR.

public string PictureFileName {get; set;}

Picture file used for OCR. The picture file is not loaded by component, but directly used by image libraries. GIF, JPEG, PNG and TIFF image formats are supported.

public int PictureHeight {get; set;}

Height of the part of picture used for OCR.

public int PictureLeft {get; set;}

Left origin of the part of picture used for OCR.

public int PictureTop {get; set;}

Top origin of the part of picture used for OCR.

 

public int PictureWidth {get; set;}

Width of the part of picture used for OCR.

public Rectangle[] Regions {get;}

Returns the location of regions (result of page layout analysis).

public Region[] RegionDetails {get;}

public class Region
{
  public Rectangle Location {get;} // left, top, right and bottom coordinates
  public string Text {get;} // recognized text
  public float Confidence {get;} // mean confidence interpreted as a percent probability (0.0 - 100.0)
  public float DeskewAngle {get;} // deskew angle
  public PageOrientation Orientation {get;} // orientation
  public WritingDirection WritingDirection {get;} // writing direction
  public TextLineOrder TextLineOrder {get;} // line order
} 

Provides detailed information of the selected region.

public int Resolution {get; set;}

Resolution of the source image in pixels per inch, so font size information can be calculated in results.

public Rectangle[] Strips {get;}

Returns the location of selected textline or strip of image regions.

public Symbol[] Symbols {get;}

public class Symbol
{
  public Rectangle Location {get;} // left, top, right and bottom coordinates
  public string Text {get;} // recognized text
  public float Confidence {get;} // mean confidence interpreted as a percent probability (0.0 - 100.0)
} 

Provides detailed information of the selected symbol.

public string Text {get;}

Recognized text.

public TextLineOrder TextLineOrder {get;}

public enum TextLineOrder { LeftToRight, RightToLeft, TopToBottom }

Text lines are read in the given sequence.

public Rectangle[] TextLines {get;}

Returns the location of textlines.

public TextLine[] TextLineDetails {get;}

public class TextLine
{
  public Rectangle Location {get;} // left, top, right and bottom coordinates
  public string Text {get;} // recognized text
  public float Confidence {get;} // mean confidence interpreted as a percent probability (0.0 - 100.0)
  public float DeskewAngle {get;} // deskew angle
  public PageOrientation Orientation {get;} // orientation
  public WritingDirection WritingDirection {get;} // writing direction
  public TextLineOrder TextLineOrder {get;} // line order
} 

Provides detailed information of the selected textline.

 

public byte[] Unlv {get;}

Recognized text in UTF-8 UNLV format.

public string UnlvText {get;}

Recognized text in UNLV format.

public byte[] Utf8Text {get;}

Recognized UTF8 text.

public static string Version {get;}

Returns version of Tesseract OCR engine.

public WordDetail[] WordDetails {get;}

public class WordDetail
{
  public Rectangle Location { get; private set; } // left, top, right and bottom coordinates
  public string Text { get; private set; } // recognized text
  public float Confidence { get; private set; } // mean confidence interpreted as a percent probability (0.0 - 100.0)
  public WordFlag Flags { get; private set; } // flags
  public string FontName { get; private set; } // font name
  public int PointSize { get; private set; } // font size in printer points (1/72 inch)
} 

[Flags]
public enum WordFlag
{
  Bold, // word is bold 
  Italic, // word is italic 
  Underlined, // word is underlined 
  Monospace, // word uses monospace font 
  Serif, // word uses serif font 
  SmallCaps, // word uses small caps 
  DropCap, // word is dropcap 
  Subscript, // word is subscript 
  Superscript, // word is superscript 
  Numeric, // word is numeric 
  FromDictionary // word was found in dictionary 
} 

Provides detailed information of the recognized word. Returns the location, text, confidence and other information of the selected word.

public Rectangle[] Words {get;}

Returns the location of recognized words.

public WritingDirection WritingDirection {get;}

public enum WritingDirection { LeftToRight, RightToLeft, TopToBottom }

Writing direction.

Methods

public bool AdaptToWord(PageSegmentation pageSegmentation, string word);

Applies the given word to the adaptive classifier. See Tesseract documentation for further information.

public void AddInitParameter(string name, string value);

Add variable used for Tesseract initialization.

public void ClearAdaptiveClassifier();

Call this function between pages to free up memory and forget adaptive data.

public void ClearInitParameters();

Clears variables used for Tesseract initialization.

public void ClearPersistentCache();

Clears any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) in Tesseract that are cached globally. This procedure allows the clearing of these caches.

public bool GetBoolParameter(string name);

Get Tesseract boolean variable.

public string GetBoxText();

Recognized text with box information. Each line consists of text, left position, top position, right position, bottom position and page number separated by spaces.

public double GetDoubleParameter(string name);

Get Tesseract double variable.

public string GetHtmlText();

Recognized text in HTML format.

public int GetIntParameter(string name);

Get Tesseract integer variable.

public string GetStringParameter(string name);

Get Tesseract string variable.

public bool GetTextDirection(out int offset, out float slope);

Retrieves offset and slope of lines. See Tesseract documentation for further information.

public string GetTsvText();

Recognized text in tab-separated values (TSV) format.

public string GetUniChar(int id);

Returns the string form of the specified unichar.

public bool IsValidCharacter(char character);

Returns true if the specified character is defined in the UniCharset.

public bool IsValidWord(string word);

Check whether a word is valid according to Tesseract's language model.

public bool PrintVariables(string fileName);

Print Tesseract variables to the specified file.

public void ReadConfigFile(string fileName);

Read a config file containing a set of parameter name-value pairs.

public void Recognize();

Initiate OCR recognision.

public void SetBoolParameter(string name, bool value);

Set Tesseract boolean variable.

public void SetDoubleParameter(string name, double value);

Set Tesseract double variable.

public void SetIntParameter(string name, int value);

Set Tesseract integer variable.

public void SetStringParameter(string name, string value);

Set Tesseract string variable.

Events

public event ProgressEventHandler Progress;

Progress notification

public delegate void ProgressEventHandler(object sender, ProgressEventArgs e);

public class ProgressEventArgs : EventArgs

public bool Cancel { get; set; }

Request to cancel recognition process.

public int Progress { get; }

Percent complete. Value range is 0 to 100.

public int WordCount { get; }

Returns number of found words.


Restriction: If you have LIETIME V.I.P membership please mail us for password.

Only for V.I.P
Warning! You are not allowed to view this text.