Skip to content

Commit a97a653

Browse files
committed
898833 - added sample for the extracting text using OCR
1 parent 7109c44 commit a97a653

18 files changed

+593
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 17
4+
VisualStudioVersion = 17.12.35707.178 d17.12
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WinForms_TextExtractionByOCR", "WinForms_TextExtractionByOCR\WinForms_TextExtractionByOCR.csproj", "{1B09455E-6F67-4155-AFE2-EB421BAB1190}"
7+
EndProject
8+
Global
9+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
10+
Debug|Any CPU = Debug|Any CPU
11+
Release|Any CPU = Release|Any CPU
12+
EndGlobalSection
13+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
14+
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15+
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Debug|Any CPU.Build.0 = Debug|Any CPU
16+
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Release|Any CPU.ActiveCfg = Release|Any CPU
17+
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Release|Any CPU.Build.0 = Release|Any CPU
18+
EndGlobalSection
19+
GlobalSection(SolutionProperties) = preSolution
20+
HideSolutionNode = FALSE
21+
EndGlobalSection
22+
EndGlobal
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<configuration>
3+
<startup>
4+
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8" />
5+
</startup>
6+
</configuration>

How-to/How-to-extract-Text-using-OCR/ExtractingSample/WinForms_TextExtractionByOCR/Form1.Designer.cs

+40
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using Syncfusion.Windows.PdfViewer;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.ComponentModel;
5+
using System.Data;
6+
using System.Drawing;
7+
using System.Linq;
8+
using System.Text;
9+
using System.Threading.Tasks;
10+
using System.Windows.Forms;
11+
using System.Windows.Forms.Integration;
12+
13+
namespace WinForms_TextExtractionByOCR
14+
{
15+
public partial class Form1 : Form
16+
{
17+
ElementHost elementHost = new ElementHost();
18+
PdfViewer pdfViewer;
19+
public Form1()
20+
{
21+
InitializeComponent();
22+
this.WindowState = FormWindowState.Maximized;
23+
pdfViewer = new PdfViewer();
24+
elementHost.Dock = DockStyle.Fill;
25+
elementHost.Child = pdfViewer;
26+
this.Controls.Add(elementHost);
27+
28+
}
29+
}
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<UserControl xmlns:PdfViewer="clr-namespace:Syncfusion.Windows.PdfViewer;assembly=Syncfusion.PdfViewer.WPF"
2+
x:Class="WinForms_TextExtractionByOCR.PdfViewer"
3+
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
4+
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
5+
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
6+
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
7+
xmlns:local="clr-namespace:WinForms_TextExtractionByOCR"
8+
mc:Ignorable="d"
9+
d:DesignHeight="450" d:DesignWidth="800">
10+
<Grid>
11+
<Grid.RowDefinitions>
12+
<RowDefinition Height="30"/>
13+
<RowDefinition/>
14+
</Grid.RowDefinitions>
15+
<Button x:Name="Rectangle" Content="Rectangle" Width="60" Click="Rectangle_Click"/>
16+
<PdfViewer:PdfViewerControl Grid.Row="1" x:Name="pdfViewer"/>
17+
</Grid>
18+
</UserControl>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
using Syncfusion.OCRProcessor;
2+
using Syncfusion.Pdf.Parsing;
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Drawing;
6+
using System.Drawing.Imaging;
7+
using System.Linq;
8+
using System.Text;
9+
using System.Threading.Tasks;
10+
using System.Windows;
11+
using System.Windows.Controls;
12+
using System.Windows.Data;
13+
using System.Windows.Documents;
14+
using System.Windows.Input;
15+
using System.Windows.Media;
16+
using System.Windows.Media.Imaging;
17+
using System.Windows.Navigation;
18+
using System.Windows.Shapes;
19+
20+
namespace WinForms_TextExtractionByOCR
21+
{
22+
/// <summary>
23+
/// Interaction logic for PdfViewer.xaml
24+
/// </summary>
25+
public partial class PdfViewer : UserControl
26+
{
27+
string file;
28+
string tessaractBinariesPath;
29+
string tessDataPath;
30+
RectangleF bounds;
31+
public PdfViewer()
32+
{
33+
InitializeComponent();
34+
#if NETCOREAPP
35+
file = "../../../Data/F#.pdf";
36+
#else
37+
file = "../../Data/F#.pdf";
38+
#endif
39+
pdfViewer.Load(file);
40+
}
41+
42+
private void Rectangle_Click(object sender, RoutedEventArgs e)
43+
{
44+
pdfViewer.AnnotationMode = Syncfusion.Windows.PdfViewer.PdfDocumentView.PdfViewerAnnotationMode.Rectangle;
45+
pdfViewer.ShapeAnnotationChanged += PdfViewer_ShapeAnnotationChanged;
46+
}
47+
48+
private void PdfViewer_ShapeAnnotationChanged(object sender, Syncfusion.Windows.PdfViewer.ShapeAnnotationChangedEventArgs e)
49+
{
50+
if (e.Action == Syncfusion.Windows.PdfViewer.AnnotationChangedAction.Add)
51+
{
52+
#if NETCOREAPP
53+
tessaractBinariesPath = "../../../Tesseract binaries";
54+
tessDataPath = @"../../../Tessdata/";
55+
#else
56+
tessaractBinariesPath = "../../Tesseract binaries";
57+
tessDataPath = @"../../Tessdata/";
58+
#endif
59+
bounds = e.NewBounds;
60+
PdfLoadedDocument loadedDocument = pdfViewer.LoadedDocument;
61+
using (OCRProcessor processor = new OCRProcessor(tessaractBinariesPath))
62+
{
63+
//Language to process the OCR
64+
processor.Settings.Language = Languages.English;
65+
Bitmap image = GetBitmap(pdfViewer.ExportAsImage(pdfViewer.CurrentPageIndex - 1));
66+
using (Bitmap clonedImage = image.Clone(bounds, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
67+
{
68+
string ocrText = processor.PerformOCR(clonedImage, tessDataPath);
69+
}
70+
image.Dispose();
71+
}
72+
}
73+
74+
}
75+
76+
Bitmap GetBitmap(BitmapSource source)
77+
{
78+
Bitmap bmp = new Bitmap(
79+
source.PixelWidth,
80+
source.PixelHeight,
81+
System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
82+
BitmapData data = bmp.LockBits(
83+
new System.Drawing.Rectangle(System.Drawing.Point.Empty, bmp.Size),
84+
ImageLockMode.WriteOnly,
85+
System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
86+
source.CopyPixels(
87+
Int32Rect.Empty,
88+
data.Scan0,
89+
data.Height * data.Stride,
90+
data.Stride);
91+
bmp.UnlockBits(data);
92+
return bmp;
93+
}
94+
}
95+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Threading.Tasks;
5+
using System.Windows.Forms;
6+
using WinForms_TextExtractionByOCR;
7+
8+
namespace WindWinForms_TextExtractionByOCR_NETowsFormsApp1
9+
{
10+
internal static class Program
11+
{
12+
/// <summary>
13+
/// The main entry point for the application.
14+
/// </summary>
15+
[STAThread]
16+
static void Main()
17+
{
18+
Application.EnableVisualStyles();
19+
Application.SetCompatibleTextRenderingDefault(false);
20+
Application.Run(new Form1());
21+
}
22+
}
23+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using System.Reflection;
2+
using System.Runtime.CompilerServices;
3+
using System.Runtime.InteropServices;
4+
5+
// General Information about an assembly is controlled through the following
6+
// set of attributes. Change these attribute values to modify the information
7+
// associated with an assembly.
8+
[assembly: AssemblyTitle("WinForms_TextExtractionByOCR")]
9+
[assembly: AssemblyDescription("")]
10+
[assembly: AssemblyConfiguration("")]
11+
[assembly: AssemblyCompany("")]
12+
[assembly: AssemblyProduct("WinForms_TextExtractionByOCR")]
13+
[assembly: AssemblyCopyright("Copyright © 2025")]
14+
[assembly: AssemblyTrademark("")]
15+
[assembly: AssemblyCulture("")]
16+
17+
// Setting ComVisible to false makes the types in this assembly not visible
18+
// to COM components. If you need to access a type in this assembly from
19+
// COM, set the ComVisible attribute to true on that type.
20+
[assembly: ComVisible(false)]
21+
22+
// The following GUID is for the ID of the typelib if this project is exposed to COM
23+
[assembly: Guid("1b09455e-6f67-4155-afe2-eb421bab1190")]
24+
25+
// Version information for an assembly consists of the following four values:
26+
//
27+
// Major Version
28+
// Minor Version
29+
// Build Number
30+
// Revision
31+
//
32+
[assembly: AssemblyVersion("1.0.0.0")]
33+
[assembly: AssemblyFileVersion("1.0.0.0")]

How-to/How-to-extract-Text-using-OCR/ExtractingSample/WinForms_TextExtractionByOCR/Properties/Resources.Designer.cs

+63
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)