|
|
|
สอบถามว่า ต้องเขียน Code อย่างไร จึงจะทำให้ระบบ Tesseract js อ่านภาษาไทยแบบ OCR ได้ดีเทียบเท่า Google Cloud Vision API ครับ |
|
|
|
|
|
|
|
สอบถามว่า ต้องเขียน Code อย่างไร จึงจะทำให้ระบบ Tesseract js อ่านภาษาไทยแบบ OCR ได้ดีเทียบเท่า Google Cloud Vision API เนื่องจากได้ลองเขียน Code เรื่องระบบ Tesseract js กับการอ่านภาษาไทยแบบ OCR ตาม Link
https://webunique.in.th/blog/2016/11/23/การแปลงรูปภาพข้อความเป/
Code ทั้งหมด มีดังนี้
1. index.html
<!DOCTYPE html>
<head>
<title>OCR แปลงรูปภาพข้อความเป็นข้อความที่แก้ไขได้</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<link rel="stylesheet" href="css/bootstrap.min.css" >
<link rel="stylesheet" href="css/bootstrap-theme.min.css" >
<script src="js/bootstrap.min.js"></script>
<script src="js/tesseract.js"></script>
<script>
$(document).ready(function(){
createorc($("#dataface").attr("src"));
function readURL(input) {
if (input[0].value != "") {
var reader = new FileReader();
reader.onload = function (e) {
createorc(e.target.result);
}
reader.readAsDataURL(input[0].files[0]);
}
}
function createorc(result){
var img = '<img id="dataface" src="'+result+'" style="max-width:100%;">';
$("#preview").html(img).promise().done(function(){
Tesseract.recognize(result,{lang: $(".langdetect").val()})
.progress(function (p) { $("#loading").show(); })
.then(data => {
$(".showtxt").html(data.text.replace(/n/g, "<br />"));
})
.catch(err => {
//console.log('catchn', err);
})
.finally(e => {
$("#loading").hide();
});
}
)
}
$( "#imagebroswer" ).change(function() {
readURL($(this));
});
});
</script>
<style>
#loading {
display:none;
top: 0;
left: 0;
background-color: rgba(255,255,255,.8);
min-width: 100%;
min-height: 100%;
height: auto;
position: fixed;
z-index: 100000000;
}
.abx{
position: absolute;
width:100%;
text-align: center;
top:25%;
}
</style>
</head>
<body>
<div id="loading">
<div class="abx">
<div align="center">กำลังประมวลผล</div>
</div>
</div>
<div class="container" style="padding:25px;">
<div class="row">
<h3>OCR แปลงรูปภาพข้อความเป็นข้อความที่แก้ไขได้</h3>
<div class="form-group">
<div class="col-md-3">
<label for="">เลือกภาษา</label>
<select class="form-control langdetect" >
<option value='eng' selected> English </option>
<option value='tha'> Thai </option>
</select>
</div>
<div class="col-md-3">
<label for="">เลือกรูป</label>
<input class="form-control" id="imagebroswer" type="file">
</div>
</div>
</div>
<div style="">
<div class="row" >
<div id="preview" class="col-md-6">
<img id="dataface" src="txtsample.png" style="max-width:100%;">
</div>
<div class="col-md-6">
<div class="showtxt " style="padding:25px 25px;border:1px solid #000;min-height:200px;">
</div>
</div>
</div>
</div>
</div>
</body>
</html>
2. tesseract.js (อยู่ใน Floder js ครับ)
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o )s(r[o]);return s})({1:[function(require,module,exports){
// shim for using process in browser
var process = module.exports = {};
// cached from whatever global is present so that test runners that stub it
// don't break things. But we need to wrap it in a try catch in case it is
// wrapped in strict mode code which doesn't define any globals. It's inside a
// function because try/catches deoptimize in certain engines.
var cachedSetTimeout;
var cachedClearTimeout;
function defaultSetTimout() {
throw new Error('setTimeout has not been defined');
}
function defaultClearTimeout () {
throw new Error('clearTimeout has not been defined');
}
(function () {
try {
if (typeof setTimeout === 'function') {
cachedSetTimeout = setTimeout;
} else {
cachedSetTimeout = defaultSetTimout;
}
} catch (e) {
cachedSetTimeout = defaultSetTimout;
}
try {
if (typeof clearTimeout === 'function') {
cachedClearTimeout = clearTimeout;
} else {
cachedClearTimeout = defaultClearTimeout;
}
} catch (e) {
cachedClearTimeout = defaultClearTimeout;
}
} ())
function runTimeout(fun) {
if (cachedSetTimeout === setTimeout) {
//normal enviroments in sane situations
return setTimeout(fun, 0);
}
// if setTimeout wasn't available but was latter defined
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) {
cachedSetTimeout = setTimeout;
return setTimeout(fun, 0);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedSetTimeout(fun, 0);
} catch(e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedSetTimeout.call(null, fun, 0);
} catch(e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
return cachedSetTimeout.call(this, fun, 0);
}
}
}
function runClearTimeout(marker) {
if (cachedClearTimeout === clearTimeout) {
//normal enviroments in sane situations
return clearTimeout(marker);
}
// if clearTimeout wasn't available but was latter defined
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) {
cachedClearTimeout = clearTimeout;
return clearTimeout(marker);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedClearTimeout(marker);
} catch (e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedClearTimeout.call(null, marker);
} catch (e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
return cachedClearTimeout.call(this, marker);
}
}
}
var queue = [];
var draining = false;
var currentQueue;
var queueIndex = -1;
function cleanUpNextTick() {
if (!draining || !currentQueue) {
return;
}
draining = false;
if (currentQueue.length) {
queue = currentQueue.concat(queue);
} else {
queueIndex = -1;
}
if (queue.length) {
drainQueue();
}
}
function drainQueue() {
if (draining) {
return;
}
var timeout = runTimeout(cleanUpNextTick);
draining = true;
var len = queue.length;
while(len) {
currentQueue = queue;
queue = [];
while (++queueIndex < len) {
if (currentQueue) {
currentQueue[queueIndex].run();
}
}
queueIndex = -1;
len = queue.length;
}
currentQueue = null;
draining = false;
runClearTimeout(timeout);
}
process.nextTick = function (fun) {
var args = new Array(arguments.length - 1);
if (arguments.length > 1) {
for (var i = 1; i < arguments.length; i++) {
args[i - 1] = arguments[i];
}
}
queue.push(new Item(fun, args));
if (queue.length === 1 && !draining) {
runTimeout(drainQueue);
}
};
// v8 likes predictible objects
function Item(fun, array) {
this.fun = fun;
this.array = array;
}
Item.prototype.run = function () {
this.fun.apply(null, this.array);
};
process.title = 'browser';
process.browser = true;
process.env = {};
process.argv = [];
process.version = ''; // empty string to avoid regexp issues
process.versions = {};
function noop() {}
process.on = noop;
process.addListener = noop;
process.once = noop;
process.off = noop;
process.removeListener = noop;
process.removeAllListeners = noop;
process.emit = noop;
process.binding = function (name) {
throw new Error('process.binding is not supported');
};
process.cwd = function () { return '/' };
process.chdir = function (dir) {
throw new Error('process.chdir is not supported');
};
process.umask = function() { return 0; };
},{}],2:[function(require,module,exports){
module.exports={
"name": "tesseract.js",
"version": "1.0.10",
"description": "Pure Javascript Multilingual OCR",
"main": "src/index.js",
"scripts": {
"test": "echo "Error: no test specified" & exit 1",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js",
"release": "npm run build && git commit -am 'new release' && git push && git tag `jq -r '.version' package.json` && git push origin --tags && npm publish"
},
"browser": {
"./src/node/index.js": "./src/browser/index.js"
},
"author": "",
"license": "Apache-2.0",
"devDependencies": {
"babel-preset-es2015": "^6.16.0",
"babelify": "^7.3.0",
"browserify": "^13.1.0",
"envify": "^3.4.1",
"http-server": "^0.9.0",
"pako": "^1.0.3",
"watchify": "^3.7.0"
},
"dependencies": {
"file-type": "^3.8.0",
"is-url": "^1.2.2",
"jpeg-js": "^0.2.0",
"level-js": "^2.2.4",
"node-fetch": "^1.6.3",
"object-assign": "^4.1.0",
"png.js": "^0.2.1",
"tesseract.js-core": "^1.0.2"
},
"repository": {
"type": "git",
"url": "https://github.com/naptha/tesseract.js.git"
},
"bugs": {
"url": "https://github.com/naptha/tesseract.js/issues"
},
"homepage": "https://github.com/naptha/tesseract.js"
}
},{}],3:[function(require,module,exports){
(function (process){
'use strict';
var defaultOptions = {
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'
};
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration');
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3);
} else {
var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js';
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions) {
if (window.Blob && window.URL) {
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']);
var worker = new Worker(window.URL.createObjectURL(blob));
} else {
var worker = new Worker(workerOptions.workerPath);
}
worker.onmessage = function (e) {
var packet = e.data;
instance._recv(packet);
};
return worker;
};
exports.terminateWorker = function (instance) {
instance.worker.terminate();
};
exports.sendPacket = function sendPacket(instance, packet) {
loadImage(packet.payload.image, function (img) {
packet.payload.image = img;
instance.worker.postMessage(packet);
});
};
function loadImage(image, cb) {
if (typeof image === 'string') {
if (/^#/.test(image)) {
// element css selector
return loadImage(document.querySelector(image), cb);
} else if (/(blob|data):/.test(image)) {
// data url
var im = new Image();
im.src = image;
im.onload = function (e) {
return loadImage(im, cb);
};
return;
} else {
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true);
xhr.responseType = "blob";
xhr.onload = function (e) {
return loadImage(xhr.response, cb);
};
xhr.onerror = function (e) {
if (/^https?:///.test(image) && !/^https://crossorigin.me/.test(image)) {
console.debug('Attempting to load image with CORS proxy');
loadImage('https://crossorigin.me/' + image, cb);
}
};
xhr.send(null);
return;
}
} else if (image instanceof File) {
// files
var fr = new FileReader();
fr.onload = function (e) {
return loadImage(fr.result, cb);
};
fr.readAsDataURL(image);
return;
} else if (image instanceof Blob) {
return loadImage(URL.createObjectURL(image), cb);
} else if (image.getContext) {
// canvas element
return loadImage(image.getContext('2d'), cb);
} else if (image.tagName == "IMG" || image.tagName == "VIDEO") {
// image element or video element
var c = document.createElement('canvas');
c.width = image.naturalWidth || image.videoWidth;
c.height = image.naturalHeight || image.videoHeight;
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
return loadImage(ctx, cb);
} else if (image.getImageData) {
// canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb);
} else {
return cb(image);
}
throw new Error('Missing return in loadImage cascade');
}
}).call(this,require('_process'))
},{"../../package.json":2,"_process":1}],4:[function(require,module,exports){
"use strict";
// The result of dump.js is a big JSON tree
// which can be easily serialized (for instance
// to be sent from a webworker to the main app
// or through Node's IPC), but we want
// a (circular) DOM-like interface for walking
// through the data.
module.exports = function circularize(page) {
page.paragraphs = [];
page.lines = [];
page.words = [];
page.symbols = [];
page.blocks.forEach(function (block) {
block.page = page;
block.lines = [];
block.words = [];
block.symbols = [];
block.paragraphs.forEach(function (para) {
para.block = block;
para.page = page;
para.words = [];
para.symbols = [];
para.lines.forEach(function (line) {
line.paragraph = para;
line.block = block;
line.page = page;
line.symbols = [];
line.words.forEach(function (word) {
word.line = line;
word.paragraph = para;
word.block = block;
word.page = page;
word.symbols.forEach(function (sym) {
sym.word = word;
sym.line = line;
sym.paragraph = para;
sym.block = block;
sym.page = page;
sym.line.symbols.push(sym);
sym.paragraph.symbols.push(sym);
sym.block.symbols.push(sym);
sym.page.symbols.push(sym);
});
word.paragraph.words.push(word);
word.block.words.push(word);
word.page.words.push(word);
});
line.block.lines.push(line);
line.page.lines.push(line);
});
para.page.paragraphs.push(para);
});
});
return page;
};
},{}],5:[function(require,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('../node/index.js');
var jobCounter = 0;
module.exports = function () {
function TesseractJob(instance) {
_classCallCheck(this, TesseractJob);
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance;
this._resolve = [];
this._reject = [];
this._progress = [];
this._finally = [];
}
_createClass(TesseractJob, [{
key: 'then',
value: function then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
if (reject) this.catch(reject);
return this;
}
}, {
key: 'catch',
value: function _catch(reject) {
if (this._reject.push) {
this._reject.push(reject);
} else {
reject(this._reject);
}
return this;
}
}, {
key: 'progress',
value: function progress(fn) {
this._progress.push(fn);
return this;
}
}, {
key: 'finally',
value: function _finally(fn) {
this._finally.push(fn);
return this;
}
}, {
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
});
}
}, {
key: '_handle',
value: function _handle(packet) {
var data = packet.data;
var runFinallyCbs = false;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.log(data);
this._resolve.forEach(function (fn) {
var ret = fn(data);
if (ret && typeof ret.then == 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) {
return fn(data);
});
this._reject = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'progress') {
this._progress.forEach(function (fn) {
return fn(data);
});
} else {
console.warn('Message type unknown', packet.status);
}
if (runFinallyCbs) {
this._finally.forEach(function (fn) {
return fn(data);
});
}
}
}]);
return TesseractJob;
}();
},{"../node/index.js":3}],6:[function(require,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
var TesseractJob = require('./common/job');
var version = require('../package.json').version;
function create() {
var workerOptions = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions));
worker.create = create;
worker.version = version;
return worker;
}
var TesseractWorker = function () {
function TesseractWorker(workerOptions) {
_classCallCheck(this, TesseractWorker);
this.worker = null;
this.workerOptions = workerOptions;
this._currentJob = null;
this._queue = [];
}
_createClass(TesseractWorker, [{
key: 'recognize',
value: function recognize(image) {
var _this = this;
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return this._delay(function (job) {
if (typeof options === 'string') options = { lang: options };
options.lang = options.lang || 'eng';
job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions });
});
}
}, {
key: 'detect',
value: function detect(image) {
var _this2 = this;
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return this._delay(function (job) {
job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions });
});
}
}, {
key: 'terminate',
value: function terminate() {
if (this.worker) adapter.terminateWorker(this);
this.worker = null;
}
}, {
key: '_delay',
value: function _delay(fn) {
var _this3 = this;
if (!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
var job = new TesseractJob(this);
this._queue.push(function (e) {
_this3._queue.shift();
_this3._currentJob = job;
fn(job);
});
if (!this._currentJob) this._dequeue();
return job;
}
}, {
key: '_dequeue',
value: function _dequeue() {
this._currentJob = null;
if (this._queue.length) {
this._queue[0]();
}
}
}, {
key: '_recv',
value: function _recv(packet) {
if (packet.status === 'resolve' && packet.action === 'recognize') {
packet.data = circularize(packet.data);
}
if (this._currentJob.id === packet.jobId) {
this._currentJob._handle(packet);
} else {
console.warn('Job ID ' + packet.jobId + ' not known.');
}
}
}]);
return TesseractWorker;
}();
module.exports = create();
},{"../package.json":2,"./common/circularize.js":4,"./common/job":5,"./node/index.js":3}]},{},[6])(6)
});
3. Link สำหรับ Download Source Code ทั้งหมดครับ - https://webunique.in.th/blog/simple/ocr.zip ซึ่งมีโครงสร้างของไฟล์ทั้งหมด ดังนี้ครับ
แล้วติดปัญหาในการ OCR คือ ได้ลองตัวอย่างรูปฉลากสินค้า ตาม Link ตัวอย่างรูป
https://www.beartai.com/wp-content/uploads/2019/06/WaiWai_02-768x512.jpg
และทำการ Crop เอาเฉพาะส่วนผสมของอาหาร ตามรูปด้านล่างครับ
แล้วทำการทดสอบ Code ในส่วนระบบ Tesseract js แล้วเจอปัญหาในเรื่อง OCR ออกมาได้ข้อมูลไม่ถูกต้อง และ OCR ได้เฉพาะแบบเลือกทีละ 1 ภาษาเท่านั้น ตามรูปด้านล่างครับ
จึงสอบถามว่า ต้องแก้ไข Code อย่างไร จึงจะ OCR ด้วยระบบ Tesseract js แล้ว อ่านค่าได้ถูกต้อง และอ่านค่าได้พร้อมกันทั้ง 2 ภาษา ตามรูปที่ต้องการอ่านค่าได้ถูกต้อง ด้านล่างครับ
Tag : PHP, HTML, HTML5, JavaScript, XAMPP, Windows
|
|
|
|
|
|
Date :
2020-06-11 20:21:06 |
By :
doanga2007 |
View :
2906 |
Reply :
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ตอบความคิดเห็นที่ : 1 เขียนโดย : PhrayaDev เมื่อวันที่ 2020-06-11 23:29:10
รายละเอียดของการตอบ ::
ได้ทดลองการเขียน Code ความคิดเห็นของ "พระยาเทพ" แล้ว ผลสรุป คือ ไม่แสดงข้อความใดๆ หลังจาก OCR ออกมาเลย จากตัวเลือก English + Thai ตามรูปด้านล่างครับ
ก่อนหน้าที่จะ เขียน Code ความคิดเห็นของ "พระยาเทพ" นั้น ยังสามารถ OCR แยกภาษา ตามตัวเลือกได้ตามปกติ ตามรูปด้านล่างครับ
และภาพตัวอย่างที่ใช้ในการทดสอบครั้งนี้ครับ
Code ที่ปรับปรุงทั้งหมดครับ
1. index.html
<!DOCTYPE html>
<head>
<title>OCR แปลงรูปภาพข้อความเป็นข้อความที่แก้ไขได้</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<link rel="stylesheet" href="css/bootstrap.min.css" >
<link rel="stylesheet" href="css/bootstrap-theme.min.css" >
<script src="js/bootstrap.min.js"></script>
<script src="js/tesseract.js"></script>
<script>
$(document).ready(function(){
createorc($("#dataface").attr("src"));
function readURL(input) {
if (input[0].value != "") {
var reader = new FileReader();
reader.onload = function (e) {
createorc(e.target.result);
}
reader.readAsDataURL(input[0].files[0]);
}
}
function createorc(result){
var img = '<img id="dataface" src="'+result+'" style="max-width:100%;">';
$("#preview").html(img).promise().done(function(){
Tesseract.recognize(result,{lang: 'eng+tha'})
.progress(function (p) { $("#loading").show(); })
.then(data => {
$(".addvalue4").html(data.text.replace(/\n/g, ""));
})
.catch(err => {
//console.log('catch\n', err);
})
.finally(e => {
$("#loading").hide();
});
}
)
}
$( "#imagebroswer" ).change(function() {
readURL($(this));
});
});
</script>
<style>
#loading {
display:none;
top: 0;
left: 0;
background-color: rgba(255,255,255,.9);
min-width: 100%;
min-height: 100%;
height: auto;
position: fixed;
z-index: 100000000;
}
.abx{
position: absolute;
width:100%;
text-align: center;
top:25%;
}
</style>
</head>
<body>
<div id="loading">
<div class="abx">
<div align="center" style="font-size: 50px;"><strong>กำลัง Load รูปภาพ เพื่อประมวลผล</strong></div><br><br>
<div align="center"><img src="loading.gif"></div>
</div>
</div>
<div class="container" style="padding:25px;">
<div class="row">
<h3>OCR แปลงรูปภาพข้อความเป็นข้อความที่แก้ไขได้</h3>
<div class="form-group">
<div class="col-md-3">
<label for="">เลือกภาษา</label>
<select class="form-control langdetect" >
<option value='eng+tha'> English + Thai </option>
</select>
</div>
<div class="col-md-3">
<label for="">เลือกรูป</label>
<input class="form-control" id="imagebroswer" type="file">
</div>
</div>
</div>
<div style="">
<div class="row">
<div id="preview" class="col-md-6">
<img id="dataface" src="txtsample.png" style="max-width:100%;">
</div>
<div class="col-md-6">
<textarea rows="1" cols="15" class="addvalue4 " name="addvalue4" style="border:none;">
</textarea>
</div>
</div>
</div>
</div>
</body>
</html>
2. tesseract.js (อยู่ใน Floder js ครับ)
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
// shim for using process in browser
var process = module.exports = {};
// cached from whatever global is present so that test runners that stub it
// don't break things. But we need to wrap it in a try catch in case it is
// wrapped in strict mode code which doesn't define any globals. It's inside a
// function because try/catches deoptimize in certain engines.
var cachedSetTimeout;
var cachedClearTimeout;
function defaultSetTimout() {
throw new Error('setTimeout has not been defined');
}
function defaultClearTimeout () {
throw new Error('clearTimeout has not been defined');
}
(function () {
try {
if (typeof setTimeout === 'function') {
cachedSetTimeout = setTimeout;
} else {
cachedSetTimeout = defaultSetTimout;
}
} catch (e) {
cachedSetTimeout = defaultSetTimout;
}
try {
if (typeof clearTimeout === 'function') {
cachedClearTimeout = clearTimeout;
} else {
cachedClearTimeout = defaultClearTimeout;
}
} catch (e) {
cachedClearTimeout = defaultClearTimeout;
}
} ())
function runTimeout(fun) {
if (cachedSetTimeout === setTimeout) {
//normal enviroments in sane situations
return setTimeout(fun, 0);
}
// if setTimeout wasn't available but was latter defined
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) {
cachedSetTimeout = setTimeout;
return setTimeout(fun, 0);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedSetTimeout(fun, 0);
} catch(e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedSetTimeout.call(null, fun, 0);
} catch(e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
return cachedSetTimeout.call(this, fun, 0);
}
}
}
function runClearTimeout(marker) {
if (cachedClearTimeout === clearTimeout) {
//normal enviroments in sane situations
return clearTimeout(marker);
}
// if clearTimeout wasn't available but was latter defined
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) {
cachedClearTimeout = clearTimeout;
return clearTimeout(marker);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedClearTimeout(marker);
} catch (e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedClearTimeout.call(null, marker);
} catch (e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
return cachedClearTimeout.call(this, marker);
}
}
}
var queue = [];
var draining = false;
var currentQueue;
var queueIndex = -1;
function cleanUpNextTick() {
if (!draining || !currentQueue) {
return;
}
draining = false;
if (currentQueue.length) {
queue = currentQueue.concat(queue);
} else {
queueIndex = -1;
}
if (queue.length) {
drainQueue();
}
}
function drainQueue() {
if (draining) {
return;
}
var timeout = runTimeout(cleanUpNextTick);
draining = true;
var len = queue.length;
while(len) {
currentQueue = queue;
queue = [];
while (++queueIndex < len) {
if (currentQueue) {
currentQueue[queueIndex].run();
}
}
queueIndex = -1;
len = queue.length;
}
currentQueue = null;
draining = false;
runClearTimeout(timeout);
}
process.nextTick = function (fun) {
var args = new Array(arguments.length - 1);
if (arguments.length > 1) {
for (var i = 1; i < arguments.length; i++) {
args[i - 1] = arguments[i];
}
}
queue.push(new Item(fun, args));
if (queue.length === 1 && !draining) {
runTimeout(drainQueue);
}
};
// v8 likes predictible objects
function Item(fun, array) {
this.fun = fun;
this.array = array;
}
Item.prototype.run = function () {
this.fun.apply(null, this.array);
};
process.title = 'browser';
process.browser = true;
process.env = {};
process.argv = [];
process.version = ''; // empty string to avoid regexp issues
process.versions = {};
function noop() {}
process.on = noop;
process.addListener = noop;
process.once = noop;
process.off = noop;
process.removeListener = noop;
process.removeAllListeners = noop;
process.emit = noop;
process.binding = function (name) {
throw new Error('process.binding is not supported');
};
process.cwd = function () { return '/' };
process.chdir = function (dir) {
throw new Error('process.chdir is not supported');
};
process.umask = function() { return 0; };
},{}],2:[function(require,module,exports){
module.exports={
"name": "tesseract.js",
"version": "1.0.10",
"description": "Pure Javascript Multilingual OCR",
"main": "src/index.js",
"scripts": {
"test": "echo \"Error: no test specified\" & exit 1",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js",
"release": "npm run build && git commit -am 'new release' && git push && git tag `jq -r '.version' package.json` && git push origin --tags && npm publish"
},
"browser": {
"./src/node/index.js": "./src/browser/index.js"
},
"author": "",
"license": "Apache-2.0",
"devDependencies": {
"babel-preset-es2015": "^6.16.0",
"babelify": "^7.3.0",
"browserify": "^13.1.0",
"envify": "^3.4.1",
"http-server": "^0.9.0",
"pako": "^1.0.3",
"watchify": "^3.7.0"
},
"dependencies": {
"file-type": "^3.8.0",
"is-url": "^1.2.2",
"jpeg-js": "^0.2.0",
"level-js": "^2.2.4",
"node-fetch": "^1.6.3",
"object-assign": "^4.1.0",
"png.js": "^0.2.1",
"tesseract.js-core": "^1.0.2"
},
"repository": {
"type": "git",
"url": "https://github.com/naptha/tesseract.js.git"
},
"bugs": {
"url": "https://github.com/naptha/tesseract.js/issues"
},
"homepage": "https://github.com/naptha/tesseract.js"
}
},{}],3:[function(require,module,exports){
(function (process){
'use strict';
var defaultOptions = {
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'
};
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration');
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3);
} else {
var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js';
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions) {
if (window.Blob && window.URL) {
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']);
var worker = new Worker(window.URL.createObjectURL(blob));
} else {
var worker = new Worker(workerOptions.workerPath);
}
worker.onmessage = function (e) {
var packet = e.data;
instance._recv(packet);
};
return worker;
};
exports.terminateWorker = function (instance) {
instance.worker.terminate();
};
exports.sendPacket = function sendPacket(instance, packet) {
loadImage(packet.payload.image, function (img) {
packet.payload.image = img;
instance.worker.postMessage(packet);
});
};
function loadImage(image, cb) {
if (typeof image === 'string') {
if (/^\#/.test(image)) {
// element css selector
return loadImage(document.querySelector(image), cb);
} else if (/(blob|data)\:/.test(image)) {
// data url
var im = new Image();
im.src = image;
im.onload = function (e) {
return loadImage(im, cb);
};
return;
} else {
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true);
xhr.responseType = "blob";
xhr.onload = function (e) {
return loadImage(xhr.response, cb);
};
xhr.onerror = function (e) {
if (/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)) {
console.debug('Attempting to load image with CORS proxy');
loadImage('https://crossorigin.me/' + image, cb);
}
};
xhr.send(null);
return;
}
} else if (image instanceof File) {
// files
var fr = new FileReader();
fr.onload = function (e) {
return loadImage(fr.result, cb);
};
fr.readAsDataURL(image);
return;
} else if (image instanceof Blob) {
return loadImage(URL.createObjectURL(image), cb);
} else if (image.getContext) {
// canvas element
return loadImage(image.getContext('2d'), cb);
} else if (image.tagName == "IMG" || image.tagName == "VIDEO") {
// image element or video element
var c = document.createElement('canvas');
c.width = image.naturalWidth || image.videoWidth;
c.height = image.naturalHeight || image.videoHeight;
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
return loadImage(ctx, cb);
} else if (image.getImageData) {
// canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb);
} else {
return cb(image);
}
throw new Error('Missing return in loadImage cascade');
}
}).call(this,require('_process'))
},{"../../package.json":2,"_process":1}],4:[function(require,module,exports){
"use strict";
// The result of dump.js is a big JSON tree
// which can be easily serialized (for instance
// to be sent from a webworker to the main app
// or through Node's IPC), but we want
// a (circular) DOM-like interface for walking
// through the data.
module.exports = function circularize(page) {
page.paragraphs = [];
page.lines = [];
page.words = [];
page.symbols = [];
page.blocks.forEach(function (block) {
block.page = page;
block.lines = [];
block.words = [];
block.symbols = [];
block.paragraphs.forEach(function (para) {
para.block = block;
para.page = page;
para.words = [];
para.symbols = [];
para.lines.forEach(function (line) {
line.paragraph = para;
line.block = block;
line.page = page;
line.symbols = [];
line.words.forEach(function (word) {
word.line = line;
word.paragraph = para;
word.block = block;
word.page = page;
word.symbols.forEach(function (sym) {
sym.word = word;
sym.line = line;
sym.paragraph = para;
sym.block = block;
sym.page = page;
sym.line.symbols.push(sym);
sym.paragraph.symbols.push(sym);
sym.block.symbols.push(sym);
sym.page.symbols.push(sym);
});
word.paragraph.words.push(word);
word.block.words.push(word);
word.page.words.push(word);
});
line.block.lines.push(line);
line.page.lines.push(line);
});
para.page.paragraphs.push(para);
});
});
return page;
};
},{}],5:[function(require,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('../node/index.js');
var jobCounter = 0;
module.exports = function () {
function TesseractJob(instance) {
_classCallCheck(this, TesseractJob);
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance;
this._resolve = [];
this._reject = [];
this._progress = [];
this._finally = [];
}
_createClass(TesseractJob, [{
key: 'then',
value: function then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
if (reject) this.catch(reject);
return this;
}
}, {
key: 'catch',
value: function _catch(reject) {
if (this._reject.push) {
this._reject.push(reject);
} else {
reject(this._reject);
}
return this;
}
}, {
key: 'progress',
value: function progress(fn) {
this._progress.push(fn);
return this;
}
}, {
key: 'finally',
value: function _finally(fn) {
this._finally.push(fn);
return this;
}
}, {
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
});
}
}, {
key: '_handle',
value: function _handle(packet) {
var data = packet.data;
var runFinallyCbs = false;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.log(data);
this._resolve.forEach(function (fn) {
var ret = fn(data);
if (ret && typeof ret.then == 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) {
return fn(data);
});
this._reject = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'progress') {
this._progress.forEach(function (fn) {
return fn(data);
});
} else {
console.warn('Message type unknown', packet.status);
}
if (runFinallyCbs) {
this._finally.forEach(function (fn) {
return fn(data);
});
}
}
}]);
return TesseractJob;
}();
},{"../node/index.js":3}],6:[function(require,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
var TesseractJob = require('./common/job');
var version = require('../package.json').version;
function create() {
var workerOptions = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions));
worker.create = create;
worker.version = version;
return worker;
}
var TesseractWorker = function () {
function TesseractWorker(workerOptions) {
_classCallCheck(this, TesseractWorker);
this.worker = null;
this.workerOptions = workerOptions;
this._currentJob = null;
this._queue = [];
}
_createClass(TesseractWorker, [{
key: 'recognize',
value: function recognize(image) {
var _this = this;
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return this._delay(function (job) {
if (typeof options === 'string') options = { lang: options };
options.lang = options.lang || 'eng';
job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions });
});
}
}, {
key: 'detect',
value: function detect(image) {
var _this2 = this;
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return this._delay(function (job) {
job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions });
});
}
}, {
key: 'terminate',
value: function terminate() {
if (this.worker) adapter.terminateWorker(this);
this.worker = null;
}
}, {
key: '_delay',
value: function _delay(fn) {
var _this3 = this;
if (!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
var job = new TesseractJob(this);
this._queue.push(function (e) {
_this3._queue.shift();
_this3._currentJob = job;
fn(job);
});
if (!this._currentJob) this._dequeue();
return job;
}
}, {
key: '_dequeue',
value: function _dequeue() {
this._currentJob = null;
if (this._queue.length) {
this._queue[0]();
}
}
}, {
key: '_recv',
value: function _recv(packet) {
if (packet.status === 'resolve' && packet.action === 'recognize') {
packet.data = circularize(packet.data);
}
if (this._currentJob.id === packet.jobId) {
this._currentJob._handle(packet);
} else {
console.warn('Job ID ' + packet.jobId + ' not known.');
}
}
}]);
return TesseractWorker;
}();
module.exports = create();
},{"../package.json":2,"./common/circularize.js":4,"./common/job":5,"./node/index.js":3}]},{},[6])(6)
});
3. Link สำหรับ Download Source Code ทั้งหมดครับ - https://webunique.in.th/blog/simple/ocr.zip ซึ่งมีโครงสร้างของไฟล์ทั้งหมด ดังนี้ครับ
|
ประวัติการแก้ไข 2020-06-16 10:48:29 2020-06-17 14:07:14
|
|
|
|
Date :
2020-06-16 10:43:41 |
By :
doanga2007 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
เอา งานเข้าล่ะ ลองอัปเดต Tesseract เป็น version ล่าสุดหรือยังครับ
|
|
|
|
|
Date :
2020-06-16 15:06:57 |
By :
PhrayaDev |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tesseract.js v2 อ่านไทยได้แย่ลงครับ ลองแล้ว ไม่แนะนำ
|
|
|
|
|
Date :
2020-06-16 16:11:26 |
By :
mr.v |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
อืม...V.2 ห่วยจริง (ทดสอบแค่แบบเบสิก)
เมื่อเทียบกับ V1 แตกต่างกันขนาดไหน...ไม่รู้ (ยังไม่ลอง V1)
แต่ข้อดีของ V2 คือ สามารถสแกนแบบหลายภาษาได้
โค้ดพื้นฐานที่ใช้ทดสอบ Tesseract.js V2 (ไม่ทำงานใน Microsoft Browser)
Code (JavaScript)
<!doctype html>
<html>
<head>
<title>ทดสอบ Tesseract.js 2</title>
</head>
<body>
<p id="status"></p>
<script src='https://unpkg.com/[email protected]/dist/tesseract.min.js'></script>
<!-- <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/tesseract.min.js"></script> -->
<script>
var sta = document.getElementById("status");
const recogImage = 'https://i.imgur.com/imFDi3l.png';
/*
https://i.imgur.com/atVCSZv.png
https://i.imgur.com/4Ixp69R.jpg
*/
const worker = Tesseract.createWorker({
logger: m => sta.innerHTML = m.status
});
Tesseract.setLogging(true);
work();
async function work() {
await worker.load();
await worker.loadLanguage('eng+tha');
await worker.initialize('eng+tha');
//let result = await worker.detect(recogImage);
//console.log(result.data);
let result = await worker.recognize(recogImage);
sta.innerHTML = result.data.text;
await worker.terminate();
}
// https://www.thaicreate.com/php/forum/135293.html
</script>
</body>
</html>
https://www.w3schools.com/code/tryit.asp?filename=GFVGTF07J4WU
แต่ก็ยังสรุปไม่ได้ว่า ห่วยเพราะการตั้งค่า หรือ ตัว library
การใช้งาน API ในเชิงลึกยังมีอะไรที่ให้เลือกใช้อีกเยอะ (ส่วนรายละเอียด ยังไม่ลอง)
https://github.com/naptha/tesseract.js/blob/master/docs/api.md
เช่น อาจเปลี่ยนไปใช้ datafile ของเวอร์ชันเก่า ด้วย langPath option ฯลฯ
https://tesseract-ocr.github.io/tessdoc/Data-Files
https://golb.hplar.ch/2019/07/ocr-with-tesseractjs.html
|
|
|
|
|
Date :
2020-06-17 17:44:34 |
By :
PhrayaDev |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
โค้ดต่อไปนี้ ใช้กับ Tesseract v4
Code
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tesseract.js</title>
<style>
.color-legend-sample {
display: inline-block;
height: 20px;
width: 20px;
}
.container {
align-items: start;
column-gap: 20px;
display: flex;
flex-direction: row;
}
.container .column {
width: 50%;
}
.preview-image-container {
text-align: center;
}
.result-box {
border: 1px solid #ccc;
margin-bottom: 10px;
padding: 10px;
}
.recognized-result {
margin-bottom: 10px;
margin-left: 0;
margin-right: 0;
margin-top: 0;
padding: 0;
}
h2.progress-result-heading,
h2.recognized-result {
border-bottom: 1px solid #ccc;
}
#file-details {
text-align: center;
}
#file-details > *:hover {
text-decoration: underline;
}
#file-details,
#form,
.preview-image-container {
margin-bottom: 10px;
}
#preview-image {
height: auto;
max-width: 100%;
width: auto;
}
#progress-result > div {
column-gap: 10px;
display: flex;
flex-direction: row;
margin-bottom: 10px;
}
#progress-result .event-progress {
flex-grow: 1;
}
#progress-result .status-text {
width: 300px;
}
#recognized-result-text {
white-space: pre-wrap;
word-break: break-all;
word-wrap: break-word;
}
</style>
</head>
<body>
<div class="container">
<div class="column">
<form id="form">
<input id="input-file" type="file" accept=".bmp.gif,.jpe,.jpeg,.jpg,.png,.webp">
</form>
<div class="preview-image-container">
<canvas id="preview-image" height="10" width="10"></canvas>
</div>
<div id="file-details"></div>
</div>
<div class="column">
<div id="result-column"></div>
</div>
</div>
<script type="application/javascript" src="dist/tesseract.min.js"></script>
<script type="application/javascript">
function displayProgress(event) {
const resultColumn = document.getElementById('result-column');
if (!resultColumn.querySelector('#progress-result-heading')) {
resultColumn.insertAdjacentHTML('beforeend', '<h2 id="progress-result-heading" class="progress-result-heading">Progress</h2>');
}
if (!resultColumn.querySelector('#progress-result')) {
resultColumn.insertAdjacentHTML('beforeend', '<div id="progress-result" class="result-box" data-user-job-id="' + event.userJobId + '"></div>');
}
const progressResult = resultColumn.querySelector('#progress-result');
const eventStatusId = event.status
.replace(/([\s]+)/g, '-')
.replace(/\(([\w \-]+)\)/g, '')
.replace(/\-$/, '');
console.log('event status:' + eventStatusId + ' (' + event.status + ') => ' + event.progress);
if (!progressResult.querySelector('#' + eventStatusId)) {
progressResult.insertAdjacentHTML(
'beforeend',
'<div id="' + eventStatusId + '">'
+ '<span class="status-text">' + event.status + '</span>'
+ '<progress class="event-progress" value="0" max="1"></progress>'
+ '</div>'
);
}
const thisEventProgress = progressResult.querySelector('#' + eventStatusId + ' .event-progress');
thisEventProgress.value = event.progress;
}// displayProgress
function displayResult(resultData) {
console.debug('recognized data: ', resultData);
const data = resultData.data;
const resultColumn = document.getElementById('result-column');
if (!resultColumn.querySelector('h2.recognized-result')) {
resultColumn.insertAdjacentHTML('beforeend', '<h2 class="recognized-result">Recognized result</h2>');
}
if (!resultColumn.querySelector('#recognized-result-text')) {
resultColumn.insertAdjacentHTML('beforeend', '<h3 class="recognized-result">data.text</h3><pre id="recognized-result-text" class="result-box">' + data.text + '</pre>');
}
if (!resultColumn.querySelector('#recognized-result-hocr')) {
resultColumn.insertAdjacentHTML('beforeend', '<h3 class="recognized-result">data.hocr</h3><div id="recognized-result-hocr" class="result-box">' + data.hocr + '</div>');
}
if (!resultColumn.querySelector('#recognized-result-generated-html')) {
let output = '';
data.paragraphs.forEach((paragraph) => {
output += '<p>';
let generatedLine = '';
paragraph.lines.forEach((line) => {
generatedLine += line.text;
});
generatedLine = generatedLine.trim().replace(/(?:\r\n|\r|\n)/g, '<br>');
output += generatedLine;
output += '</p>';
});
resultColumn.insertAdjacentHTML(
'beforeend',
'<h3 class="recognized-result">Custom generated HTML</h3>' +
'<div id="recognized-result-generated-html" class="result-box">' +
output +
'</div>'
);
output = null;
}
let boxColor, lineColor;
resultColumn.insertAdjacentHTML('beforeend', '<h3 class="recognized-result">colors</h3><div id="color-legend" class="result-box"></div>');
const colorLegend = resultColumn.querySelector('#color-legend');
boxColor = 'rgba(0, 0, 255, 0.2)';
colorLegend.insertAdjacentHTML('beforeend', '<div class="color-legend-row"><span class="color-legend-sample" style="background-color: ' + boxColor + ';"></span> Blocks</div>');
data.blocks.forEach((block) => {
drawCanvasBounding(block.bbox.x0, block.bbox.y0, block.bbox.x1, block.bbox.y1, boxColor, 10);
});
boxColor = 'rgba(255, 255, 0, 0.4)';
colorLegend.insertAdjacentHTML('beforeend', '<div class="color-legend-row"><span class="color-legend-sample" style="background-color: ' + boxColor + ';"></span> Paragraphs</div>');
data.paragraphs.forEach((paragraph) => {
drawCanvasBounding(paragraph.bbox.x0, paragraph.bbox.y0, paragraph.bbox.x1, paragraph.bbox.y1, boxColor, 5);
});
boxColor = 'rgba(255, 0, 0, 0.8)';
lineColor = 'rgba(0, 255, 0, 0.8)';
colorLegend.insertAdjacentHTML('beforeend', '<div class="color-legend-row"><span class="color-legend-sample" style="background-color: ' + boxColor + ';"></span> Lines box</div>');
colorLegend.insertAdjacentHTML('beforeend', '<div class="color-legend-row"><span class="color-legend-sample" style="background-color: ' + lineColor + ';"></span> Lines base line</div>');
data.lines.forEach((line) => {
drawCanvasBounding(line.bbox.x0, line.bbox.y0, line.bbox.x1, line.bbox.y1, boxColor);
drawCanvasBounding(line.baseline.x0, line.baseline.y0, line.baseline.x1, line.baseline.y1, lineColor, 1, 'line');
});
}// displayResult
function drawCanvasBounding(x0, y0, x1, y1, strokeStyle, lineWidth = 1, drawType = 'box') {
const canvas = document.getElementById('preview-image');
const context = canvas.getContext('2d');
context.beginPath();
context.strokeStyle = strokeStyle;
context.lineWidth = lineWidth;
if ('box' === drawType) {
context.moveTo(0, 0);
context.rect(x0, y0, (x1 - x0), (y1 - y0));
} else if ('line' === drawType) {
context.moveTo(x0, y0);
context.lineTo(x1, y1);
}
context.closePath();
context.stroke();
}// drawCanvasBounding
function inputChangePreview() {
const inputFile = document.getElementById('input-file');
const canvas = document.getElementById('preview-image');
inputFile.addEventListener('change', (event) => {
const thisTarget = event.target;
const thisFile = thisTarget.files[0];
// display file details.
const fileDetails = document.getElementById('file-details');
fileDetails.innerHTML = '';// reset.
fileDetails.insertAdjacentHTML('beforeend', '<span class="file-name">' + thisFile.name + '</span>');
fileDetails.insertAdjacentHTML('beforeend', ' <span class="file-size">' + thisFile.size + ' bytes</span>');
// render preview
const context = canvas.getContext('2d');
const imageObj = new Image();
imageObj.onload = (event) => {
canvas.height = event.target.naturalHeight;
canvas.width = event.target.naturalWidth;
context.drawImage(imageObj, 0, 0, event.target.naturalWidth, event.target.naturalHeight);
fileDetails.insertAdjacentHTML('beforeend', ' <span class="file-dimension">' + event.target.naturalWidth + '×' + event.target.naturalHeight + ' pixels</span>');
// dispatch custom event to start OCR.
const eventObj = new Event('my.rendered_image', {bubbles: true, cancelable: true});
document.dispatchEvent(eventObj);
};
imageObj.onerror = (event) => {
context.clearRect(0, 0, canvas.width, canvas.height);// reset.
console.error('img error:', event);
};
imageObj.src = URL.createObjectURL(thisFile);
});
}// inputChangePreview
function listenRenderedImageEvent() {
document.addEventListener('my.rendered_image', async (event) => {
const inputFile = document.getElementById('input-file');
const resultColumn = document.getElementById('result-column');
resultColumn.innerHTML = '';// reset.
const { createWorker } = Tesseract; // https://github.com/naptha/tesseract.js/blob/master/docs/api.md
const worker = await createWorker({
corePath: 'dist/tesseract-core.wasm.js',
workerPath: 'dist/worker.min.js',
logger: (event) => {
displayProgress(event);
}
});
await worker.loadLanguage('eng+tha');
await worker.initialize('eng+tha');
await worker.setParameters({
preserve_interword_spaces: '1',
});
const data = await worker.recognize(
inputFile.files[0],
{},
{
// @link https://github.com/naptha/tesseract.js/blob/master/src/createWorker.js#L132
tsv: false,
}
);
await worker.terminate();
displayResult(data);
});
}// listenRenderedImageEvent
function resetForm() {
document.getElementById('form').reset();
}// resetForm
window.addEventListener('DOMContentLoaded', (event) => {
resetForm();
inputChangePreview();
listenRenderedImageEvent();
});
</script>
</body>
</html>
แกะเอาเองนะ
|
ประวัติการแก้ไข 2022-12-03 16:14:53 2022-12-03 16:15:14 2022-12-03 16:39:27
|
|
|
|
Date :
2022-12-03 16:14:35 |
By :
mr.v |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Load balance : Server 01
|