生活紀錄: 10月 2018

2018年10月26日星期五

Tesseract OSD_example.cpp

// 參照 Tesseract API for VS2017

// 決定頁面，行到行，字到字的方向

#include "pch.h"
#include <iostream>

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
// 頁面方向
static const char* const sOrientation[] {
"PAGE_UP",
"PAGE_RIGHT",
"PAGE_DOWN",
"PAGE_LEFT",
};
// 字到字的方向
static const char* const sWritingDirection[] {
"LEFT_TO_RIGHT",
"RIGHT_TO_LEFT",
"TOP_TO_BOTTOM",
};
// 行到行的方向
static const char* const sTextlineOrder[]{
"LEFT_TO_RIGHT",
"RIGHT_TO_LEFT",
"TOP_TO_BOTTOM",
};

Pix *image = pixRead("D:\\TensorFlow\\OCR\\aaa.png");
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
if (api->Init(NULL, "eng")) {
std::cerr << "Could not initialize tesseract.\n";
}
api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
api->SetImage(image);
api->Recognize(0);
tesseract::PageIterator* it = api->AnalyseLayout();
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (it != 0) {
do {
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;

it->Orientation(&orientation, &direction, &order, &deskew_angle);
printf("Orientation: %s;\nWritingDirection: %s\nTextlineOrder: %s\n" \
"Deskew angle: %.4f\n",
sOrientation[orientation], sWritingDirection[direction],
sTextlineOrder[order], deskew_angle);
int left, top, right, bottom;
it->BoundingBox(level, &left, &top, &right, &bottom);
printf("BoundingBox: (%d, %d) (%d, %d)\n",
left, top, right, bottom);
} while (it->Next(level));
}

api->End();
pixDestroy(&image);
}

Tesseract ResultIterator.cpp

// 參照 Tesseract API for VS2017

// 依據字符辨識，列出所有候選字

#include "pch.h"

#include <iostream>

#include <tesseract/baseapi.h>

#include <leptonica/allheaders.h>

int main()

{

Pix *image = pixRead("D:\\TensorFlow\\OCR\\aaa.png");

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();

if (api->Init(NULL, "eng")) {

std::cerr << "Could not initialize tesseract.\n";

}

api->SetPageSegMode(tesseract::PSM_AUTO_OSD);

api->SetImage(image);

api->Recognize(0);

tesseract::ResultIterator* ri = api->GetIterator();

tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;

//tesseract::PageIteratorLevel level = tesseract::RIL_TEXTLINE;

if (ri != 0) {

do {

const char* word = ri->GetUTF8Text(level);

float conf = ri->Confidence(level);

int x1, y1, x2, y2;

ri->BoundingBox(level, &x1, &y1, &x2, &y2);

printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",

word, conf, x1, y1, x2, y2);

if (level = tesseract::RIL_SYMBOL) {

// 列出所有可能的候選字

tesseract::ChoiceIterator ci(*ri);

do {

const char* choice = ci.GetUTF8Text();

printf("\t\t%s conf: %f\n", choice, ci.Confidence());

} while (ci.Next());

printf("---------------------------------------------\n");

}

delete[] word;

} while (ri->Next(level));

}

api->End();

pixDestroy(&image);

}

Tesseract GetComponentImages.cpp

// 參照 Tesseract API for VS2017
// 依據行辨識

int main()

{

Pix *image = pixRead("D:\\temp\\OpenCV_err.png");

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();

if (api->Init(NULL, "eng")) {

std::cerr << "Could not initialize tesseract.\n";

}

api->SetImage(image);

Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL);

fprintf(stdout, "Found %d textline image components.\n", boxes->n);

for (int i = 0; i < boxes->n; i++) {

BOX* box = boxaGetBox(boxes, i, L_CLONE);

api->SetRectangle(box->x, box->y, box->w, box->h);

char* ocrResult = api->GetUTF8Text();

int conf = api->MeanTextConf();

fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",

i, box->x, box->y, box->w, box->h, conf, ocrResult);

delete[] ocrResult;

}

api->End();

pixDestroy(&image);

}

Tesseract BasicExample.cpp

// 參照 Tesseract API for VS2017
// 整張圖片一次辨識
#include "pch.h"
#include <iostream>

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
char *outText = NULL;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// 英文加中文
if (api->Init(NULL, "eng+chi_tra")) {
std::cerr << "Could not initialize tesseract.\n";
}
Pix *image = pixRead("D:\\TensorFlow\\OCR\\bbb.png");
// 以下兩行，可擇一或都不執行
api->SetPageSegMode(tesseract::PSM_SINGLE_BLOCK); // 預設值
//api->SetPageSegMode(tesseract::PSM_SINGLE_LINE);
api->SetImage(image);
//api->SetRectangle(40, 5, 150, 30);
outText = api->GetUTF8Text();
// 要顯示中文要經很多轉換
int len = ::MultiByteToWideChar(CP_UTF8, NULL, outText, -1, NULL, 0);
wchar_t* wszString = new wchar_t[len + 1];
::MultiByteToWideChar(CP_UTF8, NULL, outText, -1, wszString, len);
wszString[len] = '\0';
len = ::WideCharToMultiByte(CP_ACP, 0, wszString, -1, NULL, 0, NULL, NULL);
char* szBig5 = new char[len + 1];
::WideCharToMultiByte(CP_ACP, 0, wszString, -1, szBig5, len, NULL, NULL);
szBig5[len] = '\0';

std::cout << "=======================\n";
std::cout << outText << "\n";
std::cout << "=======================\n";
std::cout << wszString << "\n";
std::cout << "=======================\n";
std::cout << szBig5 << "\n";
std::cout << "=======================\n";
api->End();
if (outText) delete[] outText;
if (wszString) delete[] wszString;
pixDestroy(&image);
}

Tesseract API for VS2017

參照 Tesseract OCR 安裝

c:\users\userName\.cppan\stroage 目錄下搜尋 leptonica
可以找到 C:\Users\userName\.cppan\storage\src\8f\a3\90d7\src 目錄

拷貝所有檔案至 C:\Program Files\tesseract\include\leptonica

VS2013 不能編譯, VS2017 才可以

Property Pages/Platform 選 x64

Property Pages/Configuration 選 Debug

Property Pages/Configuration Properties/Debugging/Environment

PATH=%PATH%;D:\TensorFlow\OCR\tesseract\win64\bin\Debug

Property Pages/Configuration Properties/C/C++/General/Additional Include Directories

新增 C:\Program Files\tesseract\include

Property Pages/Configuration Properties/Linker/General/Additional Library Directories

新增 D:\TensorFlow\OCR\tesseract\win64\Debug

Property Pages/Configuration Properties/Linker/Input/Additional Dependencies

新增 tesseract40d.lib

新增 pvt.cppan.demo.danbloomberg.leptonica-1.76.0.lib

Property Pages/Platform 選 x64

Property Pages/Configuration 選 Release

Property Pages/Configuration Properties/Debugging/Environment

PATH=%PATH%;C:\Program Files\tesseract\bin

Property Pages/Configuration Properties/C/C++/General/Additional Include Directories

新增 C:\Program Files\tesseract\include

Property Pages/Configuration Properties/Linker/General/Additional Library Directories

新增 C:\Program Files\tesseract\lib

Property Pages/Configuration Properties/Linker/Input/Additional Dependencies

新增 tesseract40.lib

新增 pvt.cppan.demo.danbloomberg.leptonica-1.76.0.lib

執行 Debug 程式時，若是使用 Release 的 lib, delete GetUTF8Text() 產生的記憶體
會產生 Exception

2018年10月19日星期五

Tesseract OCR

https://digi.bib.uni-mannheim.de/tesseract/
可以下載安裝版
雖然它只可以執行，不能開發程式，但還是先安裝，因為要使用它的 tessdata
等用完再移除吧
https://github.com/UB-Mannheim/tesseract/wiki/Windows-build
有一些安裝檔如何產生的說明，但它是利用 Linux 跨平台編譯產生的

使用 Vcpkg
開啟 PowerShell
git clone https://github.com/Microsoft/vcpkg.git vcpkg
cd vcpkg
.\bootstrap-vcpkg.bat
產生 vcpkg.exe
.\vcpkg install tesseract:x64-windows
產生 installed\x64-windows\tools\tesseract
.\vcpkg install tesseract:x64-windows-static
產生 installed\x64-windows-static\tools\tesseract
.\vcpkg install tesseract:x86-windows-static
有 include, dll, lib, 但卻是 3.05 版

使用 cmake, cppan, vs2017
原先使用之前的 cmake(3.10版), 一直失敗, 更新成 cmake(3.12版)才成功
下載 cppan
cppan 會使用 c:\users\userName\.cppan 目錄，若有失敗要重新開始，刪除這個目錄
設定 PATH 到 cmake 和 cppan
開啟 PowerShell

git clone https://github.com/tesseract-ocr/tesseract tesseract

cd tesseract
mkdir win64
cd win64
PS D:\Tesseract\tesseract\win64> $env:Path += ";D:\Tesseract\cppan-master-Windows-client;C:\Program Files\CMake\bin"
PS D:\Tesseract\tesseract\win64> $env:path.split(";")
cppan ..
cmake .. -G "Visual Studio 15 2017 Win64"
開啟 vs2017

開啟 tesseract\win64\tesseract.sln

先編譯 "CPPAN Targets/Service/cppan-d-b-d" 專案，會產生錯誤

最主要為程式內含有錯誤的字元

開啟這些檔案，另存新檔，選擇 Save 旁邊的小按鈕，選擇 Save with encoding

Encoding 選擇 Unicode (UTF-8 with signature)

ALL_BUILD 可以成功，接著 build INSTALL

此時會產生 MSB307 setlocal 錯誤

主要是因為沒有權限安裝程式到 C:\Program Files\tesseract

使用 Administrator 身分重新開啟 vs2017

重新 build 即可

增加中文字(含手寫)的支援

到 https://github.com/tesseract-ocr/tessdata 下載 tessdata

但是我不知道要下載那些檔案，乾脆使用安裝檔內的 tessdata

設定環境變數 TESSDATA_PREFIX=C:\Program Files\tesseract\tessdata

發現在部分電腦上速度會非常慢，可關閉 openmp 改善
修改 project libtesseract 和 tesseract 的 property
C/C++/Language/Open MP Support: No(/openmp-)

2018年10月6日星期六

build darknet yolo

git clone https://github.com/AlexeyAB/darknet.git

下載 CUDA Toolkit 9.1
https://developer.nvidia.com/cuda-toolkit-archive
安裝失敗，請參考下列步驟
https://yingrenn.blogspot.com/2018/07/cuda.html

下載 cuDNN, 請選擇 cuDNN v7.0 for CUDA 9.1 for 正確 Windows 版本
https://developer.nvidia.com/rdp/cudnn-archive

使用 VS2015 開啟 D:\Tensorflow\Yolo\darknet\build\darknet\darknet.sln
切換 Win32 到 x64
Project/darknet properities/
輸入 Configuration Properties/"CUDA C/C++"/CUDA Toolkit Custom Dir
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.1
輸入 Additional Include Directories
D:\Tensorflow\Yolo\opencv\build\include
D:\CUDNN\cudnn-9.1-windows7-x64-v7\cuda\include
輸入 Additional Library Directories
D:\Tensorflow\Yolo\opencv\build\x64\vc14\lib
D:\CUDNN\cudnn-9.1-windows7-x64-v7\cuda\lib\x64

2018年10月5日星期五

git 學習紀錄

Git 只能管理單純文字檔，不能處理MS Word, pdf, 圖片或執行檔等非文字檔

安裝

# 如果你的 Linux 是 Ubuntu:

$ sudo apt-get install git-all

# 如果你的 Linux 是 Fedora:

$ sudo yum install git-all

Untracked: 在工作目錄內，尚未被追蹤或不需追蹤的檔案

Unmodified: 在工作目錄內，尚未被修改的檔案

Modified: 在工作目錄內，已經被修改的檔案

Staged: 在工作目錄內，被 add 到版本庫，尚未被 commit

設定使用者和email

$ git config --global user.name "UserName"

$ git config --global user.email "username@email.com"

在工作目錄上建立版本庫，會產生 ".git" 目錄

$ git init

查詢版本庫狀態

$ git status

將檔案放入版本庫的 stage 內

$ git add file.py

將資料夾中所有未被添加的檔案，放入版本庫的 stage 內

$ git add .

確認提交

$ git commit -m "修改說明"

確認提交(可省掉 git add)

$ git commit -am "修改說明"

查詢紀錄

$ git log

查詢紀錄，每個 commit 一行

$ git log --oneline

查詢紀錄，每個 commit 一行，並顯示 branch

$ git log --oneline --graph

查詢這次還沒add(unstaged)的修改部分和上個已經commit或已經add(staged)的文件差異

$ git diff

查詢已經add(staged)的修改部分和上個已經commit的文件差異

$ git diff --cached

查詢這次還沒add(unstaged)的修改部分和上個已經commit的文件差異

$ git diff HEAD

checkout 某一(id)版本，到工作目錄(HEAD 指到 id)

$ git checkout id

checkout 最新版本，到工作目錄(HEAD 指到最新 id)

$ git checkout master

checkout 某一(id)版本的 file.py

$ git checkout id -- file.py

查看所有 HEAD 改動

$ git reflog

直接修改上個 commit，不說明

$ git commit --amend --no-edit

reset 回到上一次的 commit

$ git reset --hard HEAD

reset 回到上上一次的 commit

$ git reset --hard HEAD^

reset 回到某一次 commit

$ git reset --hard id

reset 可搭配三種參數 soft, mixed(default), 以及 hard

soft 只回復 Repository

mixed 回復 staged 和 Repository

hard 回復 staged 和 Repository 和工作目錄(通常使用這個)

查詢分支

$ git branch

查詢本地和遠端的分支

$ git branch -a

查詢遠端的分支

$ git branch -r

建立分支 test

$ git branch test

checkout 分支 test

$ git checkout test

建立分支 test 並且切換(checkout)到 test

$ git checkout -b test

合併分支

先切回 master

$ git checkout master

將 test 合併至 master

$ git merge --no-ff -m "合併說明" test

合併有衝突時，先修改檔案，再 commit, 衝突就解決了

$ git commit -am "解決說明"

暫存修改

$ git stash

查詢暫存

$ git stash list

回復暫存

$ git stash pop

新增遠端節點 origin

$ git remote add origin https://github.com/UserName/git-name.git

推送本地的 master 分支到 origin

$ git push -u origin master

推送本地的 test 分支到 origin

$ git push -u origin test

本地端修改

$ git commit -am "修改說明"

推送修改到 origin

$ git push -u origin master
取回最新的遠端資料到本地

$ git pull origin master

訂閱：文章 (Atom)

網頁

2018年10月26日 星期五

2018年10月19日 星期五

2018年10月6日 星期六

2018年10月5日 星期五

2018年10月26日星期五

2018年10月19日星期五

2018年10月6日星期六

2018年10月5日星期五