From cb41796a5e0573bf3676b5c54fcc12c6dd42f9fb Mon Sep 17 00:00:00 2001 From: Mario Domenech Goulart Date: Mon, 26 May 2014 09:59:00 -0300 Subject: contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages This script writes language recipes for tesseract. It downloads the listing of available languages and language tarballs from the official site and writes language recipes tesseract-lang-_.bb for each language. Signed-off-by: Mario Domenech Goulart Signed-off-by: Martin Jansa --- contrib/tesseract-langs.sh | 92 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100755 contrib/tesseract-langs.sh (limited to 'contrib') diff --git a/contrib/tesseract-langs.sh b/contrib/tesseract-langs.sh new file mode 100755 index 0000000000..50873c139b --- /dev/null +++ b/contrib/tesseract-langs.sh @@ -0,0 +1,92 @@ +#! /bin/sh + +# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved +# Released under the MIT license (see meta-openembedded layer's COPYING.MIT) + +PV='3.02' + +# Sometimes the software package has a minor version, but language +# packages have not. Example: +# software package: tesseract-ocr-3.02.02.tar.gz +# language package: tesseract-ocr-3.02.por.tar.gz +MINOR_PV=02 + +recipes_dir=$1 + +usage() { + echo "Usage: `basename $0` [ ]" +} + +if [ -z "$recipes_dir" ]; then + usage + exit 1 +fi +mkdir -p "$recipes_dir" + +file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list' +file_list=`mktemp` + +remove_dl_dir= +if [ -z "$2" ]; then + remove_dl_dir=1 + dl_dir=`mktemp -d` +else + dl_dir="$2" +fi + +mkdir -p $dl_dir + +tesseract_langs() { + wget -q -O "$file_list" "$file_list_uri" + + grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&can=2&q=">' "$file_list" | \ + sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \ + grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \ + sort -u +} + +download_lang_files() { + local langs="$1" + local uri + for lang in $langs; do + if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then + uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz" + echo "Downloading $uri" + wget -q -P "$dl_dir" "$uri" + fi + done +} + +create_recipe() { + local lang=$1 + local tarball + + tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" + + md5sum=`md5sum $tarball | awk '{print $1}'` + sha256sum=`sha256sum $tarball | awk '{print $1}'` + + cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <