diff options
| -rwxr-xr-x | contrib/tesseract-langs.sh | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/contrib/tesseract-langs.sh b/contrib/tesseract-langs.sh new file mode 100755 index 0000000000..50873c139b --- /dev/null +++ b/contrib/tesseract-langs.sh | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | #! /bin/sh | ||
| 2 | |||
| 3 | # Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved | ||
| 4 | # Released under the MIT license (see meta-openembedded layer's COPYING.MIT) | ||
| 5 | |||
| 6 | PV='3.02' | ||
| 7 | |||
| 8 | # Sometimes the software package has a minor version, but language | ||
| 9 | # packages have not. Example: | ||
| 10 | # software package: tesseract-ocr-3.02.02.tar.gz | ||
| 11 | # language package: tesseract-ocr-3.02.por.tar.gz | ||
| 12 | MINOR_PV=02 | ||
| 13 | |||
| 14 | recipes_dir=$1 | ||
| 15 | |||
| 16 | usage() { | ||
| 17 | echo "Usage: `basename $0` <recipes dir> [ <download dir> ]" | ||
| 18 | } | ||
| 19 | |||
| 20 | if [ -z "$recipes_dir" ]; then | ||
| 21 | usage | ||
| 22 | exit 1 | ||
| 23 | fi | ||
| 24 | mkdir -p "$recipes_dir" | ||
| 25 | |||
| 26 | file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list' | ||
| 27 | file_list=`mktemp` | ||
| 28 | |||
| 29 | remove_dl_dir= | ||
| 30 | if [ -z "$2" ]; then | ||
| 31 | remove_dl_dir=1 | ||
| 32 | dl_dir=`mktemp -d` | ||
| 33 | else | ||
| 34 | dl_dir="$2" | ||
| 35 | fi | ||
| 36 | |||
| 37 | mkdir -p $dl_dir | ||
| 38 | |||
| 39 | tesseract_langs() { | ||
| 40 | wget -q -O "$file_list" "$file_list_uri" | ||
| 41 | |||
| 42 | grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&can=2&q=">' "$file_list" | \ | ||
| 43 | sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \ | ||
| 44 | grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \ | ||
| 45 | sort -u | ||
| 46 | } | ||
| 47 | |||
| 48 | download_lang_files() { | ||
| 49 | local langs="$1" | ||
| 50 | local uri | ||
| 51 | for lang in $langs; do | ||
| 52 | if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then | ||
| 53 | uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz" | ||
| 54 | echo "Downloading $uri" | ||
| 55 | wget -q -P "$dl_dir" "$uri" | ||
| 56 | fi | ||
| 57 | done | ||
| 58 | } | ||
| 59 | |||
| 60 | create_recipe() { | ||
| 61 | local lang=$1 | ||
| 62 | local tarball | ||
| 63 | |||
| 64 | tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" | ||
| 65 | |||
| 66 | md5sum=`md5sum $tarball | awk '{print $1}'` | ||
| 67 | sha256sum=`sha256sum $tarball | awk '{print $1}'` | ||
| 68 | |||
| 69 | cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <<EOF | ||
| 70 | # Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved | ||
| 71 | # Released under the MIT license (see meta-openembedded layer's COPYING.MIT) | ||
| 72 | |||
| 73 | TESSERACT_LANG = "$lang" | ||
| 74 | |||
| 75 | require tesseract-lang.inc | ||
| 76 | |||
| 77 | SRC_URI[md5sum] = "${md5sum}" | ||
| 78 | SRC_URI[sha256sum] = "${sha256sum}" | ||
| 79 | EOF | ||
| 80 | } | ||
| 81 | |||
| 82 | |||
| 83 | LANGS=`tesseract_langs` | ||
| 84 | |||
| 85 | download_lang_files "$LANGS" | ||
| 86 | |||
| 87 | for lang in $LANGS; do | ||
| 88 | create_recipe $lang | ||
| 89 | done | ||
| 90 | |||
| 91 | [ -n "$remove_dl_dir" ] && rm -rf $dl_dir | ||
| 92 | rm -f $file_list | ||
