ftra
I wrote this Forth utility in order to translate substrings inside a string.
Source code
ftra.fs
.( ftra.fs ) cr
\ -----------------------------
\ Copyright (C) 2005,2007,2013 Marcos Cruz (programandala.net)
\ License: http://programandala.net/license
\ Text strings translation tool
\ Program written in Forth; developed with Forth 5mx and Gforth.
\ History
\ ------------------------------------------
\ 2005-11-01
\ Start. Algorithm to translate chars into strings.
\ 2005-11-10
\ Rewrite. New improved algorithm to translate strings into strings.
\ 2005-11-11
\ Bugs fixed. It seems it works.
\ 2007-05-08
\ 'translate-more?' factored out from 'tra'.
\ Bug fixed in 'tra' :
\ The translation halted when the last word in the text had to be translated but
\ it was shorter than the first word in the translation table.
\ The best order for the elements in the translation table is not solved yet.
\ Provisional solution: remove that condition.
\ 2013-12-10
\ Source code style and stack notation updated.
\ '-left' removed, '/string' does exactly the same.
\ '.tra-table-element' factored from '.tra-table'.
\ Some comments fixed.
\ ------------------------------------------
marker ftra
variable tra-table \ address of the current translation table
variable tra-pointer \ pointer to the current string in the current translation table
: tra-pointer0 ( -- )
\ Init the translation pointer.
tra-table @ tra-pointer !
;
: tra-table: ( "<spaces>name" -- )
\ Start the translation table definition.
create does> ( -- ) ( dfa ) tra-table ! tra-pointer0
;
: ;tra-table ( -- )
\ Finish the translation table definition.
0 dup c, c, align
;
: c>s, ( b -- )
\ Compile a one char string into the data space.
1 c, c,
;
variable translation \ address of the translation
variable >translation \ pointer to the next char in the translation
: translation+! ( ca len -- )
\ Add a string to the current translation.
dup >r >translation @ swap cmove \ move chars
r> dup >translation +! \ update char pointer
translation @ dup C@ rot + swap C! \ update string count
;
: translate-this? ( ca1 len1 ca2 len2 -- ca1 len1 ca2 len2 wf )
\ Is ca2 len2 a prefix of ca1 len1?
\ ca1 len1 = remaining text to be translated
\ ca2 len2 = text to translate, from the translation table
dup >r 2over 2over 2swap drop r> compare 0=
;
: tra-pointer@ ( -- ca len )
\ Fetch the string pointed.
tra-pointer @ count
;
: tra-pointer+ ( -- )
\ Update tra-pointer to the next string in the translation table.
tra-pointer dup @ dup C@ 1+ swap + swap !
;
: translation0 ( -- )
\ Init the translation.
here
dup translation ! \ address
0 over c! \ char count
1+ >translation ! \ pointer to the first char
;
: translate-more? ( ca1 len1 -- ca1 len1 ca2 len2 wf )
\ Keep on translating?
\ ca1 len1 = remaining text to be translated
\ ca2 len2 = text to translate, from the translation table
\ wf = translate the text?
[ false ] [if]
\ Original version with 3 conditions, and debug code.
\ dup
dup 0<> \ is there text to translate?
\ cr dup if ." C1" then \ xxx informer
>r tra-pointer@
( ca1 len1 ca2 len2 -- )
\ ca1 len1 = remaining text to be translated
\ ca2 len2 = text to translate, from the translation table
dup 0<> \ is there a possible translation?
\ dup if ." C2" then \ xxx informer
r> and
\ over r> <= \ is the possible translation shorter than the text to translate?
\ dup if ." C3" then \ xxx informer
\ and
[else]
\ 2007-05-08 New version with only 2 conditions.
dup 0<> >r \ is there text to translate?
tra-pointer@ dup 0<> \ is there a possible translation?
r> and
[then]
;
: tra ( ca1 len1 -- ca2 len2 )
\ Translate a string using the current translation table.
\ ca1 len1 = text to be translated
\ ca2 len2 = translated text
\ 2dup cr type ." --> ? " \ xxx informer
translation0
begin
dup
\ more and \ xxx informer
while tra-pointer0
begin translate-more?
while
\ cr ." translate more: " 2over type ." --> " 2dup type \ xxx informer
tra-pointer+ translate-this?
if
\ cr ." translate this:" 2over type ." --> " 2dup type \ xxx informer
nip /string tra-pointer@ translation+! tra-pointer0
else
\ cr ." don't translate this: " 2over type ." --> " 2dup type \ xxx informer
2drop tra-pointer+
then
repeat 2drop dup
if
over 1 translation+! 1 /string \ char not translated
then
repeat 2drop translation @ count
\ 2dup cr ." RESULT: " type \ xxx informer
;
: .tra-table-element ( ca len -- )
\ Print the content of an element of the current translation table
cr type ." > " tra-pointer+
tra-pointer@ type tra-pointer+
;
: .tra-table ( -- )
\ Print the content of the current translation table
tra-pointer0
begin tra-pointer@ dup
while .tra-table-element
repeat 2drop
;
.( ftra.fs compiled ) cr
ftra-utf8.fs
\ ****************************
CR .( ftra-utf8 )
\ ****************************
\ Copyright (C) 2005 Marcos Cruz (http://programandala.net)
\ License: http://programandala.net/license
\ Translation table for ftra:
\
\ Some translations from the Windows character set (windows-1252;
\ Unofficial synonyms include cp-1252 and WinLatin1) into HTML
\ entities.
\ Special translations provided by Jukka "Yucca" Korpela in his web
\ page.
\ Note: code positions 128·-·159 have been reserved for eventual use
\ as control codes.
[undefined] ftra [IF]
include ftra.fs
[THEN]
tra-table: utf8
S" ‚" s, S" ‚" s, \ Jukka
S" ƒ" s, S" ƒ" s, \ Jukka
S" „" s, S" „" s, \ Jukka
S" …" s, S" …" s, \ Jukka
S" †" s, S" †" s, \ Jukka
S" ‡" s, S" ‡" s, \ Jukka
S" ˆ" s, S" ˆ" s, \ Jukka
S" ‰" s, S" ‰" s, \ Jukka
S" Š" s, S" Š" s, \ Jukka
S" ‹" s, S" ‹" s, \ Jukka
S" Œ" s, S" Œ" s, \ Jukka
S" ‘" s, S" ‘" s, \ Jukka
S" ’" s, S" ߣ" s, \ Jukka
S" “" s, S" “" s, \ Jukka
S" ”" s, S" ”" s, \ Jukka
S" •" s, S" ߦ" s, \ Jukka
S" –" s, S" –" s, \ Jukka
S" —" s, S" —" s, \ Jukka
S" ˜" s, S" ˜" s, \ Jukka
S" ™" s, S" ™" s, \ Jukka
S" š" s, S" š" s, \ Jukka
S" ›" s, S" ›" s, \ Jukka
S" œ" s, S" œ" s, \ Jukka
S" Ÿ" s, S" Ÿ" s, \ Jukka
S" ¡" s, S" ¡" s,
S" ª" s, S" ª" s,
S" «" s, S" «" s,
S" ·" s, S" ·" s,
S" º" s, S" º" s,
S" »" s, S" »" s,
S" ¿" s, S" ¿" s,
S" À" s, S" À" s,
S" Á" s, S" Á" s,
S" Ç" s, S" Ç" s,
S" È" s, S" È" s,
S" É" s, S" É" s,
S" Í" s, S" Í" s,
S" Ñ" s, S" Ñ" s,
S" Ó" s, S" Ó" s,
S" Ú" s, S" Ú" s,
S" Ü" s, S" Ü" s,
S" à" s, S" à" s,
S" á" s, S" á" s,
S" ä" s, S" ä" s,
S" ç" s, S" ç" s,
S" é" s, S" é" s,
S" í" s, S" í" s,
S" ñ" s, S" ñ" s,
S" ó" s, S" ó" s,
S" ô" s, S" ô" s,
S" ú" s, S" ú" s,
S" ü" s, S" ü" s,
;tra-table
.( ftra-utf8 ok!)
ftra-utf8es.fs
\ ****************************
CR .( ftra_utf8es )
\ ****************************
\ Copyright (C) 2005 Marcos Cruz (http://programandala.net)
\ License: http://programandala.net/license
\ Translation table for ftra:
\ Some translations from the Windows character set
\ (windows-1252; Unofficial synonyms include cp-1252 and WinLatin1)
\ into HTML entities.
\ Sorted to translate Spanish texts more quickly.
\ Special translations provided by Jukka "Yucca" Korpela in his web page.
[undefined] ftra [IF]
include ftra.fs
[THEN]
tra-table: utf8es
\ The most used, the Spanish chars
S" á" S, S" á" S,
S" é" S, S" é" S,
S" í" S, S" í" S,
S" ó" S, S" ó" S,
S" ú" S, S" ú" S,
S" ñ" S, S" ñ" S,
S" Á" S, S" Á" S,
S" É" S, S" É" S,
S" Í" S, S" Í" S,
S" Ó" S, S" Ó" S,
S" Ú" S, S" Ú" S,
S" Ñ" S, S" Ñ" S,
S" ¿" S, S" ¿" S,
S" ¡" S, S" ¡" S,
S" ü" S, S" ü" S,
S" Ü" S, S" Ü" S,
\ Some tipographical chars
S" «" S, S" «" S,
S" ·" S, S" ·" S,
S" ª" S, S" ª" S,
S" º" S, S" º" S,
S" »" S, S" »" S,
\ Chars from other languages
S" à" S, S" à" S,
S" ä" S, S" ä" S,
S" ç" S, S" ç" S,
S" À" S, S" À" S,
S" Ç" S, S" Ç" S,
S" È" S, S" È" S,
S" ô" S, S" ô" S,
S" ò" S, S" ò" S,
S" Ò" S, S" Ò" S,
\ Special chars
S" " S, S" " S, \ 160
\ Problem chars (128-159)
false [IF]
\ Original translations for chars 128-159
S" ‚" S, S" ‚" S, \ Jukka
S" ƒ" S, S" ƒ" S, \ Jukka
S" „" S, S" „" S, \ Jukka
S" …" S, S" …" S, \ Jukka
S" †" S, S" †" S, \ Jukka
S" ‡" S, S" ‡" S, \ Jukka
S" ˆ" S, S" ˆ" S, \ Jukka
S" ‰" S, S" ‰" S, \ Jukka
S" Š" S, S" Š" S, \ Jukka
S" ‹" S, S" ‹" S, \ Jukka
S" Œ" S, S" Œ" S, \ Jukka
S" ‘" S, S" ‘" S, \ Jukka
S" ’" S, S" ߣ" S, \ Jukka
S" “" S, S" “" S, \ Jukka
S" ”" S, S" ”" S, \ Jukka
S" •" S, S" ߦ" S, \ Jukka
S" –" S, S" –" S, \ Jukka
S" —" S, S" —" S, \ Jukka
S" ˜" S, S" ˜" S, \ Jukka
S" ™" S, S" ™" S, \ Jukka
S" š" S, S" š" S, \ Jukka
S" ›" S, S" ›" S, \ Jukka
S" œ" S, S" œ" S, \ Jukka
S" Ÿ" S, S" Ÿ" S, \ Jukka
[ELSE]
\ 2006 04 03
\ Actual secure translations for chars 128-159
S" €" S, S" EUR" S, \ 128
S" ‚" S, S" '" S, 130
S" „" S, S" "" S, \ 132
S" ‹" S, S" <" S, \ 139
S" Œ" S, S" OE" S, \ 140
S" ‘" S, S" '" S, \ 145
S" ’" S, S" '" S, \ 146
S" “" S, S" "" S, \ 147
S" ”" S, S" "" S, \ 148
S" •" S, S" ·" S, \ 149
S" –" S, S" -" S, \ 150
S" —" S, S" - " S, \ 151
S" ˜" S, S" ~" S, \ 152
S" ™" S, S" (TM)" S, \ 153
S" ›" S, S" >" S, 155
S" œ" S, S" oe" S, \ 156
[THEN]
;tra-table
.( ftra_utf8es ok!)
ftra-x-alfabeto-utf8es.fs
\ ****************************
CR .( ftra-x-alfabeto-utf8 )
\ ****************************
\ Copyright (C) 2005 Marcos Cruz (http://programandala.net)
\ License: http://programandala.net/license
\ Translation table for ftra:
\ from the Esperanto x alphabet into HTML entities.
[undefined] ftra [IF]
include ftra.fs
[THEN]
tra-table: x-alfabeto-utf8
S" cx" s, S" ĉ" s,
S" gx" s, S" ĝ" s,
S" jx" s, S" ĵ" s,
S" sx" s, S" ŝ" s,
S" ux" s, S" ŭ" s,
S" Cx" s, S" Ĉ" s,
S" Gx" s, S" Ĝ" s,
S" Jx" s, S" Ĵ" s,
S" Sx" s, S" Ŝ" s,
S" Ux" s, S" Ŭ" s,
S" hx" s, S" ĥ" s,
S" Hx" s, S" Ĥ" s,
;tra-table
.( ftra_x-alfabeto-utf8 ok!)
ftra-example.fs
\ ftra-example
\ Copyright (C) 2005 Marcos Cruz (http://programandala.net)
\ License: http://programandala.net/license
\ Example translation table for ftra.
[undefined] ftra [IF]
S" ftra.fs" INCLUDED
[THEN]
: .tra 2DUP CR TYPE tra CR TYPE CR ;
tra-table: TheBeatles
\ Sintax:
\ CHAR char-from c>s, S" string-to" s,
\ CHAR char-from c>s, CHAR char-to c>s,
\ S" string from" s, S" string to" s,
\ S" string from" s, CHAR char-to c>s,
\ Note: the "from" elements have to be sorted, from longer to shorter.
S" goodbye" s, S" hello" s,
S" hello" s, S" goodbye" s,
S" stop" s, S" go, go, go" s,
S" yes" s, S" no" s,
;tra-table
TheBeatles .tra-table CR
S" I say yes, you say no." .tra
S" You say stop and I say go, go, go." .tra
S" You say goodbye and I say hello." .tra