boltwire2simplilo
Descripción del contenido de la página
Programa en SBASIC (con formato SBim) para convertir páginas wiki del formato de Boltwire al formato de Simplilo.
Escribí este programa para resolver un problema del «mundo real»: convertir cerca de doscientos ficheros de texto, fuentes de páginas de Internet en formato de wiki, desde el formato de Boltwire al formato de Simplilo, como parte del proceso de migración de una sede de un sistema a otro. Aunque la parte final de la conversión requeriría en cualquier caso una revisión manual, la mayor parte de las operaciones que había que hacer con los ficheros podía automatizarse.
Tras estimar escribir el programa en otros lenguajes (principalmente en mi favorito, Forth, pero también en Bash, en sed e incluso en PHP en un servidor local) elegí S*BASIC, en parte por el gusto de trabajar en el entorno QL y en parte por la facilidad con que permite crear aplicaciones prácticas rápidamente y por sus facilidades para el tratamiento de ficheros y cadenas (a pesar de que no dispone de expresiones regulares, que hubieran resultado muy útiles).
Inicié la codifición en SuperBASIC, después continué en SBASIC por sus diversas ventajas y finalmente usé el formato mejorado SBim.
A pesar de ser una herramienta específica para solucionar un problema muy concreto, este programa tiene el interés de demostrar cómo, gracias a su uso en emuladores de QL que permiten acceder sin restricciones al sistema de ficheros del sistema operativo anfitrión, S*BASIC es una herramienta útil que en ciertos casos puede competir con lenguajes nativos y más potentes.
Código fuente
rem boltwire2simplilo
rem Copyright (C) 2010,2011,2017 Marcos Cruz (programandala.net)
rem License: http://programandala.net/license
rem This program converts wiki pages
rem from the BoltWire format (http://boltwire.com)
rem to the Simplilo format (http://programandala.net/es.programa.simplilo),
rem including some site-specific markups and extensions.
rem This program is written in SBASIC's SBim format
rem (http://programandala.net/es.programa.sbim).
rem It has been tested on the QPC2 emulator running on Debian GNU/Linux with Wine.
' ---------------------------
' Changelog
' 2010-08-07 First draft in SuperBASIC.
' 2010-08-12 Converted to SBASIC. Partly rewritten. Commented. First version of str_replace().
' 2010-08-13 First draft of bound_replace().
' 2010-08-17 First working version of bound_replace().
' 2010-09-01 Bug fixed: images {{...}} are not changed to <<m.../>> any more; they are preserved and restored.
' 2010-09-01 Local links improved with language prefix.
' 2010-09-01 New translations: <embed> and <div>.
' 2010-09-01 Directory management is simpler now with DEV_USE.
' 2010-09-02 Bug fixed: lang markups, lang((...)), are preserved before replacing the end of the extension markups.
' 2010-11-08 Tables are translated.
' 2011-10-04 Source code converted to the SBim format.
' 2011-10-05 Bug found: pages with long file names can be opened, but not created. Testing.
' 2011-10-06 Bug fixed: some file names were too long for SMSQ/E. They have to be renamed by the host system, before and after the conversion.
' 2011-10-06 Bug fixed: In the markups for bold and italic or vice versa, the block comment marks were found first and converted.
' 2011-10-06 Bug fixed: the ")]" in "[[url|text (text)]]" was taken as the end of a "[(plugin)]".
' 2011-10-07 Deprecated metadata fields are not converted any more; metadata fields of news and articles are renamed; period is added to description metadatum if missing; translate_line$() is divided into two procs.
' 2011-10-08 Line breaks converted.
' 2012-01-26 Format of splitted lines updated to the new version of SBim.
' 2017-09-20: Update SBim line comments markup.
' ---------------------------
' Main
b2s
stop
defproc b2s
local \
base_dir$,input_dir$,output_dir$,\
input_channel%,output_channel%,list_channel%,\
normal_ink%,input_ink%,output_ink%,\
former_datad$,\
files%,\
dev_use_1$,dev_use_2$
constants
let dev_use_1$ = dev_use$(1) ' backup
let dev_use_2$ = dev_use$(2) ' backup
let base_dir$="dos1_fsm_"
dev_use 1,base_dir$&"b_"
let input_dir$ = "dev1_"
dev_use 2,base_dir$&"s_"
let output_dir$ = "dev2_"
colour_ql
let normal_ink%=7 ' white
let input_ink%=4 ' green
let output_ink%=2 ' red
ink normal_ink%
let former_datad$ = datad$
create_list
translate_files
print "Done."
print "Files translated:"!files%;"."
dev_use 1,dev_use_1$ ' restore
dev_use 2,dev_use_2$ ' restore
enddef
' ---------------------------
' Constants
defproc constants
let nl$=chr$(10) ' new line
enddef
' ---------------------------
defproc create_list
' Create a file with the list of the input files,
' open it and point to the first file in the list.
local list_file$,useless$
let list_file$ = base_dir$&"boltwire2simplilo_tmp"
list_channel% = fop_over(list_file$)
check_file_error list_channel%,list_file$
dir#list_channel%,input_dir$
bget#list_channel%\0 ' Point to the start of the file.
input#list_channel%,useless$,useless$ ' Skip the list header.
enddef
defproc translate_files
' Read every file name from the list
' and translate the file.
local file$
let files% = 0
rep
if eof(#list_channel%):exit
input#list_channel%,file$
translate_file file$(len(dev_use$(1))-4 to)
let files% = files% + 1
endrep
close#list_channel%
enddef
defproc short_file_name(file$)
' Make the given file name shorter.
' Otherwise, SMSQ/E cannot manage it.
' Obsolete, no used any more.
' The renaming is done in the host system.
' str_replace "actividades","AC",file$
' str_replace "novedades","NO",file$
' str_replace "articulos","AR",file$
' str_replace "nuestrapagina","NP",file$
' str_replace "diasdegranja","DDG",file$
' str_replace "fiestasantmiquel","FSM",file$
' str_replace "venteafreiresparragos","VAFE",file$
' str_replace "undiaenlagranga","UDELG",file$
' str_replace "manosalbarro","MAB",file$
' str_replace "eraseunavez","EUV",file$
enddef
defproc translate_file(file$)
' Translate the given file.
' file$ = file name, without path.
local \
input_file$,\ ' File name with device and path.
output_file$,\ ' File name with device and path.
line$,\
metadata_zone%,\ ' Flag: Already found the metadata zone of the input file?
content$ ' Page content, printed at the end of the process.
'Print "Translating:"!file$!"[";len(file$);"]" /7 debug
print files%+1;":"!file$
let input_file$ = input_dir$ & file$
let input_channel% = fop_in(input_file$)
'print input_channel%!"(";input_file$;")" ' debug!!!
check_file_error input_channel%,input_file$
'short_file file$ ' deprecated!!!
' Obsolete!!! Done in the host system.
' if "pasportaservo" instr file$
' let output_file$ = output_dir$ & "E." & file$ ' Esperanto (temporary mark for "eo")
' else
' let output_file$ = output_dir$ & "C." & file$ ' Castilian Spanish (temporary mark for "es")
' endif
let output_file$ = output_dir$ & file$
let output_channel% = fop_over(output_file$)
'print "Output file:"!output_file$!"[channel ";output_channel%;"]" ' debug!!
check_file_error output_channel%,output_file$
rep read_line
if eof(#input_channel%):exit read_line
input#input_channel%,line$
' input_print line$
translate_line line$
if metadata_zone%
if line$="~":next read_line
translate_metadata_line line$
' output_print line$
if len(line$):print#output_channel%,line$
else
if line$="~data~":\
let metadata_zone% = 1:\
next read_line
translate_content_line line$
let content$ = content$ & nl$ & line$
endif
endrep read_line
translate_content content$
' output_print content$
print#output_channel%\content$
close#input_channel%
close#output_channel%
enddef
defproc check_file_error(channel%,file$)
'print "Checking channel"!channel%!"and file"!file$;"." ' debug!!!
if channel%<0
print#0,"Error while opening the file "!file$
report channel%
stop
endif
enddef
defproc translate_line(line$)
' Do generic translations in the given line.
str_replace "'","'",line$,0
enddef
defproc translate_content_line(line$)
' Translate a content line.
' Line breaks:
str_replace chr$(92),"PRESERVED_LINE_BREAK",line$,0
str_replace "PRESERVED_LINE_BREAK",chr$(92)&chr$(92),line$,0
str_replace "<br>",chr$(92)&chr$(92),line$,0
str_replace "<br>",chr$(92)&chr$(92),line$,0
' Quotes:
str_replace '<<','""',line$,0
str_replace '>>','""',line$,0
str_replace '<<','""',line$,0
' Extensions:
'str_replace "[(","<<",line$,0
str_replace "))","PRESERVED_END_OF_LANG_MARKUP",line$,0
'str_replace ")]"," />>",line$,0
bound_replace "[(",")]","<<"," />>",line$,0
str_replace "PRESERVED_END_OF_LANG_MARKUP","))",line$,0
bound_replace "<embed ",">","<<include system.inc."," />>",line$,0
' Mnemonic substitutions:
bound_replace "{{","}}","PRESERVED_START_OF_IMAGE_MARKUP","PRESERVED_END_OF_IMAGE_MARKUP",line$,0
bound_replace "{","}","<<m "," />>",line$,0
bound_replace "PRESERVED_START_OF_IMAGE_MARKUP","PRESERVED_END_OF_IMAGE_MARKUP","{{","}}",line$,0
' Comments:
str_replace "/"&"/**","PRESERVED_ITALIC+BOLD",line$,0
str_replace "**/"&"/","PRESERVED_BOLD+ITALIC",line$,0
str_replace "/*","{*",line$,0
str_replace "*/","*}",line$,0
str_replace "PRESERVED_ITALIC+BOLD","/"&"/**",line$,0
str_replace "PRESERVED_BOLD+ITALIC","**/"&"/",line$,0
' Link anchors:
bound_replace "[[#desdeaqui","]]","{*desde","*}",line$,0
bound_replace "[[#hastaaqui","]]","{*hasta","*}",line$,0
bound_replace "[[#","]]","<<anchor "," />>",line$,0
' HTML tags:
bound_replace '<div class="','">',"<<div "," />>",line$,0
str_replace "</div>","<<div/>>",line$,0
' Local links:
str_replace "[[ ","[[",line$,0
str_replace "[[http","((((PROTOKOLO",line$,0 ' preserve
str_replace "[[","((((es.",line$,0
str_replace "((((","[[",line$,0 ' restore
str_replace "PROTOKOLO","http",line$,0 ' restore
' Abbrs:
str_replace '<<sigla "',"<<abbr:",line$,0
str_replace '<<acronimo "',"<<abbr:",line$,0
str_replace "<<acronimo '","<<abbr:",line$,0
bound_replace '" significado="','" ',":",":",line$,0
bound_replace "' significado='","' ",":",":",line$,0
str_replace '<<acronimo ',"<<abbr:",line$,0
bound_replace ' lengua="en:',':',":",":en:",line$,0
bound_replace " lengua='en:",':',":",":en:",line$,0
' Prices table:
if file$="productos.precios" or file$="productos"
str_replace ' producto="',":",line$,0
str_replace '" unidad="',":",line$,0
str_replace '" precio="',":",line$,0
str_replace '" otraunidad="',":",line$,0
str_replace '" precioporotraunidad="',":",line$,0
str_replace '" />>',":/>>",line$,0
endif
' Definition lists:
str_replace '<dl>',"",line$,0
str_replace '</dl>',"",line$,0
str_replace '<dt>',"**",line$,0
str_replace '</dt>',"**",line$,0
str_replace '<dd>',nl$,line$,0
str_replace '</dd>',nl$,line$,0
enddef
defproc translate_metadata_line(line$)
' Translate a metadata line.
if not len(line$):ret
' Deprecated metadata:
if "author" instr line$:let line$="":ret
if "plaintitle" instr line$:let line$="":ret
' Syntax change:
str_replace ":","=",line$,1
' Metadata that have to be renamed:
str_replace "changesummary","edit_summary",line$,1
str_replace "newstype","news_type",line$,1
str_replace "newsstartdate","news_start_date",line$,1
str_replace "newsstarttime","news_start_time",line$,1
str_replace "newsenddate","news_end_date",line$,1
str_replace "newsendtime","news_end_time",line$,1
str_replace "articledate","article_date",line$,1
' Description period:
if "description" instr line$ and line$(len(line$))<>"."
let line$=line$&"."
endif
enddef
defproc translate_content(content$)
' Translate the whole content.
' Needed to translate the tables.
str_replace '[t]' & nl$ & '[r]','',content$,0
str_replace nl$ & '[r]',' |' & nl$,content$,0
str_replace nl$ & '[h]','|= ',content$,0
str_replace nl$ & '[c]','| ',content$,0
str_replace nl$ & '[t]',' |' & nl$,content$,0
enddef
defproc str_replace(search$,replace$,subject$,count%)
' Change subject$, replacing occurrences of search$ with the given replace$ value.
' Inspired by the PHP function str_replace().
' search$ : The value being searched for, otherwise known as "the needle".
' replace$ : The replacement value that replaces found search$ values.
' subject$ : The string being searched and replaced on, otherwise known as "the haystack".
' count% : If greater than 0, this will hold the maximum number of matched and replaced needles; if less than 1, all needles will be replaced.
local found_position,length
let length = len(search$)
rep
let found_position = search$ instr subject$
if found_position
let subject$ = subject$(to found_position-1) & replace$ & subject$(found_position + length to)
if count%<1
next
else
let count% = count%-1:\
if count% = 0:exit
endif
else
exit
endif
endrep
enddef
defproc bound_replace(old_start$,old_end$,new_start$,new_end$,subject$,count%)
' Unfinished!!!
' Change subject$, replacing occurrences of old_start$ with the given new_start$ value; and those of old_end$ with new_end$.
' old_start$ : The value being searched for, otherwise known as "the first needle".
' new_start$ : The replacement value that replaces found old_start$ values.
' old_end$ : The value being searched for, otherwise known as "the second needle".
' new_end$ : The replacement value that replaces found old_end$ values.
' subject$ : The string being searched and replaced on, otherwise known as "the haystack".
' count% : If greater than 0, this will hold the maximum number of matched and replaced needles; if less than 1, all needles will be replaced.
local start_position
local end_position
local search_position
local start_length,end_length
let start_length = len(old_start$)
let end_length = len(old_end$)
let search_position = 1
'print \old_start$;"*";old_end$!"->"!new_start$;"*";new_end$
'print "subject$="!subject$
rep
if search_position > len(subject$):exit
'print "search_position="!search_position
'print "subject$(search_position to)="!subject$(search_position to)
let start_position = old_start$ instr subject$(search_position to)
if not start_position
exit
else
'print "relative start_position="!start_position
let start_position = start_position + search_position - 1
'print "absolute start_position="!start_position
let end_position = old_end$ instr subject$(start_position+start_length to)
if not end_position
exit
else
'print "relative end_position="!end_position
let end_position = start_position + start_length + end_position -1
'print "absolute end_position="!end_position
let search_position = end_position + end_length
'print "search_position="!search_position
let subject$ = subject$(to start_position-1) & new_start$ & subject$(start_position+start_length to end_position-1 ) & new_end$ & subject$(search_position to)
'print "RESULT=";subject$
if count%<1
next
else
let count% = count%-1:\
if count% = 0:exit
endif
endif
endif
endrep
enddef
defproc input_print(text$)
print "<";
ink input_ink%:print !text$:ink normal_ink%
enddef
defproc output_print(text$)
print ">";
ink output_ink%:print !text$:ink normal_ink%
enddef