; $Id: dptools.el,v 1.4 2003/05/29 20:43:46 traverso Exp traverso $ ;;; (load "~/bin/dptools.el") ;;; ;;; remove the space at the end of lines (defun de-space-at-eol()(interactive) (replace-regexp " +$" "" nil (point-min-marker)(point-max-marker))) ;;; remove spaces before punctuations (defun de-pre-punct-space()(interactive) (replace-regexp " +\\([\.,!\?:;]\\)" "\\1" nil (point-min-marker)(point-max-marker))) ;;; replace multiple spaces with one space (defun replace-multiple-spaces()(interactive) (replace-regexp " +" " " nil (point-min-marker)(point-max-marker))) ;;; replace some common OCR and windows particularities (defun de-win ()(interactive) (setq tob (point-min-marker)) (setq eob (point-max-marker)) ;;; expand tabs (untabify tob eob) ;;; remove spaces at end-of-line (replace-regexp " +$" "" nil tob eob) ;;; replace a spaced hyphen with an em-dash (replace-regexp " +- +" "--" nil tob eob) ;;; replace a spaced hyphen at the beginning of line with an em-dash (replace-regexp "^- +" "--" nil tob eob) ;;; replace a spaced hyphen at the end of line with an em-dash (replace-regexp " -$" "--" nil tob eob) ;;; replace some common chars in win codepage but not in iso-latin-1 (replace-string "†" "{\\dag}" nil tob eob) (replace-string "‡" "{\\ddag}" nil tob eob) (replace-regexp "Œ\\([A-Z]\\)" "OE\\1" nil tob eob) (replace-regexp "Œ" "Oe" nil tob eob) (replace-regexp "œ" "oe" nil tob eob) (replace-regexp "—" "--" nil tob eob) (replace-regexp "[€-Ÿ]" "*" nil tob eob)) ;;; resolve the ae ligature (defun de-ae()(interactive) (replace-regexp "Æ\\([A-Z]\\)" "AE\\1" nil tob eob) (replace-regexp "Æ" "Ae" nil tob eob) (replace-regexp "æ" "ae" nil tob eob)) ;;; prepare end-of-lines hyphens: (defun de-hyp() (interactive) (beginning-of-buffer) (replace-regexp "--\n\\([a-z\"][^ \n]*\\) *\n" "--\\1\n") (beginning-of-buffer) (replace-regexp "-\n\\([A-ZÁ-Ý][^ \n]*\\) *\n" "-\\1\n") (beginning-of-buffer) (replace-regexp "-\n\\([a-zÀ-ÿ][^ \n]*\\) *\n" "\\1\n") (beginning-of-buffer) (replace-regexp "--\n\\([a-z\"][^ \n]*\\) +" "--\\1\n") (beginning-of-buffer) (replace-regexp "-\n\\([A-ZÁ-Ý][^ \n]*\\) +" "-\\1\n") (beginning-of-buffer) (replace-regexp "-\n\\([a-zÀ-ÿ][^ \n]*\\)[ \n]" "\\1\n")) ;;; pre-processing (defun insert-empty-page()(interactive) (if (< (point-max-marker) 4)(insert-string "\n(empty-page)\n"))) (defun pre-dp()(interactive) (insert-empty-page) (replace-string "\\\\" "**" nil (point-min-marker)(point-max-marker)) (goto-char (point-max)) (insert-string "\n") (close-html) (de-space-at-eol) (de-pre-punct-space) (de-win) (replace-multiple-spaces) (de-bold) (shrink-guillemets) (shrink-em-lines) (de-hyp) (save-buffer)) (defun shrink-em-lines()(interactive) (replace-string " --" "--" nil (point-min-marker)(point-max-marker)) (replace-string "-- " "--" nil (point-min-marker)(point-max-marker))) (defun de-bold()(interactive) (replace-regexp "" "" nil (point-min-marker)(point-max-marker))) (defun de-it()(interactive) (replace-regexp "" "_" nil (point-min-marker)(point-max-marker))) ;;; mark for TeX (defun pp-tex()(interactive) (setq tob (point-min-marker)) (setq eob (point-max-marker)) (replace-regexp "" "_" nil tob eob) (replace-regexp "\\([^_]\\(__\\)+_\\)\\([^_]\\)" "\\1_\\3" nil tob eob) (replace-regexp "\n+-+File: \\(...\\)\\.png-+\n+" "\\\\PG{\\1}\n" nil tob eob) (replace-string "*/" "\\=" nil tob eob) (replace-string "/*" "\\_" nil tob eob) (replace-string "[Footnote: " "\\Footnote[" nil tob eob) (replace-string "[Sidenote: " "\\Sidenote[" nil tob eob)) (defun shrink-guillemets()(interactive) (replace-string "« " "«" nil (point-min-marker)(point-max-marker)) (replace-string " »" "»" nil (point-min-marker)(point-max-marker))) ;;; pre-post-processing (defun ppp()(interactive) (de-space-at-eol) (de-pre-punct-space) (de-win) (pp-tex)) ;;; Tools for semiautomatic adjusting of page breaks ;;; adjust hyphens at end of page (defun adjust-pb-hyphens()(interactive) (beginning-of-buffer) (query-replace-regexp " *-* *\\*\\\\PG{\\(...\\)}\n\\** *\\([^ ]*\\) *" "\\2\\\\PG{\\1}\n")) ;;; query for possible paragraphs at top of page (defun adjust-page-indents()(interactive) (query-replace-regexp "}\n\\([-A-Z\"«]\\)" "}\n\n\\1")) ;;; rewrapping ;;; make regions between \_ and \= unwrappable (defun make-unfold()(interactive) (beginning-of-buffer) (setq pad (make-string 70 160)) (while (search-forward "\\_" nil t) (setq unfold-begin (point-marker)) (search-forward "\\=" nil t) (end-of-line)(forward-char)(insert-string "ª")(backward-char) (setq unfold-end (point-marker)) (replace-string " " (make-string 1 160) nil unfold-begin unfold-end) (replace-regexp "$" pad nil unfold-begin unfold-end) (goto-char unfold-end) (delete-char 1) )) ;;; clean unwrappable regions after wrapping (defun clean-unfold()(interactive) (beginning-of-buffer) (while (search-forward "\\_" nil t) (setq unfold-begin (point-marker)) (search-forward "\\=" nil t) (end-of-line) (forward-char) (setq unfold-end (point-marker)) (backward-char) (replace-string (make-string 1 160) " " nil unfold-begin unfold-end) (replace-regexp " +$" "" nil unfold-begin unfold-end))) ;;; rewrap and erase page markers (defun pg-fill()(interactive) (setq sentence-end-double-space nil) (replace-regexp "\\\\PG{...}" "" nil (point-min-marker)(point-max-marker)) (make-unfold) (fill-region (point-min-marker)(point-max-marker)) (clean-unfold) ) (defun pg-fill-2()(interactive) (beginning-of-buffer) (setq sentence-end-double-space nil) (replace-regexp "\\\\PG{...}" "" nil (point-min-marker)(point-max-marker)) (beginning-of-buffer) (protect-doubly-indented-lines) (fill-region (point-min-marker)(point-max-marker)) (unprotect-lines)) ;;; alternative: protect from rewrapping indented lines (defun protect-indented-lines()(interactive) (while (search-forward-regexp "^ " nil t) (protect-one-line))) ;;; alternative: protect from rewrapping doubly indented lines (defun protect-doubly-indented-lines()(interactive) (while (search-forward-regexp "^ " nil t) (protect-one-line))) ;;; replace non-breaking spaces with usual spaces, remove trailing spaces (defun unprotect-lines()(interactive) (replace-string (make-string 1 160) " " nil (point-min-marker)(point-max-marker)) (replace-regexp " +$" "" nil (point-min-marker)(point-max-marker))) ;;; protect the current line (defun protect-one-line()(interactive) (beginning-of-line) (setq bol (point-marker)) (next-line 1) (setq nl (point-marker)) (backward-char) (setq ll (- nl bol)) (replace-string " " (make-string 1 160) nil bol nl) (end-of-line) (if (< ll 75) (insert-string (make-string (- 75 ll) 160)))) ;;; remove pg-tex markups (defun final-cleanup() (interactive) (setq tob (point-min-marker)) (setq eob (point-max-marker)) ; (replace-string (make-string 1 160) " " nil tob eob) ; (replace-regexp " +$" "" nil tob eob) (replace-regexp "\\\\_.*\n" "" nil tob eob) ; (replace-regexp "/\\*.*\n" "" nil tob eob) ; (replace-regexp "\\*/.*\n" "" nil tob eob) (replace-regexp "\\\\=.*\n" "" nil tob eob) ; (replace-string "\\Footnote[" "[Footnote: " nil tob eob) ; (replace-string "\\Sidenote[" "[Sidenote: " nil tob eob)) ) ;;; restore ... in place of _ (defun re-html()(interactive) (while (re-search-forward "\\([^\\_]\\)_\\([^_]\\)" nil t) (replace-match "\\1\\2" nil nil) (re-search-forward "\\([^\\_]\\)_\\([^_]\\)" nil t) (replace-match "\\1\\2" nil nil))) ;;; common errors: no space after punctuation ;;; remove space before punctuation (interactively) (defun query-de-pre-punct-space()(interactive) (query-replace-regexp " +\\([\.,!\?:;]\\)" "\\1")) ;;; query add space after punctuation/letter (defun add-post-space()(interactive) (query-replace-regexp "\([\.,!\?:;]\)\([a-zA-Z]\)" "\\1 \\2")) ;;; replace tex accents with characters (defun de-tex() (interactive) (beginning-of-buffer) (replace-string "\\'{E}" "É") (beginning-of-buffer) (replace-string "\\`{E}" "È") (beginning-of-buffer) (replace-string "\\^{E}" "Ê") (beginning-of-buffer) (replace-string "\\`{A}" "À") (beginning-of-buffer) (replace-string "\\^{A}" "Â") (beginning-of-buffer) (replace-string "\\c{C}" "Ç") (beginning-of-buffer) (replace-string "\\\"{E}" "Ë") (beginning-of-buffer) (replace-string "\\\"{I}" "Ï") (beginning-of-buffer) (replace-string "\\^{I}" "Î") (beginning-of-buffer) (replace-string "\\`{O}" "Ò") (beginning-of-buffer) (replace-string "\\^{O}" "Ô") (beginning-of-buffer) (replace-string "\\^{U}" "Û") (beginning-of-buffer) (replace-string "\\'{e}" "é") (beginning-of-buffer) (replace-string "\\`{e}" "è") (beginning-of-buffer) (replace-string "\\^{e}" "ê") (beginning-of-buffer) (replace-string "\\`{a}" "à") (beginning-of-buffer) (replace-string "\\^{a}" "â") (beginning-of-buffer) (replace-string "\\c{c}" "ç") (beginning-of-buffer) (replace-string "\\\"{e}" "ë") (beginning-of-buffer) (replace-string "\\\"{\\i}" "ï") (beginning-of-buffer) (replace-string "\\^{\\i}" "î") (beginning-of-buffer) (replace-string "\\`{o}" "ò") (beginning-of-buffer) (replace-string "\\^{o}" "ô") (beginning-of-buffer) (replace-string "\\^{u}" "û") (beginning-of-buffer) (replace-string "\\`{u}" "ù") (beginning-of-buffer) (replace-regexp "`` *" "\"") (beginning-of-buffer) (replace-regexp " *''" "\"") (beginning-of-buffer) (replace-string "\\\\" "\n") (save-buffer)) (defun fs-to-ss() (interactive) (replace-string "fs" "ß") (save-buffer)) ;;; Procedures: ;;; ;;; Use pre-dp (better with the shell command prePG) to pre-process ;;; the text files; ;;; ;;; Post-processing (with TeX): ;;; ;;; Use ppp for the automatic pre-post-processing; then use ;;; adjust-pb-hyphen and adjust-page-indents to manage page ;;; interruptions. Now page breaks should be OK: check them all. ;;; ;;; Use add-post-space to identify and correct missing ;;; space after punctuation. ;;; ;;; Now spell-check the file, then prepare a file namefile.tex, ;;; copying pg.tex and completing the line \input namefile.txt; ;;; prepare a pdf file with pdflatex namefile, and using namefile.pdf ;;; to read the book. Correct the source. Repeat pdflatex to refresh ;;; the corrections. ;;; ;;; Now execute pg-fill, control all the multiple blank lines, use ;;; gutcheck to find formatting problems, and execute final-cleanup. ;;; ;;; To recover a version with ... use re-html ;;; ;;; this part is used to strip html markup from a FineReader file, ;;; keeping bold and italic marks. (defun html-to-block()(interactive) (replace-regexp "
" "\n" nil (point-min-marker)(point-max-marker)) (replace-regexp "<\\(/?[ib]\\)>" "[\\1]" nil (point-min-marker)(point-max-marker))) (defun block-to-html()(interactive) (replace-regexp "\\[\\(/?[ib]\\)\\]" "<\\1>" nil (point-min-marker)(point-max-marker))) (defun erase-html()(interactive) (replace-regexp "<[^<>]*>" "" nil (point-min-marker)(point-max-marker))) (defun erase-fr-filename()(interactive) (replace-regexp ".*[0-9][0-9][0-9][0-9]\.htm" "" nil (point-min-marker)(point-max-marker))) (defun replace-html-ents()(interactive) (setq tob (point-min-marker)) (setq eob (point-max-marker)) (replace-regexp "<\\(/?\\)I>" "<\\1i>" nil tob eob) (replace-regexp "<\\(/?\\)B>" "<\\1b>" nil tob eob) (replace-regexp "\\([ \n]+\\)" "\\1" nil tob eob) (replace-string "<" "<" nil tob eob) (replace-string ">" ">" nil tob eob) (replace-string """ "\"" nil tob eob) (replace-string " " (make-string 1 160) nil tob eob) (replace-string "¡" (make-string 1 161) nil tob eob) (replace-string "¢" (make-string 1 162) nil tob eob) (replace-string "£" (make-string 1 163) nil tob eob) (replace-string "¤" (make-string 1 164) nil tob eob) (replace-string "¥" (make-string 1 165) nil tob eob) (replace-string "¦" (make-string 1 166) nil tob eob) (replace-string "§" (make-string 1 167) nil tob eob) (replace-string "¨" (make-string 1 168) nil tob eob) (replace-string "©" (make-string 1 169) nil tob eob) (replace-string "ª" (make-string 1 170) nil tob eob) (replace-string "«" (make-string 1 171) nil tob eob) (replace-string "¬" (make-string 1 172) nil tob eob) (replace-string "­" (make-string 1 173) nil tob eob) (replace-string "®" (make-string 1 174) nil tob eob) (replace-string "¯" (make-string 1 175) nil tob eob) (replace-string "°" (make-string 1 176) nil tob eob) (replace-string "±" (make-string 1 177) nil tob eob) (replace-string "²" (make-string 1 178) nil tob eob) (replace-string "³" (make-string 1 179) nil tob eob) (replace-string "´" (make-string 1 180) nil tob eob) (replace-string "µ" (make-string 1 181) nil tob eob) (replace-string "¶" (make-string 1 182) nil tob eob) (replace-string "·" (make-string 1 183) nil tob eob) (replace-string "¸" (make-string 1 184) nil tob eob) (replace-string "¹" (make-string 1 185) nil tob eob) (replace-string "º" (make-string 1 186) nil tob eob) (replace-string "»" (make-string 1 187) nil tob eob) (replace-string "¼" (make-string 1 188) nil tob eob) (replace-string "½" (make-string 1 189) nil tob eob) (replace-string "¾" (make-string 1 190) nil tob eob) (replace-string "¿" (make-string 1 191) nil tob eob) (replace-string "À" (make-string 1 192) nil tob eob) (replace-string "Á" (make-string 1 193) nil tob eob) (replace-string "Â" (make-string 1 194) nil tob eob) (replace-string "Ã" (make-string 1 195) nil tob eob) (replace-string "Ä" (make-string 1 196) nil tob eob) (replace-string "Å" (make-string 1 197) nil tob eob) (replace-string "Æ" (make-string 1 198) nil tob eob) (replace-string "Ç" (make-string 1 199) nil tob eob) (replace-string "È" (make-string 1 200) nil tob eob) (replace-string "É" (make-string 1 201) nil tob eob) (replace-string "Ê" (make-string 1 202) nil tob eob) (replace-string "Ë" (make-string 1 203) nil tob eob) (replace-string "Ì" (make-string 1 204) nil tob eob) (replace-string "Í" (make-string 1 205) nil tob eob) (replace-string "Î" (make-string 1 206) nil tob eob) (replace-string "Ï" (make-string 1 207) nil tob eob) (replace-string "Ð" (make-string 1 208) nil tob eob) (replace-string "Ñ" (make-string 1 209) nil tob eob) (replace-string "Ò" (make-string 1 210) nil tob eob) (replace-string "Ó" (make-string 1 211) nil tob eob) (replace-string "Ô" (make-string 1 212) nil tob eob) (replace-string "Õ" (make-string 1 213) nil tob eob) (replace-string "Ö" (make-string 1 214) nil tob eob) (replace-string "×" (make-string 1 215) nil tob eob) (replace-string "Ø" (make-string 1 216) nil tob eob) (replace-string "Ù" (make-string 1 217) nil tob eob) (replace-string "Ú" (make-string 1 218) nil tob eob) (replace-string "Û" (make-string 1 219) nil tob eob) (replace-string "Ü" (make-string 1 220) nil tob eob) (replace-string "Ý" (make-string 1 221) nil tob eob) (replace-string "Þ" (make-string 1 222) nil tob eob) (replace-string "ß" (make-string 1 223) nil tob eob) (replace-string "à" (make-string 1 224) nil tob eob) (replace-string "á" (make-string 1 225) nil tob eob) (replace-string "â" (make-string 1 226) nil tob eob) (replace-string "ã" (make-string 1 227) nil tob eob) (replace-string "ä" (make-string 1 228) nil tob eob) (replace-string "å" (make-string 1 229) nil tob eob) (replace-string "æ" (make-string 1 230) nil tob eob) (replace-string "ç" (make-string 1 231) nil tob eob) (replace-string "è" (make-string 1 232) nil tob eob) (replace-string "é" (make-string 1 233) nil tob eob) (replace-string "ê" (make-string 1 234) nil tob eob) (replace-string "ë" (make-string 1 235) nil tob eob) (replace-string "ì" (make-string 1 236) nil tob eob) (replace-string "í" (make-string 1 237) nil tob eob) (replace-string "î" (make-string 1 238) nil tob eob) (replace-string "ï" (make-string 1 239) nil tob eob) (replace-string "ð" (make-string 1 240) nil tob eob) (replace-string "ñ" (make-string 1 241) nil tob eob) (replace-string "ò" (make-string 1 242) nil tob eob) (replace-string "ó" (make-string 1 243) nil tob eob) (replace-string "ô" (make-string 1 244) nil tob eob) (replace-string "õ" (make-string 1 245) nil tob eob) (replace-string "ö" (make-string 1 246) nil tob eob) (replace-string "÷" (make-string 1 247) nil tob eob) (replace-string "ø" (make-string 1 248) nil tob eob) (replace-string "ù" (make-string 1 249) nil tob eob) (replace-string "ú" (make-string 1 250) nil tob eob) (replace-string "û" (make-string 1 251) nil tob eob) (replace-string "ü" (make-string 1 252) nil tob eob) (replace-string "ý" (make-string 1 253) nil tob eob) (replace-string "þ" (make-string 1 254) nil tob eob) (replace-string "ÿ" (make-string 1 255) nil tob eob) (replace-string "ˆ" "^" nil tob eob) (replace-string "˜" "~" nil tob eob) (replace-string "&" "&" nil tob eob) ) (defun abby-to-txt()(interactive) (html-to-block) (erase-html) (block-to-html) (replace-html-ents) (erase-fr-filename) (save-buffer)) (defun close-html()(interactive) (replace-regexp "\\([ \n.!?,;:]+\\)\\(\\)" "\\2\\1" nil (point-min-marker)(point-max-marker))) (defun de-8() (interactive) (beginning-of-buffer) (replace-string "ç" "c") (beginning-of-buffer) (replace-regexp "^» *" "\"") (beginning-of-buffer) (replace-regexp "« *" "\"") (beginning-of-buffer) (replace-regexp " *»" "\"") (beginning-of-buffer) (replace-regexp "[à-å]" "a") (beginning-of-buffer) (replace-regexp "[è-ë]" "e") (beginning-of-buffer) (replace-regexp "[ì-ï]" "i") (beginning-of-buffer) (replace-regexp "[ò-ö]" "o") (beginning-of-buffer) (replace-regexp "[ù-ü]" "u") (beginning-of-buffer) (replace-string "ð" "dh") (beginning-of-buffer) (replace-string "þ" "th") (beginning-of-buffer) (replace-string "ý" "y") (beginning-of-buffer) (replace-string "ÿ" "y") (beginning-of-buffer) (replace-string "ñ" "n") (beginning-of-buffer) (replace-string "æ" "ae") (beginning-of-buffer) (replace-string "°" "o") (beginning-of-buffer) (replace-string "§" "s.") (save-buffer)) (defun de-8-de () (interactive) (beginning-of-buffer) (replace-string "Ä" "Ae") (beginning-of-buffer) (replace-string "Ö" "Oe") (beginning-of-buffer) (replace-string "Ü" "Ue") (beginning-of-buffer) (replace-string "ä" "ae") (beginning-of-buffer) (replace-string "ö" "oe") (beginning-of-buffer) (replace-string "ü" "ue") (beginning-of-buffer) (replace-string "ß" "ss")) (defun de-8-it()(interactive) (replace-regexp "à\\([^a-z]\\)" "a`\\1" nil (point-min-marker)(point-max-marker)) (replace-regexp "è\\([^a-z]\\)" "e`\\1" nil (point-min-marker)(point-max-marker)) (replace-regexp "é\\([^a-z]\\)" "e'\\1" nil (point-min-marker)(point-max-marker)) (replace-regexp "ì\\([^a-z]\\)" "i`\\1" nil (point-min-marker)(point-max-marker)) (replace-regexp "ù\\([^a-z]\\)" "u`\\1" nil (point-min-marker)(point-max-marker)) (replace-regexp "ò\\([^a-z]\\)" "o`\\1" nil (point-min-marker)(point-max-marker)))