#!/usr/local/bin/php -q ' characters) off mails - or any text files -, or to * unwrap text inside paragraphs, which is useful if the text has been * wrapped for lines of maximum length 80. there's a switch to output crude * html, too. requires php binary (php-cgi), change the path on the first line. * * Usage: ./striptext.phpsh [--dequote|--dewrap|--html] * (writes output to .new) * OR: ./striptext.phpsh [--dequote|--dewrap|--html] < filename * (reads input from standard input and writes to standard output) * * v1.0 - Mon Sep 22 19:21:52 EEST 2003 blitzer@meizo.com * */ $argv = $_SERVER ["argv"]; if (in_array ("--html", $argv)) $html = 1; if (in_array ("--dequote", $argv)) $dequote = 1; if (in_array ("--dewrap", $argv)) $dewrap = 1; if (sizeof ($argv) < 2 || (!$html && !$dequote && !$dewrap)) { echo ("Usage: " . $argv [0] . " [--dequote|--dewrap|--html]\n" . " (writes output to .new)\n" . " OR: " . $argv [0] . " [--dequote|--dewrap|--html]\n" . " (reads input from standard input and writes to standard output)\n"); exit; } if (!ereg ("^--", $argv [1])) $f = @fopen ($argv [1], "r"); else $f = fopen ("php://stdin", "r"); if (!$f) exit; $data = fread ($f, 1000000); fclose ($f); $data = str_replace ("\r\n", "\n", $data); function is_unwrapped ($a) { foreach ($a as $line) { if (strlen ($line) >= 55) return 0; } return 1; } if ($dequote) { $arr = explode ("\n", $data); for ($i = 0; $i < sizeof ($arr); $i++) { while (ereg ("^[ \t]*>[ \t]*(.*)$", $arr [$i], $reg_arr)) $arr [$i] = $reg_arr [1]; } $data = implode ("\n", $arr); } $paragraphs = preg_split ("#\n[ \t]*\n#", $data); $ret = ""; foreach ($paragraphs as $s) { if (strlen ($ret)) $ret .= "\n\n"; $a = explode ("\n", $s); $unwrapped = is_unwrapped ($a); for ($i = 0; $i < sizeof ($a); $i++) { $line = $a [$i]; if ($html) $a [$i] = htmlspecialchars ($a [$i]); if ($dewrap) { if ($unwrapped || ereg ("^[^ :]+: ", $line) || ereg ("^------", $line) || ereg ("^From .*@.* [0-9][0-9]:[0-9][0-9]:[0-9][0-9]", $line)) { if ($html) $a [$i] .= "
"; $a [$i] .= "\n"; } else $a [$i] .= " "; } } if ($dewrap) { $s = implode ("", $a); $s = str_replace ("\t", " ", $s); while (strstr ($s, " ")) $s = str_replace (" ", " ", $s); } else $s = implode ("\n", $a); if ($html) $ret .= "

"; $ret .= $s; if ($html) $ret .= "\n

\n"; } if (!ereg ("^--", $argv [1])) { $f = fopen ($argv [1] . ".new", "w"); fwrite ($f, $ret); fclose ($f); echo ("Stripped file '" . $argv [1] . "' into '" . $argv [1] . ".new'\n"); } else echo ($ret); exit; ?>