/[debian]/fullquottel/trunk/fullquottel.cpp
ViewVC logotype

Contents of /fullquottel/trunk/fullquottel.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 122 - (show annotations)
Sat Feb 25 16:41:23 2006 UTC (15 years, 3 months ago) by gregoa
File size: 27372 byte(s)
[svn-inject] Forking fullquottel source to Trunk
1 /*
2 fullquottel - a program that helps to distinguish whether an email has the "tofu" style.
3 Copyright (C) 2005 Toastfreeware <toast@toastfreeware.priv.at> -
4 Philipp Spitzer and Gregor Herrmann
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21
22 /** \mainpage Documentation for fullquottel
23 *
24 * \section Introduction
25 *
26 * fullquottel is a program that helps to distinguish whether an email has the
27 * "tofu" style (tofu ... german "Text oben, Fullquote unten" for "text above,
28 * fullquote below"; cf. "top posting", "jeopardy style", ...) or not. It takes
29 * the (already decoded) email body as input (standard input) and returns one
30 * of the words: 'Goodquottel' or 'Fullquottel' on stdout.
31 *
32 *
33 * \section Details
34 *
35 * The program performs several tests to decide whether the mail is a tofu mail
36 * or not. Each test produces a score. The final sum of the individual test
37 * scores is compared to a threshold. If it is above it, the mail is
38 * classified as tofu mail ('Fullquottel' is returned). Further more, the
39 * score itself is returned as number and as as row of where each score point
40 * produces one *.
41 *
42 * Each test can be customized:
43 * - It can be turned on and off (all tests are turned on by default).
44 * - Its score can be specified (most of the tests have the score of 1.0 by
45 * default).
46 * - The individual threshold of some tests can be specified.
47 * - Some tests have additional parameters.
48 *
49 *
50 * \section doc_tests The Tests
51 *
52 * \subsection doc_ownSigTest Own Signature Test (ownSigTest)
53 *
54 * This test searches for the own email signature (default in file:
55 * ~/.signature), no matter if it is quoted or not. If the own signature is
56 * included in a reply, this is a strong sign for a tofu mail.
57 *
58 *
59 * \subsection doc_msTest Microsoft Attribution Line Test (msTest)
60 *
61 * People using MS Outlook (Express) often have the original mail at the bottom
62 * of their reply below a line like "----- Original Message -----". This test
63 * searches for this kind of lines. It is possible to compare the line number
64 * where it was found to a certain threshold. If the line number is equal to or
65 * higher than this threshold, the test is positive and the specified score is
66 * added.
67 *
68 * Why the line number? It is possible that a user replies in a non-tofu way
69 * but has this "original message" line at the very beginning of his reply.
70 *
71 *
72 * \subsection doc_quotedToAllTest Ratio Quoted Lines To All Lines Test (quotedToAllTest)
73 *
74 * This test calculates the ratio between the number of quoted lines and the
75 * number of all lines. The ratio has a value between 0 and 1. It is compared
76 * to a threshold (default: 0.5) and in case the ratio is higher than the
77 * threshold the score is added to the final score. Additionally a factor can
78 * be specified that adds an additional weighted score:
79 *
80 * score(quotedToAllTest) = ratio > threshold ? score + ratio * factor : 0
81 *
82 *
83 * \subsection doc_bottomQuotedToAllTest Ratio Quoted Lines At Bottom Of The Mail To All Lines Test (bottomQuotedToAllTest)
84 *
85 * This test calculates the ratio between the number of quoted lines at the
86 * bottom of the mail and the number of all lines. The ratio has a value
87 * between 0 and 1. It is compared to a threshold (default: 0.5) and in case
88 * the ratio is higher than the threshold the score is added to the final
89 * score. Additionally a factor can be specified that adds an additional
90 * weighted score:
91 *
92 * score(bottomQuotedToAllTest) = ratio > threshold ? score + ratio * factor : 0
93 *
94 *
95 * \subsection doc_bottomQuotedToQuotedTest Ratio Quoted Lines At Bottom Of The Mail To Quoted Lines Test (bottomQuotedToQuotedTest)
96 *
97 * This test calculates the ratio between the number of quoted lines at the
98 * bottom of the mail and the number of all quoted lines. The ratio has a value
99 * between 0 and 1. It is compared to a threshold (default: 0.5) and in case
100 * the ratio is higher than the threshold the score is added to the final
101 * score. Additionally a factor can be specified that adds an additional
102 * weighted score:
103 *
104 * score(bottomQuotedToQuotedTest) = ratio > threshold ? score + ratio * factor : 0
105 *
106 *
107 * \subsection doc_singleBottomQuoteTest Single Quote Block At The Bottom of The Mail Test (mailSingleBottomQuote)
108 *
109 * This tests checks if the mail has only one quote block and if this quote block is
110 * at the bottom of the mail - the classical tofu style.
111 *
112 *
113 * \section doc_links Links used for programming
114 *
115 * \subsection links_optionParser option parser
116 *
117 * - http://getpot.sourceforge.net/
118 * - http://platon.sk/projects/main_page.php?project_id=3
119 * - http://www.gubbe.ch/code/libcfgparse.php
120 * - http://stlplus.sourceforge.net/stlplus/docs/ini_manager.html
121 * - http://config-plus.sourceforge.net/
122 *
123 *
124 * \subsection links_mimeDecoder Mime decoder
125 *
126 * - http://directory.fsf.org/libs/cpp/mimetic.html
127 * - http://codesink.org/mimetic_mime_library.html#snippets
128 */
129
130 /// \cond doc_programmer
131
132 #include <string>
133 #include <vector>
134 #include <map>
135 #include <fstream>
136 #include <sstream>
137 #include <iostream>
138 #include <iomanip> // for setprecision
139 #include <stdexcept>
140 #include <cctype> // for isspace()
141 #include <argp.h>
142
143 using namespace std;
144
145 // Error codes
146 const int err_noinput = 255;
147 const int err_nosigfile = 254;
148 const int err_options = 253;
149
150 /// This type represents text seperated in lines.
151 /// Line delimiters should be preserved so that the unseperated text can be easily reconstructed.
152 typedef vector<string> TextLines;
153 typedef multimap<string, string> MultiMap;
154
155
156 // Command line parsing
157 // --------------------
158
159 const char *argp_program_version = "fullquottel 0.1.1";
160 const char *argp_program_bug_address = "<toast@toastfreeware.priv.at>";
161 static char doc[] = "fullquottel - tool for detecting full quotes in mails or postings";
162 static char args_doc[] = "< infile > outfile";
163 static struct argp_option argp_options[] = {
164 {"scorethreshold", 't', "NUMBER", 0, "Threshold for final score that discriminates between Goodquottel and Fullquottel (default: 1.5)", 0 },
165 {"quotechars", -1, "CHARS", 0, "Chars used for quoting (default: >|#)", 1 },
166 {"debug", 'd', 0, 0, "Debug option (prints single test results; default: off)", 2 },
167 {"ownsigtest", -10, "1|0", 0, "Turn on/off ownSigTest (default: on)", 10 },
168 {"ownsigtestscore", -11, "NUMBER", 0, "Score for ownSigTest (default: 1.0)", 11 },
169 {"ownsigtestfile", 'f', "SIGFILE", 0, "Signature file to test against (default: ~/.signature)", 12 },
170 {"mstest", -20, "1|0", 0, "Turn on/off msTest (Microsoft attribution lines; default: on)", 20 },
171 {"mstestscore", -21, "NUMBER", 0, "Score for msTest (default: 1.0)", 21 },
172 {"mstestthreshold", -22, "NUMBER", 0, "Threshold for msTest (at or below which line a Microsoft attribution line ist found; default: 2)", 22 },
173 {"quotedtoalltest", -30, "1|0", 0, "Turn on/off quotedToAllTest (ratio quoted lines to all; default: on)", 30 },
174 {"quotedtoalltestscore", -31, "NUMBER", 0, "Score for quotedToAllTest (default: 1.0)", 31 },
175 {"quotedtoalltestfactor", -32, "NUMBER", 0, "Result = score + ratio * FACTOR (default: 0)", 32 },
176 {"quotedtoalltestthreshold", -33, "NUMBER", 0, "Ratio threshold for activating quotedToAllTest (default: 0.5)", 33 },
177 {"bottomquotedtoalltest", -40, "1|0", 0, "Turn on/off bottomQuotedToAllTest (ratio quoted lines at bottom to all; default: on)", 40 },
178 {"bottomquotedtoalltestscore", -41, "NUMBER", 0, "Score for bottomQuotedToAllTest (default: 1.0)", 41 },
179 {"bottomquotedtoalltestfactor", -42, "NUMBER", 0, "Result = score + ratio * FACTOR (default: 0)", 42 },
180 {"bottomquotedtoalltestthreshold", -43, "NUMBER", 0, "Ratio threshold for activating bottomQuotedToAllTest (default: 0.5)", 43 },
181 {"bottomquotedtoquotedtest", -50, "1|0", 0, "Turn on/off bottomQuotedToQuotedTest (ratio quoted lines at bottom to all quoted lines; default: on)", 50 },
182 {"bottomquotedtoquotedtestscore", -51, "NUMBER", 0, "Score for bottomQuotedToQuotedTest (default: 1.0)", 51 },
183 {"bottomquotedtoquotedtestfactor", -52, "NUMBER", 0, "Result = score + ratio * FACTOR (default: 0)", 52 },
184 {"bottomquotedtoquotedtestthreshold", -53, "NUMBER", 0, "Ratio threshold for activating bottomQuotedToQuotedTest (default: 0.5)", 53 },
185 {"singlebottomquotetest", -60, "1|0", 0, "Turn on/off singleBottomQuoteTest (only one quote block, and at the bottom; default: on)", 60 },
186 {"singlebottomquotetestscore", -61, "NUMBER", 0, "Score for singleBottomQuoteTest (default: 1.0)", 61 },
187 {"\nHINT: All long options (without leading --) can be used in ~/.fullquottelrc\n", -70, 0, OPTION_DOC, "", 70 },
188 { 0 }
189 };
190
191
192 /// Base class of structures that store informations about the rating/test of mail properties.
193 struct RatingBase {
194 bool active; ///< to turn the test on and off.
195 double score; ///< score that should be added if the test succeeds.
196 RatingBase() {active = true; score = 1;}
197 };
198
199
200 /// \brief structure to store the rating of an ratio test:
201 ///
202 /// resultscore = (value >= threshold) ? score + value * factor : 0;
203 struct RatioRating: public RatingBase {
204 double threshold;
205 double factor;
206 double rate(double value) const {return (active && value >= threshold) ? score + value * factor: 0;}
207 RatioRating() {threshold = 0.5; factor = 0;}
208 };
209
210
211 /// \brief Structure to store the rating of an integer test:
212 ///
213 /// resultscore = (value >= threshold) ? score : 0;
214 struct IntegerRating: public RatingBase {
215 int threshold;
216 double rate(int value) const {return (active && value >= threshold) ? score : 0;}
217 };
218
219
220 /// \brief Structure to store the rating of an integer test:
221 ///
222 /// resultscore = value ? score : 0;
223 struct BoolRating: public RatingBase {
224 double rate(bool value) const {return (active && value) ? score : 0;}
225 };
226
227
228 /// Structure for signature test options.
229 struct SigRating: public BoolRating {
230 string file; ///< signature file (with full path. ~ is possible for $HOME)
231 };
232
233
234 /// \brief Structure to store the program options.
235 ///
236 /// It stores all options and parameters that are needed to run the program. The command line options, system wide file options,
237 /// and user file options are transfered within this struct and after that, only this struct is used to access the options.
238 struct Options {
239 double scoreThreshold; ///< Threshold of the added scores of the tests, that decide, whether the mail is a tofu mail or not (default: 0.5).
240 string quoteChars; ///< Character(s) that are used to quote. The default value is ">|#".
241 bool debug; ///< If this option is set, the internal variables that show the mail statistics are print (default: false).
242 TextLines attributionLines; ///< List of Microsoft Attribution Lines (needed for the \ref doc_msTest).
243 // ratings/tests
244 SigRating ownSigTest; ///< Signature filename and rating options for the \ref doc_ownSigTest (default: ~/.signature).
245 IntegerRating msTest; ///< Rating options for the \ref doc_msTest.
246 RatioRating quotedToAllTest; ///< Rating options for the \ref doc_quotedToAllTest.
247 RatioRating bottomQuotedToAllTest; ///< Rating options for the \ref doc_bottomQuotedToAllTest.
248 RatioRating bottomQuotedToQuotedTest; ///< Rating options for the \ref doc_bottomQuotedToQuotedTest.
249 BoolRating singleBottomQuoteTest; ///< Rating options for the \ref doc_singleBottomQuoteTest.
250
251 /// the constructor initializes the options with their default values.
252 Options() {
253 scoreThreshold = 1.5;
254 quoteChars = ">|#";
255 debug = false;
256 // should we keep the following?
257 attributionLines.push_back("-----Urspr√ľngliche Nachricht-----");
258 attributionLines.push_back("-----Original Message-----");
259 attributionLines.push_back("----- Urspr√ľngliche Nachricht -----");
260 attributionLines.push_back("----- Original Message -----");
261 // ratings
262 ownSigTest.file = "~/.signature";
263 msTest.threshold = 2;
264 }
265 };
266
267
268 /// searches for key within the multimap. If found, its value is assigned to option.
269 void setStringOptionFromMultiMap(const MultiMap& mm, const string& key, string& option) {
270 typedef MultiMap::const_iterator CI;
271 pair<CI,CI> entries = mm.equal_range(key);
272 if (entries.first != entries.second) option = entries.first->second;
273 }
274
275
276 void setDoubleOptionFromMultiMap(const MultiMap& mm, const string& key, double& option) {
277 string value;
278 setStringOptionFromMultiMap(mm, key, value);
279 if (value.empty()) return;
280 istringstream ist(value);
281 ist >> option;
282 }
283
284
285 void setIntOptionFromMultiMap(const MultiMap& mm, const string& key, int& option) {
286 string value;
287 setStringOptionFromMultiMap(mm, key, value);
288 if (value.empty()) return;
289 istringstream ist(value);
290 ist >> option;
291 }
292
293
294 void setBoolOptionFromMultiMap(const MultiMap& mm, const string& key, bool& option) {
295 string value;
296 setStringOptionFromMultiMap(mm, key, value);
297 if (value.empty()) return;
298 istringstream ist(value);
299 ist >> option;
300 }
301
302
303 /// incorporates the multimap to the options
304 void useConfig(Options& options, const MultiMap& mm) {
305 typedef MultiMap::const_iterator CI;
306
307 setDoubleOptionFromMultiMap(mm, "scorethreshold", options.scoreThreshold);
308 setStringOptionFromMultiMap(mm, "quotechars", options.quoteChars);
309 setBoolOptionFromMultiMap(mm, "debug", options.debug);
310 setBoolOptionFromMultiMap(mm, "ownsigtest", options.ownSigTest.active);
311 setDoubleOptionFromMultiMap(mm, "ownsigtestscore", options.ownSigTest.score);
312 setStringOptionFromMultiMap(mm, "ownsigtestfile", options.ownSigTest.file);
313 setBoolOptionFromMultiMap(mm, "mstest", options.msTest.active);
314 setDoubleOptionFromMultiMap(mm, "mstestscore", options.msTest.score);
315 setIntOptionFromMultiMap(mm, "mstestthreshold", options.msTest.threshold);
316 setBoolOptionFromMultiMap(mm, "quotedtoalltest", options.quotedToAllTest.active);
317 setDoubleOptionFromMultiMap(mm, "quotedtoalltestscore", options.quotedToAllTest.score);
318 setDoubleOptionFromMultiMap(mm, "quotedtoalltestthreshold", options.quotedToAllTest.threshold);
319 setDoubleOptionFromMultiMap(mm, "quotedtoalltestfactor", options.quotedToAllTest.factor);
320 setBoolOptionFromMultiMap(mm, "bottomquotedtoalltest", options.bottomQuotedToAllTest.active);
321 setDoubleOptionFromMultiMap(mm, "bottomquotedtoalltestscore", options.bottomQuotedToAllTest.score);
322 setDoubleOptionFromMultiMap(mm, "bottomquotedtoalltestthreshold", options.bottomQuotedToAllTest.threshold);
323 setDoubleOptionFromMultiMap(mm, "bottomquotedtoalltestfactor", options.bottomQuotedToAllTest.factor);
324 setBoolOptionFromMultiMap(mm, "bottomquotedtoquotedtest", options.bottomQuotedToQuotedTest.active);
325 setDoubleOptionFromMultiMap(mm, "bottomquotedtoquotedtestscore", options.bottomQuotedToQuotedTest.score);
326 setDoubleOptionFromMultiMap(mm, "bottomquotedtoquotedtestthreshold", options.bottomQuotedToQuotedTest.threshold);
327 setDoubleOptionFromMultiMap(mm, "bottomquotedtoquotedtestfactor", options.bottomQuotedToQuotedTest.factor);
328 setBoolOptionFromMultiMap(mm, "singlebottomquotetest", options.singleBottomQuoteTest.active);
329 setDoubleOptionFromMultiMap(mm, "singlebottomquotetestscore", options.singleBottomQuoteTest.score);
330
331 // add or replace attributionlines
332 bool addAttributionlines = true; // default value
333 setBoolOptionFromMultiMap(mm, "addattributionlines", addAttributionlines);
334 if (!addAttributionlines) options.attributionLines.clear();
335
336 // Attribution lines
337 pair<CI,CI> entries = mm.equal_range("attributionline");
338 for (CI i = entries.first; i != entries.second; ++i) options.attributionLines.push_back(i->second);
339 }
340
341
342 /// parse function for the program options
343 static error_t parse_opt(int key, char *arg, struct argp_state *state) {
344 struct Options *options = (Options*) state->input;
345
346 string s;
347 if (arg) s = arg;
348 istringstream ist(s);
349 switch (key) {
350 case 't': ist >> options->scoreThreshold; break;
351 case -1: options->quoteChars = arg; break;
352 case 'd': options->debug = true; break;
353 case -10: ist >> options->ownSigTest.active; break;
354 case -11: ist >> options->ownSigTest.score; break;
355 case 'f': options->ownSigTest.file = arg; break;
356 case -20: ist >> options->msTest.active; break;
357 case -21: ist >> options->msTest.score; break;
358 case -22: ist >> options->msTest.threshold; break;
359 case -30: ist >> options->quotedToAllTest.active; break;
360 case -31: ist >> options->quotedToAllTest.score; break;
361 case -32: ist >> options->quotedToAllTest.factor; break;
362 case -33: ist >> options->quotedToAllTest.threshold; break;
363 case -40: ist >> options->bottomQuotedToAllTest.active; break;
364 case -41: ist >> options->bottomQuotedToAllTest.score; break;
365 case -42: ist >> options->bottomQuotedToAllTest.factor; break;
366 case -43: ist >> options->bottomQuotedToAllTest.threshold; break;
367 case -50: ist >> options->bottomQuotedToQuotedTest.active; break;
368 case -51: ist >> options->bottomQuotedToQuotedTest.score; break;
369 case -52: ist >> options->bottomQuotedToQuotedTest.factor; break;
370 case -53: ist >> options->bottomQuotedToQuotedTest.threshold; break;
371 case -60: ist >> options->singleBottomQuoteTest.active; break;
372 case -61: ist >> options->singleBottomQuoteTest.score; break;
373
374
375 case ARGP_KEY_ARG:
376 if (state->arg_num > 0)
377 // we have no arguments
378 argp_usage (state);
379 break;
380
381 default:
382 return ARGP_ERR_UNKNOWN;
383 }
384 return 0;
385 }
386
387
388 /// argp parser.
389 static struct argp argp = {argp_options, parse_opt, args_doc, doc};
390
391
392 // Helper functions
393 // ----------------
394
395 /// returns true if substring is part of string
396 bool isSubString(const string &needle, const string &haystack) {
397 return haystack.find(needle) != string::npos;
398 }
399
400
401 /// cuts leading whitespace
402 string ltrim(const string& text) {
403 string::size_type i = 0;
404 while (i < text.size() && isspace(text[i])) ++i;
405 return text.substr(i);
406 }
407
408
409 /// cuts trailing whitespace
410 string rtrim(const string& text) {
411 string::size_type i = text.size();
412 while (i != 0 && isspace(text[i-1])) --i;
413 return text.substr(0, i);
414 }
415
416
417 /// cuts leading and trailing whitespace
418 string trim(const string& text) {
419 return ltrim(rtrim(text));
420 }
421
422
423 /// loads a file into a TextLines class.
424 TextLines loadTextLines(istream& istr) {
425 TextLines content;
426 while (istr) {
427 string line;
428 getline(istr, line);
429 if (istr) content.push_back(line + '\n');
430 }
431 return content;
432 }
433
434
435 /// Loads a key=value file (lines starting with # or empty lines are ignored)
436 /// Multiple values for one key are allowed
437 /// If an syntax error occurs, an exception is thrown.
438 MultiMap multiMapFromTextLines(const TextLines& lines) throw(runtime_error) {
439 MultiMap mm;
440 for (unsigned i = 0; i != lines.size(); ++i) {
441 string line = trim(lines[i]);
442 if (line.empty()) continue;
443 if (line[0] == '#') continue;
444 string::size_type pos = line.find("=");
445 if (pos == string::npos) throw std::runtime_error("hash parsing error"); // todo: better error message with line number
446 string key = rtrim(line.substr(0, pos));
447 string value = ltrim(line.substr(pos+1));
448 mm.insert(make_pair(key, value));
449 }
450 return mm;
451 }
452
453
454 /// expands leading ~ to $HOME
455 string expandTildeToHome(string file) {
456 if (file.size() > 0 && file[0] == '~') {
457 string homeDir;
458 char* home = getenv("HOME");
459 if (home) homeDir = home;
460 file.replace(0, 1, homeDir);
461 }
462 return file;
463 }
464
465
466 /// Testing purposes: Return TextLines on cout
467 void showTextLines(const TextLines& tl, bool addEndl = false) {
468 for (TextLines::size_type i = 0; i != tl.size(); ++i) {
469 cout << tl[i];
470 if (addEndl) cout << endl;
471 }
472 }
473
474
475 /// Testing purposes
476 void showMultiMap(const MultiMap& mm) {
477 for (MultiMap::const_iterator i = mm.begin(); i != mm.end(); ++i) {
478 cout << (*i).first << "==" << (*i).second << "|" << endl;
479 }
480 }
481
482
483 /// returns true, if the line begins with the specified Quotestrings
484 bool isQuotedLine(const string &line, const string &quoteChars) {
485 string text = ltrim(line);
486 if (!text.size()) return false;
487 for (string::size_type i = 0; i != quoteChars.size(); ++i) if (quoteChars[i] == text[0]) return true;
488 return false;
489 }
490
491
492 /// cuts the signature out of the body and returns true. If the signature is not found, it returns false and leaves the body untouched.
493 bool cutSignature(TextLines& body, const string &quoteChars) {
494 for (TextLines::size_type i = body.size(); i != 0; --i) {
495 if (isQuotedLine(body[i-1], quoteChars)) return false;
496 if (body[i-1].find("-- ") == 0) {
497 // found signature
498 body.erase(body.begin()+i-1, body.end());
499 return true;
500 }
501 }
502 return false;
503 }
504
505
506 /// counts quoted lines
507 TextLines::size_type quotedLines(const TextLines &body, const string &quoteChars) {
508 TextLines::size_type result = 0;
509 for (TextLines::size_type i = 0; i != body.size(); ++i) if (isQuotedLine(body[i], quoteChars)) ++result;
510 return result;
511 }
512
513
514
515 // Score functions
516 // ---------------
517
518 /// returns true if the signature is found within the body.
519 bool ownSig(const TextLines &body, const TextLines &signature) {
520 if (!signature.size()) return false;
521 TextLines::size_type bodyPos = 0;
522 while (bodyPos != body.size()) {
523 if (!isSubString(signature[0], body[bodyPos])) {++bodyPos; continue;}
524 TextLines::size_type p = bodyPos;
525 bool isSig = true;
526 for (TextLines::size_type sigPos = 1; sigPos < signature.size(); ++sigPos) {
527 ++p;
528 if (p == body.size()) return false;
529 if (!isSubString(signature[sigPos], body[p])) {isSig = false; break;}
530 }
531 if (isSig) return true;
532 ++bodyPos;
533 }
534 return false;
535 }
536
537
538
539 /// returns the amount of quoted lines at the bottom of the message
540 unsigned quotedLinesBottom(const TextLines& bodyNoSig, const string& quoteChars) {
541 unsigned i;
542 unsigned e = 0; // counts the empty lines at the end of the mail
543 for (i = bodyNoSig.size(); i != 0; --i) {
544 if (trim(bodyNoSig[i-1]).size() == 0) {++e; continue;} // ignore empty lines at bottom
545 if (!isQuotedLine(bodyNoSig[i-1], quoteChars)) break;
546 }
547 return bodyNoSig.size() - i - e;
548 }
549
550
551 /// Returns the number of the quote blocks
552 unsigned quoteBlockCount(const TextLines& bodyNoSig, const string& quoteChars) {
553 unsigned result = 0;
554 bool wasQuoted = false;
555 for (unsigned i = 0; i != bodyNoSig.size(); ++i) {
556 if (isQuotedLine(bodyNoSig[i], quoteChars)) {
557 if (!wasQuoted) {
558 ++result;
559 wasQuoted = true;
560 }
561 } else wasQuoted = false;
562 }
563 return result;
564 }
565
566
567 /// Searches for a line from Outlook-like programs that shows the beginning of the reply
568 /// (like -----Original Message-----) and returns the line number if it is found, 0 otherwise.
569 unsigned microsoftAttributionLineNumber(const TextLines& bodyNoSig, const TextLines& attributionLines) {
570 for (unsigned i = 0; i != bodyNoSig.size(); ++i) {
571 for (unsigned j = 0; j != attributionLines.size(); ++j) if (bodyNoSig[i].find(attributionLines[j]) != string::npos) return i+1;
572 }
573 return 0;
574 }
575
576
577 // Main function
578 // -------------
579
580 int main(int argc, char *argv[]) {
581 // Settings
582 Options options;
583
584 // system config file /etc/fullquottelrc
585 ifstream sysconf_stream("/etc/fullquottelrc");
586 if (sysconf_stream) {
587 TextLines tlSysconf = loadTextLines(sysconf_stream);
588 MultiMap mmSysconf = multiMapFromTextLines(tlSysconf);
589 useConfig(options, mmSysconf);
590 }
591
592 // system config file ~/.fullquottelrc
593 ifstream userconf_stream(expandTildeToHome("~/.fullquottelrc").c_str());
594 if (userconf_stream) {
595 TextLines tlUserconf = loadTextLines(userconf_stream);
596 MultiMap mmUserconf = multiMapFromTextLines(tlUserconf);
597 useConfig(options, mmUserconf);
598 }
599
600 // parse command line options
601 argp_parse (&argp, argc, argv, 0, 0, &options);
602
603 // Get message body from stdin and store it in body.
604 TextLines body;
605 if (!cin.good()) {
606 cerr << "No input on stdin." << endl;
607 return err_noinput;
608 }
609 body = loadTextLines(cin);
610
611 // load signature file
612 string sigFile = expandTildeToHome(options.ownSigTest.file);
613 ifstream sig_stream(sigFile.c_str()); // maybe use with option ios_base::binary
614 if (!sig_stream) {
615 cerr << "Sigfile " << options.ownSigTest.file << " missing or unreadable." << endl;
616 return err_nosigfile;
617 }
618 TextLines signature = loadTextLines(sig_stream);
619
620 // create a non-signature version of the mail and store it in bodyNoSig
621 TextLines bodyNoSig = body;
622 cutSignature(bodyNoSig, options.quoteChars);
623
624
625 // debug
626 // showTextLines(options.attributionLines, true);
627 // showTextLines(bodyNoSig);
628
629 // Analyze mail
630 // basic values and "simple" tests
631 bool mailOwnSig = ownSig(bodyNoSig, signature);
632 unsigned mailLines = bodyNoSig.size();
633 unsigned mailQuotedLines = quotedLines(bodyNoSig, options.quoteChars);
634 unsigned mailQuotedLinesBottom = quotedLinesBottom(bodyNoSig, options.quoteChars);
635 unsigned mailQuoteBlockCount = quoteBlockCount(bodyNoSig, options.quoteChars);
636 unsigned mailMicrosoftAttributionLineNumber = microsoftAttributionLineNumber(bodyNoSig, options.attributionLines);
637 // "combined" test
638 double mailQuotedToAll = mailLines ? (double) mailQuotedLines / (double) mailLines : 0;
639 double mailBottomQuotedToQuoted = mailQuotedLines ? (double) mailQuotedLinesBottom / (double) mailQuotedLines : 0;
640 double mailBottomQuotedToAll = mailLines ? (double) mailQuotedLinesBottom / (double) mailLines : 0;
641 bool mailSingleBottomQuote = mailQuotedLinesBottom > 0 && mailQuoteBlockCount == 1;
642
643 // Debug output
644 if (options.debug) {
645 cout << "mailOwnSig: " << mailOwnSig << endl;
646 cout << "mailLines: " << mailLines << endl;
647 cout << "mailQuotedLines: " << mailQuotedLines << endl;
648 cout << "mailQuotedLinesBottom: " << mailQuotedLinesBottom << endl;
649 cout << "mailQuoteBlockCount: " << mailQuoteBlockCount << endl;
650 cout << "mailMicrosoftAttributionLineNumber: " << mailMicrosoftAttributionLineNumber << endl;
651 cout << "mailQuotedToAll: " << mailQuotedToAll << endl;
652 cout << "mailBottomQuotedToQuoted: " << mailBottomQuotedToQuoted << endl;
653 cout << "mailBottomQuotedToAll: " << mailBottomQuotedToAll << endl;
654 cout << "mailSingleBottomQuote: " << mailSingleBottomQuote << endl;
655 }
656
657 // Scoring
658 double score = 0;
659 if (bodyNoSig.size() == 0) {
660 } else {
661 score += options.ownSigTest.rate(mailOwnSig);
662 score += options.msTest.rate(mailMicrosoftAttributionLineNumber);
663 score += options.quotedToAllTest.rate(mailQuotedToAll);
664 score += options.bottomQuotedToAllTest.rate(mailBottomQuotedToAll);
665 score += options.bottomQuotedToQuotedTest.rate(mailBottomQuotedToQuoted);
666 score += options.singleBottomQuoteTest.rate(mailSingleBottomQuote);
667 }
668 if (options.debug) {
669 cout << "Score: " << score << endl;
670 }
671 bool fullQuottel = score > options.scoreThreshold;
672 string scoreText;
673 for (int s = 1; s <= score+0.5; ++s) scoreText += '*';
674
675 // Show result
676 cout << (fullQuottel ? "Fullquottel" : "Goodquottel") << " [" << fixed << setprecision(2) << score << "] (" << scoreText << ")" << endl;
677 return 0;
678 }
679
680 /// \endcond

  ViewVC Help
Powered by ViewVC 1.1.26