/[debian]/fullquottel/trunk/fullquottel.cpp
ViewVC logotype

Annotation of /fullquottel/trunk/fullquottel.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 697 - (hide annotations)
Sun Apr 1 22:11:36 2007 UTC (14 years, 4 months ago) by gregoa
File size: 27391 byte(s)
* New upstream release (closes: #417186).
1 gregoa 120 /*
2     fullquottel - a program that helps to distinguish whether an email has the "tofu" style.
3     Copyright (C) 2005 Toastfreeware <toast@toastfreeware.priv.at> -
4     Philipp Spitzer and Gregor Herrmann
5    
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10    
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     GNU General Public License for more details.
15    
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19     */
20    
21    
22     /** \mainpage Documentation for fullquottel
23     *
24     * \section Introduction
25     *
26     * fullquottel is a program that helps to distinguish whether an email has the
27     * "tofu" style (tofu ... german "Text oben, Fullquote unten" for "text above,
28     * fullquote below"; cf. "top posting", "jeopardy style", ...) or not. It takes
29     * the (already decoded) email body as input (standard input) and returns one
30     * of the words: 'Goodquottel' or 'Fullquottel' on stdout.
31     *
32     *
33     * \section Details
34     *
35     * The program performs several tests to decide whether the mail is a tofu mail
36     * or not. Each test produces a score. The final sum of the individual test
37     * scores is compared to a threshold. If it is above it, the mail is
38     * classified as tofu mail ('Fullquottel' is returned). Further more, the
39     * score itself is returned as number and as as row of where each score point
40     * produces one *.
41     *
42     * Each test can be customized:
43     * - It can be turned on and off (all tests are turned on by default).
44     * - Its score can be specified (most of the tests have the score of 1.0 by
45     * default).
46     * - The individual threshold of some tests can be specified.
47     * - Some tests have additional parameters.
48     *
49     *
50     * \section doc_tests The Tests
51     *
52     * \subsection doc_ownSigTest Own Signature Test (ownSigTest)
53     *
54     * This test searches for the own email signature (default in file:
55     * ~/.signature), no matter if it is quoted or not. If the own signature is
56     * included in a reply, this is a strong sign for a tofu mail.
57     *
58     *
59     * \subsection doc_msTest Microsoft Attribution Line Test (msTest)
60     *
61     * People using MS Outlook (Express) often have the original mail at the bottom
62     * of their reply below a line like "----- Original Message -----". This test
63     * searches for this kind of lines. It is possible to compare the line number
64     * where it was found to a certain threshold. If the line number is equal to or
65     * higher than this threshold, the test is positive and the specified score is
66     * added.
67     *
68     * Why the line number? It is possible that a user replies in a non-tofu way
69     * but has this "original message" line at the very beginning of his reply.
70     *
71     *
72     * \subsection doc_quotedToAllTest Ratio Quoted Lines To All Lines Test (quotedToAllTest)
73     *
74     * This test calculates the ratio between the number of quoted lines and the
75     * number of all lines. The ratio has a value between 0 and 1. It is compared
76     * to a threshold (default: 0.5) and in case the ratio is higher than the
77     * threshold the score is added to the final score. Additionally a factor can
78     * be specified that adds an additional weighted score:
79     *
80     * score(quotedToAllTest) = ratio > threshold ? score + ratio * factor : 0
81     *
82     *
83     * \subsection doc_bottomQuotedToAllTest Ratio Quoted Lines At Bottom Of The Mail To All Lines Test (bottomQuotedToAllTest)
84     *
85     * This test calculates the ratio between the number of quoted lines at the
86     * bottom of the mail and the number of all lines. The ratio has a value
87     * between 0 and 1. It is compared to a threshold (default: 0.5) and in case
88     * the ratio is higher than the threshold the score is added to the final
89     * score. Additionally a factor can be specified that adds an additional
90     * weighted score:
91     *
92     * score(bottomQuotedToAllTest) = ratio > threshold ? score + ratio * factor : 0
93     *
94     *
95     * \subsection doc_bottomQuotedToQuotedTest Ratio Quoted Lines At Bottom Of The Mail To Quoted Lines Test (bottomQuotedToQuotedTest)
96     *
97     * This test calculates the ratio between the number of quoted lines at the
98     * bottom of the mail and the number of all quoted lines. The ratio has a value
99     * between 0 and 1. It is compared to a threshold (default: 0.5) and in case
100     * the ratio is higher than the threshold the score is added to the final
101     * score. Additionally a factor can be specified that adds an additional
102     * weighted score:
103     *
104     * score(bottomQuotedToQuotedTest) = ratio > threshold ? score + ratio * factor : 0
105     *
106     *
107     * \subsection doc_singleBottomQuoteTest Single Quote Block At The Bottom of The Mail Test (mailSingleBottomQuote)
108     *
109     * This tests checks if the mail has only one quote block and if this quote block is
110     * at the bottom of the mail - the classical tofu style.
111     *
112     *
113     * \section doc_links Links used for programming
114     *
115     * \subsection links_optionParser option parser
116     *
117     * - http://getpot.sourceforge.net/
118     * - http://platon.sk/projects/main_page.php?project_id=3
119     * - http://www.gubbe.ch/code/libcfgparse.php
120     * - http://stlplus.sourceforge.net/stlplus/docs/ini_manager.html
121     * - http://config-plus.sourceforge.net/
122     *
123     *
124     * \subsection links_mimeDecoder Mime decoder
125     *
126     * - http://directory.fsf.org/libs/cpp/mimetic.html
127     * - http://codesink.org/mimetic_mime_library.html#snippets
128     */
129    
130     /// \cond doc_programmer
131    
132 gregoa 697 #include <cstdlib>
133 gregoa 120 #include <string>
134     #include <vector>
135     #include <map>
136     #include <fstream>
137     #include <sstream>
138     #include <iostream>
139     #include <iomanip> // for setprecision
140     #include <stdexcept>
141     #include <cctype> // for isspace()
142     #include <argp.h>
143    
144     using namespace std;
145    
146     // Error codes
147     const int err_noinput = 255;
148     const int err_nosigfile = 254;
149     const int err_options = 253;
150    
151     /// This type represents text seperated in lines.
152     /// Line delimiters should be preserved so that the unseperated text can be easily reconstructed.
153     typedef vector<string> TextLines;
154     typedef multimap<string, string> MultiMap;
155    
156    
157     // Command line parsing
158     // --------------------
159    
160 gregoa 697 const char *argp_program_version = "fullquottel 0.1.2";
161 gregoa 120 const char *argp_program_bug_address = "<toast@toastfreeware.priv.at>";
162     static char doc[] = "fullquottel - tool for detecting full quotes in mails or postings";
163     static char args_doc[] = "< infile > outfile";
164     static struct argp_option argp_options[] = {
165     {"scorethreshold", 't', "NUMBER", 0, "Threshold for final score that discriminates between Goodquottel and Fullquottel (default: 1.5)", 0 },
166     {"quotechars", -1, "CHARS", 0, "Chars used for quoting (default: >|#)", 1 },
167     {"debug", 'd', 0, 0, "Debug option (prints single test results; default: off)", 2 },
168     {"ownsigtest", -10, "1|0", 0, "Turn on/off ownSigTest (default: on)", 10 },
169     {"ownsigtestscore", -11, "NUMBER", 0, "Score for ownSigTest (default: 1.0)", 11 },
170     {"ownsigtestfile", 'f', "SIGFILE", 0, "Signature file to test against (default: ~/.signature)", 12 },
171     {"mstest", -20, "1|0", 0, "Turn on/off msTest (Microsoft attribution lines; default: on)", 20 },
172     {"mstestscore", -21, "NUMBER", 0, "Score for msTest (default: 1.0)", 21 },
173     {"mstestthreshold", -22, "NUMBER", 0, "Threshold for msTest (at or below which line a Microsoft attribution line ist found; default: 2)", 22 },
174     {"quotedtoalltest", -30, "1|0", 0, "Turn on/off quotedToAllTest (ratio quoted lines to all; default: on)", 30 },
175     {"quotedtoalltestscore", -31, "NUMBER", 0, "Score for quotedToAllTest (default: 1.0)", 31 },
176     {"quotedtoalltestfactor", -32, "NUMBER", 0, "Result = score + ratio * FACTOR (default: 0)", 32 },
177     {"quotedtoalltestthreshold", -33, "NUMBER", 0, "Ratio threshold for activating quotedToAllTest (default: 0.5)", 33 },
178     {"bottomquotedtoalltest", -40, "1|0", 0, "Turn on/off bottomQuotedToAllTest (ratio quoted lines at bottom to all; default: on)", 40 },
179     {"bottomquotedtoalltestscore", -41, "NUMBER", 0, "Score for bottomQuotedToAllTest (default: 1.0)", 41 },
180     {"bottomquotedtoalltestfactor", -42, "NUMBER", 0, "Result = score + ratio * FACTOR (default: 0)", 42 },
181     {"bottomquotedtoalltestthreshold", -43, "NUMBER", 0, "Ratio threshold for activating bottomQuotedToAllTest (default: 0.5)", 43 },
182     {"bottomquotedtoquotedtest", -50, "1|0", 0, "Turn on/off bottomQuotedToQuotedTest (ratio quoted lines at bottom to all quoted lines; default: on)", 50 },
183     {"bottomquotedtoquotedtestscore", -51, "NUMBER", 0, "Score for bottomQuotedToQuotedTest (default: 1.0)", 51 },
184     {"bottomquotedtoquotedtestfactor", -52, "NUMBER", 0, "Result = score + ratio * FACTOR (default: 0)", 52 },
185     {"bottomquotedtoquotedtestthreshold", -53, "NUMBER", 0, "Ratio threshold for activating bottomQuotedToQuotedTest (default: 0.5)", 53 },
186     {"singlebottomquotetest", -60, "1|0", 0, "Turn on/off singleBottomQuoteTest (only one quote block, and at the bottom; default: on)", 60 },
187     {"singlebottomquotetestscore", -61, "NUMBER", 0, "Score for singleBottomQuoteTest (default: 1.0)", 61 },
188     {"\nHINT: All long options (without leading --) can be used in ~/.fullquottelrc\n", -70, 0, OPTION_DOC, "", 70 },
189     { 0 }
190     };
191    
192    
193     /// Base class of structures that store informations about the rating/test of mail properties.
194     struct RatingBase {
195     bool active; ///< to turn the test on and off.
196     double score; ///< score that should be added if the test succeeds.
197     RatingBase() {active = true; score = 1;}
198     };
199    
200    
201     /// \brief structure to store the rating of an ratio test:
202     ///
203     /// resultscore = (value >= threshold) ? score + value * factor : 0;
204     struct RatioRating: public RatingBase {
205     double threshold;
206     double factor;
207     double rate(double value) const {return (active && value >= threshold) ? score + value * factor: 0;}
208     RatioRating() {threshold = 0.5; factor = 0;}
209     };
210    
211    
212     /// \brief Structure to store the rating of an integer test:
213     ///
214     /// resultscore = (value >= threshold) ? score : 0;
215     struct IntegerRating: public RatingBase {
216     int threshold;
217     double rate(int value) const {return (active && value >= threshold) ? score : 0;}
218     };
219    
220    
221     /// \brief Structure to store the rating of an integer test:
222     ///
223     /// resultscore = value ? score : 0;
224     struct BoolRating: public RatingBase {
225     double rate(bool value) const {return (active && value) ? score : 0;}
226     };
227    
228    
229     /// Structure for signature test options.
230     struct SigRating: public BoolRating {
231     string file; ///< signature file (with full path. ~ is possible for $HOME)
232     };
233    
234    
235     /// \brief Structure to store the program options.
236     ///
237     /// It stores all options and parameters that are needed to run the program. The command line options, system wide file options,
238     /// and user file options are transfered within this struct and after that, only this struct is used to access the options.
239     struct Options {
240     double scoreThreshold; ///< Threshold of the added scores of the tests, that decide, whether the mail is a tofu mail or not (default: 0.5).
241     string quoteChars; ///< Character(s) that are used to quote. The default value is ">|#".
242     bool debug; ///< If this option is set, the internal variables that show the mail statistics are print (default: false).
243     TextLines attributionLines; ///< List of Microsoft Attribution Lines (needed for the \ref doc_msTest).
244     // ratings/tests
245     SigRating ownSigTest; ///< Signature filename and rating options for the \ref doc_ownSigTest (default: ~/.signature).
246     IntegerRating msTest; ///< Rating options for the \ref doc_msTest.
247     RatioRating quotedToAllTest; ///< Rating options for the \ref doc_quotedToAllTest.
248     RatioRating bottomQuotedToAllTest; ///< Rating options for the \ref doc_bottomQuotedToAllTest.
249     RatioRating bottomQuotedToQuotedTest; ///< Rating options for the \ref doc_bottomQuotedToQuotedTest.
250     BoolRating singleBottomQuoteTest; ///< Rating options for the \ref doc_singleBottomQuoteTest.
251    
252     /// the constructor initializes the options with their default values.
253     Options() {
254     scoreThreshold = 1.5;
255     quoteChars = ">|#";
256     debug = false;
257     // should we keep the following?
258     attributionLines.push_back("-----Urspr√ľngliche Nachricht-----");
259     attributionLines.push_back("-----Original Message-----");
260     attributionLines.push_back("----- Urspr√ľngliche Nachricht -----");
261     attributionLines.push_back("----- Original Message -----");
262     // ratings
263     ownSigTest.file = "~/.signature";
264     msTest.threshold = 2;
265     }
266     };
267    
268    
269     /// searches for key within the multimap. If found, its value is assigned to option.
270     void setStringOptionFromMultiMap(const MultiMap& mm, const string& key, string& option) {
271     typedef MultiMap::const_iterator CI;
272     pair<CI,CI> entries = mm.equal_range(key);
273     if (entries.first != entries.second) option = entries.first->second;
274     }
275    
276    
277     void setDoubleOptionFromMultiMap(const MultiMap& mm, const string& key, double& option) {
278     string value;
279     setStringOptionFromMultiMap(mm, key, value);
280     if (value.empty()) return;
281     istringstream ist(value);
282     ist >> option;
283     }
284    
285    
286     void setIntOptionFromMultiMap(const MultiMap& mm, const string& key, int& option) {
287     string value;
288     setStringOptionFromMultiMap(mm, key, value);
289     if (value.empty()) return;
290     istringstream ist(value);
291     ist >> option;
292     }
293    
294    
295     void setBoolOptionFromMultiMap(const MultiMap& mm, const string& key, bool& option) {
296     string value;
297     setStringOptionFromMultiMap(mm, key, value);
298     if (value.empty()) return;
299     istringstream ist(value);
300     ist >> option;
301     }
302    
303    
304     /// incorporates the multimap to the options
305     void useConfig(Options& options, const MultiMap& mm) {
306     typedef MultiMap::const_iterator CI;
307    
308     setDoubleOptionFromMultiMap(mm, "scorethreshold", options.scoreThreshold);
309     setStringOptionFromMultiMap(mm, "quotechars", options.quoteChars);
310     setBoolOptionFromMultiMap(mm, "debug", options.debug);
311     setBoolOptionFromMultiMap(mm, "ownsigtest", options.ownSigTest.active);
312     setDoubleOptionFromMultiMap(mm, "ownsigtestscore", options.ownSigTest.score);
313     setStringOptionFromMultiMap(mm, "ownsigtestfile", options.ownSigTest.file);
314     setBoolOptionFromMultiMap(mm, "mstest", options.msTest.active);
315     setDoubleOptionFromMultiMap(mm, "mstestscore", options.msTest.score);
316     setIntOptionFromMultiMap(mm, "mstestthreshold", options.msTest.threshold);
317     setBoolOptionFromMultiMap(mm, "quotedtoalltest", options.quotedToAllTest.active);
318     setDoubleOptionFromMultiMap(mm, "quotedtoalltestscore", options.quotedToAllTest.score);
319     setDoubleOptionFromMultiMap(mm, "quotedtoalltestthreshold", options.quotedToAllTest.threshold);
320     setDoubleOptionFromMultiMap(mm, "quotedtoalltestfactor", options.quotedToAllTest.factor);
321     setBoolOptionFromMultiMap(mm, "bottomquotedtoalltest", options.bottomQuotedToAllTest.active);
322     setDoubleOptionFromMultiMap(mm, "bottomquotedtoalltestscore", options.bottomQuotedToAllTest.score);
323     setDoubleOptionFromMultiMap(mm, "bottomquotedtoalltestthreshold", options.bottomQuotedToAllTest.threshold);
324     setDoubleOptionFromMultiMap(mm, "bottomquotedtoalltestfactor", options.bottomQuotedToAllTest.factor);
325     setBoolOptionFromMultiMap(mm, "bottomquotedtoquotedtest", options.bottomQuotedToQuotedTest.active);
326     setDoubleOptionFromMultiMap(mm, "bottomquotedtoquotedtestscore", options.bottomQuotedToQuotedTest.score);
327     setDoubleOptionFromMultiMap(mm, "bottomquotedtoquotedtestthreshold", options.bottomQuotedToQuotedTest.threshold);
328     setDoubleOptionFromMultiMap(mm, "bottomquotedtoquotedtestfactor", options.bottomQuotedToQuotedTest.factor);
329     setBoolOptionFromMultiMap(mm, "singlebottomquotetest", options.singleBottomQuoteTest.active);
330     setDoubleOptionFromMultiMap(mm, "singlebottomquotetestscore", options.singleBottomQuoteTest.score);
331    
332     // add or replace attributionlines
333     bool addAttributionlines = true; // default value
334     setBoolOptionFromMultiMap(mm, "addattributionlines", addAttributionlines);
335     if (!addAttributionlines) options.attributionLines.clear();
336    
337     // Attribution lines
338     pair<CI,CI> entries = mm.equal_range("attributionline");
339     for (CI i = entries.first; i != entries.second; ++i) options.attributionLines.push_back(i->second);
340     }
341    
342    
343     /// parse function for the program options
344     static error_t parse_opt(int key, char *arg, struct argp_state *state) {
345     struct Options *options = (Options*) state->input;
346    
347     string s;
348     if (arg) s = arg;
349     istringstream ist(s);
350     switch (key) {
351     case 't': ist >> options->scoreThreshold; break;
352     case -1: options->quoteChars = arg; break;
353     case 'd': options->debug = true; break;
354     case -10: ist >> options->ownSigTest.active; break;
355     case -11: ist >> options->ownSigTest.score; break;
356     case 'f': options->ownSigTest.file = arg; break;
357     case -20: ist >> options->msTest.active; break;
358     case -21: ist >> options->msTest.score; break;
359     case -22: ist >> options->msTest.threshold; break;
360     case -30: ist >> options->quotedToAllTest.active; break;
361     case -31: ist >> options->quotedToAllTest.score; break;
362     case -32: ist >> options->quotedToAllTest.factor; break;
363     case -33: ist >> options->quotedToAllTest.threshold; break;
364     case -40: ist >> options->bottomQuotedToAllTest.active; break;
365     case -41: ist >> options->bottomQuotedToAllTest.score; break;
366     case -42: ist >> options->bottomQuotedToAllTest.factor; break;
367     case -43: ist >> options->bottomQuotedToAllTest.threshold; break;
368     case -50: ist >> options->bottomQuotedToQuotedTest.active; break;
369     case -51: ist >> options->bottomQuotedToQuotedTest.score; break;
370     case -52: ist >> options->bottomQuotedToQuotedTest.factor; break;
371     case -53: ist >> options->bottomQuotedToQuotedTest.threshold; break;
372     case -60: ist >> options->singleBottomQuoteTest.active; break;
373     case -61: ist >> options->singleBottomQuoteTest.score; break;
374    
375    
376     case ARGP_KEY_ARG:
377     if (state->arg_num > 0)
378     // we have no arguments
379     argp_usage (state);
380     break;
381    
382     default:
383     return ARGP_ERR_UNKNOWN;
384     }
385     return 0;
386     }
387    
388    
389     /// argp parser.
390     static struct argp argp = {argp_options, parse_opt, args_doc, doc};
391    
392    
393     // Helper functions
394     // ----------------
395    
396     /// returns true if substring is part of string
397     bool isSubString(const string &needle, const string &haystack) {
398     return haystack.find(needle) != string::npos;
399     }
400    
401    
402     /// cuts leading whitespace
403     string ltrim(const string& text) {
404     string::size_type i = 0;
405     while (i < text.size() && isspace(text[i])) ++i;
406     return text.substr(i);
407     }
408    
409    
410     /// cuts trailing whitespace
411     string rtrim(const string& text) {
412     string::size_type i = text.size();
413     while (i != 0 && isspace(text[i-1])) --i;
414     return text.substr(0, i);
415     }
416    
417    
418     /// cuts leading and trailing whitespace
419     string trim(const string& text) {
420     return ltrim(rtrim(text));
421     }
422    
423    
424     /// loads a file into a TextLines class.
425     TextLines loadTextLines(istream& istr) {
426     TextLines content;
427     while (istr) {
428     string line;
429     getline(istr, line);
430     if (istr) content.push_back(line + '\n');
431     }
432     return content;
433     }
434    
435    
436     /// Loads a key=value file (lines starting with # or empty lines are ignored)
437     /// Multiple values for one key are allowed
438     /// If an syntax error occurs, an exception is thrown.
439     MultiMap multiMapFromTextLines(const TextLines& lines) throw(runtime_error) {
440     MultiMap mm;
441     for (unsigned i = 0; i != lines.size(); ++i) {
442     string line = trim(lines[i]);
443     if (line.empty()) continue;
444     if (line[0] == '#') continue;
445     string::size_type pos = line.find("=");
446     if (pos == string::npos) throw std::runtime_error("hash parsing error"); // todo: better error message with line number
447     string key = rtrim(line.substr(0, pos));
448     string value = ltrim(line.substr(pos+1));
449     mm.insert(make_pair(key, value));
450     }
451     return mm;
452     }
453    
454    
455     /// expands leading ~ to $HOME
456     string expandTildeToHome(string file) {
457     if (file.size() > 0 && file[0] == '~') {
458     string homeDir;
459     char* home = getenv("HOME");
460     if (home) homeDir = home;
461     file.replace(0, 1, homeDir);
462     }
463     return file;
464     }
465    
466    
467     /// Testing purposes: Return TextLines on cout
468     void showTextLines(const TextLines& tl, bool addEndl = false) {
469     for (TextLines::size_type i = 0; i != tl.size(); ++i) {
470     cout << tl[i];
471     if (addEndl) cout << endl;
472     }
473     }
474    
475    
476     /// Testing purposes
477     void showMultiMap(const MultiMap& mm) {
478     for (MultiMap::const_iterator i = mm.begin(); i != mm.end(); ++i) {
479     cout << (*i).first << "==" << (*i).second << "|" << endl;
480     }
481     }
482    
483    
484     /// returns true, if the line begins with the specified Quotestrings
485     bool isQuotedLine(const string &line, const string &quoteChars) {
486     string text = ltrim(line);
487     if (!text.size()) return false;
488     for (string::size_type i = 0; i != quoteChars.size(); ++i) if (quoteChars[i] == text[0]) return true;
489     return false;
490     }
491    
492    
493     /// cuts the signature out of the body and returns true. If the signature is not found, it returns false and leaves the body untouched.
494     bool cutSignature(TextLines& body, const string &quoteChars) {
495     for (TextLines::size_type i = body.size(); i != 0; --i) {
496     if (isQuotedLine(body[i-1], quoteChars)) return false;
497     if (body[i-1].find("-- ") == 0) {
498     // found signature
499     body.erase(body.begin()+i-1, body.end());
500     return true;
501     }
502     }
503     return false;
504     }
505    
506    
507     /// counts quoted lines
508     TextLines::size_type quotedLines(const TextLines &body, const string &quoteChars) {
509     TextLines::size_type result = 0;
510     for (TextLines::size_type i = 0; i != body.size(); ++i) if (isQuotedLine(body[i], quoteChars)) ++result;
511     return result;
512     }
513    
514    
515    
516     // Score functions
517     // ---------------
518    
519     /// returns true if the signature is found within the body.
520     bool ownSig(const TextLines &body, const TextLines &signature) {
521     if (!signature.size()) return false;
522     TextLines::size_type bodyPos = 0;
523     while (bodyPos != body.size()) {
524     if (!isSubString(signature[0], body[bodyPos])) {++bodyPos; continue;}
525     TextLines::size_type p = bodyPos;
526     bool isSig = true;
527     for (TextLines::size_type sigPos = 1; sigPos < signature.size(); ++sigPos) {
528     ++p;
529     if (p == body.size()) return false;
530     if (!isSubString(signature[sigPos], body[p])) {isSig = false; break;}
531     }
532     if (isSig) return true;
533     ++bodyPos;
534     }
535     return false;
536     }
537    
538    
539    
540     /// returns the amount of quoted lines at the bottom of the message
541     unsigned quotedLinesBottom(const TextLines& bodyNoSig, const string& quoteChars) {
542     unsigned i;
543     unsigned e = 0; // counts the empty lines at the end of the mail
544     for (i = bodyNoSig.size(); i != 0; --i) {
545     if (trim(bodyNoSig[i-1]).size() == 0) {++e; continue;} // ignore empty lines at bottom
546     if (!isQuotedLine(bodyNoSig[i-1], quoteChars)) break;
547     }
548     return bodyNoSig.size() - i - e;
549     }
550    
551    
552     /// Returns the number of the quote blocks
553     unsigned quoteBlockCount(const TextLines& bodyNoSig, const string& quoteChars) {
554     unsigned result = 0;
555     bool wasQuoted = false;
556     for (unsigned i = 0; i != bodyNoSig.size(); ++i) {
557     if (isQuotedLine(bodyNoSig[i], quoteChars)) {
558     if (!wasQuoted) {
559     ++result;
560     wasQuoted = true;
561     }
562     } else wasQuoted = false;
563     }
564     return result;
565     }
566    
567    
568     /// Searches for a line from Outlook-like programs that shows the beginning of the reply
569     /// (like -----Original Message-----) and returns the line number if it is found, 0 otherwise.
570     unsigned microsoftAttributionLineNumber(const TextLines& bodyNoSig, const TextLines& attributionLines) {
571     for (unsigned i = 0; i != bodyNoSig.size(); ++i) {
572     for (unsigned j = 0; j != attributionLines.size(); ++j) if (bodyNoSig[i].find(attributionLines[j]) != string::npos) return i+1;
573     }
574     return 0;
575     }
576    
577    
578     // Main function
579     // -------------
580    
581     int main(int argc, char *argv[]) {
582     // Settings
583     Options options;
584    
585     // system config file /etc/fullquottelrc
586     ifstream sysconf_stream("/etc/fullquottelrc");
587     if (sysconf_stream) {
588     TextLines tlSysconf = loadTextLines(sysconf_stream);
589     MultiMap mmSysconf = multiMapFromTextLines(tlSysconf);
590     useConfig(options, mmSysconf);
591     }
592    
593     // system config file ~/.fullquottelrc
594     ifstream userconf_stream(expandTildeToHome("~/.fullquottelrc").c_str());
595     if (userconf_stream) {
596     TextLines tlUserconf = loadTextLines(userconf_stream);
597     MultiMap mmUserconf = multiMapFromTextLines(tlUserconf);
598     useConfig(options, mmUserconf);
599     }
600    
601     // parse command line options
602     argp_parse (&argp, argc, argv, 0, 0, &options);
603    
604     // Get message body from stdin and store it in body.
605     TextLines body;
606     if (!cin.good()) {
607     cerr << "No input on stdin." << endl;
608     return err_noinput;
609     }
610     body = loadTextLines(cin);
611    
612     // load signature file
613     string sigFile = expandTildeToHome(options.ownSigTest.file);
614     ifstream sig_stream(sigFile.c_str()); // maybe use with option ios_base::binary
615     if (!sig_stream) {
616     cerr << "Sigfile " << options.ownSigTest.file << " missing or unreadable." << endl;
617     return err_nosigfile;
618     }
619     TextLines signature = loadTextLines(sig_stream);
620    
621     // create a non-signature version of the mail and store it in bodyNoSig
622     TextLines bodyNoSig = body;
623     cutSignature(bodyNoSig, options.quoteChars);
624    
625    
626     // debug
627     // showTextLines(options.attributionLines, true);
628     // showTextLines(bodyNoSig);
629    
630     // Analyze mail
631     // basic values and "simple" tests
632     bool mailOwnSig = ownSig(bodyNoSig, signature);
633     unsigned mailLines = bodyNoSig.size();
634     unsigned mailQuotedLines = quotedLines(bodyNoSig, options.quoteChars);
635     unsigned mailQuotedLinesBottom = quotedLinesBottom(bodyNoSig, options.quoteChars);
636     unsigned mailQuoteBlockCount = quoteBlockCount(bodyNoSig, options.quoteChars);
637     unsigned mailMicrosoftAttributionLineNumber = microsoftAttributionLineNumber(bodyNoSig, options.attributionLines);
638     // "combined" test
639     double mailQuotedToAll = mailLines ? (double) mailQuotedLines / (double) mailLines : 0;
640     double mailBottomQuotedToQuoted = mailQuotedLines ? (double) mailQuotedLinesBottom / (double) mailQuotedLines : 0;
641     double mailBottomQuotedToAll = mailLines ? (double) mailQuotedLinesBottom / (double) mailLines : 0;
642     bool mailSingleBottomQuote = mailQuotedLinesBottom > 0 && mailQuoteBlockCount == 1;
643    
644     // Debug output
645     if (options.debug) {
646     cout << "mailOwnSig: " << mailOwnSig << endl;
647     cout << "mailLines: " << mailLines << endl;
648     cout << "mailQuotedLines: " << mailQuotedLines << endl;
649     cout << "mailQuotedLinesBottom: " << mailQuotedLinesBottom << endl;
650     cout << "mailQuoteBlockCount: " << mailQuoteBlockCount << endl;
651     cout << "mailMicrosoftAttributionLineNumber: " << mailMicrosoftAttributionLineNumber << endl;
652     cout << "mailQuotedToAll: " << mailQuotedToAll << endl;
653     cout << "mailBottomQuotedToQuoted: " << mailBottomQuotedToQuoted << endl;
654     cout << "mailBottomQuotedToAll: " << mailBottomQuotedToAll << endl;
655     cout << "mailSingleBottomQuote: " << mailSingleBottomQuote << endl;
656     }
657    
658     // Scoring
659     double score = 0;
660     if (bodyNoSig.size() == 0) {
661     } else {
662     score += options.ownSigTest.rate(mailOwnSig);
663     score += options.msTest.rate(mailMicrosoftAttributionLineNumber);
664     score += options.quotedToAllTest.rate(mailQuotedToAll);
665     score += options.bottomQuotedToAllTest.rate(mailBottomQuotedToAll);
666     score += options.bottomQuotedToQuotedTest.rate(mailBottomQuotedToQuoted);
667     score += options.singleBottomQuoteTest.rate(mailSingleBottomQuote);
668     }
669     if (options.debug) {
670     cout << "Score: " << score << endl;
671     }
672     bool fullQuottel = score > options.scoreThreshold;
673     string scoreText;
674     for (int s = 1; s <= score+0.5; ++s) scoreText += '*';
675    
676     // Show result
677     cout << (fullQuottel ? "Fullquottel" : "Goodquottel") << " [" << fixed << setprecision(2) << score << "] (" << scoreText << ")" << endl;
678     return 0;
679     }
680    
681     /// \endcond

  ViewVC Help
Powered by ViewVC 1.1.26