bibtex2bibtex: bibtex2bibtex.cc Source File

00001 /* bibtex2bibtex.cc */
00002 /* program to sort the entries in a Bibtex file into separate files according
00003    to specific criteria */
00004 
00005 /* Copyright (C) 2002 - 2009, Bernd Speiser */
00006 /* This file is part of bibtex2bibtex.
00007 
00008 bibtex2bibtex is free software; you can redistribute it and/or
00009 modify it under the terms of the GNU General Public License
00010 as published by the Free Software Foundation; either version 2
00011 of the License, or (at your option) any later version.
00012 
00013 bibtex2bibtex is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 GNU General Public License for more details.
00017   
00018 You should have received a copy of the GNU General Public License
00019 along with this program; if not, write to the Free Software
00020 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
00021 02111-1307, USA.
00022 */
00023 
00024 #include <algorithm>
00025 #include <iostream>
00026 #include <sstream>
00027 #include <fstream>
00028 #include <vector>
00029 #include <map>
00030 #include <cstring>
00031 
00032 #include "BibtexEntry.h"
00033 
00034 using namespace bibtex2bibtex;
00035 
00036 std::vector<BibtexEntry *> read_bibtexfile (std::ifstream &);
00037 
00038 std::vector<std::string> tokenize_key (std::string key)
00039 {
00040   std::vector<std::string> tokens;
00041 
00042   if (key.find_first_of ("/") == std::string::npos)
00043   {
00044     if (key.find_first_of ("_") == std::string::npos)
00045       std::cout << "illegal key structure: `" << key << "'" << std::endl;
00046 
00047     else
00048     {
00049       std::string token;
00050 
00051       token = key.substr (0, key.find_first_of ("_"));
00052 
00053       tokens.push_back (token);
00054 
00055       token =  key.substr (key.find_first_of ("_") + 1);
00056 
00057       tokens.push_back (token);
00058     }
00059   }
00060 
00061   else
00062   {
00063     std::string token;
00064 
00065     token = key.substr (0, key.find_first_of ("/"));
00066 
00067     tokens.push_back (token);
00068 
00069     std::string rest (key.substr ( key.find_first_of ("/") + 1));
00070 
00071     if (rest.find_first_of ("/") == std::string::npos)
00072     {
00073       if (rest.find_first_of ("_") == std::string::npos)
00074         std::cout << "illegal key structure: `" << key << "'" << std::endl;
00075 
00076       else
00077       {
00078         std::string token;
00079 
00080         token = rest.substr (0, rest.find_first_of ("_"));
00081 
00082         tokens.push_back (token);
00083 
00084         token =  rest.substr (rest.find_first_of ("_") + 1);
00085 
00086         tokens.push_back (token);
00087       }
00088     }
00089 
00090     else
00091     {
00092       std::string token;
00093 
00094       token = rest.substr (0, rest.find_first_of ("/"));
00095 
00096       tokens.push_back (token);
00097 
00098       std::string rest1 (rest.substr ( rest.find_first_of ("/") + 1));
00099 
00100       token = rest1.substr (0, rest1.find_first_of ("_") + 1);
00101 
00102       if (token != "etal_")
00103         std::cout << "illegal key structure: `" << key << "'" << std::endl;
00104 
00105       else
00106       {
00107         tokens.push_back (token);
00108 
00109         token =  rest1.substr (rest1.find_first_of ("_") + 1);
00110 
00111         tokens.push_back (token);
00112       }
00113     }
00114   }
00115 
00116   return tokens;
00117 }
00118 
00119 bool equal_keys (BibtexEntry *entry1, BibtexEntry *entry2)
00120 {
00121   std::vector<std::string> tokens1;
00122   std::vector<std::string> tokens2;
00123 
00124   tokens1 = tokenize_key (entry1->key ());
00125   tokens2 = tokenize_key (entry2->key ());
00126 
00127   std::vector<std::string>::iterator stringtoken;
00128 
00129   for (stringtoken = tokens1.begin(); stringtoken < tokens1.end (); 
00130                                                           stringtoken++)
00131     transform ((*stringtoken).begin (), (*stringtoken).end (), 
00132                                       (*stringtoken).begin (), tolower);
00133 
00134   for (stringtoken = tokens2.begin(); stringtoken < tokens2.end (); 
00135                                                           stringtoken++)
00136     transform ((*stringtoken).begin (), (*stringtoken).end (), 
00137                                       (*stringtoken).begin (), tolower);
00138 
00139   if (tokens1.size () != tokens2.size ())
00140     return false;
00141 
00142   else
00143   {
00144     int i = 0;
00145 
00146     for (stringtoken = tokens1.begin(); stringtoken < tokens1.end (); 
00147                                                           stringtoken++)
00148     {
00149       if ((*stringtoken) != tokens2[i])
00150         return false;
00151 
00152       i++;
00153     }
00154 
00155     return true;
00156   }
00157 }
00158 
00159 class lessCopy {
00160 public:
00161   bool operator () (const std::string &s1, const std::string &s2)
00162   {
00163 
00164     std::string t11 (s1.substr(0, s1.find_first_of ("0123456789")));
00165     std::string t21 (s2.substr(0, s2.find_first_of ("0123456789")));
00166   
00167     std::string rest1 ("");
00168     if (s1.find_first_of ("0123456789") != std::string::npos)
00169       rest1 = s1.substr(s1.find_first_of ("0123456789"));
00170   
00171     std::string rest2 ("");
00172     if (s2.find_first_of ("0123456789") != std::string::npos)
00173       rest2 = s2.substr(s2.find_first_of ("0123456789"));
00174   
00175   
00176     int i12;
00177     std::istringstream is1
00178           (rest1.substr (0, rest1.find_first_of (" ")));
00179     is1 >> i12;
00180     int i22;
00181     std::istringstream is2 
00182            (rest2.substr (0, rest2.find_first_of (" ")));
00183     is2 >> i22;
00184   
00185     std::string rest11 ("");
00186     if ((rest1.find_first_of (" ") != std::string::npos))
00187       rest11 = rest1.substr(rest1.find_first_of (" "));
00188   
00189     std::string rest21 ("");
00190     if ((rest2.find_first_of (" ") != std::string::npos))
00191       rest21 = rest2.substr(rest2.find_first_of (" "));
00192   
00193     std::string t13 (rest11);
00194     std::string t23 (rest21);
00195   
00196     return (t11 < t21) || (!(t21 < t11) && (i12 < i22))
00197               || (!(t21 < t11) && !(i22 < i12) && (t13 < t23));
00198   }
00199 };
00200 
00201 bool less_keys (BibtexEntry *entry1, BibtexEntry *entry2)
00202 {
00203   std::vector<std::string> tokens1;
00204   std::vector<std::string> tokens2;
00205 
00206   tokens1 = tokenize_key (entry1->key ());
00207   tokens2 = tokenize_key (entry2->key ());
00208 
00209   std::vector<std::string>::iterator stringtoken;
00210 
00211   for (stringtoken = tokens1.begin(); stringtoken < tokens1.end (); 
00212                                                           stringtoken++)
00213     transform ((*stringtoken).begin (), (*stringtoken).end (), 
00214                                       (*stringtoken).begin (), tolower);
00215 
00216   for (stringtoken = tokens2.begin(); stringtoken < tokens2.end (); 
00217                                                           stringtoken++)
00218     transform ((*stringtoken).begin (), (*stringtoken).end (), 
00219                                       (*stringtoken).begin (), tolower);
00220 
00221   if (tokens1[0] != tokens2[0])
00222     return tokens1[0] < tokens2[0];
00223 
00224   else
00225   {
00226     if (tokens1.size () == tokens2.size ())
00227     {
00228       if (tokens1[1] != tokens2[1])
00229         return tokens1[1] < tokens2[1];
00230 
00231       else if ((tokens1.size () == 3) && tokens1[2] != tokens2[2])
00232          return tokens1[2] < tokens2[2];
00233 
00234       else if ((tokens1.size () == 4) && (tokens1[3] != tokens2[3]))
00235          return tokens1[3] < tokens2[3];
00236 
00237       else
00238         return false;
00239     }
00240 
00241     else if (tokens1.size () > tokens2.size ())
00242     {
00243       if (tokens2.size () == 2)
00244         return false;
00245 
00246       if (tokens2.size () == 3)
00247       {
00248         if (tokens1[1] == tokens2[1])
00249           return false;
00250 
00251         else
00252           return tokens1[1] < tokens2[1];
00253       }
00254     }
00255 
00256     else if (tokens1.size () < tokens2.size ())
00257     {
00258       if (tokens1.size () == 2)
00259         return true;
00260 
00261       if (tokens1.size () == 3)
00262       {
00263         if (tokens1[1] == tokens2[1])
00264           return true;
00265 
00266         else
00267           return tokens1[1] < tokens2[1];
00268       }
00269     }
00270   }
00271 }
00272 
00273 int main (int argc, char *argv[])
00274 {
00275   if (argc == 1)
00276   {
00277     std::cout << "usage: bibtex2bibtex --version|--help         or"
00278          << std::endl
00279          << "       bibtex2bibtex [options] [-c criterion] -f inputfile"
00280          << " [-[p]m mergefile]" << std::endl;
00281 
00282     exit (1);
00283   }
00284 
00285   if (!strcmp (argv[1], "--version"))
00286   {
00287       std::cout << "bibtex2bibtex, version 1.3" << std::endl;
00288 
00289       exit (0);
00290   }
00291 
00292   if (!strcmp (argv[1], "--help"))
00293   {
00294     std::cout << "usage: bibtex2bibtex --version|--help         or"
00295          << std::endl
00296          << "       bibtex2bibtex [options] [-c criterion] -f inputfile"
00297          << " [-[p]m mergefile]" << std::endl;
00298     exit (0);
00299   }
00300 
00301   std::cout << "starting bibtex2bibtex - sorting and merging of "
00302        "BibTeX databases" << std::endl
00303        << "version 1.3, Copyright (C) 2002 - 2009, Bernd Speiser" << std::endl
00304        << "bibtex2bibtex comes with ABSOLUTELY NO WARRANTY; for details see"
00305        << std::endl
00306        << "the General Public License." << std::endl
00307        << "This is free software, and you are welcome to redistribute it"
00308        << std::endl
00309        << "under certain conditions; see the General Public License for "
00310           "details."
00311        << std::endl;
00312 
00313   bool verbose = false;
00314   bool newfile = false;
00315   bool count = false;
00316   bool duplicate_detect = false;
00317   bool generate_keys = false;
00318   bool pretend_merge = false;
00319 
00320   std::string criterion ("");
00321   std::string inputfile_name ("");
00322   std::string mergefile_name ("");
00323 
00324   std::ifstream input_file;
00325   std::ifstream merge_file;
00326 
00327   for (int i = 1; i < argc; i++)
00328   {
00329     if (!strcmp (argv[i], "--verbose") || !strcmp (argv[i], "-v"))
00330       verbose = true;
00331 
00332     else if (!strcmp (argv[i], "--genkey"))
00333       generate_keys = true;
00334 
00335     else if (!strcmp (argv[i], "--count"))
00336       count = true;
00337 
00338     else if (!strcmp (argv[i], "-n"))
00339       newfile = true;
00340 
00341     else if (!strcmp (argv[i], "-d"))
00342       duplicate_detect = true;
00343 
00344     else if (!strcmp (argv[i], "-f"))
00345     {
00346       i++;
00347 
00348       input_file.open (argv[i]);
00349 
00350       if (!input_file)
00351       {
00352         std::cout << "can't open file: " << argv[i] << std::endl;
00353 
00354         exit (1);
00355       }
00356 
00357       inputfile_name = argv[i];
00358 
00359       if (verbose)
00360         std::cout << "input file `" << inputfile_name 
00361                                              << "' opened" << std::endl;
00362     }
00363 
00364     else if ((!strcmp (argv[i], "-m")) || (!strcmp (argv[i], "-pm")))
00365     {
00366      if (!strcmp (argv[i], "-pm"))
00367        pretend_merge = true;
00368 
00369       i++;
00370 
00371       merge_file.open (argv[i]);
00372 
00373       if (!merge_file)
00374       {
00375         std::cout << "can't open file to merge: " << argv[i] << std::endl;
00376 
00377         exit (1);
00378       }
00379 
00380       mergefile_name = argv[i];
00381 
00382       if (verbose)
00383         std::cout << "merge file `" << mergefile_name 
00384                                              << "' opened" << std::endl;
00385     }
00386 
00387     else if (!strcmp (argv[i], "-c"))
00388     {
00389       i++;
00390 
00391       criterion = argv[i];
00392 
00393       if (criterion != "coworkers" && criterion != "key" 
00394                            && criterion != "source" && criterion != "copy")
00395       {
00396         std::cout << "no or illegal criterion given" << std::endl; 
00397 
00398         exit (1);
00399       }
00400     }
00401   }
00402 
00403   if (inputfile_name == "")
00404   {
00405     std::cout << "no input file given" << std::endl;
00406 
00407     exit (1);
00408   }
00409 
00410   std::vector <BibtexEntry *> entries;
00411 
00412   entries = read_bibtexfile (input_file);
00413 
00414   if (count || verbose)
00415     std::cout << entries.size () << " entries found in file " 
00416                                          << inputfile_name << std::endl;
00417 
00418   std::vector <BibtexEntry *>::const_iterator entry;
00419 
00420   if (generate_keys)
00421   {
00422     std::string newkey;
00423     std::string oldkey;
00424     std::string authors;
00425     std::string year;
00426  
00427     for (entry = entries.begin (); entry < entries.end (); entry++)
00428     {
00429       oldkey = (*entry)->key ();
00430 
00431       if (oldkey.empty ())
00432       {
00433         newkey = "";
00434 
00435         authors = (*entry)->authors ();
00436 
00437         std::string::size_type and_token;
00438         std::string::size_type komma;
00439         std::string::size_type blank;
00440         std::string::size_type begin;
00441 
00442         std::string rest = authors;
00443 
00444         if (!authors.empty ())
00445         {
00446           std::vector<std::string> names;
00447 
00448           for (;;)
00449           {
00450             and_token = rest.find (" and ");
00451             begin = rest.find_first_not_of (" ");
00452 
00453             if (and_token == std::string::npos)
00454             {
00455               names.push_back (rest);
00456     
00457               break;
00458             }
00459 
00460             else
00461             {
00462               names.push_back (rest.substr (0, and_token - begin));
00463 
00464               rest = rest.substr (and_token + 5);
00465             }
00466           }
00467 
00468           std::vector<std::string>::const_iterator name;
00469 
00470           bool first = true;
00471           int number = 0;
00472 
00473           for (name = names.begin (); name < names.end (); name++)
00474           {
00475             number++;
00476 
00477             if (number <= 2)
00478             {
00479               komma = (*name).find_first_of (",");
00480 
00481               if (komma == std::string::npos)
00482               {
00483                 blank = (*name).find_first_of (" ");
00484 
00485                 if (first)
00486                   first = false;
00487 
00488                 else
00489                   newkey += "/";
00490 
00491                 newkey += (*name).substr (blank + 1);
00492               }
00493 
00494               else
00495               {
00496                 if (first)
00497                   first = false;
00498 
00499                 else
00500                   newkey += "/";
00501 
00502                 newkey += (*name).substr (0, blank);
00503               }
00504             }
00505 
00506             else
00507             {
00508               newkey += "/etal";
00509 
00510               break;
00511             }
00512           }
00513         }
00514 
00515         year = (*entry)->year ();
00516 
00517         if (!year.empty ())
00518           newkey = newkey + "_" + year;
00519 
00520         (*entry)->key (newkey);
00521       }
00522     }
00523 
00524     std::ofstream outputfile;
00525 
00526     std::string outputfile_name;
00527 
00528     if (newfile)
00529       outputfile_name = inputfile_name + ".key";
00530 
00531     else
00532       outputfile_name = inputfile_name;
00533 
00534     outputfile.open (outputfile_name.c_str ());
00535 
00536     int entrynumber = 0;
00537 
00538     for (entry = entries.begin (); entry < entries.end (); entry++)
00539     { 
00540       if (verbose)
00541         std::cout << "writing entry " << ++entrynumber << " to file " << 
00542                                   outputfile_name.c_str () << std::endl;
00543 
00544       (*entry)->output (outputfile);
00545     }
00546   }
00547 
00548   std::string coworkers;
00549 
00550   if (criterion == "coworkers")
00551   {
00552     for (entry = entries.begin (); entry < entries.end (); entry++)
00553     {
00554       coworkers = (*entry)->coworkers ();  
00555 
00556       std::string::size_type coworker_begin;
00557       std::string::size_type coworker_end;
00558 
00559       std::string coworker;
00560 
00561       bool last = false;
00562 
00563       for (;;)
00564       {
00565         coworker_begin = coworkers.find_first_not_of (" ,");
00566 
00567         if (coworker_begin == std::string::npos)
00568           break;
00569 
00570         coworker_end = coworkers.find_first_of (",");
00571 
00572         if (coworker_end == std::string::npos)
00573         {
00574           coworker = coworkers.substr (coworker_begin);
00575 
00576           coworkers = "";
00577 
00578           last = true;;
00579         }
00580 
00581         else
00582         {
00583           coworker 
00584              = coworkers.substr 
00585                         (coworker_begin, coworker_end - coworker_begin);
00586 
00587           coworkers = coworkers.substr (++coworker_end);
00588         }
00589 
00590         std::ofstream outputfile;
00591 
00592         std::string outputfile_name = inputfile_name + "." + coworker;
00593 
00594         outputfile.open 
00595               (outputfile_name.c_str (), std::ios::out | std::ios::app);
00596 
00597         (*entry)->output (outputfile);
00598 
00599         outputfile.close ();
00600 
00601         if (last)
00602           break;
00603       }
00604     }
00605   }
00606 
00607   else if (criterion == "key")
00608   {
00609     if (merge_file)
00610     {
00611       std::vector<BibtexEntry *> merge_entries;
00612 
00613       merge_entries = read_bibtexfile (merge_file);
00614 
00615       if (count || verbose)
00616         std::cout << merge_entries.size () << " entries found in merge file "
00617                                          << mergefile_name << std::endl;
00618 
00619       std::vector<BibtexEntry *>::const_iterator merge_entry;
00620       std::vector<BibtexEntry *>::const_iterator entry;
00621 
00622       if (verbose)
00623         std::cout << "starting duplicate detection comparing input and "
00624                                             "merge files " << std::endl;
00625 
00626       std::ofstream merge_duplicatefile;
00627       int duplicate_number = 0;
00628       bool duplicatesfile_open = false;
00629 
00630       std::ofstream merge_includefile;
00631       int include_number = 0;
00632       bool includesfile_open = false;
00633 
00634       for (merge_entry = merge_entries.begin (); 
00635                       merge_entry < merge_entries.end (); merge_entry++)
00636       {
00637         bool duplicates_found;
00638 
00639         for (entry = entries.begin (); entry < entries.end (); entry++)
00640         {
00641           duplicates_found = false;
00642 
00643           if (equal_keys (*entry, *merge_entry))
00644           {
00645             duplicates_found = true;
00646 
00647             duplicate_number++;
00648 
00649             if (!duplicatesfile_open)
00650             {
00651               std::string merge_duplicatefile_name (mergefile_name);
00652 
00653               merge_duplicatefile_name += ".merge_duplicates";
00654 
00655               merge_duplicatefile.open 
00656                                     (merge_duplicatefile_name.c_str ());
00657 
00658               duplicatesfile_open = true;
00659             }
00660 
00661             std::cout << "duplicate found comparing input and merge "
00662                     " files for key " << (*merge_entry)->key () 
00663                                                             << std::endl
00664                    << "entry not merged, writing duplicate to file " 
00665                                                            << std::endl;
00666 
00667             (*merge_entry)->output (merge_duplicatefile);
00668 
00669             break;
00670           }
00671         }
00672 
00673         if (pretend_merge && !duplicates_found)
00674         {
00675           ++include_number;
00676 
00677           if (!includesfile_open)
00678           {
00679             std::string merge_includefile_name (mergefile_name);
00680 
00681             merge_includefile_name += ".merge_includes";
00682 
00683             merge_includefile.open 
00684                                   (merge_includefile_name.c_str ());
00685 
00686             includesfile_open = true;
00687           }
00688 
00689           if (verbose)
00690             std::cout << "entry with key " << (*merge_entry)->key () << 
00691                 " not found, writing to include file" << std::endl;
00692 
00693           (*merge_entry)->output (merge_includefile);
00694         }
00695    
00696         else if (!pretend_merge)
00697         {
00698           if (verbose)
00699             std::cout << "entry with key " << (*merge_entry)->key () << 
00700                          " not found, storing for output" << std::endl;
00701 
00702           if(!duplicates_found)
00703             entries.push_back (*merge_entry);
00704         }
00705       }
00706 
00707       if (verbose)
00708       {
00709         if (duplicate_number > 0)
00710           std::cout << duplicate_number 
00711             << " duplicate entries from file " 
00712                       << mergefile_name << " not merged  " << std::endl;
00713 
00714         if (include_number > 0)
00715           std::cout << include_number << " entries from file " 
00716              << mergefile_name << " found for including  " << std::endl;
00717       }
00718     }
00719 
00720     sort (entries.begin (), entries.end (), less_keys);
00721 
00722     std::vector<BibtexEntry *>::const_iterator firstentry;
00723     std::vector<BibtexEntry *>::const_iterator secondentry;
00724 
00725     if (duplicate_detect)
00726     {
00727       if (verbose)
00728         std::cout << "starting duplicate detection in file `" 
00729                                   << inputfile_name << "'" << std::endl;
00730 
00731       std::ofstream duplicatefile;
00732 
00733       bool duplicates_found;
00734 
00735       for (firstentry = entries.begin (); 
00736                           firstentry < entries.end () - 1; firstentry++)
00737       {
00738         if (verbose)
00739           std::cout << "scanning for key `" << (*firstentry)->key () 
00740                                                     << "'" << std::endl;
00741 
00742         duplicates_found = false;
00743 
00744         for (secondentry = firstentry + 1; secondentry < entries.end ();
00745                                                           secondentry++)
00746         {
00747           if (equal_keys (*firstentry, *secondentry))
00748           {
00749             if (!duplicates_found)
00750             {
00751               std::string duplicatefile_name (inputfile_name);
00752 
00753               duplicatefile_name += ".duplicates";
00754 
00755               duplicatefile.open (duplicatefile_name.c_str ());
00756 
00757               duplicates_found = true;
00758             }
00759 
00760             if (verbose)
00761               std::cout << "duplicates detected, keys " 
00762                                    << (*firstentry)->key ()<< std::endl;
00763 
00764             (*firstentry)->output (duplicatefile);
00765             (*secondentry)->output (duplicatefile);
00766 
00767           }       
00768         }
00769       }
00770     }
00771 
00772 
00773     std::ofstream outputfile;
00774 
00775     std::string outputfile_name;
00776 
00777     if (!pretend_merge)
00778     {
00779       if (newfile)
00780         outputfile_name = inputfile_name + ".sorted";
00781 
00782       else
00783         outputfile_name = inputfile_name;
00784 
00785       outputfile.open (outputfile_name.c_str ());
00786 
00787       int entrynumber = 0;
00788 
00789       for (entry = entries.begin (); entry < entries.end (); entry++)
00790       { 
00791         if (verbose)
00792           std::cout << "writing entry " << ++entrynumber << 
00793                    " to file " << outputfile_name.c_str () << std::endl;
00794 
00795         (*entry)->output (outputfile);
00796       }
00797     }
00798   }
00799 
00800   else if (criterion == "copy")
00801   {
00802     std::string copies;
00803 
00804     std::multimap<std::string, std::string, lessCopy> copymap;
00805 
00806     for (entry = entries.begin (); entry < entries.end (); entry++)
00807     {
00808       copies = (*entry)->copy ();
00809       if (verbose)
00810         std::cout << "read copy field: `" << copies << "'" << std::endl;
00811 
00812       if (copies != "")
00813       {
00814         std::string::size_type copy_begin;
00815         std::string::size_type copy_end;
00816 
00817         std::string copy;
00818 
00819         bool last = false;
00820 
00821         for (;;)
00822         {
00823           copy_begin = copies.find_first_not_of (" ;");
00824 
00825           if (copy_begin == std::string::npos)
00826             break;
00827 
00828           copy_end = copies.find_first_of (";");
00829 
00830           if (copy_end == std::string::npos)
00831           {
00832             copy = copies.substr (copy_begin);
00833 
00834             copies = "";
00835 
00836             last = true;;
00837           }
00838 
00839           else
00840           {
00841             copy 
00842                = copies.substr 
00843                           (copy_begin, copy_end - copy_begin);
00844 
00845             copies = copies.substr (++copy_end);
00846           }
00847 
00848           if (verbose)
00849             std::cout << "`" << copy << "'" << std::endl;
00850 
00851           copymap.insert (std::make_pair(copy, (*entry)->key()));
00852 
00853           if (last)
00854             break;
00855         }
00856       }
00857 
00858       else
00859         if (verbose)
00860           std::cout << " empty or missing copy field; entry ignored" 
00861                                                          << std::endl;
00862     }
00863 
00864     if (verbose)
00865       std::cout << " number of individual copy entries: " 
00866                                    << copymap.size () << std::endl;
00867 
00868     std::ofstream outputfile;
00869 
00870     std::string outputfile_name = inputfile_name + ".copies";
00871 
00872     outputfile.open 
00873                 (outputfile_name.c_str (), std::ios::out | std::ios::app);
00874 
00875     std::multimap<std::string, std::string, lessCopy>::iterator copymap_output;
00876     for (copymap_output = copymap.begin (); 
00877              copymap_output != copymap.end (); copymap_output++)
00878     {
00879       outputfile << copymap_output->first << "      " 
00880                             << copymap_output->second << std::endl;
00881     }
00882 
00883     outputfile.close ();
00884 
00885     if (duplicate_detect)
00886     {
00887       if (verbose)
00888         std::cout << "starting duplicate detection within copies"
00889                                                          << std::endl;
00890   
00891       std::ofstream duplicatefile;
00892   
00893       std::string duplicatefile_name = inputfile_name + ".duplicates";
00894   
00895       duplicatefile.open 
00896             (duplicatefile_name.c_str (), std::ios::out | std::ios::app);
00897   
00898       std::map<std::string, std::string, lessCopy>::iterator 
00899                                                             copymap_entry1;
00900       std::map<std::string, std::string, lessCopy>::iterator 
00901                                                             copymap_entry2;
00902       std::map<std::string, std::string, lessCopy>::iterator copymap_store;
00903   
00904       for (copymap_entry1 = copymap.begin (); 
00905                copymap_entry1 != copymap.end (); copymap_entry1++)
00906       {
00907         std::string copy1;
00908         if (copymap_entry1->first.find_first_of ("0123456789") 
00909                                                         != std::string::npos)
00910        {
00911          std::string part1 (copymap_entry1->first.substr(0, 
00912                           copymap_entry1->first.find_first_of ("0123456789")));
00913          std::string rest (copymap_entry1->first.substr(
00914                           copymap_entry1->first.find_first_of ("0123456789")));
00915           if((rest.find_first_of (" ")) != std::string::npos)
00916           {
00917             std::string part2 (rest.substr(0, rest.find_first_of (" ")));
00918             copy1 = part1 + part2;
00919           }
00920           else
00921            copy1 = copymap_entry1->first; 
00922         }
00923         else
00924         {
00925           copy1 = copymap_entry1->first;
00926         }
00927 
00928         copymap_store = copymap_entry1;
00929         copymap_store++;
00930         for (copymap_entry2 = copymap_store; 
00931                copymap_entry2 != copymap.end (); copymap_entry2++)
00932         {
00933           std::string copy2;
00934           if (copymap_entry2->first.find_first_of ("0123456789") 
00935                                                      != std::string::npos)
00936           {
00937             std::string part1 (copymap_entry2->first.substr(0, 
00938                           copymap_entry2->first.find_first_of ("0123456789")));
00939             std::string rest (copymap_entry2->first.substr(
00940                           copymap_entry2->first.find_first_of ("0123456789")));
00941             if((rest.find_first_of(" ")) != std::string::npos)
00942             {
00943               std::string part2 (rest.substr(0, rest.find_first_of (" ")));
00944               copy2 = part1 + part2;
00945             }
00946             else
00947              copy2 = copymap_entry2->first; 
00948           }
00949           else
00950           {
00951             copy2 = copymap_entry2->first;
00952           }
00953 
00954           if (copy1 == copy2)
00955           {
00956             duplicatefile << "duplicated copy label: " << 
00957               copy1 << " for entries " << 
00958                 copymap_entry1->second << " and " << 
00959                                    copymap_entry2->second << std::endl;
00960             break;
00961           }
00962         }
00963       }
00964     }
00965   }
00966 
00967   else if (criterion == "source")
00968   {
00969     std::string source;
00970     std::string clean_source;
00971 
00972     for (entry = entries.begin (); entry < entries.end (); entry++)
00973     {
00974       clean_source = "";
00975 
00976       source = (*entry)->source ();  
00977 
00978       std::string::size_type source_token_begin;
00979       std::string::size_type source_token_end;
00980 
00981       source_token_begin = source.find_first_not_of (" ");
00982 
00983       if (source_token_begin != std::string::npos)
00984       {
00985         for (;;)
00986         {
00987           source_token_end 
00988                        = source.find_first_of (" ", source_token_begin);
00989 
00990           if (source_token_end == std::string::npos)
00991           {
00992             clean_source += source.substr (source_token_begin);
00993 
00994             break;
00995           }
00996 
00997           else
00998           {
00999             clean_source 
01000               += source.substr (source_token_begin, 
01001                                  source_token_end - source_token_begin);
01002 
01003             source_token_begin = source_token_end + 1;
01004           }
01005         }
01006       }
01007 
01008       std::ofstream outputfile;
01009 
01010       std::string outputfile_name = inputfile_name + "." + clean_source;
01011 
01012       outputfile.open 
01013               (outputfile_name.c_str (), std::ios::out | std::ios::app);
01014 
01015       (*entry)->output (outputfile);
01016 
01017       outputfile.close ();
01018     }
01019   }
01020 
01021   exit (0);
01022 }
01023 
01024 std::ofstream & BibtexEntry::output (std::ofstream &outputfile)
01025 {
01026   outputfile << text <<std::endl;
01027 }
01028 
01029 std::string BibtexEntry::source (void)
01030 {
01031   return field ("journal");
01032 }
01033 
01034 std::string BibtexEntry::authors (void)
01035 {
01036   return field ("author");
01037 }
01038 
01039 std::string BibtexEntry::year (void)
01040 {
01041   return field ("year");
01042 }
01043 
01049 std::string BibtexEntry::copy (void)
01050 {
01051   return field ("copy");
01052 }
01053 
01054 std::string BibtexEntry::coworkers (void)
01055 {
01056   return field ("coworkers");
01057 }
01058 
01059 std::string BibtexEntry::field (std::string label)
01060 {
01061   std::string field;
01062 
01063   field = "";
01064 
01065   std::string::size_type pointer = 0;
01066 
01067   int counter;
01068 
01069   pointer = text.find (label, pointer);
01070 
01071   while (pointer != std::string::npos)
01072   {
01073     pointer += label.length();
01074     pointer = text.find_first_not_of (" \n\t", pointer);
01075 
01076     if ((pointer != std::string::npos) && (text.at(pointer) == '='))
01077     {
01078       pointer = text.find_first_not_of (" \n\t", ++pointer);
01079 
01080       counter = 0;
01081 
01082       if ((pointer != std::string::npos) && (text.at(pointer) == '{'))
01083       {
01084         std::string::size_type begin = ++pointer;
01085         ++counter;
01086         for (std::string::size_type i = pointer; ; ++i)
01087         {
01088           if (text.at(i) == '{')
01089             ++counter;
01090           else if (text.at(i) == '}')
01091             --counter;
01092           
01093           if (counter == 0)
01094           {   
01095             field = text.substr (begin, i - begin);
01096             break;
01097           }
01098         }
01099         break;
01100       }
01101       break;
01102     }
01103     break;
01104   }
01105 
01106   return field;
01107 }
01108 
01109 std::string BibtexEntry::key (void)
01110 {
01111   std::string::size_type key_line_begin;
01112   std::string::size_type key_begin;
01113   std::string::size_type key_end;
01114 
01115   std::string key;
01116 
01117   key_line_begin = text.find ("@");
01118 
01119   if (key_line_begin != std::string::npos)
01120   {
01121     std::string rest = text.substr (key_line_begin);
01122 
01123     key_begin = rest.find ("{"); 
01124     key_end = rest.find (","); 
01125 
01126     key = rest.substr (key_begin + 1, key_end - key_begin - 1);
01127   }
01128 
01129   else
01130     key = "";
01131 
01132 
01133   return key;
01134 }
01135 
01136 void BibtexEntry::key (std::string key)
01137 {
01138   std::string rest;
01139 
01140   std::string::size_type begin_key;
01141   std::string::size_type end_key;
01142 
01143   begin_key = text.find_first_of ("@");
01144   begin_key = text.find_first_of ("{", begin_key + 1);
01145 
01146   end_key = text.find_first_of (",", begin_key + 1);
01147 
01148   std::string replace_string = "{" + key + ",";
01149 
01150   text.replace 
01151           (begin_key, end_key - begin_key + 1, replace_string.c_str ());
01152 }
01153 
01154 std::vector<BibtexEntry *> read_bibtexfile (std::ifstream &file)
01155 {
01156   std::vector<BibtexEntry *> entries;
01157 
01158   BibtexEntry *newentry;
01159 
01160   const int buffersize = 1000;
01161 
01162   char buffer[buffersize];
01163 
01164   int entry_counter = 0;
01165 
01166   int line_number = 0;
01167 
01168   std::string bufferstring;
01169 
01170   while (file.getline (buffer, buffersize))
01171   {
01172     line_number++;
01173 
01174     if (strncmp (buffer, "}", 1))
01175     {
01176       bufferstring += buffer;
01177 
01178       bufferstring += '\n';
01179     }
01180 
01181     else
01182     {
01183       bufferstring += "}";
01184 
01185       newentry = new BibtexEntry (bufferstring);
01186 
01187       entries.push_back (newentry);
01188 
01189       bufferstring = "";
01190     }
01191   }
01192 
01193   return entries;
01194 }