00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <iostream>
00024 #include <fstream>
00025 #include <vector>
00026 #include <cstring>
00027 #include <cstdlib>
00028
00029 #include "WoSentry.h"
00030
00031 using namespace WoStransform;
00032
00033 int main (int argc, char *argv[])
00034 {
00035 if (argc == 1)
00036 {
00037 std::cout << "usage: WoStransform [options] -o output_file input_file(s)"
00038 << std::endl << "or: WoStransform --version" << std::endl;
00039
00040 exit (1);
00041 }
00042
00043 if (!strcmp (argv[1], "--version"))
00044 {
00045 std::cout << "WoStransform, version 1.3" << std::endl;
00046
00047 exit (0);
00048 }
00049
00050 std::cout << "starting WoStransform - transformation of web-of-science "
00051 << std::endl
00052 << " files into bibtex format" << std::endl
00053 << "version 1.3, Copyright (C) 2002 - 2009, Bernd Speiser" << std::endl
00054 << "WoStransform comes with ABSOLUTELY NO WARRANTY; for details see"
00055 << std::endl
00056 << "the General Public License." << std::endl
00057 << "This is free software, and you are welcome to redistribute it"
00058 << std::endl
00059 << "under certain conditions; see the General Public License for "
00060 "details."
00061 << std::endl;
00062
00063 if (argc < 4)
00064 {
00065 std::cout << "wrong number of arguments; " << std::endl <<
00066 "usage: WoStransform [options] -o output_file input_file(s)"
00067 << std::endl << "or: WoStransform --version" << std::endl;
00068
00069 exit (1);
00070 }
00071
00072 int outputfile_pointer = 0;
00073
00074 bool verbose = false;
00075
00076 for (int i = 1; i < argc; i++)
00077 {
00078 if (!strcmp (argv[i], "--verbose"))
00079 verbose = true;
00080
00081 else if (!strcmp (argv[i],"-o"))
00082 outputfile_pointer = i + 1;
00083 }
00084
00085 if (!outputfile_pointer)
00086 {
00087 std::cout << "no outputfile given; usage: WoStransform "
00088 "[options] -o output_file input_file(s) " << std::endl;
00089
00090 exit (1);
00091 }
00092
00093 std::vector <WoSentry *> entries;
00094
00095 const int buffersize = 1000;
00096
00097 char buffer[buffersize];
00098
00099 int entry_counter = 0;
00100
00101 for (int j = outputfile_pointer + 1; j < argc; j++)
00102 {
00103 std::ifstream WoSinput (argv[j]);
00104
00105 if (!WoSinput)
00106 {
00107 std::cout << "can't open file: " << argv[j] << std::endl;
00108
00109 exit (1);
00110 }
00111
00112 if (verbose)
00113 std::cout << "reading input file: `" << argv[j] << "´" << std::endl;
00114
00115 WoSinput.getline (buffer, buffersize);
00116
00117 if (strcmp (buffer, "FN ISI Export Format"))
00118 std::cout << "file `" << argv[j]
00119 << "´ not in recognized web-of-science format, skipped"
00120 << std::endl;
00121 else
00122 {
00123 int line_number = 1;
00124
00125 std::string bufferstring;
00126
00127 std::string entrytext;
00128
00129 while (WoSinput.getline (buffer, buffersize))
00130 {
00131 line_number++;
00132
00133 bufferstring = buffer;
00134
00135 bufferstring += '\n';
00136
00137 if (bufferstring.substr (0,3) == "PT ")
00138 {
00139 entrytext += bufferstring;
00140
00141 if (bufferstring.substr (3) == "Journal\n"
00142 || bufferstring.substr (3) == "J\n"
00143 || bufferstring.substr (3) == "C\n")
00144 {
00145 if (verbose)
00146 std::cout << "generating new entry of type "
00147 << bufferstring.substr
00148 (3, bufferstring.find_last_of ('\n') -
00149 bufferstring.find_first_not_of (" ", 3)) << std::endl;
00150
00151 entry_counter++;
00152
00153 WoSentry *entry = new JournalWoSentry;
00154
00155 while (WoSinput.getline (buffer, buffersize))
00156 {
00157 line_number++;
00158
00159 bufferstring = buffer;
00160
00161 bufferstring += '\n';
00162
00163 if (bufferstring.substr (0,2) == "ER")
00164 {
00165 *entry << entrytext;
00166
00167 entries.push_back (entry);
00168
00169 entrytext = "";
00170
00171 break;
00172 }
00173
00174 entrytext += bufferstring;
00175 }
00176 }
00177
00178 else if (bufferstring.substr (3) == "Book in series\n"
00179 || bufferstring.substr (3) == "S\n")
00180 {
00181 if (verbose)
00182 std::cout << "generating new entry of type "
00183 << bufferstring.substr
00184 (3, bufferstring.find_last_of ('\n') -
00185 bufferstring.find_first_not_of (" ", 3)) << std::endl;
00186
00187 WoSentry *entry = new BookinseriesWoSentry;
00188
00189 while (WoSinput.getline (buffer, buffersize))
00190 {
00191 line_number++;
00192
00193 bufferstring = buffer;
00194
00195 bufferstring += '\n';
00196
00197 if (bufferstring.substr (0,2) == "ER")
00198 {
00199 *entry << entrytext;
00200
00201 entries.push_back (entry);
00202
00203 entrytext = "";
00204
00205 break;
00206 }
00207
00208 entrytext += bufferstring;
00209 }
00210 }
00211
00212 else
00213 {
00214 std::cout << "unknown publication type `"
00215 << bufferstring.substr (3,
00216 bufferstring.find_last_of ('\n') -
00217 bufferstring.find_first_not_of (" ", 3)) <<
00218 "' in line "<< line_number << " of file " << argv[j]
00219 << std::endl;
00220 }
00221 }
00222 }
00223 }
00224 }
00225
00226 std::vector<WoSentry *>::const_iterator entrypointer;
00227
00228 std::cout << "writing to " << argv[outputfile_pointer] << std::endl;
00229
00230 std::ofstream bibtexoutput (argv[outputfile_pointer]);
00231
00232 if (!bibtexoutput)
00233 {
00234 std::cout << "can't open file: "
00235 << argv[outputfile_pointer] << std::endl;
00236 exit (1);
00237 }
00238
00239 for (entrypointer = entries.begin ();
00240 entrypointer != entries.end (); ++ entrypointer)
00241 {
00242 (*entrypointer)->deconstruct ();
00243
00244 (*entrypointer)->format_authors ();
00245
00246 (*entrypointer)->output (bibtexoutput);
00247 }
00248
00249 if (entry_counter == 1)
00250 std::cout << entry_counter << " entry generated " << std::endl;
00251
00252 else if (entry_counter > 1)
00253 std::cout << entry_counter << " entries generated " << std::endl;
00254
00255 exit (0);
00256 }
00257
00258 void JournalWoSentry::deconstruct (void)
00259 {
00260 std::string::size_type title_begin;
00261 std::string::size_type lineend;
00262
00263 title_begin = text.find ("\nTI");
00264
00265 title_begin = text.find_first_not_of (" ", title_begin + 3);
00266
00267 title = "";
00268
00269 while (title_begin != std::string::npos)
00270 {
00271 lineend = text.find ("\n", title_begin);
00272
00273 title += text.substr (title_begin, lineend - title_begin);
00274
00275 if (text.at (lineend + 1) != ' ')
00276 break;
00277
00278 title_begin = lineend + 3;
00279 }
00280
00281 std::string::size_type journal_begin;
00282
00283 journal_begin = text.find ("\nJI");
00284
00285 journal_begin = text.find_first_not_of (" ", journal_begin + 3);
00286
00287 journal = "";
00288
00289 while (journal_begin != std::string::npos)
00290 {
00291 lineend = text.find ("\n", journal_begin);
00292
00293 journal += text.substr (journal_begin, lineend - journal_begin);
00294
00295 if (text.at (lineend + 1) != ' ')
00296 break;
00297
00298 journal_begin = lineend + 3;
00299 }
00300
00301 std::string::size_type authors_begin;
00302
00303 authors_begin = text.find ("\nAU");
00304
00305 authors_begin = text.find_first_not_of (" ", authors_begin + 3);
00306
00307 authors = "";
00308
00309 while (authors_begin != std::string::npos)
00310 {
00311 lineend = text.find ("\n", authors_begin);
00312
00313 authors += text.substr (authors_begin, lineend - authors_begin);
00314
00315 authors += ";";
00316
00317 if (text.at (lineend + 1) != ' ')
00318 break;
00319
00320 authors_begin = lineend + 3;
00321 }
00322
00323 std::string::size_type printing_year_begin;
00324
00325 printing_year_begin = text.find ("\nPY");
00326
00327 printing_year_begin
00328 = text.find_first_not_of (" ", printing_year_begin + 3);
00329
00330 printing_year = "";
00331
00332 lineend = text.find ("\n", printing_year_begin);
00333
00334 printing_year +=
00335 text.substr (printing_year_begin, lineend - printing_year_begin);
00336
00337 std::string::size_type begin_page_begin;
00338
00339 begin_page_begin = text.find ("\nBP");
00340
00341 begin_page_begin
00342 = text.find_first_not_of (" ", begin_page_begin + 3);
00343
00344 begin_page = "";
00345
00346 lineend = text.find ("\n", begin_page_begin);
00347
00348 begin_page +=
00349 text.substr (begin_page_begin, lineend - begin_page_begin);
00350
00351 std::string::size_type end_page_begin;
00352
00353 end_page_begin = text.find ("\nEP");
00354
00355 end_page_begin
00356 = text.find_first_not_of (" ", end_page_begin + 3);
00357
00358 end_page = "";
00359
00360 lineend = text.find ("\n", end_page_begin);
00361
00362 end_page +=
00363 text.substr (end_page_begin, lineend - end_page_begin);
00364
00365 std::string::size_type volume_begin;
00366
00367 volume_begin = text.find ("\nVL");
00368
00369 volume = "";
00370
00371 if (volume_begin != std::string::npos)
00372 {
00373 volume_begin = text.find_first_not_of (" ", volume_begin + 3);
00374
00375 lineend = text.find ("\n", volume_begin);
00376
00377 volume += text.substr (volume_begin, lineend - volume_begin);
00378 }
00379 }
00380
00381 std::ofstream & JournalWoSentry::output (std::ofstream &outstream)
00382 {
00383 outstream << "@article{" << key << "," << std::endl;
00384
00385 outstream << "author = {" << authors << "}," << std::endl;
00386
00387 outstream << "journal = {" << journal << "}," << std::endl;
00388
00389 if (!volume.empty ())
00390 outstream << "volume = {" << volume << "}," << std::endl;
00391
00392 outstream << "pages = {" << begin_page << "~--~" << end_page
00393 << "}," << std::endl;
00394
00395 outstream << "year = {" << printing_year << "}," << std::endl;
00396
00397 outstream << "title = {" << title << "}," << std::endl;
00398
00399 outstream << "topics = {}," << std::endl;
00400 outstream << "copy = {}" << std::endl;
00401
00402 outstream << "}" << std::endl;
00403 }
00404
00405 void WoSentry::format_authors (void)
00406 {
00407 std::string buffer = authors;
00408
00409 authors = "";
00410
00411 std::string author;
00412 std::string correct_author;
00413 std::string lastname;
00414 std::string firstnames;
00415 std::string firstnames_string;
00416
00417 std::vector<std::string> lastnames;
00418
00419 std::string::size_type begin_author;
00420 std::string::size_type end_author;
00421 std::string::size_type end_authorstring;
00422 std::string::size_type end_firstname;
00423 std::string::size_type index;
00424
00425 begin_author = buffer.find_first_not_of (" ");
00426 end_author = buffer.find (";", begin_author);
00427 end_authorstring = buffer.find_last_of (";");
00428
00429 while (end_author <= end_authorstring)
00430 {
00431 author = buffer.substr (begin_author, end_author - begin_author);
00432
00433 end_firstname = author.find_last_of (" ");
00434
00435 lastname = author.substr (0, author.find_last_of (","));
00436
00437 lastnames.push_back (lastname);
00438
00439 firstnames_string
00440 = author.substr (author.find_last_of (" ") + 1) + " ";
00441
00442 firstnames
00443 = firstnames_string.substr
00444 (firstnames_string.find_first_not_of (" "));
00445
00446 std::string::iterator pointer = firstnames.begin ();
00447
00448 index = firstnames.find_first_not_of (" ");
00449
00450 int idx = 0;
00451
00452 while (firstnames.at(idx) != ' ')
00453 {
00454 firstnames.insert (++index, ".");
00455
00456 index++;
00457
00458 idx += 2;
00459 }
00460
00461 correct_author = firstnames + lastname;
00462
00463 if (authors.empty ())
00464 authors += correct_author;
00465
00466 else
00467 authors = authors + " and " + correct_author;
00468
00469 if (end_author == end_authorstring)
00470 break;
00471
00472 begin_author = buffer.find_first_not_of (" ", ++end_author);
00473 end_author = buffer.find (";", begin_author);
00474
00475 }
00476
00477 int authornumber = lastnames.size ();
00478
00479 std::vector<std::string>::const_iterator lastname_pointer;
00480
00481 lastname_pointer = lastnames.begin ();
00482
00483 if (authornumber == 1)
00484 key = *lastname_pointer + "_" + printing_year;
00485
00486 else if (authornumber == 2)
00487 key = *lastname_pointer + "/" + *(lastname_pointer + 1) + "_"
00488 + printing_year;
00489
00490 else if (authornumber > 2)
00491 key = *lastname_pointer + "/" + *(lastname_pointer + 1) + "/etal_"
00492 + printing_year;
00493
00494 else
00495 std::cout << "no author found!!!" << std::endl;
00496
00497 }
00498
00499 void BookinseriesWoSentry::deconstruct (void)
00500 {
00501 std::string::size_type title_begin;
00502 std::string::size_type lineend;
00503
00504 title_begin = text.find ("\nTI");
00505
00506 title_begin = text.find_first_not_of (" ", title_begin + 3);
00507
00508 title = "";
00509
00510 while (title_begin != std::string::npos)
00511 {
00512 lineend = text.find ("\n", title_begin);
00513
00514 title += text.substr (title_begin, lineend - title_begin);
00515
00516 if (text.at (lineend + 1) != ' ')
00517 break;
00518
00519 title_begin = lineend + 3;
00520 }
00521
00522 std::string::size_type series_begin;
00523
00524 series_begin = text.find ("\nSE");
00525
00526 series_begin = text.find_first_not_of (" ", series_begin + 3);
00527
00528 series = "";
00529
00530 while (series_begin != std::string::npos)
00531 {
00532 lineend = text.find ("\n", series_begin);
00533
00534 series += text.substr (series_begin, lineend - series_begin);
00535
00536 if (text.at (lineend + 1) != ' ')
00537 break;
00538
00539 series_begin = lineend + 3;
00540 }
00541
00542 std::string::size_type authors_begin;
00543
00544 authors_begin = text.find ("\nAU");
00545
00546 authors_begin = text.find_first_not_of (" ", authors_begin + 3);
00547
00548 authors = "";
00549
00550 while (authors_begin != std::string::npos)
00551 {
00552 lineend = text.find ("\n", authors_begin);
00553
00554 authors += text.substr (authors_begin, lineend - authors_begin);
00555
00556 authors += ";";
00557
00558 if (text.at (lineend + 1) != ' ')
00559 break;
00560
00561 authors_begin = lineend + 3;
00562 }
00563
00564 std::string::size_type printing_year_begin;
00565
00566 printing_year_begin = text.find ("\nPY");
00567
00568 printing_year_begin
00569 = text.find_first_not_of (" ", printing_year_begin + 3);
00570
00571 printing_year = "";
00572
00573 lineend = text.find ("\n", printing_year_begin);
00574
00575 printing_year +=
00576 text.substr (printing_year_begin, lineend - printing_year_begin);
00577
00578 std::string::size_type begin_page_begin;
00579
00580 begin_page_begin = text.find ("\nBP");
00581
00582 begin_page_begin
00583 = text.find_first_not_of (" ", begin_page_begin + 3);
00584
00585 begin_page = "";
00586
00587 lineend = text.find ("\n", begin_page_begin);
00588
00589 begin_page +=
00590 text.substr (begin_page_begin, lineend - begin_page_begin);
00591
00592 std::string::size_type end_page_begin;
00593
00594 end_page_begin = text.find ("\nEP");
00595
00596 end_page_begin
00597 = text.find_first_not_of (" ", end_page_begin + 3);
00598
00599 end_page = "";
00600
00601 lineend = text.find ("\n", end_page_begin);
00602
00603 end_page +=
00604 text.substr (end_page_begin, lineend - end_page_begin);
00605
00606 std::string::size_type volume_begin;
00607
00608 volume_begin = text.find ("\nVL");
00609
00610 volume = "";
00611
00612 if (volume_begin != std::string::npos)
00613 {
00614 volume_begin = text.find_first_not_of (" ", volume_begin + 3);
00615
00616 lineend = text.find ("\n", volume_begin);
00617
00618 volume += text.substr (volume_begin, lineend - volume_begin);
00619 }
00620 }
00621
00622 std::ofstream & BookinseriesWoSentry::output (std::ofstream &outstream)
00623 {
00624 outstream << "@article{" << key << "," << std::endl;
00625
00626 outstream << "author = {" << authors << "}," << std::endl;
00627
00628 outstream << "journal = {" << series << "}," << std::endl;
00629
00630 if (!volume.empty ())
00631 outstream << "volume = {" << volume << "}," << std::endl;
00632
00633 outstream << "pages = {" << begin_page << "~--~" << end_page
00634 << "}," << std::endl;
00635
00636 outstream << "year = {" << printing_year << "}," << std::endl;
00637
00638 outstream << "title = {" << title << "}," << std::endl;
00639
00640 outstream << "topics = {}," << std::endl;
00641 outstream << "copy = {}" << std::endl;
00642
00643 outstream << "}" << std::endl;
00644 }