diff options
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | TODO | 5 | ||||
-rw-r--r-- | changelog | 183 | ||||
-rw-r--r-- | hadori.C | 79 | ||||
-rw-r--r-- | inode.h | 83 |
5 files changed, 234 insertions, 125 deletions
@@ -7,7 +7,7 @@ # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# Foobar is distributed in the hope that it will be useful, +# hadori is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. @@ -15,9 +15,9 @@ # You should have received a copy of the GNU General Public License # along with hadori. If not, see <http://www.gnu.org/licenses/>. -LDFLAGS+=-lz -lboost_program_options +LDLIBS=-lboost_program_options -lstdc++ CXXFLAGS?=-O2 -Wall -CXXFLAGS+=-std=c++0x +CXXFLAGS+=-std=c++11 CPPFLAGS+=-D_FILE_OFFSET_BITS=64 all: hadori.1 @@ -26,12 +26,13 @@ hadori.1: hadori help2man -n $< -o $@ -N ./$< hadori: hadori.o -hadori.o: hadori.C inode.h version.h +hadori.o: hadori.C version.h version.h: test ! -d .git || git describe | sed 's/^\(.*\)$$/#define HADORI_VERSION "hadori \1"/' > $@ clean: rm -f hadori hadori.o hadori.1 + test ! -d .git || git checkout -f -- version.h .PHONY: version.h clean @@ -1,5 +0,0 @@ -TODO/possible optimizations -=========================== - -* more debug output when attributes mismatch in handle_file() -* perhaps use hash_map, hash_multimap for better performance? diff --git a/changelog b/changelog new file mode 100644 index 0000000..c2e8b94 --- /dev/null +++ b/changelog @@ -0,0 +1,183 @@ +commit 75293a34994df27b84ee30d94b8db625b0ca40ea +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2014-11-23 01:06:29 +0100 + + more const + +commit 77bcb2f6d8d0afa0465dd921fa2e7fb64f1433b6 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2014-11-21 20:30:34 +0100 + + use boost::iterator_range instead of homebrew hack + +commit 3c42fb81eb2cc9e658afc2156afa5d62880f3c7e +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-11-17 14:38:02 +0100 + + Makefile: reset version.h on clean when in git + +commit 2d2195cf1caaac1c5a3b081fc1a49d1f7c12bc96 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-11-17 14:36:18 +0100 + + Makefile: C++11 has been ready for a while now + +commit 38c62a8019f1a19ec93ea16d34fa2f05c702a564 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-11-17 14:33:18 +0100 + + Makefile: add libstdc++ to LDLIBS + +commit 75b33a7f64d2f04efeeb09f96e9b6be716a65526 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-11-17 14:23:02 +0100 + + tighten type requirements for range adaptors + +commit 9933463097df9fe16e446a0744204dbe04223544 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-11-17 14:05:54 +0100 + + use LDLIBS instead of LDFLAGS + + LDLIBS are put behind the prerequisites by make's built-in rules so linking + won't fail with --as-needed. + +commit d34f4e8e84f3c503a830d1495945f8c4da7c4abe +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-06-17 17:43:33 +0200 + + fix copy&paste oversight in license + +commit 5d0c23bc87b5be492869616acc42e619914aae38 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-04-24 16:31:15 +0200 + + inode.h is not so big anymore, merge it into hadori.C + +commit 5f9ccea4152b63018095e247066524c425e7824e +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-04-23 18:44:12 +0200 + + std::move inode f into kept, we won't use it afterwards + +commit 7bb00e45cc6fcfa85514bf4e645b0cedebbc0216 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-04-23 18:32:03 +0200 + + use agg init and drop unnecessary stuff + +commit 9d0562d22cef95b0db1af30e199a371670ca8050 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2013-04-23 18:28:33 +0200 + + remove hashing stuff + + the cases in which that gave a speedup are rare to nonexistent + +commit fb3fe1b085b30fcc9defa8f29477226806db085b +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2012-03-04 03:08:30 +0100 + + Use initializer braces instead of make_pair + +commit 5a901f9956801b7b9ed819da8b721a58ad93d095 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2012-03-03 01:57:29 +0100 + + TODO replaced by issues on github + +commit fc2e6f497641f6189d4c46bb975c47fe5cdecc24 (tag: 0.2) +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2012-02-26 16:11:21 +0100 + + Version 0.2 + +commit 8079e98267742de01442f0294d41085e6af2353a +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2012-02-26 16:03:06 +0100 + + Fix version.h generation for non-git + +commit 27b8fc1395bece0dc309512a3790a4892e0d29b1 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2012-02-03 14:33:21 +0100 + + Add --version, generate version.h from git, clean manpage + +commit 894df89979fdd604588eae999f984b05e471ceb8 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2012-02-03 13:52:04 +0100 + + Output help to stdout and exit with EX_OK. + +commit cee77d8be37b074fd6e039a8207e30f2f067c561 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2012-01-17 19:24:06 +0100 + + Add copyright notice and license. + +commit 8f967e3c65d1ecf6c43a6ff41b70f0fe47e64480 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-12-25 18:45:41 +0100 + + clarify description for --stdin + +commit a40bb6e7fd6e74e8b2d2222bba9421b580577e8e +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-12-02 20:23:18 +0100 + + also use unordered_map for sizes + +commit cffdf8acae2b5f985eeb81cb6e0a5bbd218b9d32 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-12-02 19:52:02 +0100 + + use equal_range and range-for + + needs an adapter because std::multimap doesn't deliver begin() and end() for + a std::pair of its iterators. + +commit 091b33363d0c067c9071414f97fda16102bc065b +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-12-02 19:17:47 +0100 + + use unordered_map for kept and to_link + + sorting is not needed + +commit 1c803dcad160e85fe4b8d46b322374283c0edd89 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-12-02 19:07:31 +0100 + + move kept, to_link, sizes as static into handle_file + + only used inside that function + +commit 568bd7e0d719d7754943c6cfebfa4e4179d62333 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-11-30 17:51:32 +0100 + + use more C++11 features + +commit 7349d7a5f4c305f5ba350858cc810ab79ca81bb4 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-11-28 01:11:16 +0100 + + forget/dont't memorize inode in to_link if st_nlink == 1 + + Might have caused data loss if inode number is reallocted to a file inside + the tree(s) being scanned. + Also saves a little bit of memory. + +commit f52b13a15df60b02e10f88b656192a2db5077706 +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-11-28 01:11:16 +0100 + + TODO update + +commit 148e9eab1109789916a7b55ece957156a57ee4cf (tag: 0.1) +Author: Timo Weingärtner <timo@tiwe.de> +Date: 2011-11-06 00:00:31 +0100 + + initial commit @@ -8,7 +8,7 @@ * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * - * Foobar is distributed in the hope that it will be useful, + * hadori is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. @@ -20,12 +20,15 @@ #include <boost/program_options.hpp> namespace po = boost::program_options; +#include <boost/range/iterator_range.hpp> + #include <string> #include <vector> #include <queue> #include <unordered_map> #include <iostream> #include <sstream> +#include <fstream> #include <cstdlib> #include <cstring> @@ -35,39 +38,55 @@ namespace po = boost::program_options; #include <dirent.h> #include <sysexits.h> -#include "inode.h" #include "version.h" -// needed for equal_range and range-for -namespace std { -template<typename T> T& begin(pair<T,T> & ip) { - return ip.first; -} -template<typename T> T& end(pair<T,T> & ip) { - return ip.second; -} -} - po::variables_map config; std::ostream debug(std::clog.rdbuf()), verbose(std::clog.rdbuf()), error(std::clog.rdbuf()); +struct inode { + std::string const filename; + struct stat const stat; +}; + +inline bool compare (inode const & l, inode const & r) { + char lbuffer[1 << 14]; + char rbuffer[1 << 14]; + std::ifstream lf(l.filename.c_str()); + std::ifstream rf(r.filename.c_str()); + + while (not lf.eof()) { + lf.read(lbuffer, sizeof(lbuffer)); + rf.read(rbuffer, sizeof(rbuffer)); + if (lf.gcount() != rf.gcount()) + return false; + if (memcmp(lbuffer, rbuffer, lf.gcount())) + return false; + } + return true; +} + +inline std::ostream& operator<< (std::ostream& os, inode const & i) { + os << "Inode " << i.stat.st_ino << ", represented by " << i.filename; + return os; +} + void do_link (inode const & i, std::string const & other) { if (!link(i.filename.c_str(), other.c_str())) { error << "linking " << i << " to " << other << " succeeded before unlinking (race condition)" << std::endl; exit(EX_UNAVAILABLE); } if (errno != EEXIST) { - char * errstring = strerror(errno); + char const * const errstring = strerror(errno); error << "error linking " << i << " to " << other << ": " << errstring << ", nothing bad happened." << std::endl; exit(EX_UNAVAILABLE); } if (unlink(other.c_str())) { - char * errstring = strerror(errno); + char const * const errstring = strerror(errno); error << "error unlinking " << other << " before linking " << i << " to it: " << errstring << std::endl; exit(EX_UNAVAILABLE); } if (link(i.filename.c_str(), other.c_str())) { - char * errstring = strerror(errno); + char const * const errstring = strerror(errno); error << "error linking " << i << " to " << other << ": " << errstring << ", destination filename was already unlinked." << std::endl; exit(EX_UNAVAILABLE); } @@ -75,8 +94,8 @@ void do_link (inode const & i, std::string const & other) { void handle_file(std::string const & path, struct stat const & s) { static std::unordered_map<ino_t, inode const> kept; - static std::unordered_map<ino_t, ino_t> to_link; - static std::unordered_multimap<off_t, ino_t> sizes; + static std::unordered_map<ino_t, ino_t const> to_link; + static std::unordered_multimap<off_t, ino_t const> sizes; debug << "examining " << path << std::endl; if (kept.count(s.st_ino)) { @@ -91,9 +110,9 @@ void handle_file(std::string const & path, struct stat const & s) { to_link.erase(s.st_ino); return; } - inode f(path, s); + inode f{path, s}; debug << f << " is new to us" << std::endl; - for (auto const & it : sizes.equal_range(s.st_size)) { + for (auto const & it : boost::make_iterator_range(sizes.equal_range(s.st_size))) { inode const & candidate = kept.find(it.second)->second; debug << "looking if it matches " << candidate << std::endl; if (candidate.stat.st_mode != s.st_mode) @@ -105,21 +124,18 @@ void handle_file(std::string const & path, struct stat const & s) { if (not config.count("no-time")) if (candidate.stat.st_mtime != s.st_mtime) continue; - if (config.count("hash")) - if (candidate.get_adler() != f.get_adler()) - continue; if (!compare(candidate, f)) continue; verbose << "linking " << candidate << " to " << path << std::endl; if (s.st_nlink > 1) - to_link.insert(std::make_pair(s.st_ino, it.second)); + to_link.insert({s.st_ino, it.second}); if (not config.count("dry-run")) do_link(candidate, path); return; } debug << "we keep " << f << std::endl; - kept.insert(std::make_pair(s.st_ino, f)); - sizes.insert(std::make_pair(s.st_size, s.st_ino)); + kept.insert({s.st_ino, std::move(f)}); + sizes.insert({s.st_size, s.st_ino}); } void recurse (std::string const & dir, dev_t const dev) { @@ -129,7 +145,7 @@ void recurse (std::string const & dir, dev_t const dev) { std::queue<std::string> subdirs; if (!(D = opendir(dir.c_str()))) { - char * errstring = strerror(errno); + char const * const errstring = strerror(errno); error << "opendir(\"" << dir << "\"): " << errstring << std::endl; return; } @@ -138,7 +154,7 @@ void recurse (std::string const & dir, dev_t const dev) { path += '/'; path += d->d_name; if (lstat(path.c_str(), &s)) { - char * errstring = strerror(errno); + char const * const errstring = strerror(errno); error << "lstat(\"" << path << "\"): " << errstring << std::endl; continue; } @@ -167,7 +183,7 @@ void recurse_start (std::string const & dir) { struct stat s; if (lstat(dir.c_str(), &s)) { - char * errstring = strerror(errno); + char const * const errstring = strerror(errno); error << "lstat(\"" << dir << "\"): " << errstring << std::endl; exit(EX_NOINPUT); } @@ -184,13 +200,12 @@ void recurse_start (std::string const & dir) { handle_file(dir, s); } -int main (int const argc, char ** argv) { +int main (int const argc, char const * const * const argv) { po::options_description opts("OPTIONS"); opts.add_options() ("help,h", "print this help message") ("version,V", "print version information") ("no-time,t", "ignore mtime") - ("hash", "use adler32 hash to speed up comparing many files with same size and mostly identical content") ("dry-run,n", "don't change anything, implies --verbose") ("verbose,v", "show which files get linked") ("debug,d", "show files being examined") @@ -234,9 +249,7 @@ int main (int const argc, char ** argv) { } else { if (not config.count("stdin") and not config.count("null")) error << "no arguments supplied, assuming --stdin." << std::endl; - char delim = '\n'; - if (config.count("null")) - delim = '\0'; + char const delim = config.count("null") ? '\0' : '\n'; for (std::string dir; getline(std::cin, dir, delim);) recurse_start(dir); } diff --git a/inode.h b/inode.h deleted file mode 100644 index 26db923..0000000 --- a/inode.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2011 Timo Weingärtner <timo@tiwe.de> - * - * This file is part of hadori. - * - * hadori is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Foobar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with hadori. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <string> -#include <ostream> -#include <fstream> - -#include <cstring> -#include <sys/stat.h> -#include <sys/types.h> - -#include <zlib.h> - -class inode { -public: - std::string const filename; - struct stat const stat; -protected: - uLong mutable adler; - -public: - inode (std::string const &, struct stat const); - - uLong get_adler () const; - - friend bool compare (inode const &, inode const &); - friend std::ostream& operator<< (std::ostream&, inode const &); -}; - -inline inode::inode (std::string const & __filename, struct stat const __stat) : filename(__filename), stat(__stat), adler(-1) { -} - -inline uLong inode::get_adler () const { - if (adler == uLong(-1)) { - char buffer[1 << 14]; - std::ifstream f(filename.c_str()); - - adler = adler32(0L, Z_NULL, 0); - while (not f.eof()) { - f.read(buffer, sizeof(buffer)); - adler = adler32(adler, (Bytef *) buffer, f.gcount()); - } - } - return adler; -} - -inline bool compare (inode const & l, inode const & r) { - char lbuffer[1 << 14]; - char rbuffer[1 << 14]; - std::ifstream lf(l.filename.c_str()); - std::ifstream rf(r.filename.c_str()); - - while (not lf.eof()) { - lf.read(lbuffer, sizeof(lbuffer)); - rf.read(rbuffer, sizeof(rbuffer)); - if (lf.gcount() != rf.gcount()) - return false; - if (memcmp(lbuffer, rbuffer, lf.gcount())) - return false; - } - return true; -} - -inline std::ostream& operator<< (std::ostream& os, inode const & i) { - os << "Inode " << i.stat.st_ino << ", represented by " << i.filename; - return os; -} |