summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile9
-rw-r--r--TODO5
-rw-r--r--changelog183
-rw-r--r--hadori.C79
-rw-r--r--inode.h83
5 files changed, 234 insertions, 125 deletions
diff --git a/Makefile b/Makefile
index a55a9dd..e8f40f6 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# Foobar is distributed in the hope that it will be useful,
+# hadori is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
@@ -15,9 +15,9 @@
# You should have received a copy of the GNU General Public License
# along with hadori. If not, see <http://www.gnu.org/licenses/>.
-LDFLAGS+=-lz -lboost_program_options
+LDLIBS=-lboost_program_options -lstdc++
CXXFLAGS?=-O2 -Wall
-CXXFLAGS+=-std=c++0x
+CXXFLAGS+=-std=c++11
CPPFLAGS+=-D_FILE_OFFSET_BITS=64
all: hadori.1
@@ -26,12 +26,13 @@ hadori.1: hadori
help2man -n $< -o $@ -N ./$<
hadori: hadori.o
-hadori.o: hadori.C inode.h version.h
+hadori.o: hadori.C version.h
version.h:
test ! -d .git || git describe | sed 's/^\(.*\)$$/#define HADORI_VERSION "hadori \1"/' > $@
clean:
rm -f hadori hadori.o hadori.1
+ test ! -d .git || git checkout -f -- version.h
.PHONY: version.h clean
diff --git a/TODO b/TODO
deleted file mode 100644
index 7019419..0000000
--- a/TODO
+++ /dev/null
@@ -1,5 +0,0 @@
-TODO/possible optimizations
-===========================
-
-* more debug output when attributes mismatch in handle_file()
-* perhaps use hash_map, hash_multimap for better performance?
diff --git a/changelog b/changelog
new file mode 100644
index 0000000..c2e8b94
--- /dev/null
+++ b/changelog
@@ -0,0 +1,183 @@
+commit 75293a34994df27b84ee30d94b8db625b0ca40ea
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2014-11-23 01:06:29 +0100
+
+ more const
+
+commit 77bcb2f6d8d0afa0465dd921fa2e7fb64f1433b6
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2014-11-21 20:30:34 +0100
+
+ use boost::iterator_range instead of homebrew hack
+
+commit 3c42fb81eb2cc9e658afc2156afa5d62880f3c7e
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-11-17 14:38:02 +0100
+
+ Makefile: reset version.h on clean when in git
+
+commit 2d2195cf1caaac1c5a3b081fc1a49d1f7c12bc96
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-11-17 14:36:18 +0100
+
+ Makefile: C++11 has been ready for a while now
+
+commit 38c62a8019f1a19ec93ea16d34fa2f05c702a564
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-11-17 14:33:18 +0100
+
+ Makefile: add libstdc++ to LDLIBS
+
+commit 75b33a7f64d2f04efeeb09f96e9b6be716a65526
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-11-17 14:23:02 +0100
+
+ tighten type requirements for range adaptors
+
+commit 9933463097df9fe16e446a0744204dbe04223544
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-11-17 14:05:54 +0100
+
+ use LDLIBS instead of LDFLAGS
+
+ LDLIBS are put behind the prerequisites by make's built-in rules so linking
+ won't fail with --as-needed.
+
+commit d34f4e8e84f3c503a830d1495945f8c4da7c4abe
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-06-17 17:43:33 +0200
+
+ fix copy&paste oversight in license
+
+commit 5d0c23bc87b5be492869616acc42e619914aae38
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-04-24 16:31:15 +0200
+
+ inode.h is not so big anymore, merge it into hadori.C
+
+commit 5f9ccea4152b63018095e247066524c425e7824e
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-04-23 18:44:12 +0200
+
+ std::move inode f into kept, we won't use it afterwards
+
+commit 7bb00e45cc6fcfa85514bf4e645b0cedebbc0216
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-04-23 18:32:03 +0200
+
+ use agg init and drop unnecessary stuff
+
+commit 9d0562d22cef95b0db1af30e199a371670ca8050
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2013-04-23 18:28:33 +0200
+
+ remove hashing stuff
+
+ the cases in which that gave a speedup are rare to nonexistent
+
+commit fb3fe1b085b30fcc9defa8f29477226806db085b
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2012-03-04 03:08:30 +0100
+
+ Use initializer braces instead of make_pair
+
+commit 5a901f9956801b7b9ed819da8b721a58ad93d095
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2012-03-03 01:57:29 +0100
+
+ TODO replaced by issues on github
+
+commit fc2e6f497641f6189d4c46bb975c47fe5cdecc24 (tag: 0.2)
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2012-02-26 16:11:21 +0100
+
+ Version 0.2
+
+commit 8079e98267742de01442f0294d41085e6af2353a
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2012-02-26 16:03:06 +0100
+
+ Fix version.h generation for non-git
+
+commit 27b8fc1395bece0dc309512a3790a4892e0d29b1
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2012-02-03 14:33:21 +0100
+
+ Add --version, generate version.h from git, clean manpage
+
+commit 894df89979fdd604588eae999f984b05e471ceb8
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2012-02-03 13:52:04 +0100
+
+ Output help to stdout and exit with EX_OK.
+
+commit cee77d8be37b074fd6e039a8207e30f2f067c561
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2012-01-17 19:24:06 +0100
+
+ Add copyright notice and license.
+
+commit 8f967e3c65d1ecf6c43a6ff41b70f0fe47e64480
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-12-25 18:45:41 +0100
+
+ clarify description for --stdin
+
+commit a40bb6e7fd6e74e8b2d2222bba9421b580577e8e
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-12-02 20:23:18 +0100
+
+ also use unordered_map for sizes
+
+commit cffdf8acae2b5f985eeb81cb6e0a5bbd218b9d32
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-12-02 19:52:02 +0100
+
+ use equal_range and range-for
+
+ needs an adapter because std::multimap doesn't deliver begin() and end() for
+ a std::pair of its iterators.
+
+commit 091b33363d0c067c9071414f97fda16102bc065b
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-12-02 19:17:47 +0100
+
+ use unordered_map for kept and to_link
+
+ sorting is not needed
+
+commit 1c803dcad160e85fe4b8d46b322374283c0edd89
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-12-02 19:07:31 +0100
+
+ move kept, to_link, sizes as static into handle_file
+
+ only used inside that function
+
+commit 568bd7e0d719d7754943c6cfebfa4e4179d62333
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-11-30 17:51:32 +0100
+
+ use more C++11 features
+
+commit 7349d7a5f4c305f5ba350858cc810ab79ca81bb4
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-11-28 01:11:16 +0100
+
+ forget/dont't memorize inode in to_link if st_nlink == 1
+
+ Might have caused data loss if inode number is reallocted to a file inside
+ the tree(s) being scanned.
+ Also saves a little bit of memory.
+
+commit f52b13a15df60b02e10f88b656192a2db5077706
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-11-28 01:11:16 +0100
+
+ TODO update
+
+commit 148e9eab1109789916a7b55ece957156a57ee4cf (tag: 0.1)
+Author: Timo Weingärtner <timo@tiwe.de>
+Date: 2011-11-06 00:00:31 +0100
+
+ initial commit
diff --git a/hadori.C b/hadori.C
index 44b6e97..b1f0e5f 100644
--- a/hadori.C
+++ b/hadori.C
@@ -8,7 +8,7 @@
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
- * Foobar is distributed in the hope that it will be useful,
+ * hadori is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
@@ -20,12 +20,15 @@
#include <boost/program_options.hpp>
namespace po = boost::program_options;
+#include <boost/range/iterator_range.hpp>
+
#include <string>
#include <vector>
#include <queue>
#include <unordered_map>
#include <iostream>
#include <sstream>
+#include <fstream>
#include <cstdlib>
#include <cstring>
@@ -35,39 +38,55 @@ namespace po = boost::program_options;
#include <dirent.h>
#include <sysexits.h>
-#include "inode.h"
#include "version.h"
-// needed for equal_range and range-for
-namespace std {
-template<typename T> T& begin(pair<T,T> & ip) {
- return ip.first;
-}
-template<typename T> T& end(pair<T,T> & ip) {
- return ip.second;
-}
-}
-
po::variables_map config;
std::ostream debug(std::clog.rdbuf()), verbose(std::clog.rdbuf()), error(std::clog.rdbuf());
+struct inode {
+ std::string const filename;
+ struct stat const stat;
+};
+
+inline bool compare (inode const & l, inode const & r) {
+ char lbuffer[1 << 14];
+ char rbuffer[1 << 14];
+ std::ifstream lf(l.filename.c_str());
+ std::ifstream rf(r.filename.c_str());
+
+ while (not lf.eof()) {
+ lf.read(lbuffer, sizeof(lbuffer));
+ rf.read(rbuffer, sizeof(rbuffer));
+ if (lf.gcount() != rf.gcount())
+ return false;
+ if (memcmp(lbuffer, rbuffer, lf.gcount()))
+ return false;
+ }
+ return true;
+}
+
+inline std::ostream& operator<< (std::ostream& os, inode const & i) {
+ os << "Inode " << i.stat.st_ino << ", represented by " << i.filename;
+ return os;
+}
+
void do_link (inode const & i, std::string const & other) {
if (!link(i.filename.c_str(), other.c_str())) {
error << "linking " << i << " to " << other << " succeeded before unlinking (race condition)" << std::endl;
exit(EX_UNAVAILABLE);
}
if (errno != EEXIST) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "error linking " << i << " to " << other << ": " << errstring << ", nothing bad happened." << std::endl;
exit(EX_UNAVAILABLE);
}
if (unlink(other.c_str())) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "error unlinking " << other << " before linking " << i << " to it: " << errstring << std::endl;
exit(EX_UNAVAILABLE);
}
if (link(i.filename.c_str(), other.c_str())) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "error linking " << i << " to " << other << ": " << errstring << ", destination filename was already unlinked." << std::endl;
exit(EX_UNAVAILABLE);
}
@@ -75,8 +94,8 @@ void do_link (inode const & i, std::string const & other) {
void handle_file(std::string const & path, struct stat const & s) {
static std::unordered_map<ino_t, inode const> kept;
- static std::unordered_map<ino_t, ino_t> to_link;
- static std::unordered_multimap<off_t, ino_t> sizes;
+ static std::unordered_map<ino_t, ino_t const> to_link;
+ static std::unordered_multimap<off_t, ino_t const> sizes;
debug << "examining " << path << std::endl;
if (kept.count(s.st_ino)) {
@@ -91,9 +110,9 @@ void handle_file(std::string const & path, struct stat const & s) {
to_link.erase(s.st_ino);
return;
}
- inode f(path, s);
+ inode f{path, s};
debug << f << " is new to us" << std::endl;
- for (auto const & it : sizes.equal_range(s.st_size)) {
+ for (auto const & it : boost::make_iterator_range(sizes.equal_range(s.st_size))) {
inode const & candidate = kept.find(it.second)->second;
debug << "looking if it matches " << candidate << std::endl;
if (candidate.stat.st_mode != s.st_mode)
@@ -105,21 +124,18 @@ void handle_file(std::string const & path, struct stat const & s) {
if (not config.count("no-time"))
if (candidate.stat.st_mtime != s.st_mtime)
continue;
- if (config.count("hash"))
- if (candidate.get_adler() != f.get_adler())
- continue;
if (!compare(candidate, f))
continue;
verbose << "linking " << candidate << " to " << path << std::endl;
if (s.st_nlink > 1)
- to_link.insert(std::make_pair(s.st_ino, it.second));
+ to_link.insert({s.st_ino, it.second});
if (not config.count("dry-run"))
do_link(candidate, path);
return;
}
debug << "we keep " << f << std::endl;
- kept.insert(std::make_pair(s.st_ino, f));
- sizes.insert(std::make_pair(s.st_size, s.st_ino));
+ kept.insert({s.st_ino, std::move(f)});
+ sizes.insert({s.st_size, s.st_ino});
}
void recurse (std::string const & dir, dev_t const dev) {
@@ -129,7 +145,7 @@ void recurse (std::string const & dir, dev_t const dev) {
std::queue<std::string> subdirs;
if (!(D = opendir(dir.c_str()))) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "opendir(\"" << dir << "\"): " << errstring << std::endl;
return;
}
@@ -138,7 +154,7 @@ void recurse (std::string const & dir, dev_t const dev) {
path += '/';
path += d->d_name;
if (lstat(path.c_str(), &s)) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "lstat(\"" << path << "\"): " << errstring << std::endl;
continue;
}
@@ -167,7 +183,7 @@ void recurse_start (std::string const & dir) {
struct stat s;
if (lstat(dir.c_str(), &s)) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "lstat(\"" << dir << "\"): " << errstring << std::endl;
exit(EX_NOINPUT);
}
@@ -184,13 +200,12 @@ void recurse_start (std::string const & dir) {
handle_file(dir, s);
}
-int main (int const argc, char ** argv) {
+int main (int const argc, char const * const * const argv) {
po::options_description opts("OPTIONS");
opts.add_options()
("help,h", "print this help message")
("version,V", "print version information")
("no-time,t", "ignore mtime")
- ("hash", "use adler32 hash to speed up comparing many files with same size and mostly identical content")
("dry-run,n", "don't change anything, implies --verbose")
("verbose,v", "show which files get linked")
("debug,d", "show files being examined")
@@ -234,9 +249,7 @@ int main (int const argc, char ** argv) {
} else {
if (not config.count("stdin") and not config.count("null"))
error << "no arguments supplied, assuming --stdin." << std::endl;
- char delim = '\n';
- if (config.count("null"))
- delim = '\0';
+ char const delim = config.count("null") ? '\0' : '\n';
for (std::string dir; getline(std::cin, dir, delim);)
recurse_start(dir);
}
diff --git a/inode.h b/inode.h
deleted file mode 100644
index 26db923..0000000
--- a/inode.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2011 Timo Weingärtner <timo@tiwe.de>
- *
- * This file is part of hadori.
- *
- * hadori is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Foobar is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with hadori. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <string>
-#include <ostream>
-#include <fstream>
-
-#include <cstring>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <zlib.h>
-
-class inode {
-public:
- std::string const filename;
- struct stat const stat;
-protected:
- uLong mutable adler;
-
-public:
- inode (std::string const &, struct stat const);
-
- uLong get_adler () const;
-
- friend bool compare (inode const &, inode const &);
- friend std::ostream& operator<< (std::ostream&, inode const &);
-};
-
-inline inode::inode (std::string const & __filename, struct stat const __stat) : filename(__filename), stat(__stat), adler(-1) {
-}
-
-inline uLong inode::get_adler () const {
- if (adler == uLong(-1)) {
- char buffer[1 << 14];
- std::ifstream f(filename.c_str());
-
- adler = adler32(0L, Z_NULL, 0);
- while (not f.eof()) {
- f.read(buffer, sizeof(buffer));
- adler = adler32(adler, (Bytef *) buffer, f.gcount());
- }
- }
- return adler;
-}
-
-inline bool compare (inode const & l, inode const & r) {
- char lbuffer[1 << 14];
- char rbuffer[1 << 14];
- std::ifstream lf(l.filename.c_str());
- std::ifstream rf(r.filename.c_str());
-
- while (not lf.eof()) {
- lf.read(lbuffer, sizeof(lbuffer));
- rf.read(rbuffer, sizeof(rbuffer));
- if (lf.gcount() != rf.gcount())
- return false;
- if (memcmp(lbuffer, rbuffer, lf.gcount()))
- return false;
- }
- return true;
-}
-
-inline std::ostream& operator<< (std::ostream& os, inode const & i) {
- os << "Inode " << i.stat.st_ino << ", represented by " << i.filename;
- return os;
-}