summaryrefslogtreecommitdiff
path: root/hadori.C
diff options
context:
space:
mode:
Diffstat (limited to 'hadori.C')
-rw-r--r--hadori.C79
1 files changed, 46 insertions, 33 deletions
diff --git a/hadori.C b/hadori.C
index 44b6e97..b1f0e5f 100644
--- a/hadori.C
+++ b/hadori.C
@@ -8,7 +8,7 @@
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
- * Foobar is distributed in the hope that it will be useful,
+ * hadori is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
@@ -20,12 +20,15 @@
#include <boost/program_options.hpp>
namespace po = boost::program_options;
+#include <boost/range/iterator_range.hpp>
+
#include <string>
#include <vector>
#include <queue>
#include <unordered_map>
#include <iostream>
#include <sstream>
+#include <fstream>
#include <cstdlib>
#include <cstring>
@@ -35,39 +38,55 @@ namespace po = boost::program_options;
#include <dirent.h>
#include <sysexits.h>
-#include "inode.h"
#include "version.h"
-// needed for equal_range and range-for
-namespace std {
-template<typename T> T& begin(pair<T,T> & ip) {
- return ip.first;
-}
-template<typename T> T& end(pair<T,T> & ip) {
- return ip.second;
-}
-}
-
po::variables_map config;
std::ostream debug(std::clog.rdbuf()), verbose(std::clog.rdbuf()), error(std::clog.rdbuf());
+struct inode {
+ std::string const filename;
+ struct stat const stat;
+};
+
+inline bool compare (inode const & l, inode const & r) {
+ char lbuffer[1 << 14];
+ char rbuffer[1 << 14];
+ std::ifstream lf(l.filename.c_str());
+ std::ifstream rf(r.filename.c_str());
+
+ while (not lf.eof()) {
+ lf.read(lbuffer, sizeof(lbuffer));
+ rf.read(rbuffer, sizeof(rbuffer));
+ if (lf.gcount() != rf.gcount())
+ return false;
+ if (memcmp(lbuffer, rbuffer, lf.gcount()))
+ return false;
+ }
+ return true;
+}
+
+inline std::ostream& operator<< (std::ostream& os, inode const & i) {
+ os << "Inode " << i.stat.st_ino << ", represented by " << i.filename;
+ return os;
+}
+
void do_link (inode const & i, std::string const & other) {
if (!link(i.filename.c_str(), other.c_str())) {
error << "linking " << i << " to " << other << " succeeded before unlinking (race condition)" << std::endl;
exit(EX_UNAVAILABLE);
}
if (errno != EEXIST) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "error linking " << i << " to " << other << ": " << errstring << ", nothing bad happened." << std::endl;
exit(EX_UNAVAILABLE);
}
if (unlink(other.c_str())) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "error unlinking " << other << " before linking " << i << " to it: " << errstring << std::endl;
exit(EX_UNAVAILABLE);
}
if (link(i.filename.c_str(), other.c_str())) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "error linking " << i << " to " << other << ": " << errstring << ", destination filename was already unlinked." << std::endl;
exit(EX_UNAVAILABLE);
}
@@ -75,8 +94,8 @@ void do_link (inode const & i, std::string const & other) {
void handle_file(std::string const & path, struct stat const & s) {
static std::unordered_map<ino_t, inode const> kept;
- static std::unordered_map<ino_t, ino_t> to_link;
- static std::unordered_multimap<off_t, ino_t> sizes;
+ static std::unordered_map<ino_t, ino_t const> to_link;
+ static std::unordered_multimap<off_t, ino_t const> sizes;
debug << "examining " << path << std::endl;
if (kept.count(s.st_ino)) {
@@ -91,9 +110,9 @@ void handle_file(std::string const & path, struct stat const & s) {
to_link.erase(s.st_ino);
return;
}
- inode f(path, s);
+ inode f{path, s};
debug << f << " is new to us" << std::endl;
- for (auto const & it : sizes.equal_range(s.st_size)) {
+ for (auto const & it : boost::make_iterator_range(sizes.equal_range(s.st_size))) {
inode const & candidate = kept.find(it.second)->second;
debug << "looking if it matches " << candidate << std::endl;
if (candidate.stat.st_mode != s.st_mode)
@@ -105,21 +124,18 @@ void handle_file(std::string const & path, struct stat const & s) {
if (not config.count("no-time"))
if (candidate.stat.st_mtime != s.st_mtime)
continue;
- if (config.count("hash"))
- if (candidate.get_adler() != f.get_adler())
- continue;
if (!compare(candidate, f))
continue;
verbose << "linking " << candidate << " to " << path << std::endl;
if (s.st_nlink > 1)
- to_link.insert(std::make_pair(s.st_ino, it.second));
+ to_link.insert({s.st_ino, it.second});
if (not config.count("dry-run"))
do_link(candidate, path);
return;
}
debug << "we keep " << f << std::endl;
- kept.insert(std::make_pair(s.st_ino, f));
- sizes.insert(std::make_pair(s.st_size, s.st_ino));
+ kept.insert({s.st_ino, std::move(f)});
+ sizes.insert({s.st_size, s.st_ino});
}
void recurse (std::string const & dir, dev_t const dev) {
@@ -129,7 +145,7 @@ void recurse (std::string const & dir, dev_t const dev) {
std::queue<std::string> subdirs;
if (!(D = opendir(dir.c_str()))) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "opendir(\"" << dir << "\"): " << errstring << std::endl;
return;
}
@@ -138,7 +154,7 @@ void recurse (std::string const & dir, dev_t const dev) {
path += '/';
path += d->d_name;
if (lstat(path.c_str(), &s)) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "lstat(\"" << path << "\"): " << errstring << std::endl;
continue;
}
@@ -167,7 +183,7 @@ void recurse_start (std::string const & dir) {
struct stat s;
if (lstat(dir.c_str(), &s)) {
- char * errstring = strerror(errno);
+ char const * const errstring = strerror(errno);
error << "lstat(\"" << dir << "\"): " << errstring << std::endl;
exit(EX_NOINPUT);
}
@@ -184,13 +200,12 @@ void recurse_start (std::string const & dir) {
handle_file(dir, s);
}
-int main (int const argc, char ** argv) {
+int main (int const argc, char const * const * const argv) {
po::options_description opts("OPTIONS");
opts.add_options()
("help,h", "print this help message")
("version,V", "print version information")
("no-time,t", "ignore mtime")
- ("hash", "use adler32 hash to speed up comparing many files with same size and mostly identical content")
("dry-run,n", "don't change anything, implies --verbose")
("verbose,v", "show which files get linked")
("debug,d", "show files being examined")
@@ -234,9 +249,7 @@ int main (int const argc, char ** argv) {
} else {
if (not config.count("stdin") and not config.count("null"))
error << "no arguments supplied, assuming --stdin." << std::endl;
- char delim = '\n';
- if (config.count("null"))
- delim = '\0';
+ char const delim = config.count("null") ? '\0' : '\n';
for (std::string dir; getline(std::cin, dir, delim);)
recurse_start(dir);
}