/* * Copyright (C) 2011 Timo Weingärtner * * This file is part of hadori. * * hadori is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * hadori is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with hadori. If not, see . */ #include namespace po = boost::program_options; #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "version.h" static po::variables_map config; static std::ostream debug(std::clog.rdbuf()), verbose(std::clog.rdbuf()), error(std::clog.rdbuf()); struct inode { std::string const filename; struct stat const stat; }; inline bool compare (inode const & l, inode const & r) { char lbuffer[1 << 14]; char rbuffer[1 << 14]; std::ifstream lf(l.filename.c_str()); std::ifstream rf(r.filename.c_str()); while (not lf.eof()) { lf.read(lbuffer, sizeof(lbuffer)); rf.read(rbuffer, sizeof(rbuffer)); if (lf.gcount() != rf.gcount()) return false; if (memcmp(lbuffer, rbuffer, static_cast(lf.gcount()))) return false; } return true; } inline std::ostream& operator<< (std::ostream& os, inode const & i) { os << "Inode " << i.stat.st_ino << ", represented by " << i.filename; return os; } static void do_link (inode const & i, std::string const & other) { if (config.count("dry-run")) return; if (!link(i.filename.c_str(), other.c_str())) { error << "linking " << i << " to " << other << " succeeded before unlinking (race condition)" << std::endl; exit(EX_UNAVAILABLE); } if (errno != EEXIST) { char const * const errstring = strerror(errno); error << "error linking " << i << " to " << other << ": " << errstring << ", nothing bad happened." << std::endl; exit(EX_UNAVAILABLE); } if (unlink(other.c_str())) { char const * const errstring = strerror(errno); error << "error unlinking " << other << " before linking " << i << " to it: " << errstring << std::endl; exit(EX_UNAVAILABLE); } if (link(i.filename.c_str(), other.c_str())) { char const * const errstring = strerror(errno); error << "error linking " << i << " to " << other << ": " << errstring << ", destination filename was already unlinked." << std::endl; exit(EX_UNAVAILABLE); } } static void handle_file (std::string const & path, struct stat const & s) { static std::unordered_map kept; static std::unordered_map to_link; static std::unordered_multimap sizes; debug << "examining " << path << std::endl; if (kept.count(s.st_ino)) { debug << "another link to inode " << s.st_ino << " that we keep" << std::endl; return; } if (to_link.count(s.st_ino)) { inode const & target = kept.find(to_link[s.st_ino])->second; debug << "another link to inode " << s.st_ino << " that we merge with " << target << std::endl; do_link(target, path); if (s.st_nlink == 1) to_link.erase(s.st_ino); return; } inode f{path, s}; debug << f << " is new to us" << std::endl; for (auto const & it : boost::make_iterator_range(sizes.equal_range(s.st_size))) { inode const & candidate = kept.find(it.second)->second; debug << "looking if it matches " << candidate << std::endl; if (candidate.stat.st_mode != s.st_mode) continue; if (candidate.stat.st_uid != s.st_uid) continue; if (candidate.stat.st_gid != s.st_gid) continue; if (not config.count("no-time")) if (candidate.stat.st_mtime != s.st_mtime) continue; if (!compare(candidate, f)) continue; verbose << "linking " << candidate << " to " << path << std::endl; if (s.st_nlink > 1) to_link.insert({s.st_ino, it.second}); do_link(candidate, path); return; } debug << "we keep " << f << std::endl; kept.insert({s.st_ino, std::move(f)}); sizes.insert({s.st_size, s.st_ino}); } static void recurse (std::string const & dir, dev_t const dev) { DIR* D; struct dirent *d; struct stat s; std::queue subdirs; if (!(D = opendir(dir.c_str()))) { char const * const errstring = strerror(errno); error << "opendir(\"" << dir << "\"): " << errstring << std::endl; return; } while ((d = readdir(D))) { std::string path(dir); path += '/'; path += d->d_name; if (lstat(path.c_str(), &s)) { char const * const errstring = strerror(errno); error << "lstat(\"" << path << "\"): " << errstring << std::endl; continue; } if (s.st_dev != dev) { error << path << " resides on another file system, ignoring." << std::endl; continue; } if (S_ISDIR(s.st_mode)) subdirs.push(d->d_name); if (S_ISREG(s.st_mode)) handle_file(path, s); } closedir(D); // directories get handled after the parent dir is closed to prevent exhausting fds for (; !subdirs.empty(); subdirs.pop()) { if (subdirs.front() == "." || subdirs.front() == "..") continue; std::string subdir(dir); subdir += '/'; subdir += subdirs.front(); recurse(subdir, dev); } } static void recurse_start (std::string const & dir) { struct stat s; if (lstat(dir.c_str(), &s)) { char const * const errstring = strerror(errno); error << "lstat(\"" << dir << "\"): " << errstring << std::endl; exit(EX_NOINPUT); } static dev_t const dev = s.st_dev; if (dev != s.st_dev) { error << dir << " resides on another file system, ignoring." << std::endl; return; } if (S_ISDIR(s.st_mode)) recurse(dir, dev); if (S_ISREG(s.st_mode)) handle_file(dir, s); } int main (int const argc, char const * const * const argv) { po::options_description opts("Options"); opts.add_options() ("help,h", "print this help message") ("version,V", "print version information") ("no-time,t", "ignore mtime") ("dry-run,n", "don't change anything, implies --verbose") ("verbose,v", "show which files get linked") ("debug,d", "show files being examined") ("stdin,s", "read arguments from stdin, one per line; you can't combine that with arguments on the commandline") ("null,0", "implies --stdin, but use null bytes as delimiter") ; po::options_description all_opts; all_opts.add(opts); all_opts.add_options() ("args", po::value>(), "files and directories to work on") ; po::positional_options_description pos_opts; pos_opts.add("args", -1); po::store(po::command_line_parser(argc, argv).options(all_opts).positional(pos_opts).run(), config); po::notify(config); if (config.count("help")) { std::cout << "Usage: hadori [ OPTIONS ] [ ARGUMENTS ]\n"; std::cout << opts; return EX_OK; } if (config.count("version")) { std::cout << HADORI_VERSION "\n"; std::cout << "Written by Timo Weingärtner.\n"; std::cout << "Report bugs to the Debian BTS at https://bugs.debian.org/\n"; std::cout << "or by mail to timo@tiwe.de.\n"; return EX_OK; } if (not config.count("debug")) debug.rdbuf(nullptr); if (not config.count("debug") and not config.count("verbose") and not config.count("dry-run")) verbose.rdbuf(nullptr); if (config.count("args")) { if (config.count("stdin") or config.count("null")) { // not supported because we don't know which arguments to scan first error << "--stdin combined with commandline arguments, this is not supported." << std::endl; return EX_USAGE; } for(auto const & dir : config["args"].as>()) recurse_start(dir); } else { if (not config.count("stdin") and not config.count("null")) error << "no arguments supplied, assuming --stdin." << std::endl; char const delim = config.count("null") ? '\0' : '\n'; for (std::string dir; getline(std::cin, dir, delim);) recurse_start(dir); } return EX_OK; }