From 74311678ba11016ab2226f450eadd4f4f7025151 Mon Sep 17 00:00:00 2001 From: Gerd von Egidy Date: Thu, 17 Dec 2015 23:01:03 +0100 Subject: [PATCH] implement counting hardlinked files only once in du() --- src/filefunc.cpp | 76 +++++++++++++++++++++++++++++++---------------- test/test_filefunc.cpp | 19 +++++++++++- 2 files changed, 68 insertions(+), 27 deletions(-) diff --git a/src/filefunc.cpp b/src/filefunc.cpp index 8b1b2f5..e95d853 100644 --- a/src/filefunc.cpp +++ b/src/filefunc.cpp @@ -40,6 +40,8 @@ on this file might be covered by the GNU General Public License. #include #include #include +#include +#include #include #include @@ -986,43 +988,65 @@ long long get_free_diskspace(const std::string& path) return free_bytes; } +namespace +{ +// anonymous namespace to make du_internal inaccessible from outside + +// internally used by du, do not use for other things +void du_internal(const std::string &path, long long &sum, std::map > &counted_inodes) +{ + + Stat sp(path, false); // don't dereference symlinks here + if (!sp) + throw runtime_error("can't stat " + path); + + // make sure we don't count hardlinked files twice + bool count_file=true; + + // dirs can't be hardlinked, their nlink is the size of entries -> doesn't matter for us here + if (!sp.is_directory() && sp.nlink() > 1) + { + // see if we have remembered this dev / inode combination + if (counted_inodes[sp.device()].count(sp.inode())) + count_file=false; + else + counted_inodes[sp.device()].insert(sp.inode()); + } + + // always add the space used, even if we have a directory, symlink or whatever: + // they need space on disk too + + if (count_file) + sum+=sp.bytes_on_disk(); + + if (sp.is_directory()) + { + std::vector dirents = get_dir(path, false); + BOOST_FOREACH(const std::string &filename, dirents) + { + // calculate size of subdir or file + du_internal(path + "/" + filename, sum, counted_inodes); + } + } +} + +} // eo anon namespace + /** - * like du(1): return the number bytes used by a directory structure + * like du(1): return the number bytes used by a directory structure, counting hardlinked files only once * @param path File or directory to start counting recursively * @param error Will contain the error if the return value is -1 [optional] * @return size in bytes on success, -1 on error - * - * @attention This function does currently not consider that hardlinked files need - * space on disk only once. But this may be added later if needed. So make - * sure that calling functions don't depend on either behavior! */ long long du(const std::string &path, std::string *error) { long long sum = 0; + std::map > counted_inodes; + try { - Stat sp(path, false); // don't dereference symlinks here - if (!sp) - throw runtime_error("can't stat " + path); - - // always add the space used, even if we have a directory, symlink or whatever: - // they need space on disk too - - sum+=sp.bytes_on_disk(); - - if (sp.is_directory()) - { - std::vector dirents = get_dir(path, false); - BOOST_FOREACH(const std::string &filename, dirents) - { - // calculate size of subdir or file - long long rtn = du(path + "/" + filename, error); - if (rtn == -1) - return -1; - sum+=rtn; - } - } + du_internal(path, sum, counted_inodes); } catch (exception &e) { diff --git a/test/test_filefunc.cpp b/test/test_filefunc.cpp index 65fa92d..1a51769 100644 --- a/test/test_filefunc.cpp +++ b/test/test_filefunc.cpp @@ -641,7 +641,24 @@ BOOST_AUTO_TEST_CASE(TestDu) long long duout=-1; string_to(dustr,duout); - BOOST_CHECK_EQUAL( duout, du(unique_dir) ); + long long first_du=du(unique_dir); + + BOOST_CHECK_EQUAL( duout, first_du ); + + // create hardlinks + string cmd; + cmd=string("ln ")+some_file+" hardlink1"; + system(cmd.c_str()); + + cmd=string("ln ")+some_file+" hardlink2"; + system(cmd.c_str()); + + cmd=string("ln ")+some_file+" hardlink3"; + system(cmd.c_str()); + + long long du_with_hardlinks=du(unique_dir); + + BOOST_CHECK_EQUAL( first_du , du_with_hardlinks ); // Unlink it BOOST_CHECK_EQUAL(true, I2n::recursive_delete(unique_dir)); -- 1.7.1