#!/perl -w # NAME: dirsizes03.pl # AIM: Given a PATH, show the directories existing, and the approx. size of each # including the number of files found # 2016-11-21 - Add VERB?() verbosity # 2016-09-26 - Add date to output # 2016-09-17 - Add 'fileparse' # 2016-08-20 - Add -o out-file option # 27/12/2011 - Minor FIX20111227 fixes - ignore 'System Volume Information', and ret 5 elements on fail # 13/07/2010 - invert the output # 01/07/2010 - add adjusted size, based on block size of 4096 - closer approx of # space required on disk system that uses 4096 bytes as BLOCK size. # 1/2/2009 - Minor fix of file count ($fc), especially when no subdirectories. # 20/12/2008 - Added an -x=excludes parameters # 20/11/2008 geoff mclane http://geoffair.net/mperl # ############################################################################### use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use File::stat; use File::Spec; # File::Spec->rel2abs($rel); # we are IN the SLN directory, get ABSOLUTE from RELATIVE use Cwd; use Fcntl ':mode'; use Cwd; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt"; open_log($outfile); my $VERS = "0.0.7 2016-11-21"; ##my $VERS = "0.0.6 2016-09-17"; ##my $VERS = "0.0.5 2016-08-20"; ##my $VERS = "0.0.4 2011-12-27"; my $in_folder = ''; #my $in_folder = 'C:\Users\Geoff\Documents'; #my $in_folder = 'C:\Documents and Settings\Geoff McLane\My Documents'; my $tot_size = 0; my @warnings = (); my $file_count = 0; my $out_file = ''; my $debug_on = 0; my $def_dir = 'C:\Projects'; ###my $def_dir = 'E:\CDROMS_01'; # ========================================== my @dir_sizes = (); # @dir_sizes offsets my $DL_SIZE = 0; my $DL_DIR = 1; my $DL_NN = 2; my $DL_KS = 3; my $DL_FC = 4; my $DL_FCNN = 5; # added my $DL_DCNT = 6; my $DL_ASIZ = 7; my $DL_TBLK = 8; # added more my $DL_DCNN = 9; my $DL_ASNN = 10; my $DL_ASKS = 11; my $DL_TBNN = 12; # ============================================ my $verbosity = 0; my %excluded = (); my $loadlog = 0; my $block = 4096; my $add_dir_block = 0; my $max_file_name = 35; my $sort_alpha = 0; # program variables my $g_dot_cnt = 0; # debug my $dbg_s01 = 0; # show prt("dir: $file $cds bytes ... as they are processed ... sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } # forward sub process_folder($$); sub prtw($) { my ($tx) = shift; $tx =~ s/\n$// if ($tx =~ /\n$/); prt("$tx\n"); push(@warnings,$tx); } sub show_warnings($) { my ($dbg) = shift; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS ...\n" ); foreach my $line (@warnings) { prt("$line\n" ); } prt("\n"); } elsif ($dbg) { prt("\nNo warnings issued.\n\n"); } } sub pgm_exit($$) { my ($val,$msg) = @_; show_warnings( 0 ); if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt("$msg\n"); } close_log($outfile,$loadlog); # unlink($outfile); exit($val); } sub dir2unix($) { my ($d) = shift; $d =~ s/\\/\//g; return $d; } #string dirghtml::b2ks1(double d) // b2ks1(double d) sub bytes2ks($) { my ($d) = @_; my $oss; my $kss; my $lg = 0; my $ks = ($d / 1024); #// get Ks my $div = 1; if( $ks < 1024 ) { $div = 1; $oss = "KB"; } elsif ( $ks < (1024 * 1024) ) { $div = 1024; $oss = "MB"; } elsif ( $ks < (1024 * 1024 * 1024) ) { $div = (1024 * 1024); $oss = "GB"; } else { $div = (1024 * 1024 * 1024); $oss = "TB"; } $kss = $ks / $div; $kss += 0.05; $kss *= 10; $lg = int($kss); return( ($lg / 10) . $oss ); } sub mycmp_ascend { return 1 if (${$a}[0] > ${$b}[0]); return -1 if (${$a}[0] < ${$b}[0]); return 0; } sub mycmp_decend { if (${$a}[0] < ${$b}[0]) { #prt( "+[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3; return 1; } if (${$a}[0] > ${$b}[0]) { #prt( "-[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3; return -1; } #prt( "=[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3; return 0; } sub mycmp_decend_alpha { return 1 if (lc(${$a}[$DL_DIR]) gt lc(${$b}[$DL_DIR])); return -1 if (lc(${$a}[$DL_DIR]) lt lc(${$b}[$DL_DIR])); return 0; } sub get_nn_local($) { # perl nice number nicenum add commas my ($n) = shift; if (length($n) > 3) { my $mod = length($n) % 3; my $ret = (($mod > 0) ? substr( $n, 0, $mod ) : ''); my $mx = int( length($n) / 3 ); for (my $i = 0; $i < $mx; $i++ ) { if (($mod == 0) && ($i == 0)) { $ret .= substr( $n, ($mod+(3*$i)), 3 ); } else { $ret .= ',' . substr( $n, ($mod+(3*$i)), 3 ); } } return $ret; } return $n; } # 0 dev device number of filesystem # 1 ino inode number # 2 mode file mode (type and permissions) # 3 nlink number of (hard) links to the file # 4 uid numeric user ID of file's owner # 5 gid numeric group ID of file's owner # 6 rdev the device identifier (special files only) # 7 size total size of file, in bytes # 8 atime last access time in seconds since the epoch # 9 mtime last modify time in seconds since the epoch # 10 ctime inode change time in seconds since the epoch (*) # 11 blksize preferred block size for file system I/O # 12 blocks actual number of blocks allocated # mode # File types. Not necessarily all are available on your system. # S_IFREG S_IFDIR S_IFLNK S_IFBLK S_IFCHR S_IFIFO S_IFSOCK S_IFWHT S_ENFMT # # The operators -f, -d, -l, -b, -c, -p, and -S. # S_ISREG($mode) S_ISDIR($mode) S_ISLNK($mode) # S_ISBLK($mode) S_ISCHR($mode) S_ISFIFO($mode) S_ISSOCK($mode) sub is_directory { my ($p) = shift; my $sb = stat($p); if ($sb) { my $mode = $sb->mode; if (S_ISDIR($mode)) { return 1; } } return 0; } sub show_stat { my ($p) = shift; my $sb = stat($p); if ($sb) { my $mode = $sb->mode; my $size = $sb->size; my $type = S_IFMT($mode); my $perm = S_IMODE($mode); my $sperm = sprintf("%04o", ($mode & 0777)); my $tstg = ""; if (S_ISREG($mode)) { $tstg = '-f'; } elsif (S_ISDIR($mode)) { $tstg = '-d'; } elsif (S_ISLNK($mode)) { $tstg = '-l'; } elsif (S_ISBLK($mode)) { $tstg = '-b'; } elsif (S_ISCHR($mode)) { $tstg = '-c'; } elsif (S_ISFIFO($mode)) { $tstg = '-p'; } elsif (S_ISSOCK($mode)) { $tstg = '-S'; } else { $tstg = '-?'; } prt("stat $p [$tstg] $size - mode=$mode, type=$type, perm=$perm [$sperm]\n"); } else { prt("stat $p failed\n"); } } sub process_folder($$) { my ($in,$lev) = @_; #my $inf = dir2unix($in); # WHY DO THIS???? my $inf = $in; # keep native my (@files, $file, $ff, $sb, $cds, $sz, $fc, $lfc); my $dsize = 0; my $rsize = 0; my $asize = 0; # adjusted size my $rasiz = 0; my $rdcnt = 0; my $blks = 0; my $fcnt = 0; my $dcnt = 0; my ($as,$dc,$diff,$cdiff,$totblks,$tb,$totrblks); $lfc = 0; $fc = 0; $cds = 0; $cdiff = 0; $totblks = 0; $totrblks = 0; if (!opendir( DIR, $inf)) { if ($inf =~ /System\s+Volume\s+Information/i) { # FIX20111227 - QUIETLY ignore this known FAILURE } elsif ($inf =~ /RECYCLE\.BIN/) { # FIX20161217 - QUIETLY ignore this known FAILURE } else { prtw("WARNING: Unable to open [$inf] ... $! ...\n"); } return 0,0,0,0,0; # FIX20111227 - return 5 zeros } if (!$dbg_s01) { local $| = 1; } @files = readdir(DIR); closedir(DIR); foreach $file (@files) { next if (($file eq '.')||($file eq '..')); $ff = $inf.$PATH_SEP.$file; if (-d $ff) { if (defined $excluded{$file}) { prt( "Skipping folder [$file].\n" ) if ($verbosity > 1); } else { $dcnt++; $rdcnt++; $asize += $block if ($add_dir_block); $rasiz += $block if ($add_dir_block); #prt("Sub dir:$lev: $file $cds bytes, $fc files, $asize, $dcnt...\n"); ($cds,$fc,$as,$dc,$tb) = process_folder($ff,($lev + 1)); $dsize += $cds; $fcnt += $fc; $asize += $as; $dcnt += $dc; $totblks += $tb; if ($lev == 0) { prt("dir: $file $cds bytes, $fc files, $asize, $dcnt ...\n") if ($dbg_s01); $tot_size += $cds; # 0 1 2 3 4 5 6 7 8 push(@dir_sizes, [$cds, $file, '', '', $fc, '', $dc, $as, $tb]); } } } else { $fcnt++; $lfc++; $sb = stat($ff); if ($sb) { $sz = $sb->size; # get file SIZE $dsize += $sz; $rsize += $sz; $blks = 0; if ($sz == 0) { $blks = 1; } else { $blks = int($sz / $block); $blks++ if ($sz % $block); } $as = ($blks * $block); $totblks += $blks; $totrblks += $blks; $asize += $as; $rasiz += $as; #$diff = $asize - $dsize; $diff = $as - $sz; $cdiff += $diff; #prt("$ff $sz $as $blks $dsize $asize ($diff - $cdiff)\n"); } else { prtw("WARNING: stat of $ff FAILED!\n"); } $file_count++; if (!$dbg_s01) { if (($file_count % 1000) == 0) { prt( "." ); $g_dot_cnt++; } } } } if ($lev == 0) { $tot_size += $rsize; prt("\n") if ($g_dot_cnt); # clear to new line if have output any DOTS... prt( "root: $inf ".get_nn($rsize).", total ".get_nn($tot_size)." $fcnt files...\n" ) if ($dbg_s01); # 0 1 2 3 4 5 6 7 8 push(@dir_sizes, [$rsize, 'root', '', '', $lfc, '',$rdcnt, $rasiz, $totrblks]); # add in this ROOT size to list } $diff = $asize - $dsize; #prt("Returning: $dsize,$fcnt,$asize,$dcnt - diff=$diff\n"); return $dsize,$fcnt,$asize,$dcnt,$totblks; } # 0 1 2 3 4 5 6 # zips 0 0KB 0 files ( 0 0KB) # TOTAL 8,124,689,257 7.6GB 72,685 FILES (2,037,180 7.8GB) sub load_last_file2($) { my ($inf) = @_; my (@a,@arr); if (open INF, "<$inf") { my @lines = ; close INF; my $lncnt = scalar @lines; prt("Processing $lncnt lines, from [$inf]...\n"); my ($line,$inc,$lnn,$len); my ($dir,$siz,$fc,$dc,$asz,$tblk); $lnn = 0; @a = (); foreach $line (@lines) { chomp $line; next if ($line =~ /^DIR\s/); $line =~ s/\(/ /g; $line =~ s/\)/ /g; $line = trim_all($line); $lnn++; $len = length($line); next if ($len == 0); @arr = split(/\s+/,$line); $len = scalar @arr; if ($len == 7) { $dir = $arr[0]; $siz = $arr[1]; $asz = $arr[2]; $fc = $arr[3]; $siz =~ s/,//; $fc =~ s/,//; $asz =~ s/\w+$//; push(@a,[$dir,$siz,$asz,$fc]); } else { prtw("WARNING: $lnn: $line SKIPPED\n"); } } } else { prtw("WARNING: Unable to open file [$inf]\n"); } return \@a; } # TODO: Maybe compare these lines with current, and show diffs... sub load_last_file($) { my ($inf) = @_; my (@lines); if (open INF, "<$inf") { @lines = ; close INF; my $lncnt = scalar @lines; prt("Got $lncnt lines, from [$inf]...\n") if (VERB9()); } return \@lines; } # ================================================================ # nice smooth output of the list, aligning all numbers # This requires a first run through to get the various minimum length # then applying those min length in the 2nd run output # ---------------------------------------------------------------- sub show_dir_sizes { my ($mdl, $msl, $mkl, $mcl, $dir, $siz, $max, $i, $nn, $ks, $fc, $fcnn); my ($dc, $asz, $aszk, $tblk, $tbnn, $mtbl, $tbks, $tbtot, $mtbkl); $max = scalar @dir_sizes; $mdl = 0; $msl = 0; $mkl = 0; $mcl = 0; # could sort in other ways - ascending, or alphabetic #@dir_sizes = sort mycmp_decend @dir_sizes; #@dir_sizes = sort mycmp_ascend @dir_sizes; if ($sort_alpha) { @dir_sizes = sort mycmp_decend_alpha @dir_sizes; } else { @dir_sizes = sort mycmp_ascend @dir_sizes; } # --------------------------------------------------- my $tot = 0; my $ftot = 0; # added my $atot = 0; my $dtot = 0; my $maszl = 0; my $mdcl = 0; my $maskl = 0; $tblk = 0; $mtbl = 0; $tbtot = 0; $mtbkl = 0; # first run, just to get min sizes for ($i = 0; $i < $max; $i++) { $dir = $dir_sizes[$i][$DL_DIR]; $siz = $dir_sizes[$i][$DL_SIZE]; $fc = $dir_sizes[$i][$DL_FC]; $dc = $dir_sizes[$i][$DL_DCNT]; $asz = $dir_sizes[$i][$DL_ASIZ]; $tblk = $dir_sizes[$i][$DL_TBLK]; $tot += $siz; $ftot += $fc; $dtot += $dc; $atot += $asz; $tbtot += $tblk; # do conversion now, and keep ASCII $nn = get_nn($siz); $ks = bytes2ks($siz); $fcnn = get_nn($fc); $dir_sizes[$i][$DL_NN] = $nn; $dir_sizes[$i][$DL_KS] = $ks; $dir_sizes[$i][$DL_FCNN] = $fcnn; $mdl = length($dir) if (length($dir) > $mdl); $msl = length($nn) if (length($nn) > $msl); $mkl = length($ks) if (length($ks) > $mkl); $mcl = length($fcnn) if (length($fcnn) > $mcl); $nn = get_nn($asz); $ks = bytes2ks($asz); $fcnn = get_nn($dc); $dir_sizes[$i][$DL_ASNN] = $nn; $dir_sizes[$i][$DL_ASKS] = $ks; $dir_sizes[$i][$DL_DCNN] = $fcnn; $maszl = length($nn) if (length($nn) > $maszl); $maskl = length($ks) if (length($ks) > $maskl); $mdcl = length($fcnn) if (length($fcnn) > $mdcl); $tbnn = get_nn($tblk); $mtbl = length($tbnn) if (length($tbnn) > $mtbl); $dir_sizes[$i][$DL_TBNN] = $tbnn; $tbks = bytes2ks($tblk * $block); $mtbkl = length($tbks) if (length($tbks) > $mtbkl); } # get total lengths $nn = get_nn($tot); $ks = bytes2ks($tot); $fcnn = get_nn($ftot); $msl = length($nn) if (length($nn) > $msl); $mkl = length($ks) if (length($ks) > $mkl); $mcl = length($fcnn) if (length($fcnn) > $mcl); $nn = get_nn($atot); $ks = bytes2ks($atot); $fcnn = get_nn($dtot); $maszl = length($nn) if (length($nn) > $maszl); $maskl = length($ks) if (length($ks) > $maskl); $mdcl = length($fcnn) if (length($fcnn) > $mdcl); $tbnn = get_nn($tbtot); $mtbl = length($tbnn) if (length($tbnn) > $mtbl); $tbks = bytes2ks($tbtot * $block); $mtbkl = length($tbks) if (length($tbks) > $mtbkl); $mdl = $max_file_name if ($mdl > $max_file_name); # second run, adjusting the lengths of each output my ($msg,$hdr); $dir = File::Spec->rel2abs($in_folder); $hdr = "DIR '$dir', sizes at ".lu_get_YYYYMMDD_hhmmss_UTC(time()).", $max lines... $tbks"; my @lines = (); prt("$hdr\n"); for ($i = 0; $i < $max; $i++) { $dir = $dir_sizes[$i][$DL_DIR]; # directory name $siz = $dir_sizes[$i][$DL_SIZE]; # total size IN this directory $nn = $dir_sizes[$i][$DL_NN]; $ks = $dir_sizes[$i][$DL_KS]; $fcnn = $dir_sizes[$i][$DL_FCNN]; $aszk = $dir_sizes[$i][$DL_ASKS]; $asz = $dir_sizes[$i][$DL_ASNN]; $tblk = $dir_sizes[$i][$DL_TBLK]; $tbnn = $dir_sizes[$i][$DL_TBNN]; $dir .= ' ' while (length($dir) < $mdl); $nn = ' '.$nn while (length($nn) < $msl); $ks = ' '.$ks while (length($ks) < $mkl); $fcnn = ' '.$fcnn while (length($fcnn) < $mcl); $asz = ' '.$asz while (length($asz) < $maszl); $aszk = ' '.$aszk while (length($aszk) < $maskl); #prt( "dir: $dir $nn $ks $fcnn files ($asz $aszk)\n" ); $tbnn = ' '.$tbnn while (length($tbnn) < $mtbl); $tbks = bytes2ks($tblk * $block); $tbks = ' '.$tbks while (length($tbks) < $mtbkl); $msg = "$dir $nn $ks $fcnn files ($tbnn $tbks)"; push(@lines,$msg); prt("$msg\n"); } # FINAL 'totals' output $dir = "TOTAL"; $dir .= ' ' while (length($dir) < $mdl); $siz = $tot; $nn = get_nn($tot); $nn = ' '.$nn while (length($nn) < $msl); $ks = bytes2ks($tot); $ks = ' '.$ks while (length($ks) < $mkl); $fcnn = get_nn($ftot); $fcnn = ' '.$fcnn while (length($fcnn) < $mcl); $tbnn = get_nn($tbtot); $tbnn = ' '.$tbnn while (length($tbnn) < $mtbl); $tbks = bytes2ks($tbtot * $block); $tbks = ' '.$tbks while (length($tbks) < $mtbkl); $msg = "$dir $nn $ks $fcnn FILES ($tbnn $tbks)"; prt("$msg\n"); $nn = scalar @lines; push(@lines,$msg); my $last_line = $msg; ####################################################### my ($ra,$cnt,$line); if (length($out_file)) { $msg = "$hdr\n"; $msg .= join("\n",@lines)."\n"; if (-f $out_file) { $ra = load_last_file($out_file); if ($ra) { $cnt = scalar @{$ra}; if ($cnt) { while ($cnt > 0) { $cnt--; $line = ${$ra}[$cnt]; chomp $line; if ($line =~ /^TOTAL/) { if ($last_line eq $line) { prt("Previous file '$out_file', has $cnt lines... SAME total line...\n"); } else { prt("Previous file '$out_file', has $cnt lines... last total line...\n"); prt("$line\n"); } last; } } } } } rename_2_old_bak($out_file); write2file($msg,$out_file); prt("Current table written to '$out_file'\n"); } # all done } ##### SUBS ONLY ABOVE #### # ######### MAIN ######### # ======================== parse_args(@ARGV); # parse user input prt( "$pgmname ... Hello, processing [$in_folder] directory ...\n" ) if ($verbosity > 0); process_folder( $in_folder, 0 ); # accumulate global array show_dir_sizes(); # do the display stuff pgm_exit(0,""); ################################################################################# #### SUBS ONLY rlated to user input # ================================= sub give_help { prt("\n"); prt( "$pgmname [Options] folder\n" ); prt( "Version: $VERS, Options:\n" ); prt( " -? -h -help = This brief HELP, and exit(0).\n" ); prt( " -l = Load log into Wordpad\n" ); prt( " -v[vvvv] = Set verbosity. (def=$verbosity).\n" ); prt( " -sort-alpha = List in alphabetic sequence. (def=by size).\n"); prt( " -x=folder = Exclude this folder.\n" ); prt( " --out (-o) = Output final list to this file.\n"); prt("\n"); } sub the_idea { prt(" Given an input folder, search it, and all its subdirectories,\n"); prt(" and report the total files, folders found.\n"); } # Ensure argument exists, or die. sub require_arg { my ($arg, @arglist) = @_; mydie( "ERROR: no argument given for option '$arg' ...\n" ) if ! @arglist; } sub set_verbosity { my (@av) = @_; my ($arg, $ex); while(@av) { $arg = $av[0]; if ($arg =~ /^-/) { $arg =~ s/^-// while ($arg =~ /^-/); if ($arg =~ /^v/) { $verbosity += length($arg); prt( "Set verbosity to [$verbosity].\n" ); } } shift @av; } } sub parse_args { my (@av) = @_; my ($arg, $ex); set_verbosity(@av); while(@av) { $arg = $av[0]; if ($arg =~ /^-/) { $arg =~ s/^-// while ($arg =~ /^-/); if (($arg eq '?')||($arg eq 'h')||($arg eq 'help')) { give_help(); the_idea(); exit(0); } elsif ($arg =~ /^x=(.+)/) { $ex = $1; $excluded{$ex} = 1; prt( "Excluding folder [$ex] ...\n" ) if ($verbosity > 0); } elsif ($arg =~ /^x$/) { require_arg(@av); shift @av; $ex = $av[0]; $excluded{$ex} = 1; prt( "Excluding folder [$ex] ...\n" ) if ($verbosity > 0); } elsif ($arg =~ /^sort-alpha/i) { $sort_alpha = 1; prt( "Set sort alpha...\n" ) if ($verbosity > 0); } elsif ($arg =~ /^l/) { $loadlog = 1; prt( "Set load log into Wordpad ...\n" ) if ($verbosity > 0); } elsif ($arg =~ /^v/) { # done first } elsif ($arg =~ /^o/) { require_arg(@av); shift @av; $ex = $av[0]; $out_file = $ex; prt( "Set output file to [$ex] ...\n" ) if ($verbosity > 0); } else { pgm_exit(1, "ERROR: Unknown argument [".$av[0]."]! Use -? for HELP.\n" ); } } else { $in_folder = $arg; prt( "Set IN folder to [$in_folder] ...\n" ) if ($verbosity > 0); } shift @av; } if ($debug_on && (length($in_folder) == 0) && (length($def_dir))) { $in_folder = $def_dir; prt( "DEBUG ON: Set IN folder to DEFAULT [$in_folder] ...\n" ); #$dbg_s01 = 1; #$loadlog = 1; } if (length($in_folder) == 0) { give_help(); pgm_exit(1,"ERROR: No input folder found in command!\n"); } } # eof - dirsizes03.pl