#!/usr/bin/perl

################################################################################
# Perl script to process all .maxpat files in a MaxMSP5 project and write a
# text file listing alphabetically all the global sends and receives (including
# signal sends).  NB if some of these begin with e.g. #1 then the respective
# send/receive with the argument filled in won't be reported as matching
# (e.g. "send #1-mess" caught in another patch by "receive module1-mess" won't
# match because "module1" is a typed-in argument to the patch with the send).
# 
# To run in a terminal, make sure the script is executable (i.e. "chmod u+x
# maxsends.pl"), that the path to perl at the top of this script is correct
# ("whereis perl"), then call it with two arguments: the root path of your
# project, and the report file you want to create 
# (e.g. "max-sends.pl ~/max/myproj ~/max/myproj/globals.txt").
# 
# Michael Edwards ~ m@michael-edwards.org ~ July 12th 2010
#
################################################################################

# todo: make sure we only get file matches when the extension (.mxo also) is at
#   the end of the file name 
# if very bored, for efficiency, make the hash/array
#   args to &store and string_in_array references

# MDE Mon Sep 23 13:07:26 2013 -- todo: if we use strict, then hashes become illegal due to / char
# use strict;

# Extensions of files to be processed.
my @MaxExtensions = ("maxpat"); # old binary .mxb files are skipped
my %Sends; # hash table to hold max file which contains each send
my %SigSends; # we treat data and signal sends separately
my %Receives; 
my %SigReceives; 
# hash to hold those receives we've matched with sends, so that we can report
# those receives that have no known match.
my %MatchedReceives; 
my $NumSends = 0; # global counters for reporting
my $NumSigSends = 0;
my $NumReceives = 0;
my $NumSigReceives = 0;


&do_tree($ARGV[0], "");
&results($ARGV[1]);

# recursively go through each file/directory in the given project folder, look
# for max files and parse them, ignoring all others.
sub do_tree {
  my ($dir, $parent) = @_; 
  my $thisdir;
  my $extension;
  my $skip;
  my $file;
  my $did_file;
  my $root_to;

  # add a slash to the given path unless already given.
  $dir = trailing_slash($dir);  
  print "\nEntering '$dir'";
  opendir($thisdir, $parent . $dir)
    or die "do_tree:  Can't open directory '$dir'.\n";
  # list all files in current directory.
  my @allfiles = readdir $thisdir;
  closedir $thisdir;
  # process alphabetically
  foreach $file (sort(@allfiles)) {
    # Don't process current or parent directory etc.
    next if ($file eq "." || $file eq ".." || $file eq ".svn" 
             || $file =~ /.mxo/);
    $did_file = 0;
    # recursive call if current is a directory
    if (-d $parent . $dir . $file) {
      # print "\n  recursive on parent:" . $parent . " dir:" . $dir;
      do_tree($file, $parent . $dir);
      next;
    }
    # there's only .maxpat for now but who knows....
    foreach $extension (@MaxExtensions) {
      if ($file =~ /$extension$/) {
        print "\nProcessing '$file'";
        process_file($parent . $dir . $file);
        $did_file = 1;
        last;
      }
    }
    # warn of skipped files.
    unless ($did_file) {
      print "\n    Skipping '$file'";
    }
  }
}

# process a single file line by line, looking for sends and receives
sub process_file {
  my ($infile) = @_;
  my $line;
  # a named send can contain numbers, letters, _ - and .
  my $regexp = "(#*[a-zA-Z0-9_\.\-]+)";

  open(INPUT, $infile) 
    or die "process_file: Can't open '$infile' for reading.\n";
  while ($line = <INPUT>) {
    # sends, e.g. "s #1-waveshaper-mix"
    if ($line =~ /"s\ $regexp"/g || $line =~ /"send\ $regexp"/g ||
        # now get the sends from message boxes rather then the send object
        $line =~ /";\\r$regexp/g) {
      # so $1 is the send name, $infile is the max file it's in
      %Sends = &store($infile, $1, %Sends);
      $NumSends++;
      # send~
    } elsif ($line =~ /"send~\ $regexp"/g) {
      %SigSends = &store($infile, $1, %SigSends);
      $NumSigSends++;
      # receive
    } elsif ($line =~ /"r\ $regexp"/g || $line =~ /"receive\ $regexp"/g) {
      %Receives = &store($infile, $1, %Receives);
      $NumReceives++;
      # receive~
    } elsif ($line =~ /"receive~\ $regexp"/g) {
      %SigReceives = &store($infile, $1, %SigReceives);
      $NumSigReceives++;
    }
  }
  close INPUT;
}

# store the max file name against the send/receive name in the given hash table
sub store {
  my ($file, $global, %hash) = @_;

  # if it's not there yet we'll have to create the array, so that the appearance
  # of a single send in many files can be handled.
  if (!$hash{$global}) {
    $hash{$global} = [ $file ];
    # don't record the same send/receive in the same file
  } elsif (!(&string_in_array($file, @{ $hash{$global} }))) { 
    # if we've already seen this send/receive, add it to the front of the array
    push @{ $hash{$global} }, $file;
  }
  return %hash
}

# parent routine to print results to the text file.
sub results {
  my ($outfile) = @_;

  open(OUTPUT, ">$outfile") 
    or die "process_file: Can't open '$outfile' for writing.\n";
  # process the data and signal sends separately
  &results_aux(\%Sends, \%Receives, "send", "receive");
  &results_aux(\%SigSends, \%SigReceives, "send~", "receive~");
  my $num_sends = &hash_count(%Sends);
  my $num_sig_sends = &hash_count(%SigSends);
  my $num_receives = &hash_count(%Receives);
  my $num_sig_receives = &hash_count(%SigReceives);
  print OUTPUT "\n\nTotals: sends: $NumSends ($num_sends unique)";
  print OUTPUT "\n        send~s: $NumSigSends ($num_sig_sends unique)";
  print OUTPUT "\n        receives: $NumReceives ($num_receives unique)";
  print OUTPUT "\n        receive~s: $NumSigReceives ($num_sig_receives unique)\n";
  close OUTPUT;
}

# print results to the given text file
# the first two arguments are references to the send and receive hash tables.
# sname and rname are either send/send~ or receive/receive~ (we process the data
# and signal versions separately).
sub results_aux {
  my ($shash, $rhash, $sname, $rname) = @_;
  my $send;
  my $receive;
  my $file;
  my $tmp;

  # process each detected send
  foreach $send (sort(keys %$shash)) { # sort all send names alphabetically
    print OUTPUT "\n$send: \n  $sname:";   
    my @rfiles;
    # On the few occasions I have to parse and process text files I reach for
    # Perl.  However the following line has finally convinced me that this
    # language is for syntax-fetishists only.  I think I'd be quicker with
    # C/Lisp, given the time and frustration the following cost me.  Maybe if I
    # was doing it every day it would sink in, but either way: 4 operators to
    # get at my data???
    my @sfiles = @{ ${%$shash}{$send} };
    # a single named send can be used in many files so these will be in the
    # array associated with the send as hash key.
    foreach $file (@sfiles) { 
      print OUTPUT "\n    $file ";
    }
    # now get the files that the receive for this key is in
    $tmp = ${%$rhash}{$send};
    if ($tmp) {
      @rfiles = @{ $tmp };
      # keep track of those receives we can match to a send.  A hash is probably
      # better than an array even though we're not interested in the value (1).
      $MatchedReceives{$send} = 1;
    }
    print OUTPUT "\n  $rname:";
    # print each file in which this send is received 
    foreach $file (@rfiles) { 
      print OUTPUT "\n    $file ";
    }
  }
  # now report those receives for which there is no matched send
  print OUTPUT "\n\n$rname names for which there is no recognised $sname:";
  foreach $receive (sort(keys %$rhash)) { # sort all send names alphabetically
    # the matched send/receives were stored in the hash above, so if it's not in
    # there, report that now.
    if (!$MatchedReceives{$receive}) {
      # and print each file the unmatched receive is in of course.
      my @rfiles = @{ ${%$rhash}{$receive} };
      print OUTPUT "\n$receive";
      foreach $file (@rfiles) { 
        print OUTPUT "\n    $file ";
    }

    }
  }
}

# there must be a perl routine for this already but...find if a string is
# already in an array
sub string_in_array {
  my ($test_str, @array) = @_;
  my $result = 0;
  my $str;

  foreach $str (@array) {
    last if $result; # exit if we've already found it.
    if ($str eq $test_str) {
      $result = 1;
    }
  }
  return $result;
}

# add a slash to the given path unless already given.
sub trailing_slash {
    my ($input) = @_;

    $input .= "/" unless ($input =~ m|/$|);
    return $input;
}

# return the number of keys in a hash table.
sub hash_count {
  my (%hash) = @_;
  # is this the only way of finding out how many items are in a hash table?  
  # scalar(%Sends) doesn't seem to do it.
  my @keys = keys %hash;
  return $#keys;
}

################################################################################
# EOF max-sends.pl
