#!/usr/bin/env perl
#
# 2018/01/17 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>
#
# apt-get install libyaml-syck-perl libtemplate-perl libarchive-zip-perl
# apt-get install yamllint libyaml-shell-perl # check YAML files

use strict;
use warnings;
use version; our $VERSION = version->declare('0.0.12');

use File::Copy qw(copy);    
use YAML::Syck;
use Getopt::Long();
use Cwd();
use Template;
use Archive::Zip qw(:ERROR_CODES :CONSTANTS);


my ($verbose);
Getopt::Long::GetOptions(
   'verbose' => \$verbose,
   );


my %CMD_DB = (
   'help'                  => \&cmd_help,
   'version'               => \&cmd_version,
   'check'                 => \&cmd_check,
   'dap-publish'           => \&cmd_dap_publish,
   'dap-unpublish'         => \&cmd_dap_unpublish,
   'make-zip'              => \&cmd_make_zip,
   'make-allfiles'         => \&cmd_make_allfiles,
   'make-file-author'      => \&cmd_make_file_author,
   'make-file-copyright'   => \&cmd_make_file_copyright,
   'make-file-license'     => \&cmd_make_file_license,
   'list-license'          => \&cmd_list_license,
   );

################################################################
# main program
################################################################

my $cmd = shift @ARGV || 'help';
if (defined $CMD_DB{$cmd}) {
   $CMD_DB{$cmd}->(@ARGV);
   }
else {
   print {*STDERR} "project-meta: command $cmd not found\n\n";
   $CMD_DB{'help'}->();
   exit 1;
   }

exit;

################################################################
# subroutine
################################################################

sub print_ok {
   my ($key, $test) = @_;
   
   printf "%-35s : %s\n", $key, $test ? 'yes' : 'no';
   }

################################################################

sub addfolder2list {
   my ($folderdb, $folder) = @_;
   
   return if $folder !~ m{/};
   
   $folder =~ s{/[^/]+$}{};

   $folderdb->{$folder}++;
   return addfolder2list($folderdb, $folder);
   }

################################################################
# command
################################################################

sub cmd_help {
   print <<'END';
project-meta - opendata project metafile manager

 project-meta help
 project-meta version
 project-meta check
 project-meta dap-publish
 project-meta dap-unpublish
 project-meta make-zip
 project-meta make-allfiles
 project-meta list-license
 project-meta make-file-license
 project-meta make-file-author
 project-meta make-file-copyright
END
   }

################################################################

sub cmd_version {
   print "$VERSION\n";
   }

################################################################

sub cmd_check {
   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");

   my $acronym     = $meta->{'project'}{'acronym'};
   my $current_dir = Cwd::getcwd();
   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};

   print_ok 'project/acronym',                  $acronym =~ m{\d\d\w[\w\d_/]+};
   print_ok 'public-dap/dap-folder',            $dap_folder ne '' and $dap_folder =~ m{^/};
   print_ok 'dap-folder not match current_dir', $dap_folder !~ m{$current_dir};

   #print YAML::Syck::Dump($meta);
   }

################################################################

sub cmd_dap_publish {
   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");
   my $current_dir = Cwd::getcwd();
   my $acronym     = $meta->{'project'}{'acronym'};
   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
   my $data_set    = $meta->{'public-dap'}{'data-set'};

   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
   {
      # Remove doublon
      my %seen = ();
      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
      }

   # Create a list of the folder
   my %folders;
   for my $dataset (@{$data_set}) {
      addfolder2list(\%folders, $dataset);
      }

   print "chmod o+rX,o-w '$current_dir'\n";
   print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym";
   for my $folder (sort keys %folders) {
      print "chmod o+rX,o-w '$current_dir/$folder'\n";
      print "mkdir '$dap_folder/$acronym/$folder'\n" if -d "$current_dir/$folder";
      }

   for my $dataset (@{$data_set}) {
      if ($dataset =~ m{/}) {
         # sub-folder case
         my $folder = $dataset =~ s{/[^/]+$}{}r;
         print "chmod -R o+rX,o-w '$current_dir/$dataset'\n";
         print "ln --symbolic --target-directory '$dap_folder/$acronym/$folder/' '$current_dir/$dataset'\n";
         }
      else {
         # Root case
         print "ln --symbolic --target-directory '$dap_folder/$acronym/' '$current_dir/$dataset'\n";
         }

      }
   print "chmod -R o+rX,o-w '$dap_folder/$acronym/'\n";
   }

################################################################

sub cmd_dap_unpublish {
   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");
   my $current_dir = Cwd::getcwd();
   my $acronym     = $meta->{'project'}{'acronym'};
   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};

   die "Error: DAP folder match current folder" if $dap_folder =~ m{$current_dir} or $current_dir =~ m{$dap_folder};

   print "find '$dap_folder/$acronym/' -type l -o -type d -exec ls -l {} \+\n";
   print "find '$dap_folder/$acronym/' -type l -delete\n";
   print "find '$dap_folder/$acronym/' -type d -delete\n";
   }

################################################################

sub cmd_make_zip {
   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");
   my $current_dir = Cwd::getcwd();
   my $data_set    = $meta->{'public-dap'}{'data-set'};
   my $acronym     = $meta->{'project'}{'acronym'};

   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
   {
      # Remove doublon
      my %seen = ();
      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
      }

   # Create a Zip file
   my $zip = Archive::Zip->new();

   for my $dataset (@{$data_set}) {
      if (-d $dataset) {
         # Folder case
         $zip->addTree($dataset, "$acronym/$dataset");
         }
      elsif (-f $dataset) {
         # File case
         $zip->addFile($dataset, "$acronym/$dataset");
         }
      else {
         # Strange case
         print "Error: entry $dataset doesn't exists\n";
         }
      }

   my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime time;
   $year += 1900;
   $mon++;
   my $date = sprintf '%04i%02i%02i-%02i%02i', $year, $mon, $mday, $hour, $min;

   # Save the Zip file
   unless ($zip->writeToFileNamed("$current_dir/$acronym--$date.zip") == AZ_OK) {
      die 'Error: zip write error';
      }
   }

################################################################

sub cmd_make_allfiles {
   cmd_make_file_author();
   cmd_make_file_license();
   cmd_make_file_copyright();
   }

################################################################

sub cmd_make_file_author {
   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");

   my $current_dir = Cwd::getcwd();

   my $acronym    = $meta->{'project'}{'acronym'};
   my $authors_list = $meta->{'project'}{'authors'};

   if (-f "$current_dir/AUTHORS.txt") {
      # Test for manual or automatically generated file
      # Automatically generated file by project-meta
      my $automatic;
      open my $fh, '<', "$current_dir/AUTHORS.txt" or die $!;
      for my $line (<$fh>) {
         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
         }
      close $fh;

      if (not $automatic) {
         print "Warning: AUTHORS.txt already exists\n";
         return;
         }

      print "Warning: update AUTHORS.txt\n";
      }

   my $tt = Template->new(INCLUDE_PATH => '/usr/share/project-meta/template.d');
   my $msg_format = '';
   $tt->process('AUTHORS.tt',
      {
         acronym    => $acronym,
         authorlist => $authors_list,
      }, \$msg_format) || die $tt->error;

   open my $fh,  '>', "$current_dir/AUTHORS.txt" or die $!;
   print $fh "$msg_format\n\n";
   close $fh;
   }

################################################################

sub cmd_make_file_license {
   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");

   my $current_dir = Cwd::getcwd();

   if (-f "$current_dir/LICENSE.txt") {
      print "Warning: LICENSE.txt already exists\n";
      return;
      }

   my $license = $meta->{'public-dap'}{'data-license'};

   if (not -f "/usr/share/project-meta/license.d/$license.txt") {
      print "Error: license $license doesn't exists in project-meta database\n";
      exit 1;
      }

   copy("/usr/share/project-meta/license.d/$license.txt", "$current_dir/LICENSE.txt")
      or die "Error: license copy failed - $!";

   print "Info: LICENSE.txt file create\n";
   return;
   }

################################################################

sub cmd_make_file_copyright {
   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");

   my $current_dir = Cwd::getcwd();

   if (-f "$current_dir/COPYRIGHT.txt") {
      # Test for manual or automatically generated file
      # Automatically generated file by project-meta
      my $automatic;
      open my $fh, '<', "$current_dir/COPYRIGHT.txt" or die $!;
      for my $line (<$fh>) {
         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
         }
      close $fh;

      if (not $automatic) {
         print "Warning: COPYRIGHT.txt already exists\n";
         return;
         }

      print "Warning: update COPYRIGHT.txt\n";
      }
   
   my $tt = Template->new(
      INCLUDE_PATH   => '/usr/share/project-meta/template.d',
      POST_CHOMP     => 1, # Remove space and carriage return after %]
      );
   my $msg_format = '';
   $tt->process('COPYRIGHT.tt',
      {
         title       => $meta->{'project'}{'title'},
         acronym     => $meta->{'project'}{'acronym'},
         authorlist  => $meta->{'project'}{'authors'},
         description => $meta->{'project'}{'short-description'},
         license     => $meta->{'public-dap'}{'data-license'},
         doi         => $meta->{'publication'}{'doi'},
      }, \$msg_format) || die $tt->error;

   open my $fh,  '>', "$current_dir/COPYRIGHT.txt" or die $!;
   print $fh "$msg_format\n\n";
   close $fh;
   }

################################################################

sub cmd_list_license {
   opendir my $dh, '/usr/share/project-meta/license.d/' or die $!;
   for my $license (readdir $dh) {
      # Keep only file
      next if not -f "/usr/share/project-meta/license.d/$license";
      
      # Keep only .txt file
      next if not $license =~ m/\.txt$/;

      $license =~ s/\.txt$//;
      print "$license\n";
      }
   closedir $dh;
   }

################################################################
# documentation
################################################################

__END__

=head1 NAME

project-meta - opendata project metafile manager


=head1 USAGE

 project-meta help
 project-meta version
 project-meta check
 project-meta dap-publish
 project-meta dap-unpublish
 project-meta make-zip
 project-meta list-license
 project-meta make-file-license
 project-meta make-file-author
 project-meta make-file-copyright


=head1 DESCRIPTION

Project-Meta is a small tool to maintain a set of open data files.
In order to help you in this task, C<project-meta> command has a set of action
to generated and maintain many files in your dataset.

Everything is declare in the metafile F<PROJECT-META.yml>.
This YAML file must exist in your root projet folder.
See L</METAFILE SPECIFICATION>.


=head1 COMMANDS

Some command are defined in the source code but are not documented here.
Theses could be not well defined, not finished, not well tested...
You can read the source code and use them at your own risk
(like for all the Project-Meta code).

=head2 check

 project-meta check

Check your F<PROJECT-META.yml> has the good key.
If your metafile is not a valid YAML file,
you can use C<yamllint> or C<ysh> commands to check just it's format.

=head2 dap-publish

 project-meta dap-publish

Publish data on an OpeNDAP server.
Because data can be very large,
This command just create UNIX soft links on the OpeNDAP folder to the real data.
There is no copy.
Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
The main keys use in the F<PROJECT-META.yml> are:

=over

=item * C<project/acronym>: the project short acronym, add to the OpeNDAP root folder

=item * C<public-dap/dap-folder>: the OpeNDAP root folder

=item * C<public-dap/data-set>: a list of files or folder to push

=back

Because this command could be dangerous, it does nothing!
It print on terminal shell command to be done.
You have to verify ouput before eval it.

 project-meta dap-publish
 project-meta dap-publish | bash

=head2 dap-unpublish

 project-meta dap-unpublish

Unpublish data from the OpeNDAP server.
In practice, it remove links in OpeNDAP folder for that projet.
Because command C<rm> is always dangerous,
we use here the command C<find> limited to folder and link.

Please verify the returned values before excuted it with the C<-delete> option.

=head2 make-zip

 project-meta make-zip

Create a ZIP archive with the open data set.
Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
The main keys use in the F<PROJECT-META.yml> are:

=over

=item * C<project/acronym>: the project short acronym, use as root folder

=item * C<public-dap/data-set>: a list of files or folder to push

=back

=head2 make-allfiles

 project-meta make-allfiles

Generate or update all files: F<AUTHORS.txt>, F<COPYRIGHT.txt> and F<LICENSE.txt>.
This command is just a shortcut for L</make-file-author>, L</make-file-copyright> and L</make-file-license>.


=head2 list-license

 project-meta list-license

Give the list of all the open data licenses supported by the project-meta license database.
At this time the possible licenses are:

=over

=item * L<community-data-license-agreement-permissive-v1.0|https://cdla.io/permissive-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Permissive-v1.0.pdf>
        (permissive - allow users to freely share and adapt)

=item * L<community-data-license-agreement-sharing-v1.0|https://cdla.io/sharing-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Sharing-v1.0.pdf>
        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)

=item * L<creative-common-attribution-v4.0|https://creativecommons.org/licenses/by/4.0/legalcode.txt>
        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)

=item * L<creative-common-zero-v1.0|https://creativecommons.org/publicdomain/zero/1.0/legalcode.txt>
        (like public domain)

=item * L<licence-ouverte-v2.0|https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf>
        (copyleft - opendata french goverment)

=item * L<open-database-license-v1.0|https://opendatacommons.org/files/2018/02/odbl-10.txt>
        (copyleft - allow users to freely share, modify, and use the database while maintaining this same freedom for others)

=back

Note that these licenses are dedicated to open data.
Please do not use an open license that would have been thought for source code or documentation and not for open data.
Here are some links about open data licence context:

=over

=item * A good article about Community Data License Agreement and Open Data Licence in general
   L<Licenses for data|https://lwn.net/Articles/753648/> written on 9 May 2018.

=item * A french page about French Public Open Data licence
   L<https://www.etalab.gouv.fr/licence-ouverte-open-licence>.

=back

=head2 make-file-license

 project-meta make-file-license

Copy the license file from the project-meta license database at the current folder
with the file name: F<LICENSE.txt>.

The license is defined in the F<PROJECT-META.yml> specification under the key C<public-dap/data-license>.
The list of possible license is given with the command L</list-license>.

=head2 make-file-author

 project-meta make-file-author

Create or update the F<AUTHORS.txt> file at the current folder.
Authors data are extracted from the C<PROJECT-META.yml> file.

=head2 make-file-copyright

 project-meta make-file-copyright

Create or update the F<COPYRIGHT.txt> file at the current folder.
Authors, license and copyright data are extracted from the C<PROJECT-META.yml> file.


=head1 METAFILE SPECIFICATION

Each project must have an open data metafile describing the project : C<PROJECT-META.yml>.
The file is in YAML format because this is a human-readable text file style.
Other formats could have been Plain XML, RDF, JSON... but they are much less readable.

You can find in the project-meta software a
L<PROJECT-META.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/PROJECT-META.sample.yml> example.
This one is actually the master reference specification!

Some interresting papers or links about Open Meta Data Schema:

=over

=item * L<Metadata for the open data portals|http://devinit.org/wp-content/uploads/2018/01/Metadata-for-open-data-portals.pdf>
        writen in december 2016.

=item * L<Project Open Data Metadata Schema v1.1|https://project-open-data.cio.gov/v1.1/schema/> from US governement
        based on L<DCAT|http://www.w3.org/TR/vocab-dcat/>.

=item * L<Metadata Standards|http://knowhow.opendatamonitor.eu/odresearch/metadata-standards/>
        from OpenDataMonitor.

=item * L<G8 Metadata Mapping|https://github.com/project-open-data/G8_Metadata_Mapping/blob/master/index.md>
        mapping between the metadata on datasets published by G8 Members through their open data portals.

=back


=head1 KNOWN BUGS

 - not really check keys and tags before doing action!


=head1 SEE ALSO

yamllint(1), ysh(1), YAML, Archive::Zip

In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:

=over

=item * C<yamllint> - Linter for YAML files (Python)

=item * C<libyaml-shell-perl> - YAML test shell (Perl)

=back


Own project ressources:

=over

=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/ProjectMeta>

=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/project-meta.html>

=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/project-meta>

=back


=head1 AUTHOR

Written by Gabriel Moreau, LEGI UMR5519, CNRS, Grenoble - France


=head1 SPECIAL THANKS

The list of people below did not directly contribute to project-meta's source code
but provided me with some data, returned bugs
or helped me in another task like having new ideas, specifications...
Maybe I forgot your contribution in recent years,
please forgive me in advance and send me an e-mail to correct this.

Joel Sommeria, Julien Chauchat, Cyrille Bonamy, Antoine Mathieu.


=head1 LICENSE AND COPYRIGHT

License GNU GPL version 2 or later and Perl equivalent

Copyright (C) 2017-2018 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>.
