#!/usr/bin/env perl # # 2018/01/17 Gabriel Moreau # # apt-get install libyaml-syck-perl libtemplate-perl libarchive-zip-perl # apt-get install yamllint libyaml-shell-perl # check YAML files use strict; use warnings; use version; our $VERSION = version->declare('0.0.12'); use File::Copy qw(copy); use YAML::Syck; use Getopt::Long(); use Cwd(); use Template; use Archive::Zip qw(:ERROR_CODES :CONSTANTS); my ($verbose); Getopt::Long::GetOptions( 'verbose' => \$verbose, ); my %CMD_DB = ( 'help' => \&cmd_help, 'version' => \&cmd_version, 'check' => \&cmd_check, 'dap-publish' => \&cmd_dap_publish, 'dap-unpublish' => \&cmd_dap_unpublish, 'make-zip' => \&cmd_make_zip, 'make-allfiles' => \&cmd_make_allfiles, 'make-file-author' => \&cmd_make_file_author, 'make-file-copyright' => \&cmd_make_file_copyright, 'make-file-license' => \&cmd_make_file_license, 'list-license' => \&cmd_list_license, ); ################################################################ # main program ################################################################ my $cmd = shift @ARGV || 'help'; if (defined $CMD_DB{$cmd}) { $CMD_DB{$cmd}->(@ARGV); } else { print {*STDERR} "project-meta: command $cmd not found\n\n"; $CMD_DB{'help'}->(); exit 1; } exit; ################################################################ # subroutine ################################################################ sub print_ok { my ($key, $test) = @_; printf "%-35s : %s\n", $key, $test ? 'yes' : 'no'; } ################################################################ sub addfolder2list { my ($folderdb, $folder) = @_; return if $folder !~ m{/}; $folder =~ s{/[^/]+$}{}; $folderdb->{$folder}++; return addfolder2list($folderdb, $folder); } ################################################################ # command ################################################################ sub cmd_help { print <<'END'; project-meta - opendata project metafile manager project-meta help project-meta version project-meta check project-meta dap-publish project-meta dap-unpublish project-meta make-zip project-meta make-allfiles project-meta list-license project-meta make-file-license project-meta make-file-author project-meta make-file-copyright END } ################################################################ sub cmd_version { print "$VERSION\n"; } ################################################################ sub cmd_check { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $acronym = $meta->{'project'}{'acronym'}; my $current_dir = Cwd::getcwd(); my $dap_folder = $meta->{'public-dap'}{'dap-folder'}; print_ok 'project/acronym', $acronym =~ m{\d\d\w[\w\d_/]+}; print_ok 'public-dap/dap-folder', $dap_folder ne '' and $dap_folder =~ m{^/}; print_ok 'dap-folder not match current_dir', $dap_folder !~ m{$current_dir}; #print YAML::Syck::Dump($meta); } ################################################################ sub cmd_dap_publish { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $current_dir = Cwd::getcwd(); my $acronym = $meta->{'project'}{'acronym'}; my $dap_folder = $meta->{'public-dap'}{'dap-folder'}; my $data_set = $meta->{'public-dap'}{'data-set'}; push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt'; { # Remove doublon my %seen = (); @{$data_set} = grep { ! $seen{$_}++ } @{$data_set}; } # Create a list of the folder my %folders; for my $dataset (@{$data_set}) { addfolder2list(\%folders, $dataset); } print "chmod o+rX,o-w '$current_dir'\n"; print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym"; for my $folder (sort keys %folders) { print "chmod o+rX,o-w '$current_dir/$folder'\n"; print "mkdir '$dap_folder/$acronym/$folder'\n" if -d "$current_dir/$folder"; } for my $dataset (@{$data_set}) { if ($dataset =~ m{/}) { # sub-folder case my $folder = $dataset =~ s{/[^/]+$}{}r; print "chmod -R o+rX,o-w '$current_dir/$dataset'\n"; print "ln --symbolic --target-directory '$dap_folder/$acronym/$folder/' '$current_dir/$dataset'\n"; } else { # Root case print "ln --symbolic --target-directory '$dap_folder/$acronym/' '$current_dir/$dataset'\n"; } } print "chmod -R o+rX,o-w '$dap_folder/$acronym/'\n"; } ################################################################ sub cmd_dap_unpublish { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $current_dir = Cwd::getcwd(); my $acronym = $meta->{'project'}{'acronym'}; my $dap_folder = $meta->{'public-dap'}{'dap-folder'}; die "Error: DAP folder match current folder" if $dap_folder =~ m{$current_dir} or $current_dir =~ m{$dap_folder}; print "find '$dap_folder/$acronym/' -type l -o -type d -exec ls -l {} \+\n"; print "find '$dap_folder/$acronym/' -type l -delete\n"; print "find '$dap_folder/$acronym/' -type d -delete\n"; } ################################################################ sub cmd_make_zip { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $current_dir = Cwd::getcwd(); my $data_set = $meta->{'public-dap'}{'data-set'}; my $acronym = $meta->{'project'}{'acronym'}; push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt'; { # Remove doublon my %seen = (); @{$data_set} = grep { ! $seen{$_}++ } @{$data_set}; } # Create a Zip file my $zip = Archive::Zip->new(); for my $dataset (@{$data_set}) { if (-d $dataset) { # Folder case $zip->addTree($dataset, "$acronym/$dataset"); } elsif (-f $dataset) { # File case $zip->addFile($dataset, "$acronym/$dataset"); } else { # Strange case print "Error: entry $dataset doesn't exists\n"; } } my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime time; $year += 1900; $mon++; my $date = sprintf '%04i%02i%02i-%02i%02i', $year, $mon, $mday, $hour, $min; # Save the Zip file unless ($zip->writeToFileNamed("$current_dir/$acronym--$date.zip") == AZ_OK) { die 'Error: zip write error'; } } ################################################################ sub cmd_make_allfiles { cmd_make_file_author(); cmd_make_file_license(); cmd_make_file_copyright(); } ################################################################ sub cmd_make_file_author { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $current_dir = Cwd::getcwd(); my $acronym = $meta->{'project'}{'acronym'}; my $authors_list = $meta->{'project'}{'authors'}; if (-f "$current_dir/AUTHORS.txt") { # Test for manual or automatically generated file # Automatically generated file by project-meta my $automatic; open my $fh, '<', "$current_dir/AUTHORS.txt" or die $!; for my $line (<$fh>) { $line =~ m/Automatically generated .* project-meta/i and $automatic++; } close $fh; if (not $automatic) { print "Warning: AUTHORS.txt already exists\n"; return; } print "Warning: update AUTHORS.txt\n"; } my $tt = Template->new(INCLUDE_PATH => '/usr/share/project-meta/template.d'); my $msg_format = ''; $tt->process('AUTHORS.tt', { acronym => $acronym, authorlist => $authors_list, }, \$msg_format) || die $tt->error; open my $fh, '>', "$current_dir/AUTHORS.txt" or die $!; print $fh "$msg_format\n\n"; close $fh; } ################################################################ sub cmd_make_file_license { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $current_dir = Cwd::getcwd(); if (-f "$current_dir/LICENSE.txt") { print "Warning: LICENSE.txt already exists\n"; return; } my $license = $meta->{'public-dap'}{'data-license'}; if (not -f "/usr/share/project-meta/license.d/$license.txt") { print "Error: license $license doesn't exists in project-meta database\n"; exit 1; } copy("/usr/share/project-meta/license.d/$license.txt", "$current_dir/LICENSE.txt") or die "Error: license copy failed - $!"; print "Info: LICENSE.txt file create\n"; return; } ################################################################ sub cmd_make_file_copyright { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $current_dir = Cwd::getcwd(); if (-f "$current_dir/COPYRIGHT.txt") { # Test for manual or automatically generated file # Automatically generated file by project-meta my $automatic; open my $fh, '<', "$current_dir/COPYRIGHT.txt" or die $!; for my $line (<$fh>) { $line =~ m/Automatically generated .* project-meta/i and $automatic++; } close $fh; if (not $automatic) { print "Warning: COPYRIGHT.txt already exists\n"; return; } print "Warning: update COPYRIGHT.txt\n"; } my $tt = Template->new( INCLUDE_PATH => '/usr/share/project-meta/template.d', POST_CHOMP => 1, # Remove space and carriage return after %] ); my $msg_format = ''; $tt->process('COPYRIGHT.tt', { title => $meta->{'project'}{'title'}, acronym => $meta->{'project'}{'acronym'}, authorlist => $meta->{'project'}{'authors'}, description => $meta->{'project'}{'short-description'}, license => $meta->{'public-dap'}{'data-license'}, doi => $meta->{'publication'}{'doi'}, }, \$msg_format) || die $tt->error; open my $fh, '>', "$current_dir/COPYRIGHT.txt" or die $!; print $fh "$msg_format\n\n"; close $fh; } ################################################################ sub cmd_list_license { opendir my $dh, '/usr/share/project-meta/license.d/' or die $!; for my $license (readdir $dh) { # Keep only file next if not -f "/usr/share/project-meta/license.d/$license"; # Keep only .txt file next if not $license =~ m/\.txt$/; $license =~ s/\.txt$//; print "$license\n"; } closedir $dh; } ################################################################ # documentation ################################################################ __END__ =head1 NAME project-meta - opendata project metafile manager =head1 USAGE project-meta help project-meta version project-meta check project-meta dap-publish project-meta dap-unpublish project-meta make-zip project-meta list-license project-meta make-file-license project-meta make-file-author project-meta make-file-copyright =head1 DESCRIPTION Project-Meta is a small tool to maintain a set of open data files. In order to help you in this task, C command has a set of action to generated and maintain many files in your dataset. Everything is declare in the metafile F. This YAML file must exist in your root projet folder. See L. =head1 COMMANDS Some command are defined in the source code but are not documented here. Theses could be not well defined, not finished, not well tested... You can read the source code and use them at your own risk (like for all the Project-Meta code). =head2 check project-meta check Check your F has the good key. If your metafile is not a valid YAML file, you can use C or C commands to check just it's format. =head2 dap-publish project-meta dap-publish Publish data on an OpeNDAP server. Because data can be very large, This command just create UNIX soft links on the OpeNDAP folder to the real data. There is no copy. Files F, F and F are mandatory but could be generated (see below). The main keys use in the F are: =over =item * C: the project short acronym, add to the OpeNDAP root folder =item * C: the OpeNDAP root folder =item * C: a list of files or folder to push =back Because this command could be dangerous, it does nothing! It print on terminal shell command to be done. You have to verify ouput before eval it. project-meta dap-publish project-meta dap-publish | bash =head2 dap-unpublish project-meta dap-unpublish Unpublish data from the OpeNDAP server. In practice, it remove links in OpeNDAP folder for that projet. Because command C is always dangerous, we use here the command C limited to folder and link. Please verify the returned values before excuted it with the C<-delete> option. =head2 make-zip project-meta make-zip Create a ZIP archive with the open data set. Files F, F and F are mandatory but could be generated (see below). The main keys use in the F are: =over =item * C: the project short acronym, use as root folder =item * C: a list of files or folder to push =back =head2 make-allfiles project-meta make-allfiles Generate or update all files: F, F and F. This command is just a shortcut for L, L and L. =head2 list-license project-meta list-license Give the list of all the open data licenses supported by the project-meta license database. At this time the possible licenses are: =over =item * L (permissive - allow users to freely share and adapt) =item * L (copyleft - allow users to freely share and adapt while maintaining this same freedom for others) =item * L (copyleft - allow users to freely share and adapt while maintaining this same freedom for others) =item * L (like public domain) =item * L (copyleft - opendata french goverment) =item * L (copyleft - allow users to freely share, modify, and use the database while maintaining this same freedom for others) =back Note that these licenses are dedicated to open data. Please do not use an open license that would have been thought for source code or documentation and not for open data. Here are some links about open data licence context: =over =item * A good article about Community Data License Agreement and Open Data Licence in general L written on 9 May 2018. =item * A french page about French Public Open Data licence L. =back =head2 make-file-license project-meta make-file-license Copy the license file from the project-meta license database at the current folder with the file name: F. The license is defined in the F specification under the key C. The list of possible license is given with the command L. =head2 make-file-author project-meta make-file-author Create or update the F file at the current folder. Authors data are extracted from the C file. =head2 make-file-copyright project-meta make-file-copyright Create or update the F file at the current folder. Authors, license and copyright data are extracted from the C file. =head1 METAFILE SPECIFICATION Each project must have an open data metafile describing the project : C. The file is in YAML format because this is a human-readable text file style. Other formats could have been Plain XML, RDF, JSON... but they are much less readable. You can find in the project-meta software a L example. This one is actually the master reference specification! Some interresting papers or links about Open Meta Data Schema: =over =item * L writen in december 2016. =item * L from US governement based on L. =item * L from OpenDataMonitor. =item * L mapping between the metadata on datasets published by G8 Members through their open data portals. =back =head1 KNOWN BUGS - not really check keys and tags before doing action! =head1 SEE ALSO yamllint(1), ysh(1), YAML, Archive::Zip In Debian GNU/Linux distribution, packages for C and C are: =over =item * C - Linter for YAML files (Python) =item * C - YAML test shell (Perl) =back Own project ressources: =over =item * L =item * L =item * L =back =head1 AUTHOR Written by Gabriel Moreau, LEGI UMR5519, CNRS, Grenoble - France =head1 SPECIAL THANKS The list of people below did not directly contribute to project-meta's source code but provided me with some data, returned bugs or helped me in another task like having new ideas, specifications... Maybe I forgot your contribution in recent years, please forgive me in advance and send me an e-mail to correct this. Joel Sommeria, Julien Chauchat, Cyrille Bonamy, Antoine Mathieu. =head1 LICENSE AND COPYRIGHT License GNU GPL version 2 or later and Perl equivalent Copyright (C) 2017-2018 Gabriel Moreau .