#!/usr/bin/env perl # # 2018/01/17 Gabriel Moreau # # apt-get install libyaml-syck-perl libtemplate-perl libarchive-zip-perl # apt-get install yamllint libyaml-shell-perl # check YAML files use strict; use warnings; use version; our $VERSION = version->declare('0.2.4'); use File::Copy qw(copy); use YAML::Syck; use Getopt::Long(); use Cwd(); use Template; use Archive::Zip qw(:ERROR_CODES :CONSTANTS); our $CFG_VERSION = 2; my %CMD_DB = ( 'help' => \&cmd_help, 'version' => \&cmd_version, 'check' => \&cmd_check, 'dap-publish' => \&cmd_dap_publish, 'dap-unpublish' => \&cmd_dap_unpublish, 'dataset-list' => \&cmd_dataset_list, 'dataset-size' => \&cmd_dataset_size, 'make-zip' => \&cmd_make_zip, 'make-allfiles' => \&cmd_make_allfiles, 'make-file-author' => \&cmd_make_file_author, 'make-file-copyright' => \&cmd_make_file_copyright, 'make-file-license' => \&cmd_make_file_license, 'list-license' => \&cmd_list_license, 'upgrade' => \&cmd_upgrade, ); ################################################################ # main program ################################################################ my $cmd = shift @ARGV || 'help'; if (defined $CMD_DB{$cmd}) { $CMD_DB{$cmd}->(@ARGV); } else { print {*STDERR} "project-meta: command $cmd not found\n\n"; $CMD_DB{'help'}->(); exit 1; } exit; ################################################################ # subroutine ################################################################ #--------------------------------------------------------------- sub get_cmd_name { my ($pkg, $sub) = split /::/, (caller(1))[3]; $sub =~ s/^cmd_//; $sub =~ s/_/-/g; return $sub; } #--------------------------------------------------------------- sub print_ok { my ($key, $test) = @_; printf "%-35s : %s\n", $key, $test ? 'yes' : 'no'; } #--------------------------------------------------------------- sub addfolder2list { my ($folderdb, $folder) = @_; return if $folder !~ m{/}; $folder =~ s{/[^/]+$}{}; $folderdb->{$folder}++; return addfolder2list($folderdb, $folder); } #--------------------------------------------------------------- sub upgrade_version_1_to_2 { my $meta = shift; $meta->{'project'}{'identifier'} ||= {}; $meta->{'project'}{'identifier'}{'acronym'} = $meta->{'project'}{'acronym'}; delete $meta->{'project'}{'acronym'}; $meta->{'project'}{'creator'} = $meta->{'project'}{'authors'}; delete $meta->{'project'}{'authors'}; $meta->{'project'}{'description'} = $meta->{'project'}{'short-description'}; delete $meta->{'project'}{'short-description'}; $meta->{'project'}{'rights'} = $meta->{'public-dap'}{'data-license'}; delete $meta->{'public-dap'}{'data-license'}; $meta->{'project'}{'relation'} ||= []; for my $doi (@{$meta->{'publication'}{'doi'}}) { push @{$meta->{'project'}{'relation'}}, {doi => $doi}; } delete $meta->{'publication'}{'doi'}; $meta->{'version'} = 2; return $meta; } #--------------------------------------------------------------- sub load_metadata { my $meta = YAML::Syck::LoadFile("PROJECT-META.yml"); my $initial_version = $meta->{'version'}; if ($initial_version < $CFG_VERSION) { print "Warning: upgrade config file from version $initial_version to last version $CFG_VERSION\n"; my $upgrade = 'upgrade_version_' . ($CFG_VERSION - 1) . '_to_' . $CFG_VERSION; &{$upgrade}($meta); $initial_version = $CFG_VERSION; } elsif ($initial_version < $CFG_VERSION) { die "Error: config file at future version $meta->{'version'}, program only at $CFG_VERSION\n" } return wantarray ? ($meta, $initial_version) : $meta; } ################################################################ # command ################################################################ sub cmd_help { my ($cmd) = @_; my $help = <<'END'; project-meta - opendata project metafile manager project-meta help project-meta version project-meta check project-meta dap-publish [--verbose|-v] [--dataset|-d dataset] project-meta dap-unpublish [--verbose|-v] [--dataset|-d dataset] project-meta dataset-list project-meta dataset-size [--verbose|-v] [--dataset|-d dataset] project-meta make-zip [--verbose|-v] [--dataset|-d dataset] project-meta make-allfiles project-meta list-license project-meta make-file-license project-meta make-file-author project-meta make-file-copyright project-meta upgrade END if (defined $cmd) { #$help =~ s/$/ /mg; print join("\n", grep(/^\suser-kont\s$cmd(\s|$)/, split/\n/, $help)) . "\n"; } else { print $help; } return; } #--------------------------------------------------------------- sub cmd_version { print "$VERSION\n"; } #--------------------------------------------------------------- sub cmd_upgrade { my ($meta, $initial_version) = load_metadata(); if ($initial_version < $meta->{'version'}) { my $next_config = "PROJECT-META-v$meta->{'version'}.yml"; if (-e $next_config) { die "Error: upgrade propose config file $next_config already exists\n"; } print "Warning: create new config file $next_config, please verify before using it\n"; YAML::Syck::SaveFile($next_config, $meta); } elsif ($initial_version == $CFG_VERSION) { print "Warning: nothing to do, config file already at version $CFG_VERSION\n"; } } #--------------------------------------------------------------- sub cmd_check { my $meta = load_metadata(); my $acronym = $meta->{'project'}{'identifier'}{'acronym'}; my $current_dir = Cwd::getcwd(); my $dap_folder = $meta->{'public-dap'}{'dap-folder'}; print_ok 'project/identifier/acronym', $acronym =~ m{\d\d\w[\w\d_/]+}; print_ok 'public-dap/dap-folder', $dap_folder ne '' and $dap_folder =~ m{^/}; print_ok 'dap-folder not match current_dir', $dap_folder !~ m{$current_dir}; #print YAML::Syck::Dump($meta); } #--------------------------------------------------------------- sub cmd_dap_publish { local @ARGV = @_; my ($verbose, $dataset_name); Getopt::Long::GetOptions( 'verbose' => \$verbose, 'dataset|d=s' => \$dataset_name, ); my $meta = load_metadata(); my $current_dir = Cwd::getcwd(); my $acronym = $meta->{'project'}{'identifier'}{'acronym'}; my $dap_folder = $meta->{'public-dap'}{'dap-folder'}; my $data_set = $meta->{'public-dap'}{'data-set'}; if ($dataset_name) { if (exists $meta->{'public-dap'}{'data-set'}{$dataset_name}) { $data_set = $meta->{'public-dap'}{'data-set'}{$dataset_name}; } else { die "Error, dataset $dataset_name does'nt exists\n"; } } push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt'; { # Remove doublon my %seen = (); @{$data_set} = grep { ! $seen{$_}++ } @{$data_set}; } # Create a list of the folder my %folders; for my $dataset (@{$data_set}) { addfolder2list(\%folders, $dataset); } print "chmod o+rX,o-w '$current_dir'\n"; print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym"; if ($dataset_name) { $acronym .= "/$dataset_name"; print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym"; } for my $folder (sort keys %folders) { print "chmod o+rX,o-w '$current_dir/$folder'\n"; print "mkdir '$dap_folder/$acronym/$folder'\n" if -d "$current_dir/$folder"; } for my $dataset (@{$data_set}) { if ($dataset =~ m{/}) { # sub-folder case my $folder = $dataset =~ s{/[^/]+$}{}r; print "chmod -R o+rX,o-w '$current_dir/$dataset'\n"; print "ln --symbolic --target-directory '$dap_folder/$acronym/$folder/' '$current_dir/$dataset'\n"; } else { # Root case print "ln --symbolic --target-directory '$dap_folder/$acronym/' '$current_dir/$dataset'\n"; } } print "chmod -R o+rX,o-w '$dap_folder/$acronym/'\n"; } #--------------------------------------------------------------- sub cmd_dap_unpublish { local @ARGV = @_; my ($verbose, $dataset_name); Getopt::Long::GetOptions( 'verbose' => \$verbose, 'dataset|d=s' => \$dataset_name, ); my $meta = load_metadata(); my $current_dir = Cwd::getcwd(); my $acronym = $meta->{'project'}{'identifier'}{'acronym'}; my $dap_folder = $meta->{'public-dap'}{'dap-folder'}; my $data_set = $meta->{'public-dap'}{'data-set'}; if ($dataset_name) { if (exists $meta->{'public-dap'}{'data-set'}{$dataset_name}) { $data_set = $meta->{'public-dap'}{'data-set'}{$dataset_name}; } else { die "Error, dataset $dataset_name does'nt exists\n"; } } die "Error: DAP folder match current folder" if $dap_folder =~ m{$current_dir} or $current_dir =~ m{$dap_folder}; $acronym .= "/$dataset_name" if $dataset_name; print "find '$dap_folder/$acronym/' -type l -o -type d -exec ls -l {} \+\n"; print "find '$dap_folder/$acronym/' -type l -delete\n"; print "find '$dap_folder/$acronym/' -type d -delete\n"; } #--------------------------------------------------------------- sub cmd_dataset_list { local @ARGV = @_; my $meta = load_metadata(); die "Error: no dataset\n" if not defined $meta->{'public-dap'} or not defined $meta->{'public-dap'}{'data-set'}; my $data_set = $meta->{'public-dap'}{'data-set'}; if(ref($data_set) eq 'HASH') { print "$_\n" for sort keys %{$data_set}; } else { print "default uname dataset\n"; } } #--------------------------------------------------------------- sub cmd_dataset_size { local @ARGV = @_; my ($verbose, $dataset_name); Getopt::Long::GetOptions( 'verbose' => \$verbose, 'dataset|d=s' => \$dataset_name, ); my $meta = load_metadata(); my $data_set = $meta->{'public-dap'}{'data-set'}; if ($dataset_name) { if (exists $meta->{'public-dap'}{'data-set'}{$dataset_name}) { $data_set = $meta->{'public-dap'}{'data-set'}{$dataset_name}; } else { die "Error, dataset $dataset_name does'nt exists\n"; } } my $total; for my $dataset (@{$data_set}) { my $cmd = qx{du -sm $dataset}; chomp $cmd; my ($size, $inode) = split /\s+/, $cmd; $total += $size; printf "%-7i %s\n", $size, $inode; } printf "%-7i %s\n", $total, 'TOTAL'; } #--------------------------------------------------------------- sub cmd_make_zip { local @ARGV = @_; my ($verbose, $dataset_name); Getopt::Long::GetOptions( 'verbose' => \$verbose, 'dataset|d=s' => \$dataset_name, ); my $meta = load_metadata(); my $current_dir = Cwd::getcwd(); my $data_set = $meta->{'public-dap'}{'data-set'}; if ($dataset_name) { if (exists $meta->{'public-dap'}{'data-set'}{$dataset_name}) { $data_set = $meta->{'public-dap'}{'data-set'}{$dataset_name}; } else { die "Error, dataset $dataset_name does'nt exists\n"; } } my $acronym = $meta->{'project'}{'identifier'}{'acronym'}; push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt'; { # Remove doublon my %seen = (); @{$data_set} = grep { ! $seen{$_}++ } @{$data_set}; } # Create a Zip file my $zip = Archive::Zip->new(); for my $dataset (@{$data_set}) { if (-d $dataset) { # Folder case $zip->addTree($dataset, "$acronym/$dataset"); } elsif (-f $dataset) { # File case $zip->addFile($dataset, "$acronym/$dataset"); } else { # Strange case print "Error: entry $dataset doesn't exists\n"; } } my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime time; $year += 1900; $mon++; my $date = sprintf '%04i%02i%02i-%02i%02i', $year, $mon, $mday, $hour, $min; # Save the Zip file my $zipname = "$acronym"; $zipname .= "-$dataset_name" if $dataset_name; $zipname .= "--$date"; unless ($zip->writeToFileNamed("$current_dir/$zipname.zip") == AZ_OK) { die 'Error: zip write error'; } } #--------------------------------------------------------------- sub cmd_make_allfiles { cmd_make_file_author(); cmd_make_file_license(); cmd_make_file_copyright(); } #--------------------------------------------------------------- sub cmd_make_file_author { my $meta = load_metadata(); my $current_dir = Cwd::getcwd(); my $acronym = $meta->{'project'}{'identifier'}{'acronym'}; my $authors_list = $meta->{'project'}{'creator'}; if (-f "$current_dir/AUTHORS.txt") { # Test for manual or automatically generated file # Automatically generated file by project-meta my $automatic; open my $fh, '<', "$current_dir/AUTHORS.txt" or die $!; for my $line (<$fh>) { $line =~ m/Automatically generated .* project-meta/i and $automatic++; } close $fh; if (not $automatic) { print "Warning: AUTHORS.txt already exists\n"; return; } print "Warning: update AUTHORS.txt\n"; } my $tt = Template->new(INCLUDE_PATH => '/usr/share/project-meta/template.d'); my $msg_format = ''; $tt->process('AUTHORS.tt', { acronym => $acronym, authorlist => $authors_list, }, \$msg_format) || die $tt->error; open my $fh, '>', "$current_dir/AUTHORS.txt" or die $!; print $fh "$msg_format\n\n"; close $fh; } #--------------------------------------------------------------- sub cmd_make_file_license { my $meta = load_metadata(); my $current_dir = Cwd::getcwd(); if (-f "$current_dir/LICENSE.txt") { print "Warning: LICENSE.txt already exists\n"; return; } my $license = $meta->{'project'}{'rights'}; if (not -f "/usr/share/project-meta/license.d/$license.txt") { print "Error: license $license doesn't exists in project-meta database\n"; exit 1; } copy("/usr/share/project-meta/license.d/$license.txt", "$current_dir/LICENSE.txt") or die "Error: license copy failed - $!"; print "Info: LICENSE.txt file create\n"; return; } #--------------------------------------------------------------- sub cmd_make_file_copyright { my $meta = load_metadata(); my $current_dir = Cwd::getcwd(); if (-f "$current_dir/COPYRIGHT.txt") { # Test for manual or automatically generated file # Automatically generated file by project-meta my $automatic; open my $fh, '<', "$current_dir/COPYRIGHT.txt" or die $!; for my $line (<$fh>) { $line =~ m/Automatically generated .* project-meta/i and $automatic++; } close $fh; if (not $automatic) { print "Warning: COPYRIGHT.txt already exists\n"; return; } print "Warning: update COPYRIGHT.txt\n"; } my $tt = Template->new( INCLUDE_PATH => '/usr/share/project-meta/template.d', POST_CHOMP => 1, # Remove space and carriage return after %] ); my $msg_format = ''; my $doi_first = ''; if (exists $meta->{'project'}{'relation'}) { for my $doi (@{$meta->{'project'}{'relation'}}) { next if not exists $doi->{'doi'}; $doi_first = $doi->{'doi'}; last; } } $tt->process('COPYRIGHT.tt', { title => $meta->{'project'}{'title'}, acronym => $meta->{'project'}{'identifier'}{'acronym'}, authorlist => $meta->{'project'}{'creator'}, description => $meta->{'project'}{'description'}, license => $meta->{'project'}{'rights'}, doi => $doi_first, }, \$msg_format) || die $tt->error; open my $fh, '>', "$current_dir/COPYRIGHT.txt" or die $!; print $fh "$msg_format\n\n"; close $fh; } #--------------------------------------------------------------- sub cmd_list_license { opendir my $dh, '/usr/share/project-meta/license.d/' or die $!; for my $license (readdir $dh) { # Keep only file next if not -f "/usr/share/project-meta/license.d/$license"; # Keep only .txt file next if not $license =~ m/\.txt$/; $license =~ s/\.txt$//; print "$license\n"; } closedir $dh; } ################################################################ # documentation ################################################################ __END__ =head1 NAME project-meta - opendata project metafile manager =head1 USAGE project-meta help project-meta version project-meta check project-meta dap-publish [--verbose|-v] [--dataset|-d dataset] project-meta dap-unpublish [--verbose|-v] [--dataset|-d dataset] project-meta dataset-list project-meta dataset-size [--verbose|-v] [--dataset|-d dataset] project-meta make-zip [--verbose|-v] [--dataset|-d dataset] project-meta list-license project-meta make-file-license project-meta make-file-author project-meta make-file-copyright project-meta upgrade =head1 DESCRIPTION Project-Meta is a small tool to maintain a set of open data files. In order to help you in this task, C command has a set of action to generated and maintain many files in your dataset. Everything is declare in the metafile F. This YAML file must exist in your root projet folder. See L. =head1 COMMANDS Some command are defined in the source code but are not documented here. Theses could be not well defined, not finished, not well tested... You can read the source code and use them at your own risk (like for all the Project-Meta code). =head2 check project-meta check Check your F has the good key. If your metafile is not a valid YAML file, you can use C or C commands to check just it's format. =head2 dap-publish project-meta dap-publish [--verbose|-v] [--dataset|-d dataset] Publish data on an OpeNDAP server. Because data can be very large, This command just create UNIX soft links on the OpeNDAP folder to the real data. There is no copy. Files F, F and F are mandatory but could be generated (see below). The main keys use in the F are: =over =item * C: the project short acronym, add to the OpeNDAP root folder =item * C: the OpeNDAP root folder =item * C: a list of files or folder to push =back Because this command could be dangerous, it does nothing! It print on terminal shell command to be done. You have to verify ouput before eval it. project-meta dap-publish project-meta dap-publish | bash =head2 dap-unpublish project-meta dap-unpublish [--verbose|-v] [--dataset|-d dataset] Unpublish data from the OpeNDAP server. In practice, it remove links in OpeNDAP folder for that projet. Because command C is always dangerous, we use here the command C limited to folder and link. Please verify the returned values before excuted it with the C<-delete> option. =head2 dataset-list project-meta dataset-list =head2 dataset-size project-meta dataset-size [--verbose|-v] [--dataset|-d dataset] =head2 make-zip project-meta make-zip [--verbose|-v] [--dataset|-d dataset] Create a ZIP archive with the open data set. Files F, F and F are mandatory but could be generated (see below). The main keys use in the F are: =over =item * C: the project short acronym, use as root folder =item * C: a list of files or folder to push =back =head2 make-allfiles project-meta make-allfiles Generate or update all files: F, F and F. This command is just a shortcut for L, L and L. =head2 list-license project-meta list-license Give the list of all the open data licenses supported by the project-meta license database. At this time the possible licenses are: =over =item * L (permissive - allow users to freely share and adapt) =item * L (copyleft - allow users to freely share and adapt while maintaining this same freedom for others) =item * L (copyleft - allow users to freely share and adapt while maintaining this same freedom for others) =item * L (like public domain) =item * L (copyleft - opendata french goverment) =item * L (copyleft - allow users to freely share, modify, and use the database while maintaining this same freedom for others) =back Note that these licenses are dedicated to open data. Please do not use an open license that would have been thought for source code or documentation and not for open data. Here are some links about open data licence context: =over =item * A good article about Community Data License Agreement and Open Data Licence in general L written on 9 May 2018. =item * A french page about French Public Open Data licence L. =back =head2 make-file-license project-meta make-file-license Copy the license file from the project-meta license database at the current folder with the file name: F. The license is defined in the F specification under the key C. The list of possible license is given with the command L. =head2 make-file-author project-meta make-file-author Create or update the F file at the current folder. Authors data are extracted from the C file. =head2 make-file-copyright project-meta make-file-copyright Create or update the F file at the current folder. Authors, license and copyright data are extracted from the C file. =head2 upgrade project-meta upgrade Upgrade config file to last version. Create a file F in the current directory if it's not exists, error otherwise. Please maually verify this autogenerated config file before rename and using it. =head1 METAFILE SPECIFICATION Each project must have an open data metafile describing the project : C. The file is in YAML format because this is a human-readable text file style. Other formats could have been Plain XML, RDF, JSON... but they are much less readable. You can find in the project-meta software a L example. This one is actually the master reference specification! Some interresting papers or links about Open Meta Data Schema: =over =item * L writen in december 2016. =item * L from US governement based on L. =item * L from OpenDataMonitor. =item * L mapping between the metadata on datasets published by G8 Members through their open data portals. =back =head1 KNOWN BUGS - not really check keys and tags before doing action! =head1 SEE ALSO yamllint(1), ysh(1), YAML, Archive::Zip In Debian GNU/Linux distribution, packages for C and C are: =over =item * C - Linter for YAML files (Python) =item * C - YAML test shell (Perl) =back Own project ressources: =over =item * L =item * L =item * L =back =head1 AUTHOR Written by Gabriel Moreau =head1 SPECIAL THANKS The list of people below did not directly contribute to project-meta's source code but provided me with some data, returned bugs or helped me in another task like having new ideas, specifications... Maybe I forgot your contribution in recent years, please forgive me in advance and send me an e-mail to correct this. Joel Sommeria, Julien Chauchat, Cyrille Bonamy, Antoine Mathieu. =head1 LICENSE AND COPYRIGHT License GNU GPL version 2 or later and Perl equivalent Copyright (C) 2017-2019, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France