source: trunk/project-meta/project-meta @ 398

Last change on this file since 398 was 398, checked in by g7moreau, 5 years ago
  • else -> elsif !
  • Property svn:executable set to *
File size: 21.2 KB
Line 
1#!/usr/bin/env perl
2#
3# 2018/01/17 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>
4#
5# apt-get install libyaml-syck-perl libtemplate-perl libarchive-zip-perl
6# apt-get install yamllint libyaml-shell-perl # check YAML files
7
8use strict;
9use warnings;
10use version; our $VERSION = version->declare('0.1.3');
11
12use File::Copy qw(copy);   
13use YAML::Syck;
14use Getopt::Long();
15use Cwd();
16use Template;
17use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
18
19our $CFG_VERSION = 2;
20
21my ($verbose);
22Getopt::Long::GetOptions(
23   'verbose' => \$verbose,
24   );
25
26
27my %CMD_DB = (
28   'help'                  => \&cmd_help,
29   'version'               => \&cmd_version,
30   'check'                 => \&cmd_check,
31   'dap-publish'           => \&cmd_dap_publish,
32   'dap-unpublish'         => \&cmd_dap_unpublish,
33   'make-zip'              => \&cmd_make_zip,
34   'make-allfiles'         => \&cmd_make_allfiles,
35   'make-file-author'      => \&cmd_make_file_author,
36   'make-file-copyright'   => \&cmd_make_file_copyright,
37   'make-file-license'     => \&cmd_make_file_license,
38   'list-license'          => \&cmd_list_license,
39   'upgrade'               => \&cmd_upgrade,
40   );
41
42################################################################
43# main program
44################################################################
45
46my $cmd = shift @ARGV || 'help';
47if (defined $CMD_DB{$cmd}) {
48   $CMD_DB{$cmd}->(@ARGV);
49   }
50else {
51   print {*STDERR} "project-meta: command $cmd not found\n\n";
52   $CMD_DB{'help'}->();
53   exit 1;
54   }
55
56exit;
57
58################################################################
59# subroutine
60################################################################
61
62sub print_ok {
63   my ($key, $test) = @_;
64   
65   printf "%-35s : %s\n", $key, $test ? 'yes' : 'no';
66   }
67
68################################################################
69
70sub addfolder2list {
71   my ($folderdb, $folder) = @_;
72   
73   return if $folder !~ m{/};
74   
75   $folder =~ s{/[^/]+$}{};
76
77   $folderdb->{$folder}++;
78   return addfolder2list($folderdb, $folder);
79   }
80
81################################################################
82
83sub upgrade_version_1_to_2 {
84   my $meta = shift;
85
86   $meta->{'project'}{'identifier'} ||= {};
87   $meta->{'project'}{'identifier'}{'acronym'} = $meta->{'project'}{'acronym'};
88   delete $meta->{'project'}{'acronym'};
89
90   $meta->{'project'}{'creator'} = $meta->{'project'}{'authors'};
91   delete $meta->{'project'}{'authors'};
92
93   $meta->{'project'}{'description'} = $meta->{'project'}{'short-description'};
94   delete $meta->{'project'}{'short-description'};
95
96   $meta->{'project'}{'rights'} = $meta->{'public-dap'}{'data-license'};
97   delete $meta->{'public-dap'}{'data-license'};
98
99   $meta->{'project'}{'relation'} ||= [];
100   for my $doi (@{$meta->{'publication'}{'doi'}}) {
101      push @{$meta->{'project'}{'relation'}}, {doi => $doi};
102      }
103   delete $meta->{'publication'}{'doi'};
104
105   $meta->{'version'} = 2;
106   return $meta;
107   }
108
109################################################################
110
111sub load_metadata {
112   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");
113
114   my $initial_version = $meta->{'version'};
115   if ($initial_version < $CFG_VERSION) {
116      print "Warning: upgrade config file from version $initial_version to last version $CFG_VERSION\n";
117      my $upgrade = 'upgrade_version_' . ($CFG_VERSION - 1) . '_to_' . $CFG_VERSION;
118      &{$upgrade}($meta);
119      $initial_version = $CFG_VERSION;
120      }
121   elsif ($initial_version < $CFG_VERSION) {
122      die "Error: config file at future version $meta->{'version'}, program only at $CFG_VERSION\n"
123      }
124
125   return wantarray ? ($meta, $initial_version) : $meta;
126   }
127
128################################################################
129# command
130################################################################
131
132sub cmd_help {
133   print <<'END';
134project-meta - opendata project metafile manager
135
136 project-meta help
137 project-meta version
138 project-meta check
139 project-meta dap-publish
140 project-meta dap-unpublish
141 project-meta make-zip
142 project-meta make-allfiles
143 project-meta list-license
144 project-meta make-file-license
145 project-meta make-file-author
146 project-meta make-file-copyright
147 project-meta upgrade
148END
149   }
150
151################################################################
152
153sub cmd_version {
154   print "$VERSION\n";
155   }
156
157################################################################
158
159sub cmd_upgrade {
160   my ($meta, $initial_version) = load_metadata();
161
162   if ($initial_version < $meta->{'version'}) {
163      my $next_config = "PROJECT-META-v$meta->{'version'}.yml";
164      if (-e $next_config) {
165         die "Error: upgrade propose config file $next_config already exists\n";
166         }
167     
168      print "Warning: create new config file $next_config, please verify before using it\n";
169      YAML::Syck::SaveFile($next_config, $meta);
170      }
171   elsif ($initial_version == $CFG_VERSION) {
172      print "Warning: nothing to do, config file already at version $CFG_VERSION\n";
173      }
174   }
175
176################################################################
177
178sub cmd_check {
179   my $meta = load_metadata();
180
181   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
182   my $current_dir = Cwd::getcwd();
183   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
184
185   print_ok 'project/identifier/acronym',       $acronym =~ m{\d\d\w[\w\d_/]+};
186   print_ok 'public-dap/dap-folder',            $dap_folder ne '' and $dap_folder =~ m{^/};
187   print_ok 'dap-folder not match current_dir', $dap_folder !~ m{$current_dir};
188
189   #print YAML::Syck::Dump($meta);
190   }
191
192################################################################
193
194sub cmd_dap_publish {
195   my $meta = load_metadata();
196   my $current_dir = Cwd::getcwd();
197   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
198   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
199   my $data_set    = $meta->{'public-dap'}{'data-set'};
200
201   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
202   {
203      # Remove doublon
204      my %seen = ();
205      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
206      }
207
208   # Create a list of the folder
209   my %folders;
210   for my $dataset (@{$data_set}) {
211      addfolder2list(\%folders, $dataset);
212      }
213
214   print "chmod o+rX,o-w '$current_dir'\n";
215   print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym";
216   for my $folder (sort keys %folders) {
217      print "chmod o+rX,o-w '$current_dir/$folder'\n";
218      print "mkdir '$dap_folder/$acronym/$folder'\n" if -d "$current_dir/$folder";
219      }
220
221   for my $dataset (@{$data_set}) {
222      if ($dataset =~ m{/}) {
223         # sub-folder case
224         my $folder = $dataset =~ s{/[^/]+$}{}r;
225         print "chmod -R o+rX,o-w '$current_dir/$dataset'\n";
226         print "ln --symbolic --target-directory '$dap_folder/$acronym/$folder/' '$current_dir/$dataset'\n";
227         }
228      else {
229         # Root case
230         print "ln --symbolic --target-directory '$dap_folder/$acronym/' '$current_dir/$dataset'\n";
231         }
232
233      }
234   print "chmod -R o+rX,o-w '$dap_folder/$acronym/'\n";
235   }
236
237################################################################
238
239sub cmd_dap_unpublish {
240   my $meta = load_metadata();
241   my $current_dir = Cwd::getcwd();
242   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
243   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
244
245   die "Error: DAP folder match current folder" if $dap_folder =~ m{$current_dir} or $current_dir =~ m{$dap_folder};
246
247   print "find '$dap_folder/$acronym/' -type l -o -type d -exec ls -l {} \+\n";
248   print "find '$dap_folder/$acronym/' -type l -delete\n";
249   print "find '$dap_folder/$acronym/' -type d -delete\n";
250   }
251
252################################################################
253
254sub cmd_make_zip {
255   my $meta = load_metadata();
256   my $current_dir = Cwd::getcwd();
257   my $data_set    = $meta->{'public-dap'}{'data-set'};
258   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
259
260   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
261   {
262      # Remove doublon
263      my %seen = ();
264      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
265      }
266
267   # Create a Zip file
268   my $zip = Archive::Zip->new();
269
270   for my $dataset (@{$data_set}) {
271      if (-d $dataset) {
272         # Folder case
273         $zip->addTree($dataset, "$acronym/$dataset");
274         }
275      elsif (-f $dataset) {
276         # File case
277         $zip->addFile($dataset, "$acronym/$dataset");
278         }
279      else {
280         # Strange case
281         print "Error: entry $dataset doesn't exists\n";
282         }
283      }
284
285   my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime time;
286   $year += 1900;
287   $mon++;
288   my $date = sprintf '%04i%02i%02i-%02i%02i', $year, $mon, $mday, $hour, $min;
289
290   # Save the Zip file
291   unless ($zip->writeToFileNamed("$current_dir/$acronym--$date.zip") == AZ_OK) {
292      die 'Error: zip write error';
293      }
294   }
295
296################################################################
297
298sub cmd_make_allfiles {
299   cmd_make_file_author();
300   cmd_make_file_license();
301   cmd_make_file_copyright();
302   }
303
304################################################################
305
306sub cmd_make_file_author {
307   my $meta = load_metadata();
308
309   my $current_dir = Cwd::getcwd();
310
311   my $acronym    = $meta->{'project'}{'identifier'}{'acronym'};
312   my $authors_list = $meta->{'project'}{'creator'};
313
314   if (-f "$current_dir/AUTHORS.txt") {
315      # Test for manual or automatically generated file
316      # Automatically generated file by project-meta
317      my $automatic;
318      open my $fh, '<', "$current_dir/AUTHORS.txt" or die $!;
319      for my $line (<$fh>) {
320         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
321         }
322      close $fh;
323
324      if (not $automatic) {
325         print "Warning: AUTHORS.txt already exists\n";
326         return;
327         }
328
329      print "Warning: update AUTHORS.txt\n";
330      }
331
332   my $tt = Template->new(INCLUDE_PATH => '/usr/share/project-meta/template.d');
333   my $msg_format = '';
334   $tt->process('AUTHORS.tt',
335      {
336         acronym    => $acronym,
337         authorlist => $authors_list,
338      }, \$msg_format) || die $tt->error;
339
340   open my $fh,  '>', "$current_dir/AUTHORS.txt" or die $!;
341   print $fh "$msg_format\n\n";
342   close $fh;
343   }
344
345################################################################
346
347sub cmd_make_file_license {
348   my $meta = load_metadata();
349
350   my $current_dir = Cwd::getcwd();
351
352   if (-f "$current_dir/LICENSE.txt") {
353      print "Warning: LICENSE.txt already exists\n";
354      return;
355      }
356
357   my $license = $meta->{'project'}{'rights'};
358
359   if (not -f "/usr/share/project-meta/license.d/$license.txt") {
360      print "Error: license $license doesn't exists in project-meta database\n";
361      exit 1;
362      }
363
364   copy("/usr/share/project-meta/license.d/$license.txt", "$current_dir/LICENSE.txt")
365      or die "Error: license copy failed - $!";
366
367   print "Info: LICENSE.txt file create\n";
368   return;
369   }
370
371################################################################
372
373sub cmd_make_file_copyright {
374   my $meta = load_metadata();
375
376   my $current_dir = Cwd::getcwd();
377
378   if (-f "$current_dir/COPYRIGHT.txt") {
379      # Test for manual or automatically generated file
380      # Automatically generated file by project-meta
381      my $automatic;
382      open my $fh, '<', "$current_dir/COPYRIGHT.txt" or die $!;
383      for my $line (<$fh>) {
384         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
385         }
386      close $fh;
387
388      if (not $automatic) {
389         print "Warning: COPYRIGHT.txt already exists\n";
390         return;
391         }
392
393      print "Warning: update COPYRIGHT.txt\n";
394      }
395   
396   my $tt = Template->new(
397      INCLUDE_PATH   => '/usr/share/project-meta/template.d',
398      POST_CHOMP     => 1, # Remove space and carriage return after %]
399      );
400   my $msg_format = '';
401   my $doi_first;
402   for my $doi (@{$meta->{'project'}{'relation'}) {
403      next if not exists $doi->{'doi'};
404      $doi_first = $doi->{'doi'};
405      last;
406      }
407   $tt->process('COPYRIGHT.tt',
408      {
409         title       => $meta->{'project'}{'title'},
410         acronym     => $meta->{'project'}{'identifier'}{'acronym'},
411         authorlist  => $meta->{'project'}{'creator'},
412         description => $meta->{'project'}{'description'},
413         license     => $meta->{'project'}{'rights'},
414         doi         => $  doi_first,
415      }, \$msg_format) || die $tt->error;
416
417   open my $fh,  '>', "$current_dir/COPYRIGHT.txt" or die $!;
418   print $fh "$msg_format\n\n";
419   close $fh;
420   }
421
422################################################################
423
424sub cmd_list_license {
425   opendir my $dh, '/usr/share/project-meta/license.d/' or die $!;
426   for my $license (readdir $dh) {
427      # Keep only file
428      next if not -f "/usr/share/project-meta/license.d/$license";
429     
430      # Keep only .txt file
431      next if not $license =~ m/\.txt$/;
432
433      $license =~ s/\.txt$//;
434      print "$license\n";
435      }
436   closedir $dh;
437   }
438
439################################################################
440# documentation
441################################################################
442
443__END__
444
445=head1 NAME
446
447project-meta - opendata project metafile manager
448
449
450=head1 USAGE
451
452 project-meta help
453 project-meta version
454 project-meta check
455 project-meta dap-publish
456 project-meta dap-unpublish
457 project-meta make-zip
458 project-meta list-license
459 project-meta make-file-license
460 project-meta make-file-author
461 project-meta make-file-copyright
462 project-meta upgrade
463
464
465=head1 DESCRIPTION
466
467Project-Meta is a small tool to maintain a set of open data files.
468In order to help you in this task, C<project-meta> command has a set of action
469to generated and maintain many files in your dataset.
470
471Everything is declare in the metafile F<PROJECT-META.yml>.
472This YAML file must exist in your root projet folder.
473See L</METAFILE SPECIFICATION>.
474
475
476=head1 COMMANDS
477
478Some command are defined in the source code but are not documented here.
479Theses could be not well defined, not finished, not well tested...
480You can read the source code and use them at your own risk
481(like for all the Project-Meta code).
482
483=head2 check
484
485 project-meta check
486
487Check your F<PROJECT-META.yml> has the good key.
488If your metafile is not a valid YAML file,
489you can use C<yamllint> or C<ysh> commands to check just it's format.
490
491=head2 dap-publish
492
493 project-meta dap-publish
494
495Publish data on an OpeNDAP server.
496Because data can be very large,
497This command just create UNIX soft links on the OpeNDAP folder to the real data.
498There is no copy.
499Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
500The main keys use in the F<PROJECT-META.yml> are:
501
502=over
503
504=item * C<project/identifier/acronym>: the project short acronym, add to the OpeNDAP root folder
505
506=item * C<public-dap/dap-folder>: the OpeNDAP root folder
507
508=item * C<public-dap/data-set>: a list of files or folder to push
509
510=back
511
512Because this command could be dangerous, it does nothing!
513It print on terminal shell command to be done.
514You have to verify ouput before eval it.
515
516 project-meta dap-publish
517 project-meta dap-publish | bash
518
519=head2 dap-unpublish
520
521 project-meta dap-unpublish
522
523Unpublish data from the OpeNDAP server.
524In practice, it remove links in OpeNDAP folder for that projet.
525Because command C<rm> is always dangerous,
526we use here the command C<find> limited to folder and link.
527
528Please verify the returned values before excuted it with the C<-delete> option.
529
530=head2 make-zip
531
532 project-meta make-zip
533
534Create a ZIP archive with the open data set.
535Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
536The main keys use in the F<PROJECT-META.yml> are:
537
538=over
539
540=item * C<project/identifier/acronym>: the project short acronym, use as root folder
541
542=item * C<public-dap/data-set>: a list of files or folder to push
543
544=back
545
546=head2 make-allfiles
547
548 project-meta make-allfiles
549
550Generate or update all files: F<AUTHORS.txt>, F<COPYRIGHT.txt> and F<LICENSE.txt>.
551This command is just a shortcut for L</make-file-author>, L</make-file-copyright> and L</make-file-license>.
552
553
554=head2 list-license
555
556 project-meta list-license
557
558Give the list of all the open data licenses supported by the project-meta license database.
559At this time the possible licenses are:
560
561=over
562
563=item * L<community-data-license-agreement-permissive-v1.0|https://cdla.io/permissive-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Permissive-v1.0.pdf>
564        (permissive - allow users to freely share and adapt)
565
566=item * L<community-data-license-agreement-sharing-v1.0|https://cdla.io/sharing-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Sharing-v1.0.pdf>
567        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
568
569=item * L<creative-common-attribution-v4.0|https://creativecommons.org/licenses/by/4.0/legalcode.txt>
570        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
571
572=item * L<creative-common-zero-v1.0|https://creativecommons.org/publicdomain/zero/1.0/legalcode.txt>
573        (like public domain)
574
575=item * L<licence-ouverte-v2.0|https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf>
576        (copyleft - opendata french goverment)
577
578=item * L<open-database-license-v1.0|https://opendatacommons.org/files/2018/02/odbl-10.txt>
579        (copyleft - allow users to freely share, modify, and use the database while maintaining this same freedom for others)
580
581=back
582
583Note that these licenses are dedicated to open data.
584Please do not use an open license that would have been thought for source code or documentation and not for open data.
585Here are some links about open data licence context:
586
587=over
588
589=item * A good article about Community Data License Agreement and Open Data Licence in general
590   L<Licenses for data|https://lwn.net/Articles/753648/> written on 9 May 2018.
591
592=item * A french page about French Public Open Data licence
593   L<https://www.etalab.gouv.fr/licence-ouverte-open-licence>.
594
595=back
596
597=head2 make-file-license
598
599 project-meta make-file-license
600
601Copy the license file from the project-meta license database at the current folder
602with the file name: F<LICENSE.txt>.
603
604The license is defined in the F<PROJECT-META.yml> specification under the key C<public-dap/data-license>.
605The list of possible license is given with the command L</list-license>.
606
607=head2 make-file-author
608
609 project-meta make-file-author
610
611Create or update the F<AUTHORS.txt> file at the current folder.
612Authors data are extracted from the C<PROJECT-META.yml> file.
613
614=head2 make-file-copyright
615
616 project-meta make-file-copyright
617
618Create or update the F<COPYRIGHT.txt> file at the current folder.
619Authors, license and copyright data are extracted from the C<PROJECT-META.yml> file.
620
621=head2 upgrade
622
623 project-meta upgrade
624
625Upgrade config file to last version.
626Create a file F<PROJECT-META-vVERSION.yml> in the current directory if it's not exists, error otherwise.
627Please maually verify this autogenerated config file before rename and using it.
628
629
630=head1 METAFILE SPECIFICATION
631
632Each project must have an open data metafile describing the project : C<PROJECT-META.yml>.
633The file is in YAML format because this is a human-readable text file style.
634Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
635
636You can find in the project-meta software a
637L<PROJECT-META.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/PROJECT-META.sample.yml> example.
638This one is actually the master reference specification!
639
640Some interresting papers or links about Open Meta Data Schema:
641
642=over
643
644=item * L<Metadata for the open data portals|http://devinit.org/wp-content/uploads/2018/01/Metadata-for-open-data-portals.pdf>
645        writen in december 2016.
646
647=item * L<Project Open Data Metadata Schema v1.1|https://project-open-data.cio.gov/v1.1/schema/> from US governement
648        based on L<DCAT|http://www.w3.org/TR/vocab-dcat/>.
649
650=item * L<Metadata Standards|http://knowhow.opendatamonitor.eu/odresearch/metadata-standards/>
651        from OpenDataMonitor.
652
653=item * L<G8 Metadata Mapping|https://github.com/project-open-data/G8_Metadata_Mapping/blob/master/index.md>
654        mapping between the metadata on datasets published by G8 Members through their open data portals.
655
656=back
657
658
659=head1 KNOWN BUGS
660
661 - not really check keys and tags before doing action!
662
663
664=head1 SEE ALSO
665
666yamllint(1), ysh(1), YAML, Archive::Zip
667
668In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
669
670=over
671
672=item * C<yamllint> - Linter for YAML files (Python)
673
674=item * C<libyaml-shell-perl> - YAML test shell (Perl)
675
676=back
677
678
679Own project ressources:
680
681=over
682
683=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/ProjectMeta>
684
685=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/project-meta.html>
686
687=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/project-meta>
688
689=back
690
691
692=head1 AUTHOR
693
694Written by Gabriel Moreau, LEGI UMR5519, CNRS, Grenoble - France
695
696
697=head1 SPECIAL THANKS
698
699The list of people below did not directly contribute to project-meta's source code
700but provided me with some data, returned bugs
701or helped me in another task like having new ideas, specifications...
702Maybe I forgot your contribution in recent years,
703please forgive me in advance and send me an e-mail to correct this.
704
705Joel Sommeria, Julien Chauchat, Cyrille Bonamy, Antoine Mathieu.
706
707
708=head1 LICENSE AND COPYRIGHT
709
710License GNU GPL version 2 or later and Perl equivalent
711
712Copyright (C) 2017-2018 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>.
Note: See TracBrowser for help on using the repository browser.