source: trunk/project-meta/project-meta @ 399

Last change on this file since 399 was 399, checked in by g7moreau, 5 years ago
  • case if no doi...
  • Property svn:executable set to *
File size: 21.3 KB
Line 
1#!/usr/bin/env perl
2#
3# 2018/01/17 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>
4#
5# apt-get install libyaml-syck-perl libtemplate-perl libarchive-zip-perl
6# apt-get install yamllint libyaml-shell-perl # check YAML files
7
8use strict;
9use warnings;
10use version; our $VERSION = version->declare('0.1.4');
11
12use File::Copy qw(copy);   
13use YAML::Syck;
14use Getopt::Long();
15use Cwd();
16use Template;
17use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
18
19our $CFG_VERSION = 2;
20
21my ($verbose);
22Getopt::Long::GetOptions(
23   'verbose' => \$verbose,
24   );
25
26
27my %CMD_DB = (
28   'help'                  => \&cmd_help,
29   'version'               => \&cmd_version,
30   'check'                 => \&cmd_check,
31   'dap-publish'           => \&cmd_dap_publish,
32   'dap-unpublish'         => \&cmd_dap_unpublish,
33   'make-zip'              => \&cmd_make_zip,
34   'make-allfiles'         => \&cmd_make_allfiles,
35   'make-file-author'      => \&cmd_make_file_author,
36   'make-file-copyright'   => \&cmd_make_file_copyright,
37   'make-file-license'     => \&cmd_make_file_license,
38   'list-license'          => \&cmd_list_license,
39   'upgrade'               => \&cmd_upgrade,
40   );
41
42################################################################
43# main program
44################################################################
45
46my $cmd = shift @ARGV || 'help';
47if (defined $CMD_DB{$cmd}) {
48   $CMD_DB{$cmd}->(@ARGV);
49   }
50else {
51   print {*STDERR} "project-meta: command $cmd not found\n\n";
52   $CMD_DB{'help'}->();
53   exit 1;
54   }
55
56exit;
57
58################################################################
59# subroutine
60################################################################
61
62sub print_ok {
63   my ($key, $test) = @_;
64   
65   printf "%-35s : %s\n", $key, $test ? 'yes' : 'no';
66   }
67
68################################################################
69
70sub addfolder2list {
71   my ($folderdb, $folder) = @_;
72   
73   return if $folder !~ m{/};
74   
75   $folder =~ s{/[^/]+$}{};
76
77   $folderdb->{$folder}++;
78   return addfolder2list($folderdb, $folder);
79   }
80
81################################################################
82
83sub upgrade_version_1_to_2 {
84   my $meta = shift;
85
86   $meta->{'project'}{'identifier'} ||= {};
87   $meta->{'project'}{'identifier'}{'acronym'} = $meta->{'project'}{'acronym'};
88   delete $meta->{'project'}{'acronym'};
89
90   $meta->{'project'}{'creator'} = $meta->{'project'}{'authors'};
91   delete $meta->{'project'}{'authors'};
92
93   $meta->{'project'}{'description'} = $meta->{'project'}{'short-description'};
94   delete $meta->{'project'}{'short-description'};
95
96   $meta->{'project'}{'rights'} = $meta->{'public-dap'}{'data-license'};
97   delete $meta->{'public-dap'}{'data-license'};
98
99   $meta->{'project'}{'relation'} ||= [];
100   for my $doi (@{$meta->{'publication'}{'doi'}}) {
101      push @{$meta->{'project'}{'relation'}}, {doi => $doi};
102      }
103   delete $meta->{'publication'}{'doi'};
104
105   $meta->{'version'} = 2;
106   return $meta;
107   }
108
109################################################################
110
111sub load_metadata {
112   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");
113
114   my $initial_version = $meta->{'version'};
115   if ($initial_version < $CFG_VERSION) {
116      print "Warning: upgrade config file from version $initial_version to last version $CFG_VERSION\n";
117      my $upgrade = 'upgrade_version_' . ($CFG_VERSION - 1) . '_to_' . $CFG_VERSION;
118      &{$upgrade}($meta);
119      $initial_version = $CFG_VERSION;
120      }
121   elsif ($initial_version < $CFG_VERSION) {
122      die "Error: config file at future version $meta->{'version'}, program only at $CFG_VERSION\n"
123      }
124
125   return wantarray ? ($meta, $initial_version) : $meta;
126   }
127
128################################################################
129# command
130################################################################
131
132sub cmd_help {
133   print <<'END';
134project-meta - opendata project metafile manager
135
136 project-meta help
137 project-meta version
138 project-meta check
139 project-meta dap-publish
140 project-meta dap-unpublish
141 project-meta make-zip
142 project-meta make-allfiles
143 project-meta list-license
144 project-meta make-file-license
145 project-meta make-file-author
146 project-meta make-file-copyright
147 project-meta upgrade
148END
149   }
150
151################################################################
152
153sub cmd_version {
154   print "$VERSION\n";
155   }
156
157################################################################
158
159sub cmd_upgrade {
160   my ($meta, $initial_version) = load_metadata();
161
162   if ($initial_version < $meta->{'version'}) {
163      my $next_config = "PROJECT-META-v$meta->{'version'}.yml";
164      if (-e $next_config) {
165         die "Error: upgrade propose config file $next_config already exists\n";
166         }
167     
168      print "Warning: create new config file $next_config, please verify before using it\n";
169      YAML::Syck::SaveFile($next_config, $meta);
170      }
171   elsif ($initial_version == $CFG_VERSION) {
172      print "Warning: nothing to do, config file already at version $CFG_VERSION\n";
173      }
174   }
175
176################################################################
177
178sub cmd_check {
179   my $meta = load_metadata();
180
181   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
182   my $current_dir = Cwd::getcwd();
183   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
184
185   print_ok 'project/identifier/acronym',       $acronym =~ m{\d\d\w[\w\d_/]+};
186   print_ok 'public-dap/dap-folder',            $dap_folder ne '' and $dap_folder =~ m{^/};
187   print_ok 'dap-folder not match current_dir', $dap_folder !~ m{$current_dir};
188
189   #print YAML::Syck::Dump($meta);
190   }
191
192################################################################
193
194sub cmd_dap_publish {
195   my $meta = load_metadata();
196   my $current_dir = Cwd::getcwd();
197   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
198   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
199   my $data_set    = $meta->{'public-dap'}{'data-set'};
200
201   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
202   {
203      # Remove doublon
204      my %seen = ();
205      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
206      }
207
208   # Create a list of the folder
209   my %folders;
210   for my $dataset (@{$data_set}) {
211      addfolder2list(\%folders, $dataset);
212      }
213
214   print "chmod o+rX,o-w '$current_dir'\n";
215   print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym";
216   for my $folder (sort keys %folders) {
217      print "chmod o+rX,o-w '$current_dir/$folder'\n";
218      print "mkdir '$dap_folder/$acronym/$folder'\n" if -d "$current_dir/$folder";
219      }
220
221   for my $dataset (@{$data_set}) {
222      if ($dataset =~ m{/}) {
223         # sub-folder case
224         my $folder = $dataset =~ s{/[^/]+$}{}r;
225         print "chmod -R o+rX,o-w '$current_dir/$dataset'\n";
226         print "ln --symbolic --target-directory '$dap_folder/$acronym/$folder/' '$current_dir/$dataset'\n";
227         }
228      else {
229         # Root case
230         print "ln --symbolic --target-directory '$dap_folder/$acronym/' '$current_dir/$dataset'\n";
231         }
232
233      }
234   print "chmod -R o+rX,o-w '$dap_folder/$acronym/'\n";
235   }
236
237################################################################
238
239sub cmd_dap_unpublish {
240   my $meta = load_metadata();
241   my $current_dir = Cwd::getcwd();
242   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
243   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
244
245   die "Error: DAP folder match current folder" if $dap_folder =~ m{$current_dir} or $current_dir =~ m{$dap_folder};
246
247   print "find '$dap_folder/$acronym/' -type l -o -type d -exec ls -l {} \+\n";
248   print "find '$dap_folder/$acronym/' -type l -delete\n";
249   print "find '$dap_folder/$acronym/' -type d -delete\n";
250   }
251
252################################################################
253
254sub cmd_make_zip {
255   my $meta = load_metadata();
256   my $current_dir = Cwd::getcwd();
257   my $data_set    = $meta->{'public-dap'}{'data-set'};
258   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
259
260   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
261   {
262      # Remove doublon
263      my %seen = ();
264      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
265      }
266
267   # Create a Zip file
268   my $zip = Archive::Zip->new();
269
270   for my $dataset (@{$data_set}) {
271      if (-d $dataset) {
272         # Folder case
273         $zip->addTree($dataset, "$acronym/$dataset");
274         }
275      elsif (-f $dataset) {
276         # File case
277         $zip->addFile($dataset, "$acronym/$dataset");
278         }
279      else {
280         # Strange case
281         print "Error: entry $dataset doesn't exists\n";
282         }
283      }
284
285   my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime time;
286   $year += 1900;
287   $mon++;
288   my $date = sprintf '%04i%02i%02i-%02i%02i', $year, $mon, $mday, $hour, $min;
289
290   # Save the Zip file
291   unless ($zip->writeToFileNamed("$current_dir/$acronym--$date.zip") == AZ_OK) {
292      die 'Error: zip write error';
293      }
294   }
295
296################################################################
297
298sub cmd_make_allfiles {
299   cmd_make_file_author();
300   cmd_make_file_license();
301   cmd_make_file_copyright();
302   }
303
304################################################################
305
306sub cmd_make_file_author {
307   my $meta = load_metadata();
308
309   my $current_dir = Cwd::getcwd();
310
311   my $acronym    = $meta->{'project'}{'identifier'}{'acronym'};
312   my $authors_list = $meta->{'project'}{'creator'};
313
314   if (-f "$current_dir/AUTHORS.txt") {
315      # Test for manual or automatically generated file
316      # Automatically generated file by project-meta
317      my $automatic;
318      open my $fh, '<', "$current_dir/AUTHORS.txt" or die $!;
319      for my $line (<$fh>) {
320         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
321         }
322      close $fh;
323
324      if (not $automatic) {
325         print "Warning: AUTHORS.txt already exists\n";
326         return;
327         }
328
329      print "Warning: update AUTHORS.txt\n";
330      }
331
332   my $tt = Template->new(INCLUDE_PATH => '/usr/share/project-meta/template.d');
333   my $msg_format = '';
334   $tt->process('AUTHORS.tt',
335      {
336         acronym    => $acronym,
337         authorlist => $authors_list,
338      }, \$msg_format) || die $tt->error;
339
340   open my $fh,  '>', "$current_dir/AUTHORS.txt" or die $!;
341   print $fh "$msg_format\n\n";
342   close $fh;
343   }
344
345################################################################
346
347sub cmd_make_file_license {
348   my $meta = load_metadata();
349
350   my $current_dir = Cwd::getcwd();
351
352   if (-f "$current_dir/LICENSE.txt") {
353      print "Warning: LICENSE.txt already exists\n";
354      return;
355      }
356
357   my $license = $meta->{'project'}{'rights'};
358
359   if (not -f "/usr/share/project-meta/license.d/$license.txt") {
360      print "Error: license $license doesn't exists in project-meta database\n";
361      exit 1;
362      }
363
364   copy("/usr/share/project-meta/license.d/$license.txt", "$current_dir/LICENSE.txt")
365      or die "Error: license copy failed - $!";
366
367   print "Info: LICENSE.txt file create\n";
368   return;
369   }
370
371################################################################
372
373sub cmd_make_file_copyright {
374   my $meta = load_metadata();
375
376   my $current_dir = Cwd::getcwd();
377
378   if (-f "$current_dir/COPYRIGHT.txt") {
379      # Test for manual or automatically generated file
380      # Automatically generated file by project-meta
381      my $automatic;
382      open my $fh, '<', "$current_dir/COPYRIGHT.txt" or die $!;
383      for my $line (<$fh>) {
384         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
385         }
386      close $fh;
387
388      if (not $automatic) {
389         print "Warning: COPYRIGHT.txt already exists\n";
390         return;
391         }
392
393      print "Warning: update COPYRIGHT.txt\n";
394      }
395   
396   my $tt = Template->new(
397      INCLUDE_PATH   => '/usr/share/project-meta/template.d',
398      POST_CHOMP     => 1, # Remove space and carriage return after %]
399      );
400   my $msg_format = '';
401   my $doi_first;
402   if (exists $meta->{'project'}{'relation'}) {
403      for my $doi (@{$meta->{'project'}{'relation'}) {
404         next if not exists $doi->{'doi'};
405         $doi_first = $doi->{'doi'};
406         last;
407         }
408      }
409   $tt->process('COPYRIGHT.tt',
410      {
411         title       => $meta->{'project'}{'title'},
412         acronym     => $meta->{'project'}{'identifier'}{'acronym'},
413         authorlist  => $meta->{'project'}{'creator'},
414         description => $meta->{'project'}{'description'},
415         license     => $meta->{'project'}{'rights'},
416         doi         => $doi_first,
417      }, \$msg_format) || die $tt->error;
418
419   open my $fh, '>', "$current_dir/COPYRIGHT.txt" or die $!;
420   print $fh "$msg_format\n\n";
421   close $fh;
422   }
423
424################################################################
425
426sub cmd_list_license {
427   opendir my $dh, '/usr/share/project-meta/license.d/' or die $!;
428   for my $license (readdir $dh) {
429      # Keep only file
430      next if not -f "/usr/share/project-meta/license.d/$license";
431     
432      # Keep only .txt file
433      next if not $license =~ m/\.txt$/;
434
435      $license =~ s/\.txt$//;
436      print "$license\n";
437      }
438   closedir $dh;
439   }
440
441################################################################
442# documentation
443################################################################
444
445__END__
446
447=head1 NAME
448
449project-meta - opendata project metafile manager
450
451
452=head1 USAGE
453
454 project-meta help
455 project-meta version
456 project-meta check
457 project-meta dap-publish
458 project-meta dap-unpublish
459 project-meta make-zip
460 project-meta list-license
461 project-meta make-file-license
462 project-meta make-file-author
463 project-meta make-file-copyright
464 project-meta upgrade
465
466
467=head1 DESCRIPTION
468
469Project-Meta is a small tool to maintain a set of open data files.
470In order to help you in this task, C<project-meta> command has a set of action
471to generated and maintain many files in your dataset.
472
473Everything is declare in the metafile F<PROJECT-META.yml>.
474This YAML file must exist in your root projet folder.
475See L</METAFILE SPECIFICATION>.
476
477
478=head1 COMMANDS
479
480Some command are defined in the source code but are not documented here.
481Theses could be not well defined, not finished, not well tested...
482You can read the source code and use them at your own risk
483(like for all the Project-Meta code).
484
485=head2 check
486
487 project-meta check
488
489Check your F<PROJECT-META.yml> has the good key.
490If your metafile is not a valid YAML file,
491you can use C<yamllint> or C<ysh> commands to check just it's format.
492
493=head2 dap-publish
494
495 project-meta dap-publish
496
497Publish data on an OpeNDAP server.
498Because data can be very large,
499This command just create UNIX soft links on the OpeNDAP folder to the real data.
500There is no copy.
501Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
502The main keys use in the F<PROJECT-META.yml> are:
503
504=over
505
506=item * C<project/identifier/acronym>: the project short acronym, add to the OpeNDAP root folder
507
508=item * C<public-dap/dap-folder>: the OpeNDAP root folder
509
510=item * C<public-dap/data-set>: a list of files or folder to push
511
512=back
513
514Because this command could be dangerous, it does nothing!
515It print on terminal shell command to be done.
516You have to verify ouput before eval it.
517
518 project-meta dap-publish
519 project-meta dap-publish | bash
520
521=head2 dap-unpublish
522
523 project-meta dap-unpublish
524
525Unpublish data from the OpeNDAP server.
526In practice, it remove links in OpeNDAP folder for that projet.
527Because command C<rm> is always dangerous,
528we use here the command C<find> limited to folder and link.
529
530Please verify the returned values before excuted it with the C<-delete> option.
531
532=head2 make-zip
533
534 project-meta make-zip
535
536Create a ZIP archive with the open data set.
537Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
538The main keys use in the F<PROJECT-META.yml> are:
539
540=over
541
542=item * C<project/identifier/acronym>: the project short acronym, use as root folder
543
544=item * C<public-dap/data-set>: a list of files or folder to push
545
546=back
547
548=head2 make-allfiles
549
550 project-meta make-allfiles
551
552Generate or update all files: F<AUTHORS.txt>, F<COPYRIGHT.txt> and F<LICENSE.txt>.
553This command is just a shortcut for L</make-file-author>, L</make-file-copyright> and L</make-file-license>.
554
555
556=head2 list-license
557
558 project-meta list-license
559
560Give the list of all the open data licenses supported by the project-meta license database.
561At this time the possible licenses are:
562
563=over
564
565=item * L<community-data-license-agreement-permissive-v1.0|https://cdla.io/permissive-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Permissive-v1.0.pdf>
566        (permissive - allow users to freely share and adapt)
567
568=item * L<community-data-license-agreement-sharing-v1.0|https://cdla.io/sharing-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Sharing-v1.0.pdf>
569        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
570
571=item * L<creative-common-attribution-v4.0|https://creativecommons.org/licenses/by/4.0/legalcode.txt>
572        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
573
574=item * L<creative-common-zero-v1.0|https://creativecommons.org/publicdomain/zero/1.0/legalcode.txt>
575        (like public domain)
576
577=item * L<licence-ouverte-v2.0|https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf>
578        (copyleft - opendata french goverment)
579
580=item * L<open-database-license-v1.0|https://opendatacommons.org/files/2018/02/odbl-10.txt>
581        (copyleft - allow users to freely share, modify, and use the database while maintaining this same freedom for others)
582
583=back
584
585Note that these licenses are dedicated to open data.
586Please do not use an open license that would have been thought for source code or documentation and not for open data.
587Here are some links about open data licence context:
588
589=over
590
591=item * A good article about Community Data License Agreement and Open Data Licence in general
592   L<Licenses for data|https://lwn.net/Articles/753648/> written on 9 May 2018.
593
594=item * A french page about French Public Open Data licence
595   L<https://www.etalab.gouv.fr/licence-ouverte-open-licence>.
596
597=back
598
599=head2 make-file-license
600
601 project-meta make-file-license
602
603Copy the license file from the project-meta license database at the current folder
604with the file name: F<LICENSE.txt>.
605
606The license is defined in the F<PROJECT-META.yml> specification under the key C<public-dap/data-license>.
607The list of possible license is given with the command L</list-license>.
608
609=head2 make-file-author
610
611 project-meta make-file-author
612
613Create or update the F<AUTHORS.txt> file at the current folder.
614Authors data are extracted from the C<PROJECT-META.yml> file.
615
616=head2 make-file-copyright
617
618 project-meta make-file-copyright
619
620Create or update the F<COPYRIGHT.txt> file at the current folder.
621Authors, license and copyright data are extracted from the C<PROJECT-META.yml> file.
622
623=head2 upgrade
624
625 project-meta upgrade
626
627Upgrade config file to last version.
628Create a file F<PROJECT-META-vVERSION.yml> in the current directory if it's not exists, error otherwise.
629Please maually verify this autogenerated config file before rename and using it.
630
631
632=head1 METAFILE SPECIFICATION
633
634Each project must have an open data metafile describing the project : C<PROJECT-META.yml>.
635The file is in YAML format because this is a human-readable text file style.
636Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
637
638You can find in the project-meta software a
639L<PROJECT-META.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/PROJECT-META.sample.yml> example.
640This one is actually the master reference specification!
641
642Some interresting papers or links about Open Meta Data Schema:
643
644=over
645
646=item * L<Metadata for the open data portals|http://devinit.org/wp-content/uploads/2018/01/Metadata-for-open-data-portals.pdf>
647        writen in december 2016.
648
649=item * L<Project Open Data Metadata Schema v1.1|https://project-open-data.cio.gov/v1.1/schema/> from US governement
650        based on L<DCAT|http://www.w3.org/TR/vocab-dcat/>.
651
652=item * L<Metadata Standards|http://knowhow.opendatamonitor.eu/odresearch/metadata-standards/>
653        from OpenDataMonitor.
654
655=item * L<G8 Metadata Mapping|https://github.com/project-open-data/G8_Metadata_Mapping/blob/master/index.md>
656        mapping between the metadata on datasets published by G8 Members through their open data portals.
657
658=back
659
660
661=head1 KNOWN BUGS
662
663 - not really check keys and tags before doing action!
664
665
666=head1 SEE ALSO
667
668yamllint(1), ysh(1), YAML, Archive::Zip
669
670In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
671
672=over
673
674=item * C<yamllint> - Linter for YAML files (Python)
675
676=item * C<libyaml-shell-perl> - YAML test shell (Perl)
677
678=back
679
680
681Own project ressources:
682
683=over
684
685=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/ProjectMeta>
686
687=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/project-meta.html>
688
689=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/project-meta>
690
691=back
692
693
694=head1 AUTHOR
695
696Written by Gabriel Moreau, LEGI UMR5519, CNRS, Grenoble - France
697
698
699=head1 SPECIAL THANKS
700
701The list of people below did not directly contribute to project-meta's source code
702but provided me with some data, returned bugs
703or helped me in another task like having new ideas, specifications...
704Maybe I forgot your contribution in recent years,
705please forgive me in advance and send me an e-mail to correct this.
706
707Joel Sommeria, Julien Chauchat, Cyrille Bonamy, Antoine Mathieu.
708
709
710=head1 LICENSE AND COPYRIGHT
711
712License GNU GPL version 2 or later and Perl equivalent
713
714Copyright (C) 2017-2018 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>.
Note: See TracBrowser for help on using the repository browser.