source: trunk/project-meta/project-meta @ 384

Last change on this file since 384 was 379, checked in by g7moreau, 6 years ago
  • Update project-meta to Dublin Core. Add an upgrade command
  • Property svn:executable set to *
File size: 21.2 KB
Line 
1#!/usr/bin/env perl
2#
3# 2018/01/17 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>
4#
5# apt-get install libyaml-syck-perl libtemplate-perl libarchive-zip-perl
6# apt-get install yamllint libyaml-shell-perl # check YAML files
7
8use strict;
9use warnings;
10use version; our $VERSION = version->declare('0.1.1');
11
12use File::Copy qw(copy);   
13use YAML::Syck;
14use Getopt::Long();
15use Cwd();
16use Template;
17use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
18
19our $CFG_VERSION = 2;
20
21my ($verbose);
22Getopt::Long::GetOptions(
23   'verbose' => \$verbose,
24   );
25
26
27my %CMD_DB = (
28   'help'                  => \&cmd_help,
29   'version'               => \&cmd_version,
30   'check'                 => \&cmd_check,
31   'dap-publish'           => \&cmd_dap_publish,
32   'dap-unpublish'         => \&cmd_dap_unpublish,
33   'make-zip'              => \&cmd_make_zip,
34   'make-allfiles'         => \&cmd_make_allfiles,
35   'make-file-author'      => \&cmd_make_file_author,
36   'make-file-copyright'   => \&cmd_make_file_copyright,
37   'make-file-license'     => \&cmd_make_file_license,
38   'list-license'          => \&cmd_list_license,
39   'upgrade'               => \&cmd_upgrade,
40   );
41
42################################################################
43# main program
44################################################################
45
46my $cmd = shift @ARGV || 'help';
47if (defined $CMD_DB{$cmd}) {
48   $CMD_DB{$cmd}->(@ARGV);
49   }
50else {
51   print {*STDERR} "project-meta: command $cmd not found\n\n";
52   $CMD_DB{'help'}->();
53   exit 1;
54   }
55
56exit;
57
58################################################################
59# subroutine
60################################################################
61
62sub print_ok {
63   my ($key, $test) = @_;
64   
65   printf "%-35s : %s\n", $key, $test ? 'yes' : 'no';
66   }
67
68################################################################
69
70sub addfolder2list {
71   my ($folderdb, $folder) = @_;
72   
73   return if $folder !~ m{/};
74   
75   $folder =~ s{/[^/]+$}{};
76
77   $folderdb->{$folder}++;
78   return addfolder2list($folderdb, $folder);
79   }
80
81################################################################
82
83sub upgrade_version_1_to_2 {
84   my $meta = shift;
85
86   $meta->{'project'}{'identifier'} ||= {};
87   $meta->{'project'}{'identifier'}{'acronym'} = $meta->{'project'}{'acronym'};
88   delete $meta->{'project'}{'acronym'};
89
90   $meta->{'project'}{'creator'} = $meta->{'project'}{'authors'};
91   delete $meta->{'project'}{'authors'};
92
93   $meta->{'project'}{'description'} = $meta->{'project'}{'short-description'};
94   delete $meta->{'project'}{'short-description'};
95
96   $meta->{'project'}{'rights'} = $meta->{'public-dap'}{'data-license'};
97   delete $meta->{'public-dap'}{'data-license'};
98
99   $meta->{'project'}{'relation'} ||= [];
100   for my $doi (@{$meta->{'publication'}{'doi'}}) {
101      push @{$meta->{'project'}{'relation'}}, {doi => $doi};
102      }
103   delete $meta->{'publication'}{'doi'};
104
105   $meta->{'version'} = 2;
106   return $meta;
107   }
108
109################################################################
110
111sub load_metadata {
112   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");
113
114   my $initial_version = $meta->{'version'};
115   if ($initial_version < $CFG_VERSION) {
116      print "Warning: upgrade config file from version $initial_version to last version $CFG_VERSION\n";
117      my $upgrade = 'upgrade_version_' . ($CFG_VERSION - 1) . '_to_' . $CFG_VERSION;
118      &{$upgrade}($meta);
119      }
120   elsif ($initial_version < $CFG_VERSION) {
121      die "Error: config file at future version $meta->{'version'}, program only at $CFG_VERSION\n"
122      }
123
124   return wantarray ? ($meta, $version) : $meta;
125   }
126
127################################################################
128# command
129################################################################
130
131sub cmd_help {
132   print <<'END';
133project-meta - opendata project metafile manager
134
135 project-meta help
136 project-meta version
137 project-meta check
138 project-meta dap-publish
139 project-meta dap-unpublish
140 project-meta make-zip
141 project-meta make-allfiles
142 project-meta list-license
143 project-meta make-file-license
144 project-meta make-file-author
145 project-meta make-file-copyright
146 project-meta upgrade
147END
148   }
149
150################################################################
151
152sub cmd_version {
153   print "$VERSION\n";
154   }
155
156################################################################
157
158sub cmd_upgrade {
159   my ($meta, $initial_version) = load_metadata();
160
161   if ($initial_version < $meta->{'version'}) {
162      my $next_config = "PROJECT-META-v$meta->{'version'}.yml";
163      if (-e $next_config) {
164         die "Error: upgrade propose config file $next_config already exists\n";
165         }
166     
167      print "Warning: create new config file $next_config, please verify before using it\n";
168      YAML::Syck::SaveFile($next_config, $meta);
169      }
170   else ($initial_version == $CFG_VERSION) {
171      print "Warning: nothing to do, config file already at version $CFG_VERSION\n"
172      };
173   }
174
175################################################################
176
177sub cmd_check {
178   my $meta = load_metadata();
179
180   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
181   my $current_dir = Cwd::getcwd();
182   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
183
184   print_ok 'project/identifier/acronym',       $acronym =~ m{\d\d\w[\w\d_/]+};
185   print_ok 'public-dap/dap-folder',            $dap_folder ne '' and $dap_folder =~ m{^/};
186   print_ok 'dap-folder not match current_dir', $dap_folder !~ m{$current_dir};
187
188   #print YAML::Syck::Dump($meta);
189   }
190
191################################################################
192
193sub cmd_dap_publish {
194   my $meta = load_metadata();
195   my $current_dir = Cwd::getcwd();
196   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
197   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
198   my $data_set    = $meta->{'public-dap'}{'data-set'};
199
200   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
201   {
202      # Remove doublon
203      my %seen = ();
204      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
205      }
206
207   # Create a list of the folder
208   my %folders;
209   for my $dataset (@{$data_set}) {
210      addfolder2list(\%folders, $dataset);
211      }
212
213   print "chmod o+rX,o-w '$current_dir'\n";
214   print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym";
215   for my $folder (sort keys %folders) {
216      print "chmod o+rX,o-w '$current_dir/$folder'\n";
217      print "mkdir '$dap_folder/$acronym/$folder'\n" if -d "$current_dir/$folder";
218      }
219
220   for my $dataset (@{$data_set}) {
221      if ($dataset =~ m{/}) {
222         # sub-folder case
223         my $folder = $dataset =~ s{/[^/]+$}{}r;
224         print "chmod -R o+rX,o-w '$current_dir/$dataset'\n";
225         print "ln --symbolic --target-directory '$dap_folder/$acronym/$folder/' '$current_dir/$dataset'\n";
226         }
227      else {
228         # Root case
229         print "ln --symbolic --target-directory '$dap_folder/$acronym/' '$current_dir/$dataset'\n";
230         }
231
232      }
233   print "chmod -R o+rX,o-w '$dap_folder/$acronym/'\n";
234   }
235
236################################################################
237
238sub cmd_dap_unpublish {
239   my $meta = load_metadata();
240   my $current_dir = Cwd::getcwd();
241   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
242   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
243
244   die "Error: DAP folder match current folder" if $dap_folder =~ m{$current_dir} or $current_dir =~ m{$dap_folder};
245
246   print "find '$dap_folder/$acronym/' -type l -o -type d -exec ls -l {} \+\n";
247   print "find '$dap_folder/$acronym/' -type l -delete\n";
248   print "find '$dap_folder/$acronym/' -type d -delete\n";
249   }
250
251################################################################
252
253sub cmd_make_zip {
254   my $meta = load_metadata();
255   my $current_dir = Cwd::getcwd();
256   my $data_set    = $meta->{'public-dap'}{'data-set'};
257   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
258
259   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
260   {
261      # Remove doublon
262      my %seen = ();
263      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
264      }
265
266   # Create a Zip file
267   my $zip = Archive::Zip->new();
268
269   for my $dataset (@{$data_set}) {
270      if (-d $dataset) {
271         # Folder case
272         $zip->addTree($dataset, "$acronym/$dataset");
273         }
274      elsif (-f $dataset) {
275         # File case
276         $zip->addFile($dataset, "$acronym/$dataset");
277         }
278      else {
279         # Strange case
280         print "Error: entry $dataset doesn't exists\n";
281         }
282      }
283
284   my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime time;
285   $year += 1900;
286   $mon++;
287   my $date = sprintf '%04i%02i%02i-%02i%02i', $year, $mon, $mday, $hour, $min;
288
289   # Save the Zip file
290   unless ($zip->writeToFileNamed("$current_dir/$acronym--$date.zip") == AZ_OK) {
291      die 'Error: zip write error';
292      }
293   }
294
295################################################################
296
297sub cmd_make_allfiles {
298   cmd_make_file_author();
299   cmd_make_file_license();
300   cmd_make_file_copyright();
301   }
302
303################################################################
304
305sub cmd_make_file_author {
306   my $meta = load_metadata();
307
308   my $current_dir = Cwd::getcwd();
309
310   my $acronym    = $meta->{'project'}{'identifier'}{'acronym'};
311   my $authors_list = $meta->{'project'}{'creator'};
312
313   if (-f "$current_dir/AUTHORS.txt") {
314      # Test for manual or automatically generated file
315      # Automatically generated file by project-meta
316      my $automatic;
317      open my $fh, '<', "$current_dir/AUTHORS.txt" or die $!;
318      for my $line (<$fh>) {
319         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
320         }
321      close $fh;
322
323      if (not $automatic) {
324         print "Warning: AUTHORS.txt already exists\n";
325         return;
326         }
327
328      print "Warning: update AUTHORS.txt\n";
329      }
330
331   my $tt = Template->new(INCLUDE_PATH => '/usr/share/project-meta/template.d');
332   my $msg_format = '';
333   $tt->process('AUTHORS.tt',
334      {
335         acronym    => $acronym,
336         authorlist => $authors_list,
337      }, \$msg_format) || die $tt->error;
338
339   open my $fh,  '>', "$current_dir/AUTHORS.txt" or die $!;
340   print $fh "$msg_format\n\n";
341   close $fh;
342   }
343
344################################################################
345
346sub cmd_make_file_license {
347   my $meta = load_metadata();
348
349   my $current_dir = Cwd::getcwd();
350
351   if (-f "$current_dir/LICENSE.txt") {
352      print "Warning: LICENSE.txt already exists\n";
353      return;
354      }
355
356   my $license = $meta->{'project'}{'rights'};
357
358   if (not -f "/usr/share/project-meta/license.d/$license.txt") {
359      print "Error: license $license doesn't exists in project-meta database\n";
360      exit 1;
361      }
362
363   copy("/usr/share/project-meta/license.d/$license.txt", "$current_dir/LICENSE.txt")
364      or die "Error: license copy failed - $!";
365
366   print "Info: LICENSE.txt file create\n";
367   return;
368   }
369
370################################################################
371
372sub cmd_make_file_copyright {
373   my $meta = load_metadata();
374
375   my $current_dir = Cwd::getcwd();
376
377   if (-f "$current_dir/COPYRIGHT.txt") {
378      # Test for manual or automatically generated file
379      # Automatically generated file by project-meta
380      my $automatic;
381      open my $fh, '<', "$current_dir/COPYRIGHT.txt" or die $!;
382      for my $line (<$fh>) {
383         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
384         }
385      close $fh;
386
387      if (not $automatic) {
388         print "Warning: COPYRIGHT.txt already exists\n";
389         return;
390         }
391
392      print "Warning: update COPYRIGHT.txt\n";
393      }
394   
395   my $tt = Template->new(
396      INCLUDE_PATH   => '/usr/share/project-meta/template.d',
397      POST_CHOMP     => 1, # Remove space and carriage return after %]
398      );
399   my $msg_format = '';
400   my $doi_first;
401   for my $doi (@{$meta->{'project'}{'relation'}) {
402      next if not exists $doi->{'doi'};
403      $doi_first = $doi->{'doi'};
404      last;
405      }
406   $tt->process('COPYRIGHT.tt',
407      {
408         title       => $meta->{'project'}{'title'},
409         acronym     => $meta->{'project'}{'identifier'}{'acronym'},
410         authorlist  => $meta->{'project'}{'creator'},
411         description => $meta->{'project'}{'description'},
412         license     => $meta->{'project'}{'rights'},
413         doi         => $  doi_first,
414      }, \$msg_format) || die $tt->error;
415
416   open my $fh,  '>', "$current_dir/COPYRIGHT.txt" or die $!;
417   print $fh "$msg_format\n\n";
418   close $fh;
419   }
420
421################################################################
422
423sub cmd_list_license {
424   opendir my $dh, '/usr/share/project-meta/license.d/' or die $!;
425   for my $license (readdir $dh) {
426      # Keep only file
427      next if not -f "/usr/share/project-meta/license.d/$license";
428     
429      # Keep only .txt file
430      next if not $license =~ m/\.txt$/;
431
432      $license =~ s/\.txt$//;
433      print "$license\n";
434      }
435   closedir $dh;
436   }
437
438################################################################
439# documentation
440################################################################
441
442__END__
443
444=head1 NAME
445
446project-meta - opendata project metafile manager
447
448
449=head1 USAGE
450
451 project-meta help
452 project-meta version
453 project-meta check
454 project-meta dap-publish
455 project-meta dap-unpublish
456 project-meta make-zip
457 project-meta list-license
458 project-meta make-file-license
459 project-meta make-file-author
460 project-meta make-file-copyright
461 project-meta upgrade
462
463
464=head1 DESCRIPTION
465
466Project-Meta is a small tool to maintain a set of open data files.
467In order to help you in this task, C<project-meta> command has a set of action
468to generated and maintain many files in your dataset.
469
470Everything is declare in the metafile F<PROJECT-META.yml>.
471This YAML file must exist in your root projet folder.
472See L</METAFILE SPECIFICATION>.
473
474
475=head1 COMMANDS
476
477Some command are defined in the source code but are not documented here.
478Theses could be not well defined, not finished, not well tested...
479You can read the source code and use them at your own risk
480(like for all the Project-Meta code).
481
482=head2 check
483
484 project-meta check
485
486Check your F<PROJECT-META.yml> has the good key.
487If your metafile is not a valid YAML file,
488you can use C<yamllint> or C<ysh> commands to check just it's format.
489
490=head2 dap-publish
491
492 project-meta dap-publish
493
494Publish data on an OpeNDAP server.
495Because data can be very large,
496This command just create UNIX soft links on the OpeNDAP folder to the real data.
497There is no copy.
498Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
499The main keys use in the F<PROJECT-META.yml> are:
500
501=over
502
503=item * C<project/identifier/acronym>: the project short acronym, add to the OpeNDAP root folder
504
505=item * C<public-dap/dap-folder>: the OpeNDAP root folder
506
507=item * C<public-dap/data-set>: a list of files or folder to push
508
509=back
510
511Because this command could be dangerous, it does nothing!
512It print on terminal shell command to be done.
513You have to verify ouput before eval it.
514
515 project-meta dap-publish
516 project-meta dap-publish | bash
517
518=head2 dap-unpublish
519
520 project-meta dap-unpublish
521
522Unpublish data from the OpeNDAP server.
523In practice, it remove links in OpeNDAP folder for that projet.
524Because command C<rm> is always dangerous,
525we use here the command C<find> limited to folder and link.
526
527Please verify the returned values before excuted it with the C<-delete> option.
528
529=head2 make-zip
530
531 project-meta make-zip
532
533Create a ZIP archive with the open data set.
534Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
535The main keys use in the F<PROJECT-META.yml> are:
536
537=over
538
539=item * C<project/identifier/acronym>: the project short acronym, use as root folder
540
541=item * C<public-dap/data-set>: a list of files or folder to push
542
543=back
544
545=head2 make-allfiles
546
547 project-meta make-allfiles
548
549Generate or update all files: F<AUTHORS.txt>, F<COPYRIGHT.txt> and F<LICENSE.txt>.
550This command is just a shortcut for L</make-file-author>, L</make-file-copyright> and L</make-file-license>.
551
552
553=head2 list-license
554
555 project-meta list-license
556
557Give the list of all the open data licenses supported by the project-meta license database.
558At this time the possible licenses are:
559
560=over
561
562=item * L<community-data-license-agreement-permissive-v1.0|https://cdla.io/permissive-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Permissive-v1.0.pdf>
563        (permissive - allow users to freely share and adapt)
564
565=item * L<community-data-license-agreement-sharing-v1.0|https://cdla.io/sharing-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Sharing-v1.0.pdf>
566        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
567
568=item * L<creative-common-attribution-v4.0|https://creativecommons.org/licenses/by/4.0/legalcode.txt>
569        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
570
571=item * L<creative-common-zero-v1.0|https://creativecommons.org/publicdomain/zero/1.0/legalcode.txt>
572        (like public domain)
573
574=item * L<licence-ouverte-v2.0|https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf>
575        (copyleft - opendata french goverment)
576
577=item * L<open-database-license-v1.0|https://opendatacommons.org/files/2018/02/odbl-10.txt>
578        (copyleft - allow users to freely share, modify, and use the database while maintaining this same freedom for others)
579
580=back
581
582Note that these licenses are dedicated to open data.
583Please do not use an open license that would have been thought for source code or documentation and not for open data.
584Here are some links about open data licence context:
585
586=over
587
588=item * A good article about Community Data License Agreement and Open Data Licence in general
589   L<Licenses for data|https://lwn.net/Articles/753648/> written on 9 May 2018.
590
591=item * A french page about French Public Open Data licence
592   L<https://www.etalab.gouv.fr/licence-ouverte-open-licence>.
593
594=back
595
596=head2 make-file-license
597
598 project-meta make-file-license
599
600Copy the license file from the project-meta license database at the current folder
601with the file name: F<LICENSE.txt>.
602
603The license is defined in the F<PROJECT-META.yml> specification under the key C<public-dap/data-license>.
604The list of possible license is given with the command L</list-license>.
605
606=head2 make-file-author
607
608 project-meta make-file-author
609
610Create or update the F<AUTHORS.txt> file at the current folder.
611Authors data are extracted from the C<PROJECT-META.yml> file.
612
613=head2 make-file-copyright
614
615 project-meta make-file-copyright
616
617Create or update the F<COPYRIGHT.txt> file at the current folder.
618Authors, license and copyright data are extracted from the C<PROJECT-META.yml> file.
619
620=head2 upgrade
621
622 project-meta upgrade
623
624Upgrade config file to last version.
625Create a file F<PROJECT-META-vVERSION.yml> in the current directory if it's not exists, error otherwise.
626Please maually verify this autogenerated config file before rename and using it.
627
628
629=head1 METAFILE SPECIFICATION
630
631Each project must have an open data metafile describing the project : C<PROJECT-META.yml>.
632The file is in YAML format because this is a human-readable text file style.
633Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
634
635You can find in the project-meta software a
636L<PROJECT-META.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/PROJECT-META.sample.yml> example.
637This one is actually the master reference specification!
638
639Some interresting papers or links about Open Meta Data Schema:
640
641=over
642
643=item * L<Metadata for the open data portals|http://devinit.org/wp-content/uploads/2018/01/Metadata-for-open-data-portals.pdf>
644        writen in december 2016.
645
646=item * L<Project Open Data Metadata Schema v1.1|https://project-open-data.cio.gov/v1.1/schema/> from US governement
647        based on L<DCAT|http://www.w3.org/TR/vocab-dcat/>.
648
649=item * L<Metadata Standards|http://knowhow.opendatamonitor.eu/odresearch/metadata-standards/>
650        from OpenDataMonitor.
651
652=item * L<G8 Metadata Mapping|https://github.com/project-open-data/G8_Metadata_Mapping/blob/master/index.md>
653        mapping between the metadata on datasets published by G8 Members through their open data portals.
654
655=back
656
657
658=head1 KNOWN BUGS
659
660 - not really check keys and tags before doing action!
661
662
663=head1 SEE ALSO
664
665yamllint(1), ysh(1), YAML, Archive::Zip
666
667In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
668
669=over
670
671=item * C<yamllint> - Linter for YAML files (Python)
672
673=item * C<libyaml-shell-perl> - YAML test shell (Perl)
674
675=back
676
677
678Own project ressources:
679
680=over
681
682=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/ProjectMeta>
683
684=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/project-meta.html>
685
686=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/project-meta>
687
688=back
689
690
691=head1 AUTHOR
692
693Written by Gabriel Moreau, LEGI UMR5519, CNRS, Grenoble - France
694
695
696=head1 SPECIAL THANKS
697
698The list of people below did not directly contribute to project-meta's source code
699but provided me with some data, returned bugs
700or helped me in another task like having new ideas, specifications...
701Maybe I forgot your contribution in recent years,
702please forgive me in advance and send me an e-mail to correct this.
703
704Joel Sommeria, Julien Chauchat, Cyrille Bonamy, Antoine Mathieu.
705
706
707=head1 LICENSE AND COPYRIGHT
708
709License GNU GPL version 2 or later and Perl equivalent
710
711Copyright (C) 2017-2018 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>.
Note: See TracBrowser for help on using the repository browser.