source: trunk/project-meta/project-meta @ 404

Last change on this file since 404 was 404, checked in by g7moreau, 5 years ago
  • Add command dataset-size
  • Property svn:executable set to *
File size: 21.7 KB
Line 
1#!/usr/bin/env perl
2#
3# 2018/01/17 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>
4#
5# apt-get install libyaml-syck-perl libtemplate-perl libarchive-zip-perl
6# apt-get install yamllint libyaml-shell-perl # check YAML files
7
8use strict;
9use warnings;
10use version; our $VERSION = version->declare('0.1.6');
11
12use File::Copy qw(copy);   
13use YAML::Syck;
14use Getopt::Long();
15use Cwd();
16use Template;
17use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
18
19our $CFG_VERSION = 2;
20
21my ($verbose);
22Getopt::Long::GetOptions(
23   'verbose' => \$verbose,
24   );
25
26
27my %CMD_DB = (
28   'help'                  => \&cmd_help,
29   'version'               => \&cmd_version,
30   'check'                 => \&cmd_check,
31   'dap-publish'           => \&cmd_dap_publish,
32   'dap-unpublish'         => \&cmd_dap_unpublish,
33   'dataset-size'          => \&cmd_dataset_size,
34   'make-zip'              => \&cmd_make_zip,
35   'make-allfiles'         => \&cmd_make_allfiles,
36   'make-file-author'      => \&cmd_make_file_author,
37   'make-file-copyright'   => \&cmd_make_file_copyright,
38   'make-file-license'     => \&cmd_make_file_license,
39   'list-license'          => \&cmd_list_license,
40   'upgrade'               => \&cmd_upgrade,
41   );
42
43################################################################
44# main program
45################################################################
46
47my $cmd = shift @ARGV || 'help';
48if (defined $CMD_DB{$cmd}) {
49   $CMD_DB{$cmd}->(@ARGV);
50   }
51else {
52   print {*STDERR} "project-meta: command $cmd not found\n\n";
53   $CMD_DB{'help'}->();
54   exit 1;
55   }
56
57exit;
58
59################################################################
60# subroutine
61################################################################
62
63sub print_ok {
64   my ($key, $test) = @_;
65   
66   printf "%-35s : %s\n", $key, $test ? 'yes' : 'no';
67   }
68
69################################################################
70
71sub addfolder2list {
72   my ($folderdb, $folder) = @_;
73   
74   return if $folder !~ m{/};
75   
76   $folder =~ s{/[^/]+$}{};
77
78   $folderdb->{$folder}++;
79   return addfolder2list($folderdb, $folder);
80   }
81
82################################################################
83
84sub upgrade_version_1_to_2 {
85   my $meta = shift;
86
87   $meta->{'project'}{'identifier'} ||= {};
88   $meta->{'project'}{'identifier'}{'acronym'} = $meta->{'project'}{'acronym'};
89   delete $meta->{'project'}{'acronym'};
90
91   $meta->{'project'}{'creator'} = $meta->{'project'}{'authors'};
92   delete $meta->{'project'}{'authors'};
93
94   $meta->{'project'}{'description'} = $meta->{'project'}{'short-description'};
95   delete $meta->{'project'}{'short-description'};
96
97   $meta->{'project'}{'rights'} = $meta->{'public-dap'}{'data-license'};
98   delete $meta->{'public-dap'}{'data-license'};
99
100   $meta->{'project'}{'relation'} ||= [];
101   for my $doi (@{$meta->{'publication'}{'doi'}}) {
102      push @{$meta->{'project'}{'relation'}}, {doi => $doi};
103      }
104   delete $meta->{'publication'}{'doi'};
105
106   $meta->{'version'} = 2;
107   return $meta;
108   }
109
110################################################################
111
112sub load_metadata {
113   my $meta = YAML::Syck::LoadFile("PROJECT-META.yml");
114
115   my $initial_version = $meta->{'version'};
116   if ($initial_version < $CFG_VERSION) {
117      print "Warning: upgrade config file from version $initial_version to last version $CFG_VERSION\n";
118      my $upgrade = 'upgrade_version_' . ($CFG_VERSION - 1) . '_to_' . $CFG_VERSION;
119      &{$upgrade}($meta);
120      $initial_version = $CFG_VERSION;
121      }
122   elsif ($initial_version < $CFG_VERSION) {
123      die "Error: config file at future version $meta->{'version'}, program only at $CFG_VERSION\n"
124      }
125
126   return wantarray ? ($meta, $initial_version) : $meta;
127   }
128
129################################################################
130# command
131################################################################
132
133sub cmd_help {
134   print <<'END';
135project-meta - opendata project metafile manager
136
137 project-meta help
138 project-meta version
139 project-meta check
140 project-meta dap-publish
141 project-meta dap-unpublish
142 project-meta dataset-size
143 project-meta make-zip
144 project-meta make-allfiles
145 project-meta list-license
146 project-meta make-file-license
147 project-meta make-file-author
148 project-meta make-file-copyright
149 project-meta upgrade
150END
151   }
152
153################################################################
154
155sub cmd_version {
156   print "$VERSION\n";
157   }
158
159################################################################
160
161sub cmd_upgrade {
162   my ($meta, $initial_version) = load_metadata();
163
164   if ($initial_version < $meta->{'version'}) {
165      my $next_config = "PROJECT-META-v$meta->{'version'}.yml";
166      if (-e $next_config) {
167         die "Error: upgrade propose config file $next_config already exists\n";
168         }
169     
170      print "Warning: create new config file $next_config, please verify before using it\n";
171      YAML::Syck::SaveFile($next_config, $meta);
172      }
173   elsif ($initial_version == $CFG_VERSION) {
174      print "Warning: nothing to do, config file already at version $CFG_VERSION\n";
175      }
176   }
177
178################################################################
179
180sub cmd_check {
181   my $meta = load_metadata();
182
183   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
184   my $current_dir = Cwd::getcwd();
185   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
186
187   print_ok 'project/identifier/acronym',       $acronym =~ m{\d\d\w[\w\d_/]+};
188   print_ok 'public-dap/dap-folder',            $dap_folder ne '' and $dap_folder =~ m{^/};
189   print_ok 'dap-folder not match current_dir', $dap_folder !~ m{$current_dir};
190
191   #print YAML::Syck::Dump($meta);
192   }
193
194################################################################
195
196sub cmd_dap_publish {
197   my $meta = load_metadata();
198   my $current_dir = Cwd::getcwd();
199   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
200   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
201   my $data_set    = $meta->{'public-dap'}{'data-set'};
202
203   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
204   {
205      # Remove doublon
206      my %seen = ();
207      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
208      }
209
210   # Create a list of the folder
211   my %folders;
212   for my $dataset (@{$data_set}) {
213      addfolder2list(\%folders, $dataset);
214      }
215
216   print "chmod o+rX,o-w '$current_dir'\n";
217   print "mkdir -p '$dap_folder/$acronym'\n" if not -d "$dap_folder/$acronym";
218   for my $folder (sort keys %folders) {
219      print "chmod o+rX,o-w '$current_dir/$folder'\n";
220      print "mkdir '$dap_folder/$acronym/$folder'\n" if -d "$current_dir/$folder";
221      }
222
223   for my $dataset (@{$data_set}) {
224      if ($dataset =~ m{/}) {
225         # sub-folder case
226         my $folder = $dataset =~ s{/[^/]+$}{}r;
227         print "chmod -R o+rX,o-w '$current_dir/$dataset'\n";
228         print "ln --symbolic --target-directory '$dap_folder/$acronym/$folder/' '$current_dir/$dataset'\n";
229         }
230      else {
231         # Root case
232         print "ln --symbolic --target-directory '$dap_folder/$acronym/' '$current_dir/$dataset'\n";
233         }
234
235      }
236   print "chmod -R o+rX,o-w '$dap_folder/$acronym/'\n";
237   }
238
239################################################################
240
241sub cmd_dap_unpublish {
242   my $meta = load_metadata();
243   my $current_dir = Cwd::getcwd();
244   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
245   my $dap_folder  = $meta->{'public-dap'}{'dap-folder'};
246
247   die "Error: DAP folder match current folder" if $dap_folder =~ m{$current_dir} or $current_dir =~ m{$dap_folder};
248
249   print "find '$dap_folder/$acronym/' -type l -o -type d -exec ls -l {} \+\n";
250   print "find '$dap_folder/$acronym/' -type l -delete\n";
251   print "find '$dap_folder/$acronym/' -type d -delete\n";
252   }
253
254################################################################
255
256sub cmd_dataset_size {
257   my $meta = load_metadata();
258   my $current_dir = Cwd::getcwd();
259   for my $dataset (@{$data_set}) {
260      qx{du -sm $dataset};
261      }
262   }
263
264################################################################
265sub cmd_make_zip {
266   my $meta = load_metadata();
267   my $current_dir = Cwd::getcwd();
268   my $data_set    = $meta->{'public-dap'}{'data-set'};
269   my $acronym     = $meta->{'project'}{'identifier'}{'acronym'};
270
271   push @{$data_set}, 'AUTHORS.txt', 'COPYRIGHT.txt', 'LICENSE.txt';
272   {
273      # Remove doublon
274      my %seen = ();
275      @{$data_set} = grep { ! $seen{$_}++ } @{$data_set};
276      }
277
278   # Create a Zip file
279   my $zip = Archive::Zip->new();
280
281   for my $dataset (@{$data_set}) {
282      if (-d $dataset) {
283         # Folder case
284         $zip->addTree($dataset, "$acronym/$dataset");
285         }
286      elsif (-f $dataset) {
287         # File case
288         $zip->addFile($dataset, "$acronym/$dataset");
289         }
290      else {
291         # Strange case
292         print "Error: entry $dataset doesn't exists\n";
293         }
294      }
295
296   my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime time;
297   $year += 1900;
298   $mon++;
299   my $date = sprintf '%04i%02i%02i-%02i%02i', $year, $mon, $mday, $hour, $min;
300
301   # Save the Zip file
302   unless ($zip->writeToFileNamed("$current_dir/$acronym--$date.zip") == AZ_OK) {
303      die 'Error: zip write error';
304      }
305   }
306
307################################################################
308
309sub cmd_make_allfiles {
310   cmd_make_file_author();
311   cmd_make_file_license();
312   cmd_make_file_copyright();
313   }
314
315################################################################
316
317sub cmd_make_file_author {
318   my $meta = load_metadata();
319
320   my $current_dir = Cwd::getcwd();
321
322   my $acronym    = $meta->{'project'}{'identifier'}{'acronym'};
323   my $authors_list = $meta->{'project'}{'creator'};
324
325   if (-f "$current_dir/AUTHORS.txt") {
326      # Test for manual or automatically generated file
327      # Automatically generated file by project-meta
328      my $automatic;
329      open my $fh, '<', "$current_dir/AUTHORS.txt" or die $!;
330      for my $line (<$fh>) {
331         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
332         }
333      close $fh;
334
335      if (not $automatic) {
336         print "Warning: AUTHORS.txt already exists\n";
337         return;
338         }
339
340      print "Warning: update AUTHORS.txt\n";
341      }
342
343   my $tt = Template->new(INCLUDE_PATH => '/usr/share/project-meta/template.d');
344   my $msg_format = '';
345   $tt->process('AUTHORS.tt',
346      {
347         acronym    => $acronym,
348         authorlist => $authors_list,
349      }, \$msg_format) || die $tt->error;
350
351   open my $fh,  '>', "$current_dir/AUTHORS.txt" or die $!;
352   print $fh "$msg_format\n\n";
353   close $fh;
354   }
355
356################################################################
357
358sub cmd_make_file_license {
359   my $meta = load_metadata();
360
361   my $current_dir = Cwd::getcwd();
362
363   if (-f "$current_dir/LICENSE.txt") {
364      print "Warning: LICENSE.txt already exists\n";
365      return;
366      }
367
368   my $license = $meta->{'project'}{'rights'};
369
370   if (not -f "/usr/share/project-meta/license.d/$license.txt") {
371      print "Error: license $license doesn't exists in project-meta database\n";
372      exit 1;
373      }
374
375   copy("/usr/share/project-meta/license.d/$license.txt", "$current_dir/LICENSE.txt")
376      or die "Error: license copy failed - $!";
377
378   print "Info: LICENSE.txt file create\n";
379   return;
380   }
381
382################################################################
383
384sub cmd_make_file_copyright {
385   my $meta = load_metadata();
386
387   my $current_dir = Cwd::getcwd();
388
389   if (-f "$current_dir/COPYRIGHT.txt") {
390      # Test for manual or automatically generated file
391      # Automatically generated file by project-meta
392      my $automatic;
393      open my $fh, '<', "$current_dir/COPYRIGHT.txt" or die $!;
394      for my $line (<$fh>) {
395         $line =~ m/Automatically generated .* project-meta/i and $automatic++;
396         }
397      close $fh;
398
399      if (not $automatic) {
400         print "Warning: COPYRIGHT.txt already exists\n";
401         return;
402         }
403
404      print "Warning: update COPYRIGHT.txt\n";
405      }
406   
407   my $tt = Template->new(
408      INCLUDE_PATH   => '/usr/share/project-meta/template.d',
409      POST_CHOMP     => 1, # Remove space and carriage return after %]
410      );
411   my $msg_format = '';
412   my $doi_first  = '';
413   if (exists $meta->{'project'}{'relation'}) {
414      for my $doi (@{$meta->{'project'}{'relation'}}) {
415         next if not exists $doi->{'doi'};
416         $doi_first = $doi->{'doi'};
417         last;
418         }
419      }
420   $tt->process('COPYRIGHT.tt',
421      {
422         title       => $meta->{'project'}{'title'},
423         acronym     => $meta->{'project'}{'identifier'}{'acronym'},
424         authorlist  => $meta->{'project'}{'creator'},
425         description => $meta->{'project'}{'description'},
426         license     => $meta->{'project'}{'rights'},
427         doi         => $doi_first,
428      }, \$msg_format) || die $tt->error;
429
430   open my $fh, '>', "$current_dir/COPYRIGHT.txt" or die $!;
431   print $fh "$msg_format\n\n";
432   close $fh;
433   }
434
435################################################################
436
437sub cmd_list_license {
438   opendir my $dh, '/usr/share/project-meta/license.d/' or die $!;
439   for my $license (readdir $dh) {
440      # Keep only file
441      next if not -f "/usr/share/project-meta/license.d/$license";
442     
443      # Keep only .txt file
444      next if not $license =~ m/\.txt$/;
445
446      $license =~ s/\.txt$//;
447      print "$license\n";
448      }
449   closedir $dh;
450   }
451
452################################################################
453# documentation
454################################################################
455
456__END__
457
458=head1 NAME
459
460project-meta - opendata project metafile manager
461
462
463=head1 USAGE
464
465 project-meta help
466 project-meta version
467 project-meta check
468 project-meta dap-publish
469 project-meta dap-unpublish
470 project-meta dataset-size
471 project-meta make-zip
472 project-meta list-license
473 project-meta make-file-license
474 project-meta make-file-author
475 project-meta make-file-copyright
476 project-meta upgrade
477
478
479=head1 DESCRIPTION
480
481Project-Meta is a small tool to maintain a set of open data files.
482In order to help you in this task, C<project-meta> command has a set of action
483to generated and maintain many files in your dataset.
484
485Everything is declare in the metafile F<PROJECT-META.yml>.
486This YAML file must exist in your root projet folder.
487See L</METAFILE SPECIFICATION>.
488
489
490=head1 COMMANDS
491
492Some command are defined in the source code but are not documented here.
493Theses could be not well defined, not finished, not well tested...
494You can read the source code and use them at your own risk
495(like for all the Project-Meta code).
496
497=head2 check
498
499 project-meta check
500
501Check your F<PROJECT-META.yml> has the good key.
502If your metafile is not a valid YAML file,
503you can use C<yamllint> or C<ysh> commands to check just it's format.
504
505=head2 dap-publish
506
507 project-meta dap-publish
508
509Publish data on an OpeNDAP server.
510Because data can be very large,
511This command just create UNIX soft links on the OpeNDAP folder to the real data.
512There is no copy.
513Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
514The main keys use in the F<PROJECT-META.yml> are:
515
516=over
517
518=item * C<project/identifier/acronym>: the project short acronym, add to the OpeNDAP root folder
519
520=item * C<public-dap/dap-folder>: the OpeNDAP root folder
521
522=item * C<public-dap/data-set>: a list of files or folder to push
523
524=back
525
526Because this command could be dangerous, it does nothing!
527It print on terminal shell command to be done.
528You have to verify ouput before eval it.
529
530 project-meta dap-publish
531 project-meta dap-publish | bash
532
533=head2 dap-unpublish
534
535 project-meta dap-unpublish
536
537Unpublish data from the OpeNDAP server.
538In practice, it remove links in OpeNDAP folder for that projet.
539Because command C<rm> is always dangerous,
540we use here the command C<find> limited to folder and link.
541
542Please verify the returned values before excuted it with the C<-delete> option.
543
544=head2 dataset-size
545
546 project-meta dataset-size
547
548=head2 make-zip
549
550 project-meta make-zip
551
552Create a ZIP archive with the open data set.
553Files F<AUTHORS.txt>, F<LICENSE.txt> and F<COPYRIGHT.txt> are mandatory but could be generated (see below).
554The main keys use in the F<PROJECT-META.yml> are:
555
556=over
557
558=item * C<project/identifier/acronym>: the project short acronym, use as root folder
559
560=item * C<public-dap/data-set>: a list of files or folder to push
561
562=back
563
564=head2 make-allfiles
565
566 project-meta make-allfiles
567
568Generate or update all files: F<AUTHORS.txt>, F<COPYRIGHT.txt> and F<LICENSE.txt>.
569This command is just a shortcut for L</make-file-author>, L</make-file-copyright> and L</make-file-license>.
570
571
572=head2 list-license
573
574 project-meta list-license
575
576Give the list of all the open data licenses supported by the project-meta license database.
577At this time the possible licenses are:
578
579=over
580
581=item * L<community-data-license-agreement-permissive-v1.0|https://cdla.io/permissive-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Permissive-v1.0.pdf>
582        (permissive - allow users to freely share and adapt)
583
584=item * L<community-data-license-agreement-sharing-v1.0|https://cdla.io/sharing-1-0/wp-content/uploads/sites/52/2017/10/CDLA-Sharing-v1.0.pdf>
585        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
586
587=item * L<creative-common-attribution-v4.0|https://creativecommons.org/licenses/by/4.0/legalcode.txt>
588        (copyleft - allow users to freely share and adapt while maintaining this same freedom for others)
589
590=item * L<creative-common-zero-v1.0|https://creativecommons.org/publicdomain/zero/1.0/legalcode.txt>
591        (like public domain)
592
593=item * L<licence-ouverte-v2.0|https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf>
594        (copyleft - opendata french goverment)
595
596=item * L<open-database-license-v1.0|https://opendatacommons.org/files/2018/02/odbl-10.txt>
597        (copyleft - allow users to freely share, modify, and use the database while maintaining this same freedom for others)
598
599=back
600
601Note that these licenses are dedicated to open data.
602Please do not use an open license that would have been thought for source code or documentation and not for open data.
603Here are some links about open data licence context:
604
605=over
606
607=item * A good article about Community Data License Agreement and Open Data Licence in general
608   L<Licenses for data|https://lwn.net/Articles/753648/> written on 9 May 2018.
609
610=item * A french page about French Public Open Data licence
611   L<https://www.etalab.gouv.fr/licence-ouverte-open-licence>.
612
613=back
614
615=head2 make-file-license
616
617 project-meta make-file-license
618
619Copy the license file from the project-meta license database at the current folder
620with the file name: F<LICENSE.txt>.
621
622The license is defined in the F<PROJECT-META.yml> specification under the key C<public-dap/data-license>.
623The list of possible license is given with the command L</list-license>.
624
625=head2 make-file-author
626
627 project-meta make-file-author
628
629Create or update the F<AUTHORS.txt> file at the current folder.
630Authors data are extracted from the C<PROJECT-META.yml> file.
631
632=head2 make-file-copyright
633
634 project-meta make-file-copyright
635
636Create or update the F<COPYRIGHT.txt> file at the current folder.
637Authors, license and copyright data are extracted from the C<PROJECT-META.yml> file.
638
639=head2 upgrade
640
641 project-meta upgrade
642
643Upgrade config file to last version.
644Create a file F<PROJECT-META-vVERSION.yml> in the current directory if it's not exists, error otherwise.
645Please maually verify this autogenerated config file before rename and using it.
646
647
648=head1 METAFILE SPECIFICATION
649
650Each project must have an open data metafile describing the project : C<PROJECT-META.yml>.
651The file is in YAML format because this is a human-readable text file style.
652Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
653
654You can find in the project-meta software a
655L<PROJECT-META.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/PROJECT-META.sample.yml> example.
656This one is actually the master reference specification!
657
658Some interresting papers or links about Open Meta Data Schema:
659
660=over
661
662=item * L<Metadata for the open data portals|http://devinit.org/wp-content/uploads/2018/01/Metadata-for-open-data-portals.pdf>
663        writen in december 2016.
664
665=item * L<Project Open Data Metadata Schema v1.1|https://project-open-data.cio.gov/v1.1/schema/> from US governement
666        based on L<DCAT|http://www.w3.org/TR/vocab-dcat/>.
667
668=item * L<Metadata Standards|http://knowhow.opendatamonitor.eu/odresearch/metadata-standards/>
669        from OpenDataMonitor.
670
671=item * L<G8 Metadata Mapping|https://github.com/project-open-data/G8_Metadata_Mapping/blob/master/index.md>
672        mapping between the metadata on datasets published by G8 Members through their open data portals.
673
674=back
675
676
677=head1 KNOWN BUGS
678
679 - not really check keys and tags before doing action!
680
681
682=head1 SEE ALSO
683
684yamllint(1), ysh(1), YAML, Archive::Zip
685
686In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
687
688=over
689
690=item * C<yamllint> - Linter for YAML files (Python)
691
692=item * C<libyaml-shell-perl> - YAML test shell (Perl)
693
694=back
695
696
697Own project ressources:
698
699=over
700
701=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/ProjectMeta>
702
703=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/project-meta/project-meta.html>
704
705=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/project-meta>
706
707=back
708
709
710=head1 AUTHOR
711
712Written by Gabriel Moreau, LEGI UMR5519, CNRS, Grenoble - France
713
714
715=head1 SPECIAL THANKS
716
717The list of people below did not directly contribute to project-meta's source code
718but provided me with some data, returned bugs
719or helped me in another task like having new ideas, specifications...
720Maybe I forgot your contribution in recent years,
721please forgive me in advance and send me an e-mail to correct this.
722
723Joel Sommeria, Julien Chauchat, Cyrille Bonamy, Antoine Mathieu.
724
725
726=head1 LICENSE AND COPYRIGHT
727
728License GNU GPL version 2 or later and Perl equivalent
729
730Copyright (C) 2017-2018 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>.
Note: See TracBrowser for help on using the repository browser.