[CDBI] Re: Record versioning

Ryan Tate lists at ryantate.com
Tue Mar 14 20:04:24 GMT 2006


On 3/14/06, Edward J. Sabol <sabol at alderaan.gsfc.nasa.gov> wrote:
> Ryan Tate's posting probably had the most information on how to implement
> such a system.

My system is fairly crude and I have only used it with one record type.

But I did develop a My::DBI::Versioned subclass in case I wanted to do
this again in the future.

Again, I have only implemented this once and it is very rough. Also, I
have not included some of the fancy joins I do elsewhere to, for
example, list all current objects. It would be useful to have a
retrieve_all_latest_versions method.

Also, this can be tricky going. If you have associations between your
object and, say, certain users, those associations are usually tied to
the PK of the base object. So in many use scenarios, like a
comprehensive and up to date listing including associated users, you
will need to be working with both the PK of the base version and of
the latest version. Also, you won't be able to delete the base version
without relinking the associations. And even then there will be some
hairy self-joins to sort out. (I'll see if I can extract mine from
existing code when I have time.)

But in case it is of use, here is my Versioned subclass, with some
edits to remove particular quirks (e.g. I have changed the package
name to something easily understood). Criticism (including "don't go
any further down this path, back out before it is too late") welcomed:



package My::DBI::Versioned;
use strict;
#My::DBI is a fairly normal Class::DBI subclass
use base 'My::DBI';
use Algorithm::Diff qw(diff traverse_sequences);

###Easy management of versions as rows in same table as original.
#Child classes must have some column referencing its own class
#through a has_a relationship. For example:
#my $Package = __PACKAGE__
#__PACKAGE__->has_a( version_of => $Package );
#They will also likely want a has_many relationship with themselves:
#__PACKAGE__->has_many( versions => $Package);


sub base_called{
  my $caller = shift;
  my $class = ref $caller || $caller;
  my $called = $class->meta_info('version', 'base_called');
  return $called if $called;
  my $has_as = $class->meta_info('has_a');
  ($called) = grep { $has_as->{$_}->foreign_class eq $class } keys %$has_as;
  die "Versioned class must have self-referencing has_a relationship"
unless $called;
  $class->_extend_meta('version', 'base_called', $called);
  return $called;
}

sub versions_called{
  my $caller = shift;
  my $class = ref $caller || $caller;
  my $called = $class->meta_info('version', 'versions_called');
  return $called if $called;
  my $has_manys = $class->meta_info('has_many');
  ($called) = grep { $has_manys->{$_}->foreign_class eq $class } keys
%$has_manys;
  if ($called) {
    $class->_extend_meta('version', 'versions_called', $called);
    return $called;
  }
  else {
    die "Versioned class must have self-referencing has_a
relationship" unless $class->base_ca\
lled;
    $class->has_many( versions => $class );
    $class->_extend_meta('version', 'versions_called', 'versions');
    return 'versions';
  }
}

sub create_version{
  my $self = shift;
  my $new = shift || {};
  my $version = $self->as_hashref;
  my $primary_key = $self->primary_key;
  delete $version->{$primary_key};
  my $base = $self->base_called;
  #No versions of versions, so we try $self->$base first
  #in case $self is itself a version
  $version->{$base} = $self->$base || $self->$primary_key;
  $version->{$_} = $new->{$_} foreach keys %$new;
  my $class = ref $self;
  $class->create($version);
}

sub base_version{
  my $self = shift;
  if ($self->is_version) {
    my $class = ref $self;
    my $base = $self->base_called;
    return $self->$base;
  } else {
    return $self;
  }
}

sub latest_version{
  my $self = shift;
  my @versions = $self->latest_versions;
  return $self unless @versions;
  return shift @versions;
}

sub latest_versions{
  my $self = shift;
  my $sort_key = shift;
  reverse $self->sorted_versions($sort_key);
}

sub sorted_versions{
  my $self = shift;
  my $sort_key = shift;
  $sort_key = $self->primary_key unless $sort_key;
  my $versions = $self->versions_called;
  my @versions = sort { $a->$sort_key <=> $b->$sort_key }
$self->base_version->$versions;
  return @versions;
}

sub is_version{
  my $self = shift;
  my $base = $self->base_called;
  $self->$base;
}


sub version_trail{
  my $self = shift;
  my $sort_key = shift;
  my @versions = $self->latest_versions($sort_key);
  push @versions, $self->base_version;
  return @versions;
}

sub create_version_or_update{
  my $self = shift;
  my $changes = shift or die 'No changes supplied';

  if ($self->new_version_needed($changes, @_)) {
    return $self->create_version($changes);
  }
  else {
    $self->$_($changes->{$_}) foreach keys %$changes;
    $self->update;
    return $self;
  }
}

#Override in subclass.
#Generic version provided as example, likely
#less than ideal in most cases.
sub new_version_needed{
  my $self = shift;
  my $changes = shift or die 'No changes';
  foreach my $changed (keys %$changes) {
    return 1 if $self->orig_loss($self->$changed, $changes->{$changed}) > 0;
    return 1 if $self->diff_factor($self->$changed, $changes->{$changed}) > 2;
  }
  return 0;
}

sub orig_loss{
  my $self = shift;
  my $original_text = shift or die 'No text';
  my $replacement_text = shift or die 'No replacement text';
  my $losses = 0;
  traverse_sequences([ split //, $original_text ],
                     [ split //, $replacement_text ],
                     {DISCARD_A => sub { $losses++ }}
                    );
  my $factor = ($losses/length $original_text) * 100;
  return $factor;
}


sub diff_factor{
  my $self = shift;
  my $original_text = shift or die 'No text';
  my $replacement_text = shift or die 'No replacement text';
  my $changes = $self->diff_changes($original_text, $replacement_text, @_);
  my $factor = ($changes/length $original_text) * 100;
  return $factor;
}

sub diff_changes{
  my $self = shift;
  my $original_text = shift or die 'No text';
  my $replacement_text = shift or die 'No replacement text';

  my @diffs = diff([ split //, $original_text ],
                   [ split //, $replacement_text ]);

  my %changes;

  foreach my $hunk (@diffs) {
    foreach my $change (@$hunk) {
      #rewrite of character only counts as one change
      $changes{$change->[1]} = 1;
    }
  }
  return scalar keys %changes;
}

sub primary_key{
  my $caller = shift;
  my $class = ref $caller || $caller;
  my ($primary_key, $extra_pk) = $class->primary_columns;
  #Can't do multiple pks because has_many can't handle multiple pks
  die "Multiple primary keys not supported for versioning" if $extra_pk;
  return $primary_key;
}

sub as_hashref{
  my $self = shift;
  my %row = map { $_ => $self->$_ } $self->columns;
  \%row;
}


1;




More information about the ClassDBI mailing list