From 710e02b70cb2530802812577229cd62a50da8090 Mon Sep 17 00:00:00 2001
From: Dirk Koopman <djk@tobit.co.uk>
Date: Wed, 5 Jan 2022 18:41:15 +0000
Subject: [PATCH] EXTENSIVE user file and route cleanup, see Changes

---
 Changes              |  32 ++++++++++++++
 perl/DXProtHandle.pm |  71 ++++++++++++++++++++++++------
 perl/DXUser.pm       | 101 ++++++++++++++++++++++++++++++++++---------
 perl/DXUtil.pm       |   5 ++-
 4 files changed, 173 insertions(+), 36 deletions(-)

diff --git a/Changes b/Changes
index 93ff18bb..dc40b96a 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,35 @@
+05Jan22=======================================================================
+1. Mark nodes that send PC92 K records as spider. These will include VE7CC
+   nodes. NOTE: there appear to be user records marked as user or other sorts
+   of node, which (now) are actually spider (compatible) nodes and will be
+   marked accordingly.
+2. Adjust nodes currently marked as spider nodes, but are sending versions 
+   not in the spider range of versions on PC92 A records as AK1A.
+3. Try to undo some damage where users have been autocreated with similar
+   attributes as nodes (locked out with privilege set to 1). This will 
+   slowly fix this problem over time, but see item 4 for a 'big bang' 
+   approach.
+4. It has come to my attention that there are a large number of users (of 
+   all sorts) that have incompatible SSIDs. See 03Jan22/4 for details. 
+
+   These are now being scrubbed out of the users file and also will present
+   as their normalised selves. If a -0* SSID is encountered then, if the 
+   normalised version of that call is not present, it will be renamed to 
+   that normalised call. If the normalised version of that user record is
+   already present, the un-normalised user record (-0*) will be removed. 
+5. Make export_users do a batch clean (as in 3. above) and also get rid of
+   (default) 12+ year old unaccessed user records and (default) 2+ year old 
+   "empty" records (with no qra/latlog/qth or handle).
+ 
+   NOTE: if you do an manual export_users (as opposed to the automatic one
+   done once a week), do not be alarmed by the number of old (i.e. more than
+   12 years old) callsigns that it will get rid of. In my case it was about 
+   ~2/5th of the users file. Still left me with over 100,000 "active" users.
+
+   In you are a bit twitchy about this, the code will copy the current
+   user_json and user_json.ooooo to user_json.keep and user_json.backstop
+   respectively. These files will never be overwritten unless you remove one
+   or both, when they will be regenerated on the next export_user. 
 04Jan22=======================================================================
 1. Fix issue in the RBN (and probably other places) with callsigns that
    contain trailing / in callsigns like: OH0K/6, K2PO/7 etc.
diff --git a/perl/DXProtHandle.pm b/perl/DXProtHandle.pm
index 979a619a..1ba996c8 100644
--- a/perl/DXProtHandle.pm
+++ b/perl/DXProtHandle.pm
@@ -785,6 +785,15 @@ sub check_add_user
 		$user->put;				# just to make sure it gets written away!!!
 		dbg("DXProt: PC92 new user record for $call created");
 	}
+
+	# this is to fix a problem I introduced some build ago by using this function for users
+	# whereas it was only being used for nodes.
+	if ($user->is_user && $user->lockout && $user->priv == 1) {
+		$user->priv(0);
+		$user->lockout(0);
+		dbg("DXProt: PC92 user record for $call depriv'd and unlocked");
+		$user->put;
+	}
 	return $user;
 }
 
@@ -1546,8 +1555,13 @@ sub _decode_pc92_call
 	my $is_node = $flag & 4;
 	my $is_extnode = $flag & 2;
 	my $here = $flag & 1;
-	my $ip	= $part[3];
-	$ip ||= $part[1] if $part[1] && $part[1] !~ /^\d+$/ && ($part[1] =~ /^(?:\d+\.)+/ || $part[1] =~ /^(?:(?:[abcdef\d]+)?,)+/);
+	my $ip;
+	if ($part[1] =~ /[,.]/) {
+		$ip = $part[1];
+		$part[1] = $part[2] = 0;
+	} elsif ($part[3] =~ /[,.]/) {
+		$ip = $part[3];
+	}
 	$ip =~ s/,/:/g if $ip;
 	return ($call, $is_node, $is_extnode, $here, $part[1], $part[2], $ip);
 }
@@ -1604,6 +1618,17 @@ sub _add_thingy
 		if ($ncall ne $call) {
 			my $user;
 			my $r;
+
+			# normalise call, delete any unnormalised calls in the users file.
+			# then ignore this thingy
+			my $normcall = normalise_call($call);
+			if ($normcall ne $call) {
+				next if DXChannel::get($call);
+				$user = DXUser::get($call);
+				dbg("DXProt::_add_thingy call $call normalised to $normcall, deleting spurious user $call");
+				$user->del;
+			    $call = $normcall; # this is safe because a route add will ignore duplicates
+			}
 			
 			if ($is_node) {
 				dbg("ROUTE: added node $call to $ncall") if isdbg('routelow');
@@ -1611,12 +1636,19 @@ sub _add_thingy
 				@rout = $parent->add($call, $version, Route::here($here), $ip);
 				$r = Route::Node::get($call);
 				$r->PC92C_dxchan($dxchan->call, $hops) if $r;
-				if ($version) {
+				if ($version && $version =~ /\d+/) {
 					my $old = $user->sort;
-					if ($version >= 5455 && defined $build && $build > 0 || $version >= 3000 ) {
+					if ($user->is_ak1a && (($version >= 5455 && defined $build && $build > 0) || ($version >= 3000 && $version <= 3500)) ) {
 						$user->sort('S');
-						dbg("PCProt::_add_thingy node $call sort ($old) updated to " . $user->sort) if isdbg('route');
-					} 
+						$build //= 0;
+						dbg("PCProt::_add_thingy node $call v: $version b: $build sort ($old) updated to " . $user->sort);
+					} elsif ($user->is_spider && ($version < 3000 || ($version > 4000 && $version < 5455)) && $version =~ /^\d+$/) {
+						unless ($version == 5000 && $build == 0) {
+							$user->sort('A');
+							$build //= 0;
+							dbg("PCProt::_add_thingy node $call v: $version b: $build sort ($old) downgraded to " . $user->sort);
+						}
+					}
 				}
 			} else {
 				dbg("ROUTE: added user $call to $ncall") if isdbg('routelow');
@@ -1838,7 +1870,7 @@ sub pc92_handle_first_slot
 	}
 	$parent->here(Route::here($here));
 	$parent->version($version || $pc19_version) if $version;
-	$parent->build($build) if $build;
+	$build =~ s/^0\.//, $parent->build($build) if $build;
 	$parent->PC92C_dxchan($self->{call}, $hops) unless $self->{call} eq $parent->call;
 	return ($parent, @radd);
 }
@@ -1956,13 +1988,24 @@ sub handle_92
 
 			($parent, $add) = $self->pc92_handle_first_slot(\@ent, $parent, $t, $hops);
 			return unless $parent; # dupe
-
+			
 			push @radd, $add if $add;
 			$parent->reset_obs;
-			$parent->version($ent[4]) if $ent[4];
-			if ($ent[5]) {
-				$ent[5] =~ s/^0.//;
-				$parent->build($ent[5]);
+			my $call = $parent->call;
+			my $version = $ent[4] // 0;
+			my $build = $ent[5] // 0;
+			$build =~ s/^0\.//;
+			my $oldbuild = $parent->build // 0;
+			$oldbuild =~ s/^0\.//;
+			my $oldversion = $parent->version // 0;
+			my $user = check_add_user($parent->call, 'S');
+			my $oldsort = $user->sort;
+			if ($oldsort ne 'S' || $oldversion != $version || $build != $oldbuild) {
+				dbg("PCProt PC92 K node $call updated version: $version (was $oldversion) build: $build (was $oldbuild) sort: 'S' (was $oldsort)");
+				$user->sort('S');
+				$user->version($parent->version($version));
+				$user->build($parent->build($build));
+				$user->put;
 			}
 
 			dbg("ROUTE: reset obscount on $parent->{call} now " . $parent->obscount) if isdbg('obscount');
@@ -2006,11 +2049,11 @@ sub handle_92
 			my $dxc;
 			next unless $_ && @$_;
 			if ($_->[0] eq $main::mycall) {
-				LogDbg('err', "PCPROT: $self->{call} : type $sort $_->[0] refers to me, ignored");
+				dbg("PCPROT: $self->{call} : type $sort $_->[0] refers to me, ignored") if isdbg('route');
 				next;
 			}
 			if ($_->[0] eq $main::myalias && $_->[1] || $_->[0] eq $main::mycall && $_->[1] == 0) {
-				LogDbg('err',"PCPROT: $self->{call} : type $sort $_->[0] changing type to " . $_->[1]?"Node":"User" . ", ignored");
+				LogDbg('err',"PCPROT: $self->{call} : type $sort $_->[0] trying to change type to " . $_->[1]?"Node":"User" . ", ignored");
 				next;
 			}
 			
diff --git a/perl/DXUser.pm b/perl/DXUser.pm
index 4af135e7..a4283c33 100644
--- a/perl/DXUser.pm
+++ b/perl/DXUser.pm
@@ -24,7 +24,7 @@ use DXJSON;
 
 use strict;
 
-use vars qw(%u $dbm $filename %valid $lastoperinterval $lasttime $lru $lrusize $tooold $v3);
+use vars qw(%u $dbm $filename %valid $lastoperinterval $lasttime $lru $lrusize $tooold $veryold $v3);
 
 %u = ();
 $dbm = undef;
@@ -32,7 +32,8 @@ $filename = undef;
 $lastoperinterval = 60*24*60*60;
 $lasttime = 0;
 $lrusize = 5000;
-$tooold = 86400 * 365;		# this marks an old user who hasn't given enough info to be useful
+$tooold = 86400 * 365 * 2;		# this marks an old user who hasn't given enough info to be useful
+$veryold = $tooold * 6;	        # Ancient default 12 years
 $v3 = 0;
 our $maxconnlist = 3;			# remember this many connection time (duration) [start, end] pairs
 
@@ -211,7 +212,6 @@ sub new
 #	confess "can't create existing call $call in User\n!" if $u{$call};
 
 	my $self = $pkg->alloc($call);
-	$self->{lastseen} = $main::systime;
 	$self->put;
 	return $self;
 }
@@ -229,7 +229,6 @@ sub get
 	# is it in the LRU cache?
 	my $ref = $lru->get($call);
 	if ($ref && ref $ref eq 'DXUser') {
-		$ref->{lastseen} = $main::systime;
 		return $ref;
 	}
 	
@@ -251,7 +250,6 @@ sub get
 			}
 			return undef;
 		}
-		$ref->{lastseen} = $main::systime;
 		$lru->put($call, $ref);
 		return $ref;
 	}
@@ -300,9 +298,9 @@ sub put
 	my $call = $self->{call};
 
 	$dbm->del($call);
-	delete $self->{annok} if $self->{annok};
-	delete $self->{dxok} if $self->{dxok};
-
+	delete $self->{annok};
+	delete $self->{dxok};
+	$self->{lastseen} = $main::systime;
 	$lru->put($call, $self);
 	my $ref = $self->encode;
 	$dbm->put($call, $ref);
@@ -343,8 +341,8 @@ sub close
 	my $self = shift;
 	my $startt = shift;
 	my $ip = shift;
-	$self->{lastseen} = $self->{lastin} = $main::systime;
 	# add a record to the connect list
+	$self->{lastin} = $main::systime;
 	my $ref = [$startt || $self->{startt}, $main::systime];
 	push @$ref, $ip if $ip;
 	push @{$self->{connlist}}, $ref;
@@ -383,16 +381,27 @@ sub export
 	my $fn = $name ne 'user_json' ? $name : "$main::local_data/$name";                       # force use of local
 	
 	# save old ones
+	copy $fn, "$fn.keep" unless -e "$fn.keep";
+	copy "$fn.ooooo", "$fn.backstop" unless -e "$fn,backstop";
+
 	move "$fn.oooo", "$fn.ooooo" if -e "$fn.oooo";
 	move "$fn.ooo", "$fn.oooo" if -e "$fn.ooo";
 	move "$fn.oo", "$fn.ooo" if -e "$fn.oo";
 	move "$fn.o", "$fn.oo" if -e "$fn.o";
 	move "$fn", "$fn.o" if -e "$fn";
 
+	
 	my $ta = [gettimeofday];
 	my $count = 0;
 	my $err = 0;
 	my $del = 0;
+	my $spurious = 0;
+	my $unlocked = 0;
+	my $old =  0;
+	my $ancient =  0;
+	my $nodes = 0;
+	my $renamed = 0;
+		
 	my $fh = new IO::File ">$fn" or return "cannot open $fn ($!)";
 	if ($fh) {
 		my $key = 0;
@@ -484,30 +493,80 @@ print "There are $count user records and $err errors in $diff mS\n";
 			my $ref;
 			eval {$ref = decode($val); };
 			if ($ref) {
-				my $t = $ref->{lastin} || 0;
-				if ($ref->is_user && !$ref->{priv} && $main::systime > $t + $tooold) {
-					unless ($ref->{lat} && $ref->{long} || $ref->{qth} || $ref->{qra}) {
+				my $t = $ref->{lastseen} if exists $ref->{lastseen};
+				$t ||= $ref->{lastin} if exists $ref->{lastin};
+				$t ||= $ref->{lastoper} if exists $ref->{lastoper};
+				$t //= 0;
+				
+				if ($ref->is_user) {
+					if ($ref->{priv} == 0 && $main::systime > $t + $tooold) {
+						unless (($ref->{lat} && $ref->{long}) || $ref->{qth} || $ref->{name} || $ref->{qra}) {
+							LogDbg('DXCommand', sprintf("$ref->{call} deleted, empty and too Old at %s", difft($t, ' ')));
+							++$del;
+							++$old;
+							eval {$dbm->del($key)};
+							dbg(carp("Export Error2: delete '$key' => '$val' $@")) if $@;
+							next;
+						}
+					}
+					if ($main::systime > $t + $veryold) {
+						LogDbg('DXCommand', sprintf("$ref->{call} deleted, POSITIVELY ANCIENT at %s", difft($t, ' ')));
+						++$del;
+						++$ancient;
+						eval {$dbm->del($key)};
+						dbg(carp("Export Error2: delete '$key' => '$val' $@")) if $@;
+						next;
+					}
+					if ($ref->{lockout} == 1 && $ref->{priv} == 1) {
+						LogDbg('DXCommand', "$ref->{call} depriv'd and unlocked");
+						$ref->{lockout} = $ref->{priv} = 0;
+						$ref->put;
+						++$unlocked;
+					}
+					if ($ref->is_node && $main::systime > $t + $veryold) {
+						LogDbg('DXCommand', sprintf("NODE $ref->{call} deleted (%s) old", difft($t, ' ')));
+						++$del;
+						++$nodes;
 						eval {$dbm->del($key)};
 						dbg(carp("Export Error2: delete '$key' => '$val' $@")) if $@;
-						LogDbg('DXCommand', "$ref->{call} deleted, too old");
-						$del++;
+						next;
+					}
+					
+					my $normcall = normalise_call($key);
+					if ($normcall ne $key) {
+						# if the normalised call does not exist, create it from the duff call.
+						my $nref = DXUser::get_current($normcall);
+						unless ($nref) {
+							$ref->{call} = $normcall;
+							$ref->put;
+							LogDbg('DXCommand', "DXProt: spurious call $key normalises to $normcall renaming $key -> $normcall");
+							++$renamed;
+						} 
+						LogDbg('DXCommand', "DXProt: spurious call $key (should be $normcall), removing");
+						eval {$dbm->del($key)};
+						dbg(carp("Export Error1: delete $key => '$val' $@")) if $@;
+						++$spurious;
+						++$del;
 						next;
 					}
 				}
-				# only store users that are reasonably active or have useful information
-				print $fh "$key\t" . encode($ref) . "\n";
-				++$count;
 			} else {
 				LogDbg('DXCommand', "Export Error3: '$key'\t" . carp($val) ."\n$@");
 				eval {$dbm->del($key)};
 				dbg(carp("Export Error3: delete '$key' => '$val' $@")) if $@;
 				++$err;
+				next;
 			}
-		} 
-        $fh->close;
-    }
+			
+			# only store users that are reasonably active or have useful information
+			print $fh "$key\t" . encode($ref) . "\n";
+			++$count;
+		}
+	} 
+	$fh->close;
+
 	my $diff = _diffms($ta);
-	my $s = qq{Exported users to $fn - $count Users $del Deleted $err Errors in $diff mS ('sh/log Export' for details)};
+	my $s = qq{Exported users to $fn - $count Users,  $del Deleted ($old empty \& too old, $ancient ancient, $nodes nodes, $spurious spurious), $renamed renamed, $unlocked Unlocked, $err Errors in $diff mS ('sh/log Export' for details)};
 	LogDbg('command', $s);
 	return $s;
 }
diff --git a/perl/DXUtil.pm b/perl/DXUtil.pm
index b8ff16d1..51272d74 100644
--- a/perl/DXUtil.pm
+++ b/perl/DXUtil.pm
@@ -565,8 +565,11 @@ sub difft
 		}
 	}
 	return '-(ve)' if $t < 0;
-	my ($d,$h,$m,$s);
+	my ($y,$d,$h,$m,$s);
 	my $out = '';
+	$y = int $t / (86400*365);
+	$out .= sprintf ("%s${y}y", $adds?' ':'') if $y;
+	$t -= $y * 86400 * 365;
 	$d = int $t / 86400;
 	$out .= sprintf ("%s${d}d", $adds?' ':'') if $d;
 	$t -= $d * 86400;
-- 
2.43.0