Vprr File Remover.pl

Embed Size (px)

Citation preview

  • 8/3/2019 Vprr File Remover.pl

    1/10

    #!/usr/bin/perl -w

    ########################################################################################## Name: vprr_file_remover.pl## Revision: 0.01 2009/02/12# Initial Revision.#########################################################################################

    use strict;use warnings;

    use Getopt::Long qw(GetOptions);

    use Pod::Usage qw(pod2usage);use File::Basename qw(basename);use File::Copy qw(move);use DBI;use IO::Handle;use Data::Dumper;

    ## Turn on autoflush

    #BEGIN { $| = 1 }

    ## Make %ENV safer#$ENV{'PATH'} = '/bin:/usr/bin';delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

    ## Clear possible tainted script name#$0=basename($0);{

    no locale;($0)= ($0 =~ m/^([\w\.\-]+)/);

    }

    ## Globals#use vars qw/ %opt %config @error_msgs /;

  • 8/3/2019 Vprr File Remover.pl

    2/10

    my %opt = ();my %config = ();my @error_msgs = ();

    our $VERSION= 0.01;

    ###################################################################################################### The usage_error() subroutine is used to print this script usage error.#####################################################################################################sub usage_error {

    my $message = shift;

    print STDERR "USAGE ERROR: $message\n" if defined($message);

    exit 1;}

    ###################################################################################################### The init() subroutine is used to set up database connection and capture any of the options# from the command line.

    #####################################################################################################sub init {

    my $config = shift;

    ## Parse the command line for options#GetOptions('verbose|n' => \$opt{'n'},

    'count_run|c' => \$opt{'c'},'ifile|f=s' => \$opt{'f'},'vprr_dir|d=s' => \$opt{'d'},'target_dir|t=s' => \$opt{'t'},'version|v' => \$opt{'v'},'help|h' => \$opt{'h'}) or usage_error("Failed to process command line options.");

    ## Print VERSION number for this script and exit if 'version' option is detected.

    #do { print STDOUT "$0 Version $VERSION\n"; exit(0); } if $opt{'v'};

    #

  • 8/3/2019 Vprr File Remover.pl

    3/10

    # Print perldoc for this script and exit if 'help' option is detected.#pod2usage({-exitval => 0, -verbose => 2}) if $opt{'h'};

    ## Check --test option.

    #$config->{'count_run'} = !defined($opt{'c'}) ? 0 : 1;

    usage_error("The -ifile option is required.") if !defined($opt{'f'});## Check if ifile option is provided.#usage_error("The -ifile option is required.") if !defined($opt{'f'});$config->{'ifile'} = $opt{'f'};

    $config->{'ifile'} =~ s/\s//g;

    ## Check Input file#usage_error("Input file $config->{'ifile'} does not exist.") if !-e $config->{'ifile'};

    ## Default VPRR directory to /ftp/edgar/vprr if --vprr_dir option is not provided.

    #$config->{'vprr_dir'} = !$opt{'d'} ? '/ftp/edgar/vprr' : $opt{'d'};

    ## Validate the VPRR directory#usage_error("Directory $config->{'vprr_dir'} does not exist.") if !-d $config->{'vprr_dir'};usage_error("Directory $config->{'vprr_dir'} is not accessable.")

    if !opendir(DIR, $config->{'vprr_dir'});

    closedir(DIR);

    ## Set up the output filename#my ($mday, $mon, $year) = (localtime(time()))[3,4,5];$year += 1900;$mon++;$config->{'outfile'} = basename($0, '.pl') . '.' .

    sprintf("%04d-%02d-%02d", $year, $mon, $mday) .'.out';

    if (!$config->{'count_run'}) {

  • 8/3/2019 Vprr File Remover.pl

    4/10

    ## Default target directory to /ftp/edgar/vprr/vprr_removed/YYYYMMDD# if --target_dir option is not provided.#if (!defined($opt{'t'})) {

    $config->{'target_dir'} = '/ftp/edgar/vprr/vprr_removed/' .

    sprintf("%04d%02d%02d", $year, $mon, $mday);if (!-d $config->{'target_dir'}) {

    if (!mkdir($config->{'target_dir'})) {do_log("ERROR: Failed to create target directory $config->{'target_dir'} $!");return 0;

    }

    elsif (chmod(0700, $config->{'target_dir'}) != 1) {do_log("ERROR: Failed to chmod on target directory $config->{'target_dir'} $!");

    return 0;}

    }}

    else {$config->{'target_dir'} = $opt{'t'};

    }

    ## Validate the target directory#usage_error("Directory $config->{'target_dir'} does not exist.") if !-d $config->{'target_dir'};usage_error("Directory $config->{'target_dir'} is not accessable.")

    if !opendir(DIR, $config->{'target_dir'});closedir(DIR);

    }

    $config->{'log_filename'} = basename($0, '.pl') . '.log';

    ## Connect to MySQL#my $dbiconnect = "DBI:mysql:edgar;mysql_read_default_file=/home/mysql/.my.cnf.nobody";$config->{'dbh'} = DBI->connect($dbiconnect, "", "", {RaiseError => 0, AutoCommit => 1});

    if (!defined($config->{'dbh'})) {

    do_log("ERROR: Failed to connect to database");return 0;}

  • 8/3/2019 Vprr File Remover.pl

    5/10

    else {return 1;

    }}

    ####################################################################################################

    ## The do_log() subroutine is used to generate debug message.#####################################################################################################sub do_log {

    my $msg = shift;my $errors = shift;

    my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time);

    $year += 1900;$mon++;

    my $dts = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $year, $mon, $mday, $hour, $min, $sec);

    print STDOUT "$dts:$0 $VERSION $$ $msg\n" if defined($opt{'n'});

    push(@$errors, $msg) if !defined($errors);

    return if !defined($config{'log_filename'});

    return if !open(LOG_FILE, ">>$config{'log_filename'}");

    print LOG_FILE "$dts:$0 $VERSION $$ $msg\n";close LOG_FILE;

    }

    ###################################################################################################### The find_link() subroutine is used to search the database for the accession number of a specific

    # film number. It will build a directory path based on the the accession number it found.#####################################################################################################sub find_link {

    my $film_number = shift;my $config = shift;

    my $link = '';

    my $sql = 'SELECT ';$sql .= ' SUBSTRING(accession_number, 12, 2) as SubDir,';$sql .= ' accession_number ';$sql .= 'FROM';

  • 8/3/2019 Vprr File Remover.pl

    6/10

    $sql .= ' filing_values ';$sql .= 'WHERE';$sql .= " film_number ='$film_number'";

    my $sth = $config->{'dbh'}->prepare($sql);if (!defined($sth)) {

    do_log("ERROR: Can't prepare statement ($DBI::errstr)");return undef;

    }

    else {my $rc = $sth->execute();if (!defined($rc)) {

    do_log("ERROR: Can't execute statement $sql ($DBI::errstr)");$sth->finish;

    return undef;}

    else {my @row = ();while (@row = $sth->fetchrow_array) {

    $link = "$config->{'vprr_dir'}/$row[0]/$row[1]";}$sth->finish;

    return $link;}

    }}

    ###################################################################################################### Main program.#

    ####################################################################################################

    do_log("Program starts processing..........");

    ## Initialize this script.#if (init(\%config, \@error_msgs)) {

    my @film_numbers = ();my %accession_nums = ();

    do_log('Configuration = ' . Dumper(\%config));

  • 8/3/2019 Vprr File Remover.pl

    7/10

    ## Open the output file#if (!open(OUT_FILE, ">$config{'outfile'}")) {

    do_log("Failed to open output file ($config{'outfile'}). $!");}

    ## Open the input file#elsif (!open(IN_FILE, $config{'ifile'})) {

    do_log("Failed to open input file ($config{'ifile'}). $!");}

    else {

    @film_numbers = ;close IN_FILE;

    my $total_count = @film_numbers;my $link = undef;my $pdf_file = '';my $doesnot_exist_count = 0;foreach my $f (@film_numbers) {

    $pdf_file = '';

    chomp $f;$f =~ s/^\s+//;$f =~ s/\s+$//;next if length($f) == 0;

    ## Invalid film number#if ($f !~ /^(\d{4})\d{4}$/) {

    do_log("WARNING: Invalid film number $f");print OUT_FILE "$f\tInvalid film number\n";}

    else {$pdf_file = "$config{'vprr_dir'}/0000/$1/$f.pdf";

    ## See if PDF file exists on the system disk#if (!-e $pdf_file) {

    $doesnot_exist_count++;do_log("WARNING: No PDF file ($pdf_file) found for film number $f");

  • 8/3/2019 Vprr File Remover.pl

    8/10

    print OUT_FILE "$f\tNo PDF file found\n";next;

    }

    ## Doing a count run

    #elsif ($config{'count_run'}) {

    next;}

    else {$link = find_link($f, \%config);

    #

    # No Symbolic link string built#if (!defined($link)) {

    do_log("WARNING: No accession number found for film number $f");if (!move($pdf_file, "$config{'target_dir'}/$f.pdf")) {

    do_log("ERROR: Failed to move file $pdf_file to $config{'target_dir'}. $!");print OUT_FILE "$f\tFailed to move file\n";

    }

    else {print OUT_FILE "$f\tSuccessfully removed with no link $pdf_file\n";

    }}

    ## Symbolic link does not exist#elsif (!-l $link) {

    do_log("WARNING: No symbolic link ($link) found for film number $f");if (!move($pdf_file, "$config{'target_dir'}/$f.pdf")) {do_log("ERROR: Failed to move file $pdf_file to $config{'target_dir'}. $!");print OUT_FILE "$f\tFailed to move file\n";

    }

    else {print OUT_FILE "$f\tSuccessfully removed with no link $pdf_file\n";

    }}

    ## Move the PDF file out to the target directory

  • 8/3/2019 Vprr File Remover.pl

    9/10

    #else {

    if (!move($pdf_file, "$config{'target_dir'}/$f.pdf")) {do_log("ERROR: Failed to move file $pdf_file to $config{'target_dir'}. $!");print OUT_FILE "$f\tFailed to move file\n";

    }

    ## Remove the symbolic link#if (unlink($link) == 1) {

    do_log("INFO: Successfully removed film number $f");print OUT_FILE "$f\tSuccessfully removed $link $pdf_file\n";

    }

    ## Failed to remove the symbolic link#else {

    do_log("ERROR: Failed to remove symbolic link $link. $!");print OUT_FILE "$f\tFailed to remove symbolic link\n";

    }}

    }

    }}if ($config{'count_run'}) {

    print OUT_FILE "Total film numbers=$total_count\n";print OUT_FILE "Does Not Exist Count=$doesnot_exist_count\n";

    }close OUT_FILE;

    }

    ## Disconnect from MySQL#$config{'dbh'}->disconnect() if defined($config{'dbh'});

    }

    ## Failed to initialize send notification email to DMZ OPS#else {

    my $error_msgs = join("\n", @error_msgs);do_log("Failed to initialize ($error_msgs)");

    }

  • 8/3/2019 Vprr File Remover.pl

    10/10

    do_log("Program is exiting........");

    __END__

    =head1 NAME

    vprr_file_remover.pl - VPRR PDF Files Remover.

    =head1 SYNOPSIS

    vprr_file_remover.pl {options}

    =head1 DESCRIPTION

    This script is used to generate a list of accession numbers that associate with multiple PDF files in thedatabase. The report will be mail out the DMZ OPS.

    =head1 OPTIONS

    -h, --help : Display this help and exit

    -v, --version : Display the current version of this script

    -f, --ifile : Input file name

    -t, --target_dir : Specify the directory where to move the PDF files to

    -d, --vprr_dir : Specify the directory of the VPRR root directory

    -c, --count_run : Get the count for no PDF files

    -n, --verbose : Run in verbose mode