DOKK / manpages / debian 12 / libgenome-model-tools-music-perl / Genome::Model::Tools::Music::Smg.3pm.en
Genome::Model::Tools::Music::Smg(3pm) User Contributed Perl Documentation Genome::Model::Tools::Music::Smg(3pm)

EOS
); }

sub _doc_authors {
return <<EOS
Qunyuan Zhang, Ph.D.
Cyriac Kandoth, Ph.D.
Nathan D. Dees, Ph.D. EOS }

sub execute {
my $self = shift;
my $gene_mr_file = $self->gene_mr_file;
my $output_file = $self->output_file;
my $output_file_detailed = $output_file . "_detailed";
my $max_fdr = $self->max_fdr;
my $skip_low_mr_genes = $self->skip_low_mr_genes;
my $bmr_modifier_file = $self->bmr_modifier_file;
my $processors = $self->processors;

    # Check on all the input data before starting work
    print STDERR "Gene mutation rate file not found or is empty: $gene_mr_file\n" unless( -s $gene_mr_file );
    print STDERR "BMR modifier file not found or is empty: $bmr_modifier_file\n" unless( !defined $bmr_modifier_file || -s $bmr_modifier_file );
    return undef unless( -s $gene_mr_file && ( !defined $bmr_modifier_file || -s $bmr_modifier_file ));
    # If BMR modifiers were provided, then load them, and create another gene_mr_file with modified BMRs
    if( defined $bmr_modifier_file ) {
        my $inBmrModFh = IO::File->new( $bmr_modifier_file ) or die "Couldn't open $bmr_modifier_file. $!\n";
        my %bmr_modifier = ();
        while( my $line = $inBmrModFh->getline ) {
            next if( $line =~ m/^#/ );
            chomp( $line );
            my ( $gene, $modifier ) = split( /\t/, $line );
            ( $modifier > 0 ) or die "$modifier is an invalid bmr-modifier. Please fix values in $bmr_modifier_file.\n";
            $bmr_modifier{$gene} = $modifier;
        }
        $inBmrModFh->close;
        my $new_gene_mr_file = Genome::Sys->create_temp_file_path;
        ( $new_gene_mr_file ) or die "Couldn't create a temp file. $!";
        my $inMrFh = IO::File->new( $gene_mr_file ) or die "Couldn't open $gene_mr_file. $!\n";
        my $outMrFh = IO::File->new( $new_gene_mr_file, ">" ) or die "Couldn't open $new_gene_mr_file. $!\n";
        while( my $line = $inMrFh->getline ) {
            if( $line =~ m/^#/ ) {
                $outMrFh->print( $line );
                next;
            }
            chomp( $line );
            my ( $gene, $type, $covd_bps, $mut_cnt, $bmr ) = split( /\t/, $line );
            $bmr = $bmr * $bmr_modifier{$gene} if( defined $bmr_modifier{$gene} );
            $outMrFh->print( "$gene\t$type\t$covd_bps\t$mut_cnt\t$bmr\n" );
        }
        $outMrFh->close;
        $inMrFh->close;
        $gene_mr_file = $new_gene_mr_file;
    }
    # Collect per-gene mutation rates for reporting in results later
    my ( %gene_muts, %gene_bps, %mut_classes_hash );
    my $inMrFh = IO::File->new( $gene_mr_file ) or die "Couldn't open $gene_mr_file. $!\n";
    while( my $line = $inMrFh->getline ) {
        next if( $line =~ m/^#/ );
        my ( $gene, $type, $covd_bps, $mut_cnt, undef ) = split( /\t/, $line );
        # Warn user about cases where there could be fewer covered bps than mutations detected
        ( $mut_cnt <= $covd_bps ) or warn "More $type seen in $gene than there are bps with sufficient coverage!\n";
        if( $type eq "Overall" or $type eq "Indels" or $type eq "Truncations" ) {
            $gene_muts{$gene}{$type} = $mut_cnt;
            $gene_bps{$gene} = $covd_bps;
            $mut_classes_hash{$type} = 1 unless( $type eq "Overall" );
        }
        elsif( $type =~ m/(Transitions|Transversions)$/ ) {
            $gene_muts{$gene}{SNVs} += $mut_cnt;
            $mut_classes_hash{SNVs} = 1;
        }
        else {
            die "Unrecognized mutation class in gene-mr-file. $!\n";
        }
    }
    $inMrFh->close;
    my @mut_classes = sort keys %mut_classes_hash;
    # Create a temporary intermediate file to hold the p-values
    my $pval_file = Genome::Sys->create_temp_file_path;
    ( $pval_file ) or die "Couldn't create a temp file. $!";
    # Call R for Fisher combined test, Likelihood ratio test, and convolution test on each gene
    my $smg_cmd = "R --slave --args < " . __FILE__ . ".R $gene_mr_file $pval_file smg_test $processors $skip_low_mr_genes";
    WIFEXITED( system $smg_cmd ) or croak "Couldn't run: $smg_cmd ($?)";
    # Call R for calculating FDR on the p-values calculated in the SMG test
    my $fdr_cmd = "R --slave --args < " . __FILE__ . ".R $pval_file $output_file_detailed calc_fdr $processors $skip_low_mr_genes";
    WIFEXITED( system $fdr_cmd ) or croak "Couldn't run: $fdr_cmd ($?)";
    # Parse the R output to identify the SMGs (significant by at least 2 of 3 tests)
    my $smgFh = IO::File->new( $output_file_detailed ) or die "Couldn't open $output_file_detailed. $!\n";
    my ( @newLines, @smgLines );
    my $header = "#Gene\t" . join( "\t", @mut_classes );
    $header .= "\tTot Muts\tCovd Bps\tMuts pMbp\tP-value FCPT\tP-value LRT\tP-value CT\tFDR FCPT\tFDR LRT\tFDR CT\n";
    while( my $line = $smgFh->getline ) {
        chomp( $line );
        if( $line =~ m/^Gene\tp.fisher\tp.lr\tp.convol\tfdr.fisher\tfdr.lr\tfdr.convol$/ ) {
            push( @newLines, $header );
            push( @smgLines, $header );
        }
        else {
            my ( $gene, @pq_vals ) = split( /\t/, $line );
            my ( $p_fcpt, $p_lrt, $p_ct, $q_fcpt, $q_lrt, $q_ct ) = @pq_vals;
            my @mut_cnts;
            foreach( @mut_classes ) {
                # If a mutation count is a fraction, round down the digits after the decimal point
                push( @mut_cnts, (( $gene_muts{$gene}{$_} =~ m/\./ ) ? sprintf( "%.2f", $gene_muts{$gene}{$_} ) : $gene_muts{$gene}{$_} ));
            }
            my $mut_per_mbp = ( $gene_bps{$gene} ? sprintf( "%.2f", ( $gene_muts{$gene}{Overall} / $gene_bps{$gene} * 1000000 )) : 0 );
            push( @newLines, join( "\t", $gene, @mut_cnts, $gene_muts{$gene}{Overall}, $gene_bps{$gene}, $mut_per_mbp, @pq_vals ) . "\n" );
            # If the FDR of at least two of these tests is less than the maximum allowed, we consider it an SMG
            if(( $q_fcpt <= $max_fdr && $q_lrt <= $max_fdr ) || ( $q_fcpt <= $max_fdr && $q_ct <= $max_fdr ) ||
               ( $q_lrt <= $max_fdr && $q_ct <= $max_fdr )) {
                push( @smgLines, join( "\t", $gene, @mut_cnts, $gene_muts{$gene}{Overall}, $gene_bps{$gene}, $mut_per_mbp, @pq_vals ) . "\n" );
            }
        }
    }
    $smgFh->close;
    # Add per-gene SNV and Indel counts to the detailed R output, and make the header friendlier
    my $outDetFh = IO::File->new( $output_file_detailed, ">" ) or die "Couldn't open $output_file_detailed. $!\n";
    $outDetFh->print( @newLines );
    $outDetFh->close;
    # Do the same for only the genes that we consider SMGs
    my $outFh = IO::File->new( $output_file, ">" ) or die "Couldn't open $output_file. $!\n";
    $outFh->print( @smgLines );
    $outFh->close;
    return 1;
}

1;

2020-11-06 perl v5.30.3