Changeset 538 for trunk

Show
Ignore:
Timestamp:
07/04/11 02:03:20 (11 months ago)
Author:
AlexanderPico
Message:

additional tweaks to GO handling

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/dbbuilder/src/org/bridgedb/extract/Ensembl_ETL_Device_v18_local.pl

    r537 r538  
    433433my $dbname = $dbas[0]->dbc->dbname();        # e.g., core_mus_musculus_42_36c 
    434434my @split_dbname = split(/_/, $dbname); 
    435 if ($split_dbname[2] == "collection"){       # shift array elements for "collections" 
     435if ($split_dbname[2] eq "collection"){       # shift array elements for "collections" 
    436436        splice(@split_dbname,1,1); 
    437437} 
     
    17631763            $ADMIN_Xrefs{$dbe_dbname}[10] = "\'Y\'"; # collected 
    17641764            if (!${$seen{GeneOntology}{$dbe_primary_id}}++){ 
    1765                 # Get GO term annotations using $go_adaptor 
    17661765                my $name = mysql_quotes(""); 
    17671766                my $namespace = mysql_quotes(""); 
     1767                # Get GO term annotations using $go_adaptor 
    17681768                if ($go_adaptor){ 
    1769                         my $acc = $dbe_primary_id; 
    1770                         $acc =~ s/\'//g; # strip single quotes to use as variable 
    1771                         my $term = $go_adaptor->fetch_by_accession($acc); 
    1772                         my $name = mysql_quotes($term->name()); #e.g., plasma membrane 
    1773                         my $namespace = mysql_quotes($term->namespace()); # e.g., cellular component 
    1774                 }        
    1775                 $$GeneTables{GeneOntology}{$count.$dot.$subcount{GeneOntology}} = [$dbe_primary_id, $name, $namespace]; 
     1769                  my $acc = $dbe_primary_id; 
     1770                  $acc =~ s/\'//g; # strip single quotes to use as variable 
     1771                  my $term = $go_adaptor->fetch_by_accession($acc); 
     1772                  my $name = mysql_quotes($term->name()); #e.g., plasma membrane 
     1773                  my $namespace = mysql_quotes($term->namespace()); # e.g., cellular_component 
     1774                  $dbe_description = $name unless ($name eq "" || !$name); 
     1775                } 
     1776                $$GeneTables{GeneOntology}{$count.$dot.$subcount{GeneOntology}} = [$dbe_primary_id, $dbe_description, $namespace]; 
    17761777                $$Ensembl_GeneTables{GeneOntology}{$count.$dot.$subcount{GeneOntology}} = [$gene_stable_id, $dbe_primary_id]; 
    1777                 $$Attributes{GeneOntology}{$count.$dot.$subcount{GeneOntology}.$dot.'1'} = [$dbe_primary_id, mysql_quotes( $$GeneTables{GeneOntology}{'NAME'}[1]), mysql_quotes('Description'), $name];  
     1778                $$Attributes{GeneOntology}{$count.$dot.$subcount{GeneOntology}.$dot.'1'} = [$dbe_primary_id, mysql_quotes( $$GeneTables{GeneOntology}{'NAME'}[1]), mysql_quotes('Description'), $dbe_description];  
    17781779                ++$subcount{GeneOntology}; 
    17791780            } 
     
    17891790                  my $namespace = mysql_quotes($term->namespace()); # e.g., cellular_component 
    17901791                  if ($namespace =~ /\'biological_process\'/){ 
     1792                    if (!${$seen{GOslimBP}{$dbe_primary_id}}++){ 
    17911793                        $$GeneTables{GOslimBP}{$count.$dot.$subcount{GOslimBP}} = [$dbe_primary_id, $name]; 
    17921794                        $$Ensembl_GeneTables{GOslimBP}{$count.$dot.$subcount{GOslimBP}} = [$gene_stable_id, $dbe_primary_id]; 
    17931795                        $$Attributes{GOslimBP}{$count.$dot.$subcount{GOslimBP}.$dot.'1'} = [$dbe_primary_id, mysql_quotes( $$GeneTables{GOslimBP}{'NAME'}[1]), mysql_quotes('Description'), $name];      
    17941796                        ++$subcount{GOslimBP}; 
     1797                    } 
    17951798                  } elsif ($namespace =~ /\'cellular_component\'/){ 
     1799                    if (!${$seen{GOslimCC}{$dbe_primary_id}}++){ 
    17961800                        $$GeneTables{GOslimCC}{$count.$dot.$subcount{GOslimCC}} = [$dbe_primary_id, $name]; 
    17971801                        $$Ensembl_GeneTables{GOslimCC}{$count.$dot.$subcount{GOslimCC}} = [$gene_stable_id, $dbe_primary_id]; 
    17981802                        $$Attributes{GOslimCC}{$count.$dot.$subcount{GOslimCC}.$dot.'1'} = [$dbe_primary_id, mysql_quotes( $$GeneTables{GOslimCC}{'NAME'}[1]), mysql_quotes('Description'), $name]; 
    17991803                        ++$subcount{GOslimCC}; 
     1804                    } 
    18001805                  } elsif ($namespace =~ /\'molecular_function\'/){ 
     1806                    if (!${$seen{GOslimMF}{$dbe_primary_id}}++){ 
    18011807                        $$GeneTables{GOslimMF}{$count.$dot.$subcount{GOslimMF}} = [$dbe_primary_id, $name]; 
    18021808                        $$Ensembl_GeneTables{GOslimMF}{$count.$dot.$subcount{GOslimMF}} = [$gene_stable_id, $dbe_primary_id]; 
    18031809                        $$Attributes{GOslimMF}{$count.$dot.$subcount{GOslimMF}.$dot.'1'} = [$dbe_primary_id, mysql_quotes( $$GeneTables{GOslimMF}{'NAME'}[1]), mysql_quotes('Description'), $name]; 
    18041810                        ++$subcount{GOslimMF}; 
     1811                    } 
    18051812                  } else { 
    18061813                        #garbage? 
     
    21802187  my %arrayTable = getArrayTable(); 
    21812188 
     2189if (!defined $arrayTable{$genus_species} ){ 
     2190        return; 
     2191} 
     2192 
    21822193  foreach my $key ( keys %$GeneTables) { 
    21832194      $subcount{$key} = 1;