- Timestamp:
- 08/19/09 05:53:32 (3 years ago)
- Files:
-
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/corelib/src/org/bridgedb/webservice/biomart/IDMapperBiomart.java
r156 r161 16 16 // 17 17 18 package org.bridgedb.webservice; 19 20 import java.io.BufferedReader; 18 package org.bridgedb.webservice.biomart; 19 21 20 import java.io.IOException; 22 21 23 22 import java.util.HashMap; 24 23 import java.util.HashSet; 25 import java.util.Iterator;26 24 import java.util.Map; 27 25 import java.util.Set; 28 import java.util.Vector;29 30 import javax.xml.parsers.ParserConfigurationException;31 26 32 27 import org.bridgedb.AbstractIDMapperCapabilities; … … 37 32 import org.bridgedb.IDMapperException; 38 33 import org.bridgedb.Xref; 39 import org.bridgedb.file.IDMappingReaderFromDelimitedReader; 40 import org.bridgedb.webservice.biomart.Attribute; 41 import org.bridgedb.webservice.biomart.BiomartStub; 42 import org.bridgedb.webservice.biomart.Filter; 43 import org.bridgedb.webservice.biomart.XMLQueryBuilder; 44 45 import org.xml.sax.SAXException; 34 import org.bridgedb.webservice.IDMapperWebservice; 35 import org.bridgedb.webservice.biomart.util.BiomartClient; 46 36 47 37 /** … … 61 51 /** {@inheritDoc} */ 62 52 public IDMapper connect(String location) throws IDMapperException { 63 // e.g.: transitivity=false,id-type-filter=true@dataset=oanatinus_gene_ensembl53 // e.g.: id-type-filter=true@dataset=oanatinus_gene_ensembl 64 54 // e.g.: http://www.biomart.org/biomart/martservice?dataset=oanatinus_gene_ensembl 65 String baseURL = BiomartStub.defaultBaseURL; 66 boolean transitivity = false; 55 String baseURL = BiomartClient.defaultBaseURL; 67 56 boolean idTypeFilter = true; 68 57 … … 78 67 79 68 if (config!=null) { 80 String transitivityTag = "transitivity=";81 idx = config.indexOf(transitivityTag);82 if (idx!=-1) {83 String tran = config.substring(idx+transitivityTag.length());84 if (tran.toLowerCase().startsWith("true")) {85 transitivity = true;86 } else if (tran.toLowerCase().startsWith("false")) {87 transitivity = false;88 } else {89 throw new IDMapperException(90 "transivity can only be true or false");91 }92 }93 94 95 69 String idTypeFilterTag = "id-type-filter="; 96 70 idx = config.indexOf(idTypeFilterTag); … … 117 91 String martTag = "mart="; 118 92 idx = param.indexOf(martTag); 119 String mart Name= param.substring(idx+martTag.length());120 121 idx = mart Name.indexOf("&");93 String mart = param.substring(idx+martTag.length()); 94 95 idx = mart.indexOf("&"); 122 96 if (idx>-1) { 123 mart Name = martName.substring(0,idx);97 mart = mart.substring(0,idx); 124 98 } 125 99 126 100 String datasetTag = "dataset="; 127 101 idx = param.indexOf(datasetTag); 128 String dataset Name= param.substring(idx+datasetTag.length());129 130 idx = dataset Name.indexOf("&");102 String dataset = param.substring(idx+datasetTag.length()); 103 104 idx = dataset.indexOf("&"); 131 105 if (idx>-1) { 132 dataset Name = datasetName.substring(0,idx);133 } 134 135 return new IDMapperBiomart(mart Name, datasetName, baseURL,136 idTypeFilter , transitivity);137 } 138 } 139 140 private String mart Name;141 private String dataset Name;106 dataset = dataset.substring(0,idx); 107 } 108 109 return new IDMapperBiomart(mart, dataset, baseURL, 110 idTypeFilter); 111 } 112 } 113 114 private String mart; 115 private String dataset; 142 116 private BiomartStub stub; 143 private boolean transitivity;144 117 private boolean idOnlyForTgtDataSource; 145 118 146 119 private String baseURL; 147 120 148 private Map<DataSource, Filter> mapSrcDSFilter;149 private Map<DataSource, Attribute> mapSrcDSAttr;121 private Set<DataSource> supportedSrcDs; 122 private Set<DataSource> supportedTgtDs; 150 123 151 124 /** 152 125 * Transitivity is unsupported.ID only. ID only for target data sources. 153 126 * Use default url of BiMart. 154 * @param mart Namename of mart155 * @param dataset Namename of dataset127 * @param mart name of mart 128 * @param dataset name of dataset 156 129 * @throws IDMapperException if failed to link to the dataset 157 130 */ 158 public IDMapperBiomart(String martName, String datasetName) throws IDMapperException { 159 this(martName, datasetName, null); 131 public IDMapperBiomart(String mart, String dataset) 132 throws IDMapperException { 133 this(mart, dataset, null); 160 134 } 161 135 162 136 /** 163 137 * Use default url of BiMart. 164 * @param mart Namename of mart165 * @param dataset Namename of dataset138 * @param mart name of mart 139 * @param dataset name of dataset 166 140 * @param idOnlyForTgtDataSource id-only option, filter data source ends 167 141 * with 'ID' or 'Accession'. … … 169 143 * @throws IDMapperException if failed to link to the dataset 170 144 */ 171 public IDMapperBiomart(String mart Name, String datasetName, boolean idOnlyForTgtDataSource,172 boolean transitivity) throws IDMapperException {173 this(mart Name, datasetName, null, idOnlyForTgtDataSource, transitivity);145 public IDMapperBiomart(String mart, String dataset, 146 boolean idOnlyForTgtDataSource) throws IDMapperException { 147 this(mart, dataset, null, idOnlyForTgtDataSource); 174 148 } 175 149 176 150 /** 177 151 * Transitivity is unsupported.ID only. ID only for target data sources. 178 * @param mart Namename of mart179 * @param dataset Namename of dataset152 * @param mart name of mart 153 * @param dataset name of dataset 180 154 * @param baseURL base url of BioMart 181 155 * @throws IDMapperException if failed to link to the dataset 182 156 */ 183 public IDMapperBiomart(String mart Name, String datasetName, String baseURL)157 public IDMapperBiomart(String mart, String dataset, String baseURL) 184 158 throws IDMapperException { 185 this(martName, datasetName, baseURL, true); 186 } 187 188 /** 189 * Transitivity is unsupported. 190 * @param martName name of mart 191 * @param datasetName name of dataset 192 * @param baseURL base url of BioMart 193 * @param idOnlyForTgtDataSource id-only option, filter data source ends 194 * with 'ID' or 'Accession'. 195 * @throws IDMapperException if failed to link to the dataset 196 */ 197 public IDMapperBiomart(String martName, String datasetName, String baseURL, 198 boolean idOnlyForTgtDataSource) throws IDMapperException { 199 this(martName, datasetName, baseURL, idOnlyForTgtDataSource, false); 159 this(mart, dataset, baseURL, true); 200 160 } 201 161 … … 203 163 * Construct from a dataset, a database, id-only option and transitivity 204 164 * option. 205 * @param mart Namename of mart206 * @param dataset Namename of dataset165 * @param mart name of mart 166 * @param dataset name of dataset 207 167 * @param baseURL base url of BioMart 208 168 * @param idOnlyForTgtDataSource id-only option, filter data source ends … … 211 171 * @throws IDMapperException if failed to link to the dataset 212 172 */ 213 public IDMapperBiomart(String mart Name, String datasetName, String baseURL,214 boolean idOnlyForTgtDataSource , boolean transitivity) throws IDMapperException {215 this.mart Name = martName;216 this.dataset Name = datasetName;173 public IDMapperBiomart(String mart, String dataset, String baseURL, 174 boolean idOnlyForTgtDataSource) throws IDMapperException { 175 this.mart = mart; 176 this.dataset = dataset; 217 177 if (baseURL!=null) { 218 178 this.baseURL = baseURL; 219 179 } else { 220 this.baseURL = Biomart Stub.defaultBaseURL;180 this.baseURL = BiomartClient.defaultBaseURL; 221 181 } 222 182 … … 227 187 } 228 188 229 try { 230 if (!stub.getRegistry().containsKey(martName)) { 231 throw new IDMapperException("Mart not exist."); 232 } 233 234 if (!stub.getAvailableDatasets(martName).contains(stub.getDataset(datasetName))) { 235 throw new IDMapperException("dataset not exist."); 236 } 237 } catch (IOException e) { 238 throw new IDMapperException(e); 239 } catch (ParserConfigurationException e) { 240 throw new IDMapperException(e); 241 } catch (SAXException e) { 242 throw new IDMapperException(e); 243 } 244 245 setIDOnlyForTgtDataSource(idOnlyForTgtDataSource); 246 setTransitivity(transitivity); 247 248 mapSrcDSFilter = new HashMap<DataSource, Filter>(); 249 mapSrcDSAttr = new HashMap<DataSource, Attribute>(); 189 if (!stub.availableMarts().contains(mart)) { 190 throw new IDMapperException("Mart not exist."); 191 } 192 193 if (!stub.availableDatasets(mart).contains(dataset)) { 194 throw new IDMapperException("dataset not exist."); 195 } 196 197 this.idOnlyForTgtDataSource = idOnlyForTgtDataSource; 198 199 supportedSrcDs = this.getSupportedSrcDataSources(); 200 supportedTgtDs = this.getSupportedTgtDataSources(); 250 201 251 202 cap = new BiomartCapabilities(); … … 253 204 254 205 /** 255 * Filter target datasource ending with "ID" or "Accession".256 * @param idOnlyForTgtDataSource ID-only if true257 */258 public void setIDOnlyForTgtDataSource(boolean idOnlyForTgtDataSource) {259 this.idOnlyForTgtDataSource = idOnlyForTgtDataSource;260 }261 262 /**263 206 * 264 207 * @return true if ID-only for target data sources. … … 269 212 270 213 /** 271 * Set transitivity support.272 * @param transitivity support transitivity if true.273 */274 public void setTransitivity(final boolean transitivity) {275 this.transitivity = transitivity;276 }277 278 /**279 * Get transitivity support.280 * @return true if support transitivity; false otherwise.281 */282 public boolean getTransitivity() {283 return transitivity;284 }285 286 /**287 * Set base url of BioMart.288 * @param baseURL URL of BioMart.289 * @throws IDMapperException if failed to read local resources.290 */291 public void setBaseURL(final String baseURL) throws IDMapperException {292 try {293 stub = BiomartStub.getInstance(baseURL);294 } catch (IOException e) {295 throw new IDMapperException(e);296 }297 this.baseURL = baseURL;298 }299 300 /**301 214 * 302 215 * @return base URL of BioMart. … … 308 221 /** 309 222 * 310 * @param mart mart name311 */312 public void setMart(final String mart) {313 this.martName = mart;314 }315 316 /**317 *318 223 * @return mart name 319 224 */ 320 225 public String getMart() { 321 return martName; 322 } 323 324 /** 325 * Set dataset. 326 * @param dataset dataset from BioMart 327 */ 328 public void setDataset(final String dataset) { 329 this.datasetName = dataset; 226 return mart; 330 227 } 331 228 … … 335 232 */ 336 233 public String getDataset() { 337 return dataset Name;234 return dataset; 338 235 } 339 236 … … 353 250 if (srcXrefs==null) { 354 251 throw new java.lang.IllegalArgumentException( 355 "srcXrefs or tgtDataSources cannot be null"); 356 } 357 358 Map<Xref, Set<Xref>> result = new HashMap(); 359 360 // remove unsupported source datasources 361 Set<DataSource> supportedSrcDatasources 362 = cap.getSupportedSrcDataSources(); 363 Map<DataSource, String> queryFilters = getQueryFilters(srcXrefs); 364 Iterator<DataSource> it = queryFilters.keySet().iterator(); 365 while (it.hasNext()) { 366 DataSource ds = it.next(); 367 if (!supportedSrcDatasources.contains(ds)) { 368 it.remove(); 369 } 370 } 371 if (queryFilters.isEmpty()) { 372 return result; 373 } 374 375 // remove unsupported target datasources 376 Set<DataSource> supportedTgtDatasources = cap.getSupportedTgtDataSources(); 377 Vector<DataSource> tgtDss; 378 if (tgtDataSources == null) 379 tgtDss = new Vector(supportedSrcDatasources); 380 else 381 { 382 tgtDss = new Vector(tgtDataSources); 383 tgtDss.retainAll(supportedTgtDatasources); 384 } 385 if (tgtDss.isEmpty()) { 386 return result; 387 } 388 389 for (Map.Entry<DataSource, String> filter : queryFilters.entrySet()) { 390 DataSource srcDs = filter.getKey(); 391 392 String srcAttr = mapSrcDSFilter.get(srcDs).getName(); 393 Attribute[] attrs = getAttributes(tgtDss, srcAttr); 394 395 Map<String, String> queryFilter = new HashMap(1); 396 queryFilter.put(srcAttr, filter.getValue()); 397 398 String query = XMLQueryBuilder.getQueryString(datasetName, attrs, queryFilter); 399 400 BufferedReader bfr = null; 401 try { 402 bfr = stub.sendQuery(query); 403 if (!bfr.ready()) 404 throw new IDMapperException("Query failed"); 405 } catch (IOException e) { 406 throw new IDMapperException(e); 407 } 408 409 if (bfr==null) { 410 return result; 411 } 412 413 IDMappingReaderFromDelimitedReader reader 414 = new IDMappingReaderFromDelimitedReader(bfr, 415 "\\t", null, transitivity); 416 417 Vector<DataSource> dss = new Vector(tgtDss.size()+1); 418 dss.addAll(tgtDss); 419 dss.add(srcDs); 420 reader.setDataSources(dss); 421 422 Map<Xref,Set<Xref>> mapXrefs = reader.getIDMappings(); 423 if (mapXrefs==null) { 424 return result; 425 } 426 427 for (Xref srcXref : srcXrefs) { 428 Set<Xref> refs = mapXrefs.get(srcXref); 429 if (refs==null) continue; 430 431 Set<Xref> tgtRefs = result.get(srcXref); 432 if (tgtRefs==null) { 433 tgtRefs = new HashSet(); 434 result.put(srcXref, tgtRefs); 252 "srcXrefs or tgtDataSources cannot be null."); 253 } 254 255 Map<Xref, Set<Xref>> result = new HashMap<Xref, Set<Xref>>(); 256 257 // source datasources 258 Map<String, Map<String, Xref>> mapSrcTypeIDXrefs = new HashMap(); 259 for (Xref xref : srcXrefs) { 260 DataSource ds = xref.getDataSource(); 261 if (!supportedSrcDs.contains(ds)) continue; 262 263 String src = ds.getFullName(); 264 Map<String, Xref> ids = mapSrcTypeIDXrefs.get(src); 265 if (ids==null) { 266 ids = new HashMap(); 267 mapSrcTypeIDXrefs.put(src, ids); 268 } 269 ids.put(xref.getId(), xref); 270 } 271 272 // supported tgt datasources 273 Set<String> tgtTypes = new HashSet(); 274 for (DataSource ds : tgtDataSources) { 275 if (supportedTgtDs.contains(ds)) { 276 tgtTypes.add(ds.getFullName()); 277 } 278 } 279 String[] tgts = tgtTypes.toArray(new String[0]); 280 281 for (Map.Entry<String, Map<String, Xref>> entry : 282 mapSrcTypeIDXrefs.entrySet()) { 283 String src = entry.getKey(); 284 Set<String> ids = entry.getValue().keySet(); 285 Map<String,Set<String>[]> res = 286 stub.translate(mart, dataset , src, tgts, ids); 287 288 289 for (Map.Entry<String,Set<String>[]> entryRes : res.entrySet()) { 290 String srcId = entryRes.getKey(); 291 Set<String>[] tgtIds = entryRes.getValue(); 292 if (tgtIds==null) { // source xref not exist 293 continue; 435 294 } 436 295 437 for (Xref tgtXref : refs) { 438 if (tgtDataSources.contains(tgtXref.getDataSource())) { 439 tgtRefs.add(tgtXref); 296 Xref srcXref = mapSrcTypeIDXrefs.get(src).get(srcId); 297 298 Set<Xref> tgtXrefs = new HashSet(); 299 for (int itgt=0; itgt<tgts.length; itgt++) { 300 for (String tgtId : tgtIds[itgt]) { 301 Xref tgtXref = new Xref(tgtId, 302 DataSource.getByFullName(tgts[itgt])); 303 tgtXrefs.add(tgtXref); 440 304 } 441 305 } 442 } 443 } 306 307 result.put(srcXref, tgtXrefs); 308 } 309 } 444 310 445 311 return result; 446 }447 448 /**449 * Create filters from the source xrefs.450 * @param srcXrefs source xrefs451 * @return map from data source to IDs452 */453 protected Map<DataSource,String> getQueryFilters(Set<Xref> srcXrefs) {454 Map<DataSource, Set<String>> mapNameValue = new HashMap();455 for (Xref xref : srcXrefs) {456 DataSource ds = xref.getDataSource();457 Set<String> ids = mapNameValue.get(ds);458 if (ids==null) {459 ids = new HashSet();460 mapNameValue.put(ds, ids);461 }462 ids.add(xref.getId());463 }464 465 Map<DataSource,String> filters = new HashMap();466 for (Map.Entry<DataSource, Set<String>> entry : mapNameValue.entrySet()) {467 DataSource ds = entry.getKey();468 StringBuilder value = new StringBuilder();469 for (String str : entry.getValue()) {470 value.append(str);471 value.append(",");472 }473 474 int len = value.length();475 if (len>0) {476 value.deleteCharAt(len-1);477 }478 479 filters.put(ds, value.toString());480 }481 482 return filters;483 }484 485 /**486 * This code is bollowed from IDMapperClient from Cytoscape.487 * @param tgtDataSources target data sources488 * @param filterName filter name489 * @return attributes490 */491 protected Attribute[] getAttributes(Vector<DataSource> tgtDataSources,492 String filterName) {493 int n = tgtDataSources.size();494 Attribute[] attrs = new Attribute[n+1];495 496 int iattr = 0;497 for (DataSource ds : tgtDataSources) {498 //attrs[iattr++] = new Attribute(ds.getFullName());499 attrs[iattr++] = this.mapSrcDSAttr.get(ds);500 }501 502 // Database-specific modification.503 // This is not the best way, but cannot provide universal solution.504 Attribute attr;505 if (datasetName.contains("REACTOME")) {506 attr = stub.filterToAttributeName(datasetName, "REACTOME", filterName);507 } else if (datasetName.contains("UNIPROT")) {508 attr = stub.filterToAttributeName(datasetName, "UNIPROT", filterName);509 } else if (datasetName.contains("VARIATION")) {510 attr = stub.getAttribute(datasetName, filterName + "_stable_id");511 } else {512 attr = stub.getAttribute(datasetName, filterName);513 }514 515 attrs[n] = attr;516 517 return attrs;518 312 } 519 313 … … 535 329 * free text search is not supported for BioMart-based IDMapper. 536 330 */ 537 public Set<Xref> freeSearch (String text, int limit) throws IDMapperException { 331 public Set<Xref> freeSearch (String text, int limit) 332 throws IDMapperException { 538 333 throw new UnsupportedOperationException(); 539 334 } … … 544 339 * @throws IOException if failed to read the filters 545 340 */ 546 protected Set<DataSource> getSupportedSrcDataSources() throws IOException { 341 protected Set<DataSource> getSupportedSrcDataSources() 342 throws IDMapperException { 547 343 Set<DataSource> dss = new HashSet(); 548 Map<String, Filter> filters = stub.getFilters(datasetName); 549 for (Filter filter : filters.values()) { 550 //String fullName = filter.getDisplayName()+" ("+filter.getName()+")"; 551 String fullName = filter.getDisplayName(); 552 if (fullName.endsWith("(s)")) { 553 fullName = fullName.substring(0, fullName.length()-3); 554 } 555 //TODO: mapping to bridgedb system code 556 DataSource ds = DataSource.getByFullName(fullName); 557 dss.add(ds); 558 mapSrcDSFilter.put(ds, filter); 344 Set<String> filters = stub.availableFilters(mart, dataset); 345 for (String filter : filters) { 346 DataSource ds = DataSource.getByFullName(filter); 347 if (ds!=null) { 348 dss.add(ds); 349 } 559 350 } 560 351 return dss; … … 566 357 * @throws IOException if failed to read the filters 567 358 */ 568 protected Set<DataSource> getSupportedTgtDataSources() throws IOException {569 Map<String, Attribute> attributeVals = stub.getAttributes(datasetName);359 protected Set<DataSource> getSupportedTgtDataSources() 360 throws IDMapperException { 570 361 Set<DataSource> dss = new HashSet(); 571 for (Attribute attr : attributeVals.values()) { 572 String displayName = attr.getDisplayName(); 573 String name = attr.getName(); 574 if (idOnlyForTgtDataSource) { 575 if (!displayName.endsWith("ID") 576 && !displayName.endsWith("Accession") 577 && !name.endsWith("id") 578 && !name.endsWith("accession")) { 579 continue; 580 } 581 } 582 //String fullName = displayName + " ("+name+")"; 583 String fullName = displayName; 584 //TODO: mapping to bridgedb system code 585 DataSource ds = DataSource.getByFullName(fullName); 586 dss.add(ds); 587 mapSrcDSAttr.put(ds, attr); 362 Set<String> attributes = stub.availableAttributes(mart, dataset, 363 idOnlyForTgtDataSource); 364 365 for (String attr : attributes) { 366 DataSource ds = DataSource.getByFullName(attr); 367 if (ds!=null) { 368 dss.add(ds); 369 } 588 370 } 589 371 return dss; … … 603 385 /** {@inheritDoc} */ 604 386 public Set<DataSource> getSupportedSrcDataSources() throws IDMapperException { 605 try { 606 return IDMapperBiomart.this.getSupportedSrcDataSources(); 607 } catch (IOException ex) { 608 throw new IDMapperException(ex); 609 } 387 return IDMapperBiomart.this.supportedSrcDs; 610 388 } 611 389 612 390 /** {@inheritDoc} */ 613 391 public Set<DataSource> getSupportedTgtDataSources() throws IDMapperException { 614 try { 615 return IDMapperBiomart.this.getSupportedTgtDataSources(); 616 } catch (IOException ex) { 617 throw new IDMapperException(ex); 618 } 392 return IDMapperBiomart.this.supportedTgtDs; 619 393 } 620 394
