root/trunk/org.bridgedb/src/org/bridgedb/DataSource.java
@
324
| Revision 324, 12.5 KB (checked in by martijn, 6 months ago) | |
|---|---|
|
|
| Line | |
|---|---|
| 1 | // BridgeDb, |
| 2 | // An abstraction layer for identifer mapping services, both local and online. |
| 3 | // Copyright 2006-2009 BridgeDb developers |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | // |
| 17 | package org.bridgedb; |
| 18 | |
| 19 | import java.io.UnsupportedEncodingException; |
| 20 | import java.net.URLEncoder; |
| 21 | import java.util.ArrayList; |
| 22 | import java.util.HashMap; |
| 23 | import java.util.HashSet; |
| 24 | import java.util.List; |
| 25 | import java.util.Map; |
| 26 | import java.util.Set; |
| 27 | |
| 28 | /** |
| 29 | contains information about a certain DataSource, such as |
| 30 | <ul> |
| 31 | <li>It's full name ("Ensembl") |
| 32 | <li>It's system code ("En") |
| 33 | <li>It's main url ("http://www.ensembl.org") |
| 34 | <li>Id-specific url's ("http://www.ensembl.org/Homo_sapiens/Gene/Summary?g=" + id) |
| 35 | </ul> |
| 36 | The DataSource class uses the extensible enum pattern. |
| 37 | You can't instantiate DataSources directly, instead you have to use one of |
| 38 | the constants from the org.bridgedb.bio module such as BioDataSource.ENSEMBL, |
| 39 | or the "getBySystemcode" or "getByFullname" methods. |
| 40 | These methods return a predefined DataSource object if it exists. |
| 41 | If a predefined DataSource for a requested SystemCode doesn't exists, |
| 42 | a new one springs to life automatically. This can be used |
| 43 | when the user requests new, unknown data sources. If you call |
| 44 | getBySystemCode twice with the same argument, it is guaranteed |
| 45 | that you get the same return object. However, there is no way |
| 46 | to combine a new DataSource with a new FullName unless you use |
| 47 | the "register" method. |
| 48 | <p> |
| 49 | This way any number of pre-defined DataSources can be used, |
| 50 | but plugins can define new ones and you can |
| 51 | handle unknown data sources in the same |
| 52 | way as predefined ones. |
| 53 | <p> |
| 54 | Definitions for common DataSources can be found in {@link org.bridgedb.bio.BioDataSource}. |
| 55 | */ |
| 56 | public final class DataSource |
| 57 | { |
| 58 | private static Map<String, DataSource> bySysCode = new HashMap<String, DataSource>(); |
| 59 | private static Map<String, DataSource> byFullName = new HashMap<String, DataSource>(); |
| 60 | private static Set<DataSource> registry = new HashSet<DataSource>(); |
| 61 | |
| 62 | private String sysCode = null; |
| 63 | private String fullName = null; |
| 64 | private String mainUrl = null; |
| 65 | private String prefix = ""; |
| 66 | private String postfix = ""; |
| 67 | private Object organism = null; |
| 68 | private String idExample = null; |
| 69 | private boolean isPrimary = true; |
| 70 | private String type = "unknown"; |
| 71 | private String urnBase = ""; |
| 72 | |
| 73 | /** |
| 74 | * Constructor is private, so that we don't |
| 75 | * get any standalone DataSources. |
| 76 | * DataSources should be obtained from |
| 77 | * {@link getByFullName} or {@link getBySystemCode}. Information about |
| 78 | * DataSources can be added with {@link register} |
| 79 | */ |
| 80 | private DataSource () {} |
| 81 | |
| 82 | /** |
| 83 | * Turn id into url pointing to info page on the web, e.g. "http://www.ensembl.org/get?id=ENSG..." |
| 84 | * @param id identifier to use in url |
| 85 | * @return Url |
| 86 | */ |
| 87 | public String getUrl(String id) |
| 88 | { |
| 89 | return prefix + id + postfix; |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * returns full name of DataSource e.g. "Ensembl". |
| 94 | * May return null if only the system code is known. |
| 95 | * Also used as identifier in GPML |
| 96 | * @return full name of DataSource |
| 97 | */ |
| 98 | public String getFullName() |
| 99 | { |
| 100 | return fullName; |
| 101 | } |
| 102 | |
| 103 | /** |
| 104 | * returns GenMAPP SystemCode, e.g. "En". May return null, |
| 105 | * if only the full name is known. |
| 106 | * Also used as identifier in |
| 107 | * <ol> |
| 108 | * <li>Gdb databases, |
| 109 | * <li>Gex databases. |
| 110 | * <li>Imported data |
| 111 | * <li>the Mapp format. |
| 112 | * </ol> |
| 113 | * We should try not to use the system code anywhere outside |
| 114 | * these 4 uses. |
| 115 | * @return systemcode, a short unique code. |
| 116 | */ |
| 117 | public String getSystemCode() |
| 118 | { |
| 119 | return sysCode; |
| 120 | } |
| 121 | |
| 122 | /** |
| 123 | * Return the main Url for this datasource, |
| 124 | * that can be used to refer to the datasource in general. |
| 125 | * (e.g. http://www.ensembl.org/) |
| 126 | * |
| 127 | * May return null in case the main url is unknown. |
| 128 | * @return main url |
| 129 | */ |
| 130 | public String getMainUrl() |
| 131 | { |
| 132 | return mainUrl; |
| 133 | } |
| 134 | |
| 135 | /** |
| 136 | * @return type of entity that this DataSource describes, for example |
| 137 | * "metabolite", "gene", "protein" or "probe" |
| 138 | */ |
| 139 | public String getType() |
| 140 | { |
| 141 | return type; |
| 142 | } |
| 143 | |
| 144 | /** |
| 145 | * Creates a global identifier. |
| 146 | * It uses the MIRIAM data type list |
| 147 | * to create a MIRIAM URI like "urn:miriam:uniprot:P12345", |
| 148 | * or if this DataSource is not included |
| 149 | * in the MIRIAM data types list, a bridgedb URI. |
| 150 | * @param id Id to generate URN from. |
| 151 | * @return the URN. |
| 152 | */ |
| 153 | public String getURN(String id) |
| 154 | { |
| 155 | String idPart = ""; |
| 156 | try |
| 157 | { |
| 158 | idPart = URLEncoder.encode(id, "UTF-8"); |
| 159 | } catch (UnsupportedEncodingException ex) { idPart = id; } |
| 160 | return urnBase + ":" + idPart; |
| 161 | } |
| 162 | |
| 163 | /** |
| 164 | * Uses builder pattern to set optional attributes for a DataSource. For example, this allows you to use the |
| 165 | * following code: |
| 166 | * <pre> |
| 167 | * DataSource.register("X", "Affymetrix") |
| 168 | * .mainUrl("http://www.affymetrix.com") |
| 169 | * .type("probe") |
| 170 | * .primary(false); |
| 171 | * </pre> |
| 172 | */ |
| 173 | public static final class Builder |
| 174 | { |
| 175 | private final DataSource current; |
| 176 | |
| 177 | /** |
| 178 | * Create a Builder for a DataSource. Note that an existing DataSource is |
| 179 | * modified rather than creating a new one. |
| 180 | * This constructor should only be called by the register method. |
| 181 | * @param current the DataSource to be modified |
| 182 | */ |
| 183 | private Builder(DataSource current) |
| 184 | { |
| 185 | this.current = current; |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | * @return the DataSource under construction |
| 190 | */ |
| 191 | public DataSource asDataSource() |
| 192 | { |
| 193 | return current; |
| 194 | } |
| 195 | |
| 196 | /** |
| 197 | * |
| 198 | * @param urlPattern is a template for generating valid URL's for identifiers. |
| 199 | * The pattern should contain the substring "$ID", which will be replaced by the actual identifier. |
| 200 | * @return the same Builder object so you can chain setters |
| 201 | */ |
| 202 | public Builder urlPattern (String urlPattern) |
| 203 | { |
| 204 | if (urlPattern == null || "".equals (urlPattern)) |
| 205 | { |
| 206 | current.prefix = ""; |
| 207 | current.postfix = ""; |
| 208 | } |
| 209 | else |
| 210 | { |
| 211 | int pos = urlPattern.indexOf("$ID"); |
| 212 | if (pos == -1) throw new IllegalArgumentException("Url maker pattern for " + current + "' should have $ID in it"); |
| 213 | current.prefix = urlPattern.substring(0, pos); |
| 214 | current.postfix = urlPattern.substring(pos + 3); |
| 215 | } |
| 216 | return this; |
| 217 | } |
| 218 | |
| 219 | /** |
| 220 | * @param mainUrl url of homepage |
| 221 | * @return the same Builder object so you can chain setters |
| 222 | */ |
| 223 | public Builder mainUrl (String mainUrl) |
| 224 | { |
| 225 | current.mainUrl = mainUrl; |
| 226 | return this; |
| 227 | } |
| 228 | |
| 229 | |
| 230 | /** |
| 231 | * @param idExample an example id from this system |
| 232 | * @return the same Builder object so you can chain setters |
| 233 | */ |
| 234 | public Builder idExample (String idExample) |
| 235 | { |
| 236 | current.idExample = idExample; |
| 237 | return this; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * @param isPrimary secondary id's such as EC numbers, Gene Ontology or vendor-specific systems occur in data or linkouts, |
| 242 | * but their use in pathways is discouraged |
| 243 | * @return the same Builder object so you can chain setters |
| 244 | */ |
| 245 | public Builder primary (boolean isPrimary) |
| 246 | { |
| 247 | current.isPrimary = isPrimary; |
| 248 | return this; |
| 249 | } |
| 250 | |
| 251 | /** |
| 252 | * @param type the type of datasource, for example "protein", "gene", "metabolite" |
| 253 | * @return the same Builder object so you can chain setters |
| 254 | */ |
| 255 | public Builder type (String type) |
| 256 | { |
| 257 | current.type = type; |
| 258 | return this; |
| 259 | } |
| 260 | |
| 261 | /** |
| 262 | * @param organism organism for which this system code is suitable, or null for any / not applicable |
| 263 | * @return the same Builder object so you can chain setters |
| 264 | */ |
| 265 | public Builder organism (Object organism) |
| 266 | { |
| 267 | current.organism = organism; |
| 268 | return this; |
| 269 | } |
| 270 | |
| 271 | /** |
| 272 | * @param base for urn generation, for example "urn:miriam:uniprot" |
| 273 | * @return the same Builder object so you can chain setters |
| 274 | */ |
| 275 | public Builder urnBase (String base) |
| 276 | { |
| 277 | current.urnBase = base; |
| 278 | return this; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | /** |
| 283 | * Register a new DataSource with (optional) detailed information. |
| 284 | * This can be used by other modules to define new DataSources. |
| 285 | * @param sysCode short unique code between 1-4 letters, originally used by GenMAPP |
| 286 | * @param fullName full name used in GPML. Must be 20 or less characters |
| 287 | * @return Builder that can be used for adding detailed information. |
| 288 | */ |
| 289 | public static Builder register(String sysCode, String fullName) |
| 290 | { |
| 291 | DataSource current = null; |
| 292 | if (fullName == null && sysCode == null) throw new NullPointerException(); |
| 293 | // if (fullName != null && fullName.length() > 20) |
| 294 | // { |
| 295 | // throw new IllegalArgumentException("full Name '" + fullName + "' must be 20 or less characters"); |
| 296 | // } |
| 297 | |
| 298 | if (byFullName.containsKey(fullName)) |
| 299 | { |
| 300 | current = byFullName.get(fullName); |
| 301 | } |
| 302 | else if (bySysCode.containsKey(sysCode)) |
| 303 | { |
| 304 | current = bySysCode.get(sysCode); |
| 305 | } |
| 306 | else |
| 307 | { |
| 308 | current = new DataSource (); |
| 309 | registry.add (current); |
| 310 | } |
| 311 | |
| 312 | current.sysCode = sysCode; |
| 313 | current.fullName = fullName; |
| 314 | |
| 315 | if (isSuitableKey(sysCode)) |
| 316 | bySysCode.put(sysCode, current); |
| 317 | if (isSuitableKey(fullName)) |
| 318 | byFullName.put(fullName, current); |
| 319 | |
| 320 | return new Builder(current); |
| 321 | } |
| 322 | |
| 323 | /** |
| 324 | * Helper method to determine if a String is allowed as key for bySysCode and byFullname hashes. |
| 325 | * Null values and empty strings are not allowed. |
| 326 | * @param key key to check. |
| 327 | * @return true if the key is allowed |
| 328 | */ |
| 329 | private static boolean isSuitableKey(String key) |
| 330 | { |
| 331 | return !(key == null || "".equals(key)); |
| 332 | } |
| 333 | |
| 334 | |
| 335 | /** |
| 336 | * @param systemCode short unique code to query for |
| 337 | * @return pre-existing DataSource object by system code, |
| 338 | * if it exists, or creates a new one. |
| 339 | */ |
| 340 | public static DataSource getBySystemCode(String systemCode) |
| 341 | { |
| 342 | if (!bySysCode.containsKey(systemCode) && isSuitableKey(systemCode)) |
| 343 | { |
| 344 | register (systemCode, null); |
| 345 | } |
| 346 | return bySysCode.get(systemCode); |
| 347 | } |
| 348 | |
| 349 | /** |
| 350 | * returns pre-existing DataSource object by |
| 351 | * full name, if it exists, |
| 352 | * or creates a new one. |
| 353 | * @param fullName full name to query for |
| 354 | * @return DataSource |
| 355 | */ |
| 356 | public static DataSource getByFullName(String fullName) |
| 357 | { |
| 358 | if (!byFullName.containsKey(fullName) && isSuitableKey(fullName)) |
| 359 | { |
| 360 | register (null, fullName); |
| 361 | } |
| 362 | return byFullName.get(fullName); |
| 363 | } |
| 364 | |
| 365 | /** |
| 366 | get all registered datasoures as a set. |
| 367 | @return set of all registered DataSources |
| 368 | */ |
| 369 | static public Set<DataSource> getDataSources() |
| 370 | { |
| 371 | return registry; |
| 372 | } |
| 373 | |
| 374 | /** |
| 375 | * returns a filtered subset of available datasources. |
| 376 | * @param primary Filter for specified primary-ness. If null, don't filter on primary-ness. |
| 377 | * @param metabolite Filter for specified metabolite-ness. If null, don't filter on metabolite-ness. |
| 378 | * @param o Filter for specified organism. If null, don't filter on organism. |
| 379 | * @return filtered set. |
| 380 | */ |
| 381 | static public Set<DataSource> getFilteredSet (Boolean primary, Boolean metabolite, Object o) |
| 382 | { |
| 383 | final Set<DataSource> result = new HashSet<DataSource>(); |
| 384 | for (DataSource ds : registry) |
| 385 | { |
| 386 | if ( |
| 387 | (primary == null || ds.isPrimary() == primary) && |
| 388 | (metabolite == null || ds.isMetabolite() == metabolite) && |
| 389 | (o == null || ds.organism == null || o == ds.organism)) |
| 390 | { |
| 391 | result.add (ds); |
| 392 | } |
| 393 | } |
| 394 | return result; |
| 395 | } |
| 396 | |
| 397 | /** |
| 398 | * Get a list of all non-null full names. |
| 399 | * <p> |
| 400 | * Warning: the ordering of this list is undefined. |
| 401 | * Two subsequent calls may give different results. |
| 402 | * @return List of full names |
| 403 | */ |
| 404 | static public List<String> getFullNames() |
| 405 | { |
| 406 | final List<String> result = new ArrayList<String>(); |
| 407 | result.addAll (byFullName.keySet()); |
| 408 | return result; |
| 409 | } |
| 410 | /** |
| 411 | * The string representation of a DataSource is equal to |
| 412 | * it's full name. (e.g. "Ensembl") |
| 413 | * @return String representation |
| 414 | */ |
| 415 | public String toString() |
| 416 | { |
| 417 | return fullName; |
| 418 | } |
| 419 | |
| 420 | /** |
| 421 | * @return example Xref, mostly for testing purposes |
| 422 | */ |
| 423 | public Xref getExample () |
| 424 | { |
| 425 | return new Xref (idExample, this); |
| 426 | } |
| 427 | |
| 428 | /** |
| 429 | * @return if this is a primary DataSource or not. Primary DataSources |
| 430 | * are preferred when annotating models. |
| 431 | * |
| 432 | * A DataSource is primary if it is not of type probe, |
| 433 | * so that means e.g. Affymetrix or Agilent probes are not primary. All |
| 434 | * gene, protein and metabolite identifiers are primary. |
| 435 | */ |
| 436 | public boolean isPrimary() |
| 437 | { |
| 438 | return isPrimary; |
| 439 | } |
| 440 | |
| 441 | /** |
| 442 | * @return if this DataSource describes metabolites or not. |
| 443 | */ |
| 444 | public boolean isMetabolite() |
| 445 | { |
| 446 | return type.equals ("metabolite"); |
| 447 | } |
| 448 | |
| 449 | /** |
| 450 | * @return Organism that this DataSource describes, or null if multiple / not applicable. |
| 451 | */ |
| 452 | public Object getOrganism() |
| 453 | { |
| 454 | return organism; |
| 455 | } |
| 456 | |
| 457 | } |
Note: See TracBrowser
for help on using the browser.
