root/trunk/org.bridgedb/src/org/bridgedb/DataSource.java @ 324

Revision 324, 12.5 KB (checked in by martijn, 6 months ago)

Fixes NPE in DataSource?.isMetabolite()

  • Property svn:eol-style set to native
Line 
1// BridgeDb,
2// An abstraction layer for identifer mapping services, both local and online.
3// Copyright 2006-2009 BridgeDb developers
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
17package org.bridgedb;
18
19import java.io.UnsupportedEncodingException;
20import java.net.URLEncoder;
21import java.util.ArrayList;
22import java.util.HashMap;
23import java.util.HashSet;
24import java.util.List;
25import java.util.Map;
26import java.util.Set;
27
28/**
29contains information about a certain DataSource, such as
30<ul>
31<li>It's full name ("Ensembl")
32<li>It's system code ("En")
33<li>It's main url ("http://www.ensembl.org")
34<li>Id-specific url's ("http://www.ensembl.org/Homo_sapiens/Gene/Summary?g=" + id)
35</ul>
36The DataSource class uses the extensible enum pattern.
37You can't instantiate DataSources directly, instead you have to use one of
38the constants from the org.bridgedb.bio module such as BioDataSource.ENSEMBL,
39or the "getBySystemcode" or "getByFullname" methods.
40These methods return a predefined DataSource object if it exists.
41If a predefined DataSource for a requested SystemCode doesn't exists,
42a new one springs to life automatically. This can be used
43when the user requests new, unknown data sources. If you call
44getBySystemCode twice with the same argument, it is guaranteed
45that you get the same return object. However, there is no way
46to combine a new DataSource with a new FullName unless you use
47the "register" method.
48<p>
49This way any number of pre-defined DataSources can be used,
50but plugins can define new ones and you can
51handle unknown data sources in the same
52way as predefined ones.
53<p>
54Definitions for common DataSources can be found in {@link org.bridgedb.bio.BioDataSource}.
55*/
56public final class DataSource
57{
58        private static Map<String, DataSource> bySysCode = new HashMap<String, DataSource>();
59        private static Map<String, DataSource> byFullName = new HashMap<String, DataSource>();
60        private static Set<DataSource> registry = new HashSet<DataSource>();
61       
62        private String sysCode = null;
63        private String fullName = null;
64        private String mainUrl = null;
65        private String prefix = "";
66        private String postfix = "";
67        private Object organism = null;
68        private String idExample = null;
69        private boolean isPrimary = true;
70        private String type = "unknown";
71        private String urnBase = "";
72       
73        /**
74         * Constructor is private, so that we don't
75         * get any standalone DataSources.
76         * DataSources should be obtained from
77         * {@link getByFullName} or {@link getBySystemCode}. Information about
78         * DataSources can be added with {@link register}
79         */
80        private DataSource () {}
81       
82        /**
83         * Turn id into url pointing to info page on the web, e.g. "http://www.ensembl.org/get?id=ENSG..."
84         * @param id identifier to use in url
85         * @return Url
86         */
87        public String getUrl(String id)
88        {
89                return prefix + id + postfix;
90        }
91                               
92        /**
93         * returns full name of DataSource e.g. "Ensembl".
94         * May return null if only the system code is known.
95         * Also used as identifier in GPML
96         * @return full name of DataSource
97         */
98        public String getFullName()
99        {
100                return fullName;
101        }
102       
103        /**
104         * returns GenMAPP SystemCode, e.g. "En". May return null,
105         * if only the full name is known.
106         * Also used as identifier in
107         * <ol>
108         * <li>Gdb databases,
109         * <li>Gex databases.
110         * <li>Imported data
111         * <li>the Mapp format.
112         * </ol>
113         * We should try not to use the system code anywhere outside
114         * these 4 uses.
115         * @return systemcode, a short unique code.
116         */
117        public String getSystemCode()
118        {
119                return sysCode;
120        }
121       
122        /**
123         * Return the main Url for this datasource,
124         * that can be used to refer to the datasource in general.
125         * (e.g. http://www.ensembl.org/)
126         *
127         * May return null in case the main url is unknown.
128         * @return main url
129         */
130        public String getMainUrl()
131        {       
132                return mainUrl;
133        }
134
135        /**
136         * @return type of entity that this DataSource describes, for example
137         *   "metabolite", "gene", "protein" or "probe"
138         */
139        public String getType()
140        {
141                return type;
142        }
143       
144        /**
145         * Creates a global identifier.
146         * It uses the MIRIAM data type list
147         * to create a MIRIAM URI like "urn:miriam:uniprot:P12345",
148         * or if this DataSource is not included
149         * in the MIRIAM data types list, a bridgedb URI.
150         * @param id Id to generate URN from.
151         * @return the URN.
152         */
153        public String getURN(String id)
154        {
155                String idPart = "";
156                try
157                {
158                        idPart = URLEncoder.encode(id, "UTF-8");
159                } catch (UnsupportedEncodingException ex) { idPart = id; }
160                return urnBase + ":" + idPart;
161        }
162       
163        /**
164         * Uses builder pattern to set optional attributes for a DataSource. For example, this allows you to use the
165         * following code:
166         * <pre>
167         * DataSource.register("X", "Affymetrix")
168         *     .mainUrl("http://www.affymetrix.com")
169         *     .type("probe")
170         *     .primary(false);
171         * </pre>
172         */
173        public static final class Builder
174        {
175                private final DataSource current;
176               
177                /**
178                 * Create a Builder for a DataSource. Note that an existing DataSource is
179                 * modified rather than creating a new one.
180                 * This constructor should only be called by the register method.
181                 * @param current the DataSource to be modified
182                 */
183                private Builder(DataSource current)
184                {
185                        this.current = current;
186                }
187               
188                /**
189                 * @return the DataSource under construction
190                 */
191                public DataSource asDataSource()
192                {
193                        return current;
194                }
195               
196                /**
197                 *
198                 * @param urlPattern is a template for generating valid URL's for identifiers.
199                 *      The pattern should contain the substring "$ID", which will be replaced by the actual identifier.
200                 * @return the same Builder object so you can chain setters
201                 */
202                public Builder urlPattern (String urlPattern)
203                {
204                        if (urlPattern == null || "".equals (urlPattern))
205                        {
206                                current.prefix = "";
207                                current.postfix = "";
208                        }
209                        else
210                        {
211                                int pos = urlPattern.indexOf("$ID");
212                                if (pos == -1) throw new IllegalArgumentException("Url maker pattern for " + current + "' should have $ID in it");
213                                current.prefix = urlPattern.substring(0, pos);
214                                current.postfix = urlPattern.substring(pos + 3);
215                        }
216                        return this;
217                }
218               
219                /**
220                 * @param mainUrl url of homepage
221                 * @return the same Builder object so you can chain setters
222                 */
223                public Builder mainUrl (String mainUrl)
224                {
225                        current.mainUrl = mainUrl;
226                        return this;
227                }
228
229
230                /**
231                 * @param idExample an example id from this system
232                 * @return the same Builder object so you can chain setters
233                 */
234                public Builder idExample (String idExample)
235                {
236                        current.idExample = idExample;
237                        return this;
238                }
239               
240                /**
241                 * @param isPrimary secondary id's such as EC numbers, Gene Ontology or vendor-specific systems occur in data or linkouts,
242                 *      but their use in pathways is discouraged
243                 * @return the same Builder object so you can chain setters
244                 */
245                public Builder primary (boolean isPrimary)
246                {
247                        current.isPrimary = isPrimary;
248                        return this;
249                }
250               
251                /**
252                 * @param type the type of datasource, for example "protein", "gene", "metabolite"
253                 * @return the same Builder object so you can chain setters
254                 */
255                public Builder type (String type)
256                {
257                        current.type = type;
258                        return this;
259                }
260               
261                /**
262                 * @param organism organism for which this system code is suitable, or null for any / not applicable
263                 * @return the same Builder object so you can chain setters
264                 */
265                public Builder organism (Object organism)
266                {
267                        current.organism = organism;
268                        return this;
269                }
270               
271                /**
272                 * @param base for urn generation, for example "urn:miriam:uniprot"
273                 * @return the same Builder object so you can chain setters
274                 */
275                public Builder urnBase (String base)
276                {
277                        current.urnBase = base;
278                        return this;
279                }
280        }
281       
282        /**
283         * Register a new DataSource with (optional) detailed information.
284         * This can be used by other modules to define new DataSources.
285         * @param sysCode short unique code between 1-4 letters, originally used by GenMAPP
286         * @param fullName full name used in GPML. Must be 20 or less characters
287         * @return Builder that can be used for adding detailed information.
288         */
289        public static Builder register(String sysCode, String fullName)
290        {
291                DataSource current = null;
292                if (fullName == null && sysCode == null) throw new NullPointerException();
293//              if (fullName != null && fullName.length() > 20)
294//              {
295//                      throw new IllegalArgumentException("full Name '" + fullName + "' must be 20 or less characters");
296//              }
297               
298                if (byFullName.containsKey(fullName))
299                {
300                        current = byFullName.get(fullName);
301                }
302                else if (bySysCode.containsKey(sysCode))
303                {
304                        current = bySysCode.get(sysCode);
305                }
306                else
307                {
308                        current = new DataSource ();
309                        registry.add (current);
310                }
311               
312                current.sysCode = sysCode;
313                current.fullName = fullName;
314
315                if (isSuitableKey(sysCode))
316                        bySysCode.put(sysCode, current);
317                if (isSuitableKey(fullName))
318                        byFullName.put(fullName, current);
319               
320                return new Builder(current);
321        }
322       
323        /**
324         * Helper method to determine if a String is allowed as key for bySysCode and byFullname hashes.
325         * Null values and empty strings are not allowed.
326         * @param key key to check.
327         * @return true if the key is allowed
328         */
329        private static boolean isSuitableKey(String key)
330        {
331                return !(key == null || "".equals(key));
332        }
333       
334       
335        /**
336         * @param systemCode short unique code to query for
337         * @return pre-existing DataSource object by system code,
338         *      if it exists, or creates a new one.
339         */
340        public static DataSource getBySystemCode(String systemCode)
341        {
342                if (!bySysCode.containsKey(systemCode) && isSuitableKey(systemCode))
343                {
344                        register (systemCode, null);
345                }
346                return bySysCode.get(systemCode);
347        }
348       
349        /**
350         * returns pre-existing DataSource object by
351         * full name, if it exists,
352         * or creates a new one.
353         * @param fullName full name to query for
354         * @return DataSource
355         */
356        public static DataSource getByFullName(String fullName)
357        {
358                if (!byFullName.containsKey(fullName) && isSuitableKey(fullName))
359                {
360                        register (null, fullName);
361                }
362                return byFullName.get(fullName);
363        }
364       
365        /**
366                get all registered datasoures as a set.
367                @return set of all registered DataSources
368        */ 
369        static public Set<DataSource> getDataSources()
370        {
371                return registry;
372        }
373       
374        /**
375         * returns a filtered subset of available datasources.
376         * @param primary Filter for specified primary-ness. If null, don't filter on primary-ness.
377         * @param metabolite Filter for specified metabolite-ness. If null, don't filter on metabolite-ness.
378         * @param o Filter for specified organism. If null, don't filter on organism.
379         * @return filtered set.
380         */
381        static public Set<DataSource> getFilteredSet (Boolean primary, Boolean metabolite, Object o)
382        {
383                final Set<DataSource> result = new HashSet<DataSource>();
384                for (DataSource ds : registry)
385                {
386                        if (
387                                        (primary == null || ds.isPrimary() == primary) &&
388                                        (metabolite == null || ds.isMetabolite() == metabolite) &&
389                                        (o == null || ds.organism == null || o == ds.organism))
390                        {
391                                result.add (ds);
392                        }
393                }
394                return result;
395        }
396       
397        /**
398         * Get a list of all non-null full names.
399         * <p>
400         * Warning: the ordering of this list is undefined.
401         * Two subsequent calls may give different results.
402         * @return List of full names
403         */
404        static public List<String> getFullNames()
405        {
406                final List<String> result = new ArrayList<String>();
407                result.addAll (byFullName.keySet());
408                return result;
409        }
410        /**
411         * The string representation of a DataSource is equal to
412         * it's full name. (e.g. "Ensembl")
413         * @return String representation
414         */
415        public String toString()
416        {
417                return fullName;
418        }
419       
420        /**
421         * @return example Xref, mostly for testing purposes
422         */
423        public Xref getExample ()
424        {
425                return new Xref (idExample, this);
426        }
427       
428        /**
429         * @return if this is a primary DataSource or not. Primary DataSources
430         * are preferred when annotating models.
431         *
432         * A DataSource is primary if it is not of type probe,
433         * so that means e.g. Affymetrix or Agilent probes are not primary. All
434         * gene, protein and metabolite identifiers are primary.
435         */
436        public boolean isPrimary()
437        {
438                return isPrimary;
439        }
440       
441        /**
442         * @return if this DataSource describes metabolites or not.
443         */
444        public boolean isMetabolite()
445        {
446                return type.equals ("metabolite");
447        }
448
449        /**
450         * @return Organism that this DataSource describes, or null if multiple / not applicable.
451         */
452        public Object getOrganism()
453        {
454                return organism;
455        }
456
457}
Note: See TracBrowser for help on using the browser.