From eb1a37142515018a96b0eda1cd11429eaa4dc728 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 3 Feb 2022 18:12:59 +0100 Subject: [PATCH 1/7] [Toolbox] create subnetwork extraction from reaction graph --- .../ExtractSubReactionNetwork.java | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java new file mode 100644 index 000000000..9faa1976c --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java @@ -0,0 +1,142 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.KShortestPath; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.ShortestPath; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.SteinerTreeApprox; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.DefaultWeightPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.DegreeWeightPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.WeightsFromFile; +import fr.inrae.toulouse.metexplore.met4j_graph.core.BioPath; +import fr.inrae.toulouse.metexplore.met4j_graph.core.GraphFactory; +import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.reaction.CompoundEdge; +import fr.inrae.toulouse.metexplore.met4j_graph.core.reaction.ReactionGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.ExportGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_mapping.Mapper; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; +import org.kohsuke.args4j.Option; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; + +public class ExtractSubReactionNetwork extends AbstractMet4jApplication { + + @Option(name = "-i", usage = "input SBML file", required = true) + public String inputPath = null; + @Option(name = "-s", usage = "input sources txt file", required = true) + public String sourcePath = null; + @Option(name = "-t", usage = "input targets txt file", required = true) + public String targetPath = null; + @Option(name = "-o", usage = "output gml file", required = true) + public String outputPath = null; + + + @Option(name = "-sc", aliases = {"--side"}, usage = "an optional file containing list of side compounds to ignore") + public String sideCompoundFile = null; + + @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs", forbids = {"-dw", "-sw"}) + public String weightFile = null; + + //@Option(name = "-u", aliases = {"--undirected"}, usage = "Ignore reaction direction") + //public Boolean undirected = false; + + @Option(name = "-k", usage = "Extract k-shortest paths", forbids = {"-st"}) + public int k = 1; + @Option(name = "-st", aliases = {"--steinertree"}, usage = "Extract Steiner Tree", forbids = {"-k"}) + public boolean st = false; + + + public void run() throws IOException, Met4jSbmlReaderException { + //import network + JsbmlReader reader = new JsbmlReader(this.inputPath, false); + BioNetwork network = reader.read(); + + //Graph processing: import side compounds + System.err.println("importing side compounds..."); + Mapper<BioMetabolite> mapper = new Mapper<>(network,BioNetwork::getMetabolitesView).skipIfNotFound(); + BioCollection<BioMetabolite> sideCpds = mapper.map(sideCompoundFile); + if(mapper.getNumberOfSkippedEntries()>0) System.err.println(mapper.getNumberOfSkippedEntries() + " side compounds not found in network."); + System.err.println(sideCpds.size() + " side compounds ignored during graph build."); + + //get sources and targets + System.err.println("extracting sources and targets"); + Mapper<BioReaction> rmapper = new Mapper<>(network,BioNetwork::getReactionsView).skipIfNotFound(); + HashSet<BioReaction> sources = new HashSet<>(rmapper.map(sourcePath)); + if(rmapper.getNumberOfSkippedEntries()>0) System.err.println(rmapper.getNumberOfSkippedEntries() + " source not found in network."); + HashSet<BioReaction> targets = new HashSet<>(rmapper.map(targetPath)); + if(rmapper.getNumberOfSkippedEntries()>0) System.err.println(rmapper.getNumberOfSkippedEntries() + " target not found in network."); + + //Create compound graph + Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network); + ReactionGraph graph = builder.getReactionGraph(sideCpds); + + //Graph processing: set weights [optional] + WeightingPolicy<BioReaction, CompoundEdge, ReactionGraph> wp = new DefaultWeightPolicy<>(); + if (weightFile != null) { + wp = new WeightsFromFile(weightFile, true); + } + wp.setWeight(graph); + + //extract sub-network + GraphFactory<BioReaction, CompoundEdge, ReactionGraph> factory = new GraphFactory<>() { + @Override + public ReactionGraph createGraph() { + return new ReactionGraph(); + } + }; + ReactionGraph subnet; + if (st) { + SteinerTreeApprox<BioReaction, CompoundEdge, ReactionGraph> stComp = new SteinerTreeApprox<>(graph); + List<CompoundEdge> stEdges = stComp.getSteinerTreeList(sources, targets, (weightFile != null)); + subnet = factory.createGraphFromEdgeList(stEdges); + } else if (k > 1) { + KShortestPath<BioReaction, CompoundEdge, ReactionGraph> kspComp = new KShortestPath<>(graph); + List<BioPath<BioReaction, CompoundEdge>> kspPath = kspComp.getKShortestPathsUnionList(sources, targets, k); + subnet = factory.createGraphFromPathList(kspPath); + } else { + ShortestPath<BioReaction, CompoundEdge, ReactionGraph> spComp = new ShortestPath<>(graph); + List<BioPath<BioReaction, CompoundEdge>> spPath = spComp.getShortestPathsUnionList(sources, targets); + subnet = factory.createGraphFromPathList(spPath); + } + + //export sub-network + ExportGraph.toGmlWithAttributes(subnet, outputPath); + + } + + public static void main(String[] args) throws IOException, Met4jSbmlReaderException { + ExtractSubReactionNetwork app = new ExtractSubReactionNetwork(); + app.parseArguments(args); + app.run(); + } + + @Override + public String getLabel() { + return this.getClass().getSimpleName(); + } + + @Override + public String getLongDescription() { + return this.getShortDescription() + "\n" + + "The subnetwork correspond to part of the network that connects reactions from the first list to reactions from the second list.\n" + + "Sources and targets list can have elements in common. The connecting part can be defined as the union of shortest or k-shortest paths between sources and targets, " + + "or the Steiner tree connecting them. Contrary to Compound graph, reaction graph often lacks weighting policy for edge relevance. In order to ensure appropriate " + + "network density, a list of side compounds to ignore for linking reactions must be provided"; + } + + @Override + public String getShortDescription() { + return "Create a subnetwork from a GSMN in SBML format, and two files containing lists of reactions of interests ids, one per row, plus one file of the same format containing side compounds ids."; + } +} -- GitLab From 8055b9d1c6b9f09d6d56030b00592691d9e3ba6b Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Fri, 4 Feb 2022 11:43:41 +0100 Subject: [PATCH 2/7] fix cli ption --- .../networkAnalysis/ExtractSubReactionNetwork.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java index 9faa1976c..46892348b 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java @@ -45,12 +45,9 @@ public class ExtractSubReactionNetwork extends AbstractMet4jApplication { @Option(name = "-sc", aliases = {"--side"}, usage = "an optional file containing list of side compounds to ignore") public String sideCompoundFile = null; - @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs", forbids = {"-dw", "-sw"}) + @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for reactions pairs") public String weightFile = null; - //@Option(name = "-u", aliases = {"--undirected"}, usage = "Ignore reaction direction") - //public Boolean undirected = false; - @Option(name = "-k", usage = "Extract k-shortest paths", forbids = {"-st"}) public int k = 1; @Option(name = "-st", aliases = {"--steinertree"}, usage = "Extract Steiner Tree", forbids = {"-k"}) -- GitLab From 5ed81cc165c81ccb2e3c3f06c0afd41814245960 Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 8 Feb 2022 09:02:56 +0100 Subject: [PATCH 3/7] grammar/spelling/typo fix --- .../networkAnalysis/ExtractSubReactionNetwork.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java index 46892348b..8325de502 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java @@ -126,7 +126,7 @@ public class ExtractSubReactionNetwork extends AbstractMet4jApplication { @Override public String getLongDescription() { return this.getShortDescription() + "\n" + - "The subnetwork correspond to part of the network that connects reactions from the first list to reactions from the second list.\n" + + "The subnetwork corresponds to part of the network that connects reactions from the first list to reactions from the second list.\n" + "Sources and targets list can have elements in common. The connecting part can be defined as the union of shortest or k-shortest paths between sources and targets, " + "or the Steiner tree connecting them. Contrary to Compound graph, reaction graph often lacks weighting policy for edge relevance. In order to ensure appropriate " + "network density, a list of side compounds to ignore for linking reactions must be provided"; -- GitLab From 50b421abebc63ff4776c469d2ab1100a7eb7ef8a Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 8 Feb 2022 09:03:20 +0100 Subject: [PATCH 4/7] grammar/spelling/typo fix --- .../networkAnalysis/ExtractSubReactionNetwork.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java index 8325de502..0fa763913 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java @@ -128,7 +128,7 @@ public class ExtractSubReactionNetwork extends AbstractMet4jApplication { return this.getShortDescription() + "\n" + "The subnetwork corresponds to part of the network that connects reactions from the first list to reactions from the second list.\n" + "Sources and targets list can have elements in common. The connecting part can be defined as the union of shortest or k-shortest paths between sources and targets, " + - "or the Steiner tree connecting them. Contrary to Compound graph, reaction graph often lacks weighting policy for edge relevance. In order to ensure appropriate " + + "or the Steiner tree connecting them. Contrary to compound graph, reaction graph often lacks weighting policy for edge relevance. In order to ensure appropriate " + "network density, a list of side compounds to ignore for linking reactions must be provided"; } -- GitLab From c79859d2ea0357301a9346ad023b583d5b36de92 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Mon, 21 Feb 2022 16:37:51 +0100 Subject: [PATCH 5/7] fix typo --- .../networkAnalysis/ExtractSubReactionNetwork.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java index 0fa763913..c6f7b4840 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java @@ -74,7 +74,7 @@ public class ExtractSubReactionNetwork extends AbstractMet4jApplication { HashSet<BioReaction> targets = new HashSet<>(rmapper.map(targetPath)); if(rmapper.getNumberOfSkippedEntries()>0) System.err.println(rmapper.getNumberOfSkippedEntries() + " target not found in network."); - //Create compound graph + //Create reaction graph Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network); ReactionGraph graph = builder.getReactionGraph(sideCpds); -- GitLab From 15e4f7bf5a2f2fa59b710e65d140cce37bab1af9 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Tue, 22 Feb 2022 13:59:33 +0100 Subject: [PATCH 6/7] makes side compound definition mendatory avoid latency during reaction graph build and traversal due to over-density --- .../networkAnalysis/ExtractSubReactionNetwork.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java index c6f7b4840..d5e04ede5 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java @@ -40,10 +40,9 @@ public class ExtractSubReactionNetwork extends AbstractMet4jApplication { public String targetPath = null; @Option(name = "-o", usage = "output gml file", required = true) public String outputPath = null; - - - @Option(name = "-sc", aliases = {"--side"}, usage = "an optional file containing list of side compounds to ignore") + @Option(name = "-sc", aliases = {"--side"}, usage = "a file containing list of side compounds to ignore", required = true) public String sideCompoundFile = null; + @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for reactions pairs") public String weightFile = null; -- GitLab From a8bd7fac4fe452338fbe544793c39075c6a5cba0 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Tue, 22 Feb 2022 16:52:38 +0100 Subject: [PATCH 7/7] Improve doc, mention weight file --- .../networkAnalysis/ExtractSubReactionNetwork.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java index d5e04ede5..64fab85fd 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ExtractSubReactionNetwork.java @@ -128,7 +128,7 @@ public class ExtractSubReactionNetwork extends AbstractMet4jApplication { "The subnetwork corresponds to part of the network that connects reactions from the first list to reactions from the second list.\n" + "Sources and targets list can have elements in common. The connecting part can be defined as the union of shortest or k-shortest paths between sources and targets, " + "or the Steiner tree connecting them. Contrary to compound graph, reaction graph often lacks weighting policy for edge relevance. In order to ensure appropriate " + - "network density, a list of side compounds to ignore for linking reactions must be provided"; + "network density, a list of side compounds to ignore for linking reactions must be provided. An optional edge weight file, if available, can also be used."; } @Override -- GitLab