<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.2 20190208//EN" "JATS-archivearticle1-mathml3.dtd">
<article xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic" pub-type="epub">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">70056</article-id><article-id pub-id-type="doi">10.7554/eLife.70056</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Tools and Resources</subject></subj-group><subj-group subj-group-type="heading"><subject>Plant Biology</subject></subj-group></article-categories><title-group><article-title>Increased signal-to-noise ratios within experimental field trials by regressing spatially distributed soil properties as principal components</article-title></title-group><contrib-group><contrib contrib-type="author" id="author-239286"><name><surname>Berry</surname><given-names>Jeffrey C</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="other" rid="fund1"/><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-224030"><name><surname>Qi</surname><given-names>Mingsheng</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-2448-1227</contrib-id><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-239287"><name><surname>Sonawane</surname><given-names>Balasaheb V</given-names></name><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="other" rid="fund1"/><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-239285"><name><surname>Sheflin</surname><given-names>Amy</given-names></name><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-196784"><name><surname>Cousins</surname><given-names>Asaph</given-names></name><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-239284"><name><surname>Prenni</surname><given-names>Jessica</given-names></name><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="con6"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-239288"><name><surname>Schachtman</surname><given-names>Daniel P</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-1807-4369</contrib-id><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="con7"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-239283"><name><surname>Liu</surname><given-names>Peng</given-names></name><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="fn" rid="con8"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" corresp="yes" id="author-189625"><name><surname>Bart</surname><given-names>Rebecca S</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-1378-3481</contrib-id><email>rbart@danforthcenter.org</email><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="other" rid="fund1"/><xref ref-type="fn" rid="con9"/><xref ref-type="fn" rid="conf3"/></contrib><aff id="aff1"><label>1</label><institution-wrap><institution-id institution-id-type="ror">https://ror.org/000cyem11</institution-id><institution>Donald Danforth Plant Science Center</institution></institution-wrap><addr-line><named-content content-type="city">St. Louis</named-content></addr-line><country>United States</country></aff><aff id="aff2"><label>2</label><institution-wrap><institution-id institution-id-type="ror">https://ror.org/05dk0ce17</institution-id><institution>School of Biological Sciences, Washington State University</institution></institution-wrap><addr-line><named-content content-type="city">Pullman</named-content></addr-line><country>United States</country></aff><aff id="aff3"><label>3</label><institution-wrap><institution-id institution-id-type="ror">https://ror.org/03k1gpj17</institution-id><institution>Department of Biochemistry and Molecular Biology, Colorado State University</institution></institution-wrap><addr-line><named-content content-type="city">Fort Collins</named-content></addr-line><country>United States</country></aff><aff id="aff4"><label>4</label><institution-wrap><institution-id institution-id-type="ror">https://ror.org/043mer456</institution-id><institution>Department of Agronomy and Horticulture, University of Nebraska-Lincoln</institution></institution-wrap><addr-line><named-content content-type="city">Lincoln</named-content></addr-line><country>United States</country></aff><aff id="aff5"><label>5</label><institution-wrap><institution-id institution-id-type="ror">https://ror.org/04rswrd78</institution-id><institution>Department of Statistics, Iowa State University</institution></institution-wrap><addr-line><named-content content-type="city">Ames</named-content></addr-line><country>United States</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Whiteman</surname><given-names>Noah K</given-names></name><role>Reviewing Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/01an7q238</institution-id><institution>University of California, Berkeley</institution></institution-wrap><country>United States</country></aff></contrib><contrib contrib-type="senior_editor"><name><surname>Schuman</surname><given-names>Meredith C</given-names></name><role>Senior Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/02crff812</institution-id><institution>University of Zurich</institution></institution-wrap><country>Switzerland</country></aff></contrib></contrib-group><pub-date publication-format="electronic" date-type="publication"><day>12</day><month>07</month><year>2022</year></pub-date><pub-date pub-type="collection"><year>2022</year></pub-date><volume>11</volume><elocation-id>e70056</elocation-id><history><date date-type="received" iso-8601-date="2021-05-05"><day>05</day><month>05</month><year>2021</year></date><date date-type="accepted" iso-8601-date="2022-06-29"><day>29</day><month>06</month><year>2022</year></date></history><pub-history><event><event-desc>This manuscript was published as a preprint at bioRxiv.</event-desc><date date-type="preprint" iso-8601-date="2021-04-30"><day>30</day><month>04</month><year>2021</year></date><self-uri content-type="preprint" xlink:href="https://doi.org/10.1101/2021.04.29.441834"/></event></pub-history><permissions><copyright-statement>&#169; 2022, Berry et al</copyright-statement><copyright-year>2022</copyright-year><copyright-holder>Berry et al</copyright-holder><ali:free_to_read/><license xlink:href="http://creativecommons.org/licenses/by/4.0/"><ali:license_ref>http://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife-70056-v1.pdf"/><self-uri content-type="figures-pdf" xlink:href="elife-70056-figures-v1.pdf"/><abstract><p>Environmental variability poses a major challenge to any field study. Researchers attempt to mitigate this challenge through replication. Thus, the ability to detect experimental signals is determined by the degree of replication and the amount of environmental variation, noise, within the experimental system. A major source of noise in field studies comes from the natural heterogeneity of soil properties which create microtreatments throughout the field. In addition, the variation within different soil properties is often nonrandomly distributed across a field. We explore this challenge through a sorghum field trial dataset with accompanying plant, microbiome, and soil property data. Diverse sorghum genotypes and two watering regimes were applied in a split-plot design. We describe a process of identifying, estimating, and controlling for the effects of spatially distributed soil properties on plant traits and microbial communities using minimal degrees of freedom. Importantly, this process provides a method with which sources of environmental variation in field data can be identified and adjusted, improving our ability to resolve effects of interest and to quantify subtle phenotypes.</p></abstract><kwd-group kwd-group-type="author-keywords"><kwd>sorghum</kwd><kwd>field research</kwd><kwd>microbiome</kwd><kwd>drought</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd>Other</kwd></kwd-group><funding-group><award-group id="fund1"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/100000015</institution-id><institution>U.S. Department of Energy</institution></institution-wrap></funding-source><award-id>DE_SC0014395</award-id><principal-award-recipient><name><surname>Berry</surname><given-names>Jeffrey C</given-names></name><name><surname>Qi</surname><given-names>Mingsheng</given-names></name><name><surname>Sonawane</surname><given-names>Balasaheb V</given-names></name><name><surname>Sheflin</surname><given-names>Amy</given-names></name><name><surname>Cousins</surname><given-names>Asaph</given-names></name><name><surname>Prenni</surname><given-names>Jessica</given-names></name><name><surname>Schachtman</surname><given-names>Daniel P</given-names></name><name><surname>Liu</surname><given-names>Peng</given-names></name><name><surname>Bart</surname><given-names>Rebecca S</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection, and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>A method to reduce the noise present in field experiments by accounting for heterogeneity in field soils.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Environmental variation makes the real world a noisy place to conduct science. In the context of experimental agriculture fields, variation in topography may result in uneven water moisture accumulation. Similarly, soil nutrients such as nitrogen and phosphate are often nonuniformly distributed across a field. These unintended and often unknown sources of environmental variation may significantly affect the experimental results. The traditional approach to mitigate this variability is through experimental designs that include replicate blocks (<xref ref-type="bibr" rid="bib20">Piepho et al., 2013</xref>; <xref ref-type="bibr" rid="bib7">Fisher, 1925</xref>). While helpful for controlling for variation that is relatively uniform within the blocks, true biological signal may still be masked by other experimental noise that is heterogeneous within blocks.</p><p>Analytical approaches have been used to parameterize spatial variation within a field using traditional mixed-effect modeling. These methods come in two general flavors: estimating spatial-covariance structures and spatial-smoothing using splines (<xref ref-type="bibr" rid="bib25">Rodr&#237;guez-&#193;lvarez et al., 2016</xref>; <xref ref-type="bibr" rid="bib19">Piepho et al., 2008</xref>; <xref ref-type="bibr" rid="bib32">Velazco et al., 2017</xref>). The former is older and canonical but requires advanced statistical knowledge to interpret the results. The latter is newer and easier to use courtesy of advancements in computation. Spatial-smoothing has been shown to effectively account for spatial variations in uniform barley fields and promotes genetic heritability in simulation studies (<xref ref-type="bibr" rid="bib25">Rodr&#237;guez-&#193;lvarez et al., 2016</xref>). While spatial-smoothing using splines does effectively address spatial variation of a trait in a field, traditional parameterizations using spatial-covariance structures do so as well and further provide intuitive metrics on the type and shape of the structure. Estimating and accounting for spatial structure have proven useful for a variety of biological systems including nematodes (<xref ref-type="bibr" rid="bib23">Quist et al., 2019</xref>), microbiomes (<xref ref-type="bibr" rid="bib8">Franklin and Mills, 2003</xref>), forestry (<xref ref-type="bibr" rid="bib14">Ohashi and Gyokusen, 2007</xref>; <xref ref-type="bibr" rid="bib12">M&#246;tt&#246;nen et al., 1999</xref>; <xref ref-type="bibr" rid="bib1">Bai et al., 2012</xref>), and ionomics QTL mapping (<xref ref-type="bibr" rid="bib16">Pauli et al., 2018</xref>). These previous studies have used spatial-covariance estimation methods to identify and associate spatial effects on various traits of interest. These methods are also the backbone of geospatial statistics where the goal is to interpolate values between sampling points (<xref ref-type="bibr" rid="bib15">Olea, 2018</xref>).</p><p>Another challenge facing field studies is to identify which factors of a multivariate dataset influence the traits of interest and then adjust for the effects from all covariates while maintaining sufficient degrees of freedom for statistical inference. This challenge is similar to the challenge associated with genome-wide association studies (GWAS) that must handle population structure. In GWAS studies, phylogenetic relatedness is managed by principal component analysis (<xref ref-type="bibr" rid="bib21">Price et al., 2006</xref>). Principal components (PCs) capture axes of most variation and effectively reduce a complex multivariate dataset down to only a few independent vectors of greatest importance. PCs have also previously been used for dimension reduction to investigate how various soil nutrients affect natural selection on plant roots (<xref ref-type="bibr" rid="bib13">Murren et al., 2020</xref>).</p><p>Here, we combine approaches from geospatial statistics with dimension reduction to deal with environmental variation in field studies. Specifically, we estimated spatial-covariance structures for each factor and then accounted for these effects using principal component regression. Applying this approach reduced experimental noise due to environmental variation and revealed previously hidden associations in a field study. This field study with 24 varieties of sorghum and 2 watering treatments, well-watered and water stressed, arranged in a split-plot design with 8 replicate blocks, was completed in 2017. Several types of data were collected including, but not limited to, plant harvest traits (height, fresh and dry weight, and panicle size), soil property composition (calcium, magnesium, nitrate, organic matter, pH, phosphate, potassium, salinity, sodium, sulfate, and total cations), three microbiome samplings for each plot (root, rhizosphere, and soil), leaf traits (specific leaf area, C and N content, and stable isotopes of C and N), and root metabolomic profile. In this study, the soil chemical and physical properties were used as the multicovariates that exhibited spatial-covariance structure and subsequently created microtreatment effects throughout the field that are associated with plant traits. We demonstrate that accounting for these effects via residuals of principal component regression is an effective method to improve the detection of effects of treatment factors and reduce the noise caused by spatial variation within a field.</p></sec><sec id="s2" sec-type="results"><title>Results</title><sec id="s2-1"><title>Geospatial statistics interpolates soil property composition throughout the field</title><p>We previously described a field-level experiment in which sorghum and its associated microbiome were evaluated across two different watering treatments (<xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref>). As is typical of field studies, the collected data showed significant variability across all measured parameters. We also measured several different soil properties at multiple points throughout the field including organic matter, pH, phosphate, nitrate, sum of cations, calcium, magnesium, potassium, sodium, sulfate, and salinity compositions. We hypothesized that variation across the field in these soil properties may explain some of the variability in the other measurements. Here, using the field and microbiome datasets from <xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref>, we describe and evaluate a general method for reducing noise and apply this method to additional metabolomics and stable isotope datasets.</p><p>First, because only a limited set of points across the field were sampled for soil properties, there was a need to estimate the missing values (<xref ref-type="fig" rid="fig1">Figure 1A</xref>). We suspected that sample proximity would introduce correlation within the measured soil properties. To assess this type of spatial correlation, we employed techniques from geospatial statistics to capture the correlation structure of any pair of samples in the field. Of the 12 properties tested, 6 properties exhibited evidence of spatial distributions (p value &lt;0.05): salinity (mmho), nitrate (ppm), sulfate (ppm), calcium (ppm), magnesium (ppm), and phosphate (ppm) (see Methods: Statistical testing for evidence of spatial structure). For these six soil properties we estimated the missing values throughout the field. Interpolation of values between sampling points was performed by leveraging spatial correlation structure to predict unobserved values, a process called kriging (see Methods: Geospatial interpolation methods) (<xref ref-type="table" rid="table1">Table 1</xref>). To test the kriging accuracy, we performed a leave-one-out cross-validation for each soil property. Through this analysis we observed that the error of the predictions, when scaled to unit variance of the observations, exhibit distributions that resemble the expected standard <italic>Z</italic>-distribution (<xref ref-type="fig" rid="fig1s1">Figure 1&#8212;figure supplement 1</xref>). The ratio of the partial sill to the nugget is a proxy for the magnitude of the variance that is attributable to the spatial structure. Comparing this ratio for nitrate and phosphate shows that the phosphate spatial correlation was much stronger than the nitrate spatial correlation (<xref ref-type="fig" rid="fig1">Figure 1</xref>). Calcium also exhibited correlation structure of distances larger than nitrate, but much smaller than phosphate (<xref ref-type="fig" rid="fig1">Figure 1</xref>). To visualize the spatial structure for each soil property, the kriged values of each property were centered around the mean and scaled to unit variance (<xref ref-type="fig" rid="fig1">Figure 1</xref>). This analysis revealed that the soil properties exhibited different topographies across the field. For example, phosphate levels were high in a band across the center of the field while nitrate and calcium levels were more variable with several high and low spots (<xref ref-type="fig" rid="fig1">Figure 1</xref>). We also considered correlation between the different soil properties and observed several correlation blocks, implying similar spatial structures (<xref ref-type="fig" rid="fig1s2">Figure 1&#8212;figure supplement 2</xref>).</p><fig-group><fig id="fig1" position="float"><label>Figure 1.</label><caption><title>Graphical depictions of field layout where each cell is a plot in the field.</title><p>Water treatment is specified on right; WS = water stressed, WW = well-watered. Eight split-plot replicate blocks are denoted in gray vertical bars. Color scale represents data with genotype and treatment removed. Green indicates larger than average, white indicates approximately average, and magenta indicates below average values. (<bold>A</bold>) Nitrate values are shown for each cell (outlined in gray) that were sampled for soil property analysis. (<bold>B</bold>) Kriged nitrate values to estimate nitrate levels in unsampled plots. (<bold>C, D</bold>) Kriged values for phosphate and calcium. Variogram fit of spatial model is indicated with model type, nugget, partial sill, and range.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig1.jpg"/></fig><fig id="fig1s1" position="float" specific-use="child-fig"><label>Figure 1&#8212;figure supplement 1.</label><caption><title>Leave-one-out cross-validation of soil property observations.</title><p>Shown is the difference between the predicted and the observed values for each sample normalized to the standard deviation of the observations, <italic>x</italic>-axis, and their respective frequency, <italic>y</italic>-axis.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig1-figsupp1.jpg"/></fig><fig id="fig1s2" position="float" specific-use="child-fig"><label>Figure 1&#8212;figure supplement 2.</label><caption><title>Pearson correlations between all pairwise soil properties.</title><p>Negative correlations are depicted as shades of red, and positive correlations are depicted as blue. Soil properties are ordered by hierarchical clustering based on Euclidean distance and complete agglomeration.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig1-figsupp2.jpg"/></fig></fig-group><table-wrap id="table1" position="float"><label>Table 1.</label><caption><title>Shown are sum-of-square errors for each soil property (columns) and each spatial model tested (rows).</title><p>Cells that have asterisks are those models that have minimal errors and are chosen to be the best-fit model for kriging for the respective soil property.</p></caption><table frame="hsides" rules="groups"><thead><tr><th align="left" valign="top"/><th align="left" valign="top">Salinity</th><th align="left" valign="top">Nitrate</th><th align="left" valign="top">Sulfate</th><th align="left" valign="top">Calcium</th><th align="left" valign="top">Magnesium</th><th align="left" valign="top">Phosphate</th></tr></thead><tbody><tr><td align="left" valign="top">Nugget only</td><td align="char" char="hyphen" valign="top">1.92115E&#8722;05</td><td align="char" char="." valign="top">203.668</td><td align="char" char="." valign="top">10645.6</td><td align="char" char="." valign="top">833211000</td><td align="char" char="." valign="top">2261360</td><td align="char" char="." valign="top">149,688</td></tr><tr><td align="left" valign="top">Exponential</td><td align="char" char="hyphen" valign="top">1.1769E&#8722;06</td><td align="char" char="." valign="top">41.2651</td><td align="char" char="." valign="top">1909.53</td><td align="char" char="." valign="top">17886300000</td><td align="char" char="." valign="top">65136300</td><td align="char" char="." valign="top">27556.2</td></tr><tr><td align="left" valign="top">Spherical</td><td align="char" char="hyphen" valign="top">2.03099E&#8722;06</td><td align="char" char="." valign="top">25.5247</td><td align="char" char="." valign="top">2058.42</td><td align="char" char="." valign="top">244155000*</td><td align="char" char="." valign="top">1038010*</td><td align="char" char="." valign="top">32351.9</td></tr><tr><td align="left" valign="top">Gaussian</td><td align="char" char="hyphen" valign="top">2.16269E&#8722;06</td><td align="char" char="." valign="top">22.3183*</td><td align="char" char="." valign="top">289,060</td><td align="char" char="." valign="top">17669600000</td><td align="char" char="." valign="top">64365000</td><td align="char" char="." valign="top">100,531</td></tr><tr><td align="left" valign="top">Matern</td><td align="char" char="." valign="top">0.00000113273*</td><td align="char" char="." valign="top">23.0069</td><td align="char" char="." valign="top">1682.03*</td><td align="char" char="." valign="top">266373000</td><td align="char" char="." valign="top">1087980</td><td align="char" char="." valign="top">18961.6</td></tr><tr><td align="left" valign="top">Stein&#8217;s Matern</td><td align="char" char="hyphen" valign="top">1.13273E&#8722;06</td><td align="char" char="." valign="top">23.0069</td><td align="char" char="." valign="top">290,357</td><td align="char" char="." valign="top">17886300000</td><td align="char" char="." valign="top">65136300</td><td align="char" char="." valign="top">18827.8*</td></tr></tbody></table></table-wrap></sec><sec id="s2-2"><title>Soil property variation influences plant phenotypes and microbiome composition</title><p>The above analyses clearly showed that soil properties were variable across the field site. However, it was not clear whether the observed variation was large enough to affect plant-associated phenotypes or the microbiome. To address this, we used constrained analysis of principal coordinates (CAP). With CAP, it is possible to identify specific effects on a multidimensional dataset while acknowledging variation due to other effects. For example, to understand if and how soil property variation affected microbiome composition, we first had to control for the effects of the watering treatments, the different genotypes and their interactions (see Methods: Statistical testing for phenotype&#8211;property associations). First, CAP was computed and a permutation analysis of variance (ANOVA), using 999 iterations, was performed on each soil property to identify if there is an interaction with the different compartments on the overall microbiome composition and resulted in all but one soil property, sulfate, showing an interaction effect (<xref ref-type="table" rid="table2">Table 2</xref>). Following the interaction effect permutation ANOVA, post hoc permutation ANOVAs, using 999 permutations, were performed to identify specific soil property influence on the three compartments independently. From this analysis, we observed that the root microbiome was invariant to all soil property variations. In contrast, the rhizosphere and soil microbiomes were influenced by the variation in several soil properties (<xref ref-type="fig" rid="fig2">Figure 2A</xref>). Additionally, there were some soil properties (salinity, sulfate, and calcium) whose variation affected either the rhizosphere or soil microbiomes but not both. This suggests that microbiome compartments are differentially sensitive to different types of soil property variation. CAP and permutation ANOVA were also applied to annotated root metabolomic profiles. In contrast to the large effects seen in the microbiomes, no soil property variances were associated with changes in the metabolomic profile (<xref ref-type="fig" rid="fig2s1">Figure 2&#8212;figure supplement 1</xref>). This may reflect the relative stability of the metabolites identified from gas chromatography&#8211;mass spectrometry (GC&#8211;MS) which mostly represents primary metabolites, or the sensitivity of measurements.</p><table-wrap id="table2" position="float"><label>Table 2.</label><caption><title>Shown for each soil property that exhibits significant spatial distribution (see Methods: Statistical testing for evidence of spatial structure) (first column) is the interaction effect with the different microbiome tissue compartments on the overall microbiome composition (p value &lt;0.05 indicates significant interaction, PERMANOVA from model: Composition ~ Property:Compartment).</title></caption><table frame="hsides" rules="groups"><thead><tr><th align="left" valign="top">Soil property</th><th align="left" valign="top">p value</th></tr></thead><tbody><tr><td align="left" valign="top">Sulfate (ppm)</td><td align="char" char="." valign="top">0.24</td></tr><tr><td align="left" valign="top">Salinity (mmho/cm)</td><td align="char" char="." valign="top">0.011</td></tr><tr><td align="left" valign="top">Phosphate (ppm)</td><td align="char" char="." valign="top">0.001</td></tr><tr><td align="left" valign="top">Nitrate (ppm)</td><td align="char" char="." valign="top">0.001</td></tr><tr><td align="left" valign="top">Magnesium (ppm)</td><td align="char" char="." valign="top">0.003</td></tr><tr><td align="left" valign="top">Calcium (ppm)</td><td align="char" char="." valign="top">0.005</td></tr></tbody></table></table-wrap><fig-group><fig id="fig2" position="float"><label>Figure 2.</label><caption><title>Association of soil property variations with multiple phenotypes.</title><p>Six soil properties were assessed for effect on root microbiome and plant phenotypes using permutation ANOVA. Cells are colored by &#8722;log10 p value of the effect. (<bold>A</bold>) Effect of each soil property on microbiome beta diversity from three root compartments: root (endosphere), RHZ (rhizosphere), and soil (bulk soil), while constraining on genotype and treatment. Effect of each soil nutrient on the height and weight (<bold>B</bold>) and leaf &#948;<sup>15</sup>N (<bold>C</bold>) using type III sum of squares while including treatment, genotype, and interaction as additional fixed effects. (<bold>D</bold>) Example effect of kriged phosphate, <italic>x</italic>-axis, on plant height, adjusted for genotype and treatment, <italic>y</italic>-axis.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig2.jpg"/></fig><fig id="fig2s1" position="float" specific-use="child-fig"><label>Figure 2&#8212;figure supplement 1.</label><caption><title>Same analysis as <xref ref-type="fig" rid="fig2">Figure 2A-C</xref>, but here are shown the phenotypes that did not demonstrate soil property associations.</title></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig2-figsupp1.jpg"/></fig></fig-group><p>While microbiome and metabolite data are highly multivariate, plant phenotypes, composed of individual phenotypic traits, are much less so and are therefore suited to univariate statistical analyses. Just as the microbiome has the potential to be influenced by the soil property variations, the same could be true for the univariate phenotypic traits. Similarly, the effects due to the experimental design must be acknowledged to more precisely evaluate the association a given property has on the phenotypic trait. Mixed-effect models were created for each soil property&#8211;phenotypic trait pair with treatment, genotype, and their interaction as fixed effects and random effect for the split-plot replicates each having multivariate-normal spatial correlation structure (see Methods: Statistical testing for phenotype&#8211;property associations; <xref ref-type="disp-formula" rid="equ5">Equation 4</xref>). The precise effect a given soil property has on a phenotypic trait was evaluated using type III sum of squares to account for the other sources of variance (treatment, genotype, and the interaction) on the phenotypic trait. Traditional <italic>F</italic>-test from the ANOVA revealed that plant height and total fresh weight are influenced by soil phosphate and magnesium variation, respectively (<xref ref-type="fig" rid="fig2">Figure 2B</xref>). Similar modeling of the leaf traits indicated the soil phosphate variation is significantly associated with &#7839;<sup>15</sup>N (<xref ref-type="fig" rid="fig2">Figure 2C</xref>). Many other phenotypic traits were examined and did not have statistically significant associations with the variation in soil properties (<xref ref-type="fig" rid="fig2s1">Figure 2&#8212;figure supplement 1</xref>). Closer examination showed that soil phosphate levels are mildly inversely correlated to plant height (<xref ref-type="fig" rid="fig2">Figure 2D</xref>). This supports the hypothesis that excess phosphate inhibits plant growth and development (<xref ref-type="bibr" rid="bib29">Shukla et al., 2017</xref>; <xref ref-type="bibr" rid="bib31">Song et al., 2016</xref>) and suggests that the levels found in the center of the field were too high for optimum sorghum growth.</p></sec><sec id="s2-3"><title>Statistical approach to place field noise into PCs</title><p>We have shown that many of the soil properties exhibit spatial distribution and influence various plant and microbiome traits. Therefore, to understand the effects of treatment and genotype on the phenotypic traits more precisely, we must first account for the effects of the soil properties. The replication in our study was not sufficient to include all the soil properties as covariates to account for their influence &#8211; this would require a degree of freedom for every soil property. A generalized approach to overcome limited sample size is reducing the dimensionality of the covariates using principal component analysis and regressing against the first several PCs, known as principal component regression. In this approach, the PCs retain a percentage of the influence from the individual properties and can be used as a proxy to adjust for as much variation as possible.</p><p>To adjust for the spatial effect of soil properties unrelated to genotype and treatment, we used the observed soil properties, not the kriged values, and fit linear models for each soil property with fixed effects being treatment, genotype, and their interaction. The residuals from these linear models were the adjusted soil properties unrelated to genotype and treatment. These residuals were then processed with principal component analysis. To test the success of accounting for our experimental design, we investigated clustering within the first two PCs and observed that indeed the treatment and genotype effects were accounted for (<xref ref-type="fig" rid="fig3">Figure 3A</xref>) as there were no obvious treatment or genotype effects remaining in these PCs. We selected the first three PCs as regressors which represented approximately 66% of the total variance in the soil property data (<xref ref-type="fig" rid="fig3">Figure 3B</xref>). Next, we visualized the contribution of each soil property within each PC (<xref ref-type="fig" rid="fig3">Figure 3C</xref>). PC1 is primarily represented by calcium, magnesium, potassium, sodium, and sum of cations. PC2 is primarily represented by sulfate, salinity, and pH. PC3 is primarily represented by phosphate and nitrate. Then, we used kriging (see Methods: Geospatial interpolation methods) to interpolate the missing values for the rest of the field. Since many soil properties exhibited spatial distributions (<xref ref-type="fig" rid="fig1">Figure 1</xref>), we expected that the PCs would also display a spatial distribution. Indeed, the spatial distribution of the kriged first PC resembles the calcium distribution which emphasizes the contribution of that property (<xref ref-type="fig" rid="fig1">Figures 1</xref> and <xref ref-type="fig" rid="fig3">3D</xref>). In summary, through this approach, we revealed specific sources of field-level noise and successfully captured a significant proportion of the field-level variation into three PCs.</p><fig id="fig3" position="float"><label>Figure 3.</label><caption><title>Variation in soil properties can be collapsed into principal components.</title><p>(<bold>A</bold>) First two principal components of soil property residuals as <italic>x</italic>- and <italic>y</italic>-axis, respectively, colored by genotype and shaped by treatment. (<bold>B</bold>) Scree plot of the first 10 principal components. Shown is the percent variance explained of the total property variance by each component. Dashed line is at 10% variance explained. (<bold>C</bold>) For the first three components, colored is the contribution of each soil property to its respective variance explained within each component. (<bold>D</bold>) Spatial distribution of kriged PC1. Each cell colored by scaling the values to unit variance. Variogram fit with nugget, partial sill, and range displayed.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig3.jpg"/></fig></sec><sec id="s2-4"><title>Using PCs to denoise field data</title><p>Having reduced the soil property influences into a limited number of PCs, next we aimed to account for these unintended influences on our phenotypes of interest. Similar to how we removed the experimental design effects, we created mixed-effect models for each of our phenotypes as the dependent variable with multivariate normal spatial structure within each split-plot replicate, and the first three PCs as fixed effect independent variables (see Methods: Principal component regression; <xref ref-type="disp-formula" rid="equ6">Equation 5</xref>). For the univariate phenotypes, we extracted model residuals (<xref ref-type="disp-formula" rid="equ2">Equation 2</xref>) to generate soil property invariant phenotypes. In the microbiomes, residuals were not comparable to the original counts, therefore we back transformed the model estimates to create an adjusted count for each operational taxonomic unit (OTU) (see Methods: Principal component regression; <xref ref-type="disp-formula" rid="equ7">Equation 6</xref>).</p><p>In the microbiome datasets, we expected the variance within each of the compartments to decrease and the difference between compartments to be more obvious. To test this, we combined the original observed counts and the adjusted counts for microbes that were able to be modeled and performed principal component analysis. We observed larger distances between microbiome compartments using the adjusted counts versus the observed counts (<xref ref-type="fig" rid="fig4">Figure 4A</xref>). This indicates the sources of variation from the soil properties were better controlled thereby increasing the differentiation between compartments. Within each compartment, the root microbiome was least affected by soil factors which was demonstrated by the small distance between the observed and adjusted counts, followed by rhizosphere with a larger separation, and soil being the farthest and most affected (<xref ref-type="fig" rid="fig4">Figure 4A</xref>). The adjusted counts produced clusters that were larger than their respective original counts, again indicating sources of variation were addressed so that the within compartment effects were better elucidated (<xref ref-type="fig" rid="fig4">Figure 4A</xref>). Next, we examined the experimental design effects within each compartment using variance components (PERMANOVA) and found mirroring results (<xref ref-type="fig" rid="fig4">Figure 4B</xref>). The experimental design was not differentially resolved in the root microbiome after principal component regression; however, the rhizosphere and soil microbiomes saw an increase in 3% and 2%, respectively. Noting that treatment was the effect of largest change, we sought to visualize treatment effect changes within each of the tissue compartments. Given the strong compartment differentiation, identifying clusters within a compartment required principal component analysis to be performed on each compartment individually. In the rhizosphere, we observed treatment differentiation using both the observed and adjusted counts; however, the distance between the centers of each cluster was larger after adjustment further indicating within-group variation was reduced (<xref ref-type="fig" rid="fig4s1">Figure 4&#8212;figure supplement 1</xref>).</p><fig-group><fig id="fig4" position="float"><label>Figure 4.</label><caption><title>Accounting for influence from soil property variance within microbiome data reveals plant phenotypes that correlate with microbe abundance.</title><p>(<bold>A</bold>) Principal component analysis on the combined raw and residual microbiome tables. Shown are the first two components with their respective variance explained. Samples are colored by tissue type and shaped by original or residual values. Gray points are the centers of each respective cluster, and gray lines connect the centers of each cluster. (<bold>B</bold>) Partial correlations of experimental design variables in each microbiome compartment&#8217;s composition before and after principal component regression. (<bold>C</bold>) Observed plant height values, <italic>x</italic>-axis, and the change in that value as a result of the adjustment, <italic>y</italic>-axis. (<bold>D</bold>) Similar to C, shown are the fresh weight values and their respective changes. In both C and D, horizontal green dashed lines represent the 95% confidence interval for the change in observation. Light gray dots are within the interval, and dark gray dots are outside of the interval. (<bold>E</bold>) Partial correlations of experimental design variables in leaf &#948;<sup>15</sup>N before and after principal component regression. (<bold>F, G</bold>) For only water-stressed samples and only the soil microbiome, plant height, <italic>y</italic>-axis, and operational taxonomic unit (OTU) abundance of Microvirga, <italic>x</italic>-axis, before and after principal component regression. Shown in G, is the fit of a change-point model where the red line is no change before threshold, the vertical dashed line, and the blue line is a linear fit after the threshold.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig4.jpg"/></fig><fig id="fig4s1" position="float" specific-use="child-fig"><label>Figure 4&#8212;figure supplement 1.</label><caption><title>Principal component analysis on the root (<bold>A</bold>), rhizosphere (<bold>B</bold>), and soil (<bold>C</bold>) samples using the combined raw and residual microbiome tables.</title><p>Left panel uses the original counts and right uses adjusted counts (see Methods: Principal component regression). Each panel has colors corresponding to the treatment for each sample.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig4-figsupp1.jpg"/></fig><fig id="fig4s2" position="float" specific-use="child-fig"><label>Figure 4&#8212;figure supplement 2.</label><caption><title>Effect of applying principal component regression on plant morphology.</title><p>(<bold>A</bold>) Boxplots, overlaying violin plots, of plant height, <italic>y</italic>-axis, against the watering treatments (WS: drought, WW: well-watered), <italic>x</italic>-axis. (<bold>B</bold>) Same as A, but displaying the residuals of plant height, <italic>y</italic>-axis, from principal component regression model using dimension reduced soil properties as covariates. See <xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref> for additional details and analysis on these data. (<bold>C</bold>) Variance components, <italic>y</italic>-axis, of each harvest trait measured, <italic>x</italic>-axis, for both original data and adjusted data (_resid).</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-70056.xml.media/fig4-figsupp2.jpg"/></fig></fig-group><p>To assess how principal component regression affected the univariate plant phenotypes, we compared the data before and after principal component regression. Prior to removing noise from soil properties, plant height and fresh weight both showed drought effects, and after performing principal component regression, these effects were maintained (<xref ref-type="fig" rid="fig4s2">Figure 4&#8212;figure supplement 2</xref>). Further, by plotting the original values against the change for each value after adjustment, we showed that this correction method is equitable for all plant sizes; in other words, short and tall plants were not overly adjusted either positively or negatively (<xref ref-type="fig" rid="fig4">Figure 4</xref>). Additionally, the variation attributable to soil properties was as much as &#177;10% for fresh weight and &#177;6% for plant height for some individual observations. Additionally, variance components were computed for all harvest traits but both fresh weight and plant height did not demonstrate an increase in experimental design resolution (<xref ref-type="fig" rid="fig4s2">Figure 4&#8212;figure supplement 2</xref>). Lastly, leaf delta &#948;<sup>15</sup>N showed an association with phosphate (<xref ref-type="fig" rid="fig2">Figure 2C</xref>), and variance components for the experimental design effects found that the <italic>R</italic><sup>2</sup> between the unadjusted and adjusted increased by approximately 5% (<xref ref-type="fig" rid="fig4">Figure 4E</xref>). This is similar to the changes in the explained variance in rhizosphere and soil microbiome compositions.</p><p>Microbiome analyses often have less-than-ideal replication and therefore elimination of confounding variations is crucial in identifying effects of interest, such as identifying plant-growth promoting microbes. For instance, change-point models can identify microbial impacts on plant phenotypes once the abundance of the microbe reaches a particular level (<xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref>). We show that in water-stressed samples before the adjustment on both plant height and microbe abundance of <italic>Microvirga</italic> in the soil microbiome, we do not see any evidence of an association. However, after accounting for the variance from the soil properties, there is a strong positive correlation between plant phenotype and OTU abundance (false discovery rate (FDR)-adjusted p value &lt;0.05) (<xref ref-type="fig" rid="fig4">Figure 4F, G</xref>). This further emphasizes that if an experimental factor is expected to have a small effect size, that it would be lost to noise. We observe an order of magnitude more significant OTUs in the change-point associations before principal component regression versus after (<xref ref-type="table" rid="table3">Table 3</xref>). Adjusting the soil property effect or other nuisance factors in experimental design can lead to increased power of detecting biological signals.</p><table-wrap id="table3" position="float"><label>Table 3.</label><caption><title>Shown for each microbiome compartment are the number of operational taxonomic units (OTUs) that showed significant association (p value &lt;0.05) in their abundance to the respective phenotype, either positive or negative, both before (original) and after (PC123 Adj) principal component regression.</title><p>The intersection column shows the number of OTUs shared between these two sets of counts.</p></caption><table frame="hsides" rules="groups"><thead><tr><th align="left" valign="bottom">Compartment</th><th align="left" valign="bottom">Phenotype</th><th align="left" valign="bottom">Original</th><th align="left" valign="bottom">PC123 Adj</th><th align="left" valign="bottom">Intersection</th></tr></thead><tbody><tr><td align="left" valign="bottom">Soil</td><td align="left" valign="bottom">Dry weight</td><td align="char" char="." valign="bottom">7950</td><td align="char" char="." valign="bottom">445</td><td align="char" char="." valign="bottom">155</td></tr><tr><td align="left" valign="bottom">RHZ</td><td align="left" valign="bottom">Dry weight</td><td align="char" char="." valign="bottom">5619</td><td align="char" char="." valign="bottom">897</td><td align="char" char="." valign="bottom">333</td></tr><tr><td align="left" valign="bottom">Root</td><td align="left" valign="bottom">Dry weight</td><td align="char" char="." valign="bottom">3320</td><td align="char" char="." valign="bottom">231</td><td align="char" char="." valign="bottom">96</td></tr><tr><td align="left" valign="bottom">Soil</td><td align="left" valign="bottom">Plant height</td><td align="char" char="." valign="bottom">7991</td><td align="char" char="." valign="bottom">342</td><td align="char" char="." valign="bottom">137</td></tr><tr><td align="left" valign="bottom">RHZ</td><td align="left" valign="bottom">Plant height</td><td align="char" char="." valign="bottom">5614</td><td align="char" char="." valign="bottom">854</td><td align="char" char="." valign="bottom">329</td></tr><tr><td align="left" valign="bottom">Root</td><td align="left" valign="bottom">Plant height</td><td align="char" char="." valign="bottom">3397</td><td align="char" char="." valign="bottom">245</td><td align="char" char="." valign="bottom">97</td></tr></tbody></table></table-wrap></sec></sec><sec id="s3" sec-type="discussion"><title>Discussion</title><p>Large-scale trials within complex environments are an important component of many biological subdisciplines. Because of environmental variability, these experiments must include high levels of replication and even so, results often fail to repeat in subsequent trials. For agricultural field studies a major source of variation is heterogeneous soil property distributions that create their own microtreatments and are covariates to planned experimental designs. Because these microtreatments are often unknown, and therefore not accounted for, they show up as experimental noise and may lead to false inferences. For instance, nitrogen is known to affect plant growth (size, color, yield, etc.) (<xref ref-type="bibr" rid="bib33">Veley et al., 2017</xref>; <xref ref-type="bibr" rid="bib5">Chapin, 1987</xref>). If nitrogen is unevenly distributed across a field experiment aimed at characterizing biomass among diverse genotypes, the variability in nitrogen may confound the experiment. In this study, by intentionally measuring multiple soil properties across the field experiment, we were able to account for this known variation through PCs and gain novel biological insight into possible field relevant interactions between plants and microbes (<xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref>).</p><p>After accounting for environmental variation, we observe a reduced total number of OTUs that correlate with plant phenotypes (<xref ref-type="table" rid="table3">Table 3</xref>). While the total number is smaller, we also observe new &#8216;hits&#8217; that were not observed before accounting for environmental factors. Additional work will be required to fully understand these patterns but here we offer a few theoretical explanations. We note that accounting for the environmental variation reduced our degrees of freedom that may reduce the power of detecting some associations. More importantly, after adjusting for environmental effect, we have focused our analysis on conditional associations instead of marginal associations. <italic>Marginal associations</italic> measure whether two variables change in the same or opposite directions simultaneously, regardless of other factors that may affect those two variables. <italic>Conditional associations</italic>, on the other hand, adjust for the impacts of other variables, and measure the relationship between two variables that cannot be explained by other variables. In a scenario in which the environment affects a microbe abundance and a plant phenotype, this may be observed as marginal associations between the two even though the microbe abundance and plant phenotype are not directly associated with each other. It is possible that many of the &#8216;hits&#8217; before environmental factor adjustment are results of such marginal associations. By accounting for environmental factors, we focus on conditional associations that measure the rarer direct or possibly causal interactions between OTUs and plants. Nevertheless, future experimental work will be required to refine the list of candidate microbes.</p><p>While the approach described here represents a major advance forward, we acknowledge several opportunities for further improvement. For example, the soil property data were collected at just one time point, approximately 1 month prior to harvest. While we expect some soil properties to remain constant over the course of the experiment, others likely fluctuate (e.g., water content). Future studies might gather soil property composition at multiple time points, including before planting and at the time of phenotyping, to generate paired data. Additionally, advancements have been made in spatiotemporal modeling using Bayesian hierarchical modeling with time as an autoregressor (<xref ref-type="bibr" rid="bib6">Finley et al., 2015</xref>; <xref ref-type="bibr" rid="bib27">Rushworth et al., 2014</xref>) which may prove powerful if soil property composition were densely sampled over time. We also note that replication remains a crucial aspect of these types of experiments. In this analysis, we used three degrees of freedom by including the first three PCs of the soil property data in a regression to account for their contributions on the phenotypes. We chose to only include PCs that have at least 10% variance explained and only include up to replication number minus three so the experimental design effects could still be estimated. In our case, had a fourth PC shown a significant source of variance, degrees of freedom would have become limiting. On the other hand, had we included additional replication, it may have been possible to correct for covariates such as the soil properties by directly regressing on the properties themselves, rather than using a dimension reduction technique.</p><p>A significant advantage of the method described here, is that the sources of variation (soil properties) are known. This allows the researcher to examine how these fluctuations might influence experimental results and may also lead to additional biological insight. However, we note that it is not possible to measure every potential source of variation. In many cases, it may prove useful to apply an approach that is agnostic to the source of variation, such as the SpATS method described previously (<xref ref-type="bibr" rid="bib32">Velazco et al., 2017</xref>).</p><p>We note that in these datasets, the stable isotopes and primary metabolite profiles are invariant to the measured soil properties. For the isotopes, this may be indicative of stability relative to the fluctuations in the properties across the field, but it is possible that if the soil property variations were larger, then a relationship might be established. The metabolite profile used in these analyses is only those captured from GC/MS and mostly consist of primary metabolites such as sugars, organic and amino acids, small phenolics, and fatty acids. It may be true that secondary metabolites that were not examined in these analyses may associate with the soil property variations. We observe that a relatively small amount of variation in plant height and weight were attributed to the soil property composition, other types of data, particularly microbiome composition, were much more susceptible. Microbiome quantification has been shifting from using OTUs to amplicon sequence variants (ASVs) which are designed to identify and retain more specific bacterial identification. Some microbes were overtly sparse across the samples and principal component regression could not successfully estimate model parameters. The microbiome table generation pipeline used for this field allows for the identification of very sparse microbes by way of using 99.5% identity clustering which results in 23,617 OTUs detected. After applying principal component regression, approximately 25% of the microbes were successfully modeled and retained. The methods proposed here should also be applicable to those types of tables as well with one caveat: ASV tables are sparser than OTU tables. While the methods here are zero-inflated, it is likely the percent of ASVs not successfully modeled would be larger than if using classical OTU techniques.</p><p>In conclusion, here we demonstrate the impact of spatially distributed soil property variations on several phenotypes of interest and present principal component regression as a method to alleviate the effects analytically. We observed that the microbiome communities were heavily influenced by the soil properties while the plant phenotypes were more resilient but nonetheless affected. Identifying sources of variation and removing their influence enhances the ability to resolve other effects of interest and enables more honest, reliable, and believable quantification of subtle phenotypes.</p></sec><sec id="s4" sec-type="materials|methods"><title>Materials and methods</title><p>In a related research manuscript (<xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref>), we describe analysis of the field dataset and include additional details on experimental design (sorghum varieties, watering regimes, and field layout). Microbes were not intentionally added to these experiments, but instead were allowed to evolve naturally from the environment. For clarity, we summarize key details here, as well. In short, we planted 24 varieties of sorghum in a split-plot design with 8 replicate blocks with 2 watering treatments per block (WW: well-watered and WS: drought). End of season harvesting procedures, microbiome sampling, DNA extraction, and sequencing are also fully described in <xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref>.</p><sec id="s4-1"><title>Processing amplicon reads with VSEARCH and OTU table QC</title><p>Three microbiomes were collected for each plant: root endophytes, rhizosphere, and bulk soil, and all samples were sent for 16S PE amplicon sequencing at JGI (see <xref ref-type="bibr" rid="bib22">Qi et al., 2022</xref> for extraction and sequencing methods). What follows is the VSEARCH (v2.9.0) (<xref ref-type="bibr" rid="bib26">Rognes et al., 2016</xref>) workflow for taking the reads for each sample and processing them to curate the OTU table: merge paired ends, merge all samples, fastq filter, sequence dereplication, cluster unique sequences, remove chimeras, and read quantification. Merging paired ends had the following parameters: max diffs = 10, max diff percentage = 90, min merge length = 230, max merge length = 540. Samples were then combined into a single fasta file. Fastq filtering had the following parameters: maxee = 1, strip left = 19, strip right = 20, fastq max n&#8217;s=0, fasta width = 0. Dereplication had the following parameters: min unique size = 1, fasta width = 0. OTU clustering had the following parameters: id percentage = 0.995, strand = both. Removing chimeras had the following parameters: fasta width = 0. Read quantification had the following parameters: id percentage = 0.9. These steps were combined all together in a directed acyclic graph (DAG) workflow and executed on a HTCondor high-throughput computation cluster. A total of 171,273 OTUs were detected and of those 114,179 had quantification across all 1280 samples. OTU table quality control was done in two steps: samples were removed if the total number of reads quantified across all OTUs was less than 10,000. In addition, OTUs were removed if the total number of reads quantified across all samples was less than 100 or greater than 200,000. After applying this filter 92,385 OTUs and 1280 samples remained. Of the OTUs removed only 422 had counts larger than 200,000 indicating the majority of the OTUs removed were rare and would not have enough information to perform proper statistical analysis. Once OTUs and samples that did not meet the quality control filters were removed, each OTU count in a sample was scaled proportionally to the same number, max number of reads per sample, so that all samples had the same number of OTU counts quantified.</p></sec><sec id="s4-2"><title>Geospatial interpolation methods</title><p>One major assumption of fitting spatial models is that the distribution is stationary, meaning that the mean and covariance between any two samples are the same throughout the grid. However, this field trial included two treatment factors (watering treatments and sorghum genotypes) which may have directly affected the measured soil properties. Thus, to satisfy the stationary assumption, we needed to account for any influence on the soil properties from the two treatment factors and/or their interaction. To do this, we fitted linear models including treatment factors and their interaction for each soil property as follows:<disp-formula id="equ1"><label>(1)</label><mml:math id="m1"><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>p</mml:mi><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>*</mml:mi><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mo>+</mml:mo><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>u</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi> </mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi></mml:math></disp-formula></p><p>Then we calculated residuals for each property to generate stationary soil properties by subtracting the observed values by the predicted values based on model (1), as follows:<disp-formula id="equ2"><label>(2)</label><mml:math id="m2"><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>u</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>s</mml:mi><mml:mi> </mml:mi><mml:mo>=</mml:mo><mml:mi> </mml:mi><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>t</mml:mi><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mo>(</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>t</mml:mi><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:math></disp-formula></p><p>The residuals were used to estimate variances at multiple distances to produce a variogram and then, several spatial models were fitted to the data. The spatial models that were considered include (1) No Structure, (2) Exponential, (3) Spherical, (4) Gaussian, (5) Matern, and (6) Stein.</p><p>This was fully implemented in the R package &#8216;gstat&#8217; using the fit.variogram() function, and full spatial model parameterizations can be found (<xref ref-type="bibr" rid="bib17">Pebesma and Wesseling, 1998</xref>; <xref ref-type="bibr" rid="bib18">Pebesma, 2004</xref>; <xref ref-type="bibr" rid="bib17">Pebesma and Wesseling, 1998</xref>; <xref ref-type="bibr" rid="bib18">Pebesma, 2004</xref>). The best fitting model was selected as the one that produced the minimal sum-of-squares errors. Given a spatial model, there are three metrics that describe a spatial fit and help us understand how the sampled points are correlated. The nugget is the estimated variance between two adjacent samples and represents the noise of the data. The range is the distance at which the change in variance with respect to distance first becomes zero and represents how far away sampled points demonstrate the correlation structure. Finally, the partial sill is the variance at the range minus the nugget variance. Since each spatial model is fully parameterized by only these three coefficients, model complexity is not a factor in estimating the model sum of squares to identify the best fitting parameterization.</p><p>With the best spatial model, we estimated spatial weights as a function of the distance between two sampling points and used these weights to predict values at nonsampled positions by kriging, a method to interpolate by using a weighted average of the observed values in the neighborhood of nonsampled position. More specifically, we applied ordinary kriging and let the sum of spatial weights to be one so that ordinary kriging is unbiased (<xref ref-type="bibr" rid="bib15">Olea, 2018</xref>).<disp-formula id="equ3"><label>(3)</label><mml:math id="m3"><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>u</mml:mi><mml:mi>a</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mfenced separators="|"><mml:mrow><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mi> </mml:mi><mml:mrow><mml:msubsup><mml:mo>&#8721;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mi> </mml:mi></mml:mrow></mml:mrow><mml:msub><mml:mrow><mml:mi>&#955;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>u</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mo>(</mml:mo><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:math></disp-formula><disp-formula id="equ4"><mml:math id="m4"><mml:mrow><mml:mrow><mml:mi mathvariant="normal">w</mml:mi><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mtext>&#160;</mml:mtext><mml:munderover><mml:mo>&#8721;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mtext>&#160;</mml:mtext><mml:msub><mml:mi>&#955;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></disp-formula></p><p>Kriging was performed using the krige() function in the R package &#8216;gstat&#8217;.</p></sec><sec id="s4-3"><title>Statistical testing for evidence of spatial structure</title><p>Using the residuals of the soil properties from <xref ref-type="disp-formula" rid="equ2">Equation 2</xref> (see Methods: Geospatial interpolation methods), multiple models were created with different spatial-covariance structures: intercept-only (no structure), spherical, exponential, Gaussian, linear, and rational-quadratic. A soil property is considered to exhibit evidence of spatial structure if any of five spatial covariance structure models are significantly more likely than the intercept-only model using likelihood ratio test (p value &lt;0.05).</p></sec><sec id="s4-4"><title>Statistical testing for phenotype&#8211;property associations</title><p>Using only the soil properties that were determined to have significant spatial structure (see Methods: Statistical testing for evidence of spatial structure), the kriged soil property (<xref ref-type="disp-formula" rid="equ3">Equation 3</xref>) was computed and used in the following association model for the mean structure:<disp-formula id="equ5"><label>(4)</label><mml:math id="m5"><mml:mi>f</mml:mi><mml:mo>(</mml:mo><mml:mi>P</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mi>p</mml:mi><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo><mml:mi> </mml:mi><mml:mo>=</mml:mo><mml:mi> </mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>p</mml:mi><mml:mi>t</mml:mi><mml:mi> </mml:mi><mml:mo>+</mml:mo><mml:mi> </mml:mi><mml:mi>T</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>*</mml:mi><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mo>+</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>t</mml:mi><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>Z</mml:mi><mml:mo>+</mml:mo><mml:mi> </mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi></mml:math></disp-formula></p><p>where f() is the transformation function, <italic>Z</italic> is the multivariate normal spatial distribution nested within each split-plot replicate. In the case of plant height, fresh weight, and &#948;<sup>13</sup>C, the f() is the identity function and significant association is determined by computing a chi-square statistic based on type III sum of squares for the Property_krig. In the metabolite and microbiome estimation, f() is a Canberra distance transformation and significant association is determined by using constrained analysis of principal coordinates using the capscale() function in the vegan R package and constraining on all terms except Property_krig, which has its effect on metabolite composition estimated using permutation ANOVA with 999 iterations.</p></sec><sec id="s4-5"><title>Principal component regression</title><p>Starting with the stationary soil properties, that is, the residuals obtained from <xref ref-type="disp-formula" rid="equ2">Equation 2</xref>, principal component analysis was performed using the PCA() function in the FactoMineR R package, and the first three components were extracted. Each PC was then kriged using the same methods described previously in the method section of geospatial interpolation methods using <xref ref-type="disp-formula" rid="equ3">Equation 3</xref>. The kriged PCs were then used to create the following principal component regression model:<disp-formula id="equ6"><label>(5)</label><mml:math id="m6"><mml:mi>g</mml:mi><mml:mo>(</mml:mo><mml:mi>E</mml:mi><mml:mo>(</mml:mo><mml:mi>P</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mo>)</mml:mo><mml:mo>)</mml:mo><mml:mi> </mml:mi><mml:mo>=</mml:mo><mml:mi> </mml:mi><mml:mi>&#945;</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>&#946;</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:msub><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>&#946;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:msub><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>&#946;</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:msub><mml:mrow><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>Z</mml:mi></mml:math></disp-formula></p><p>where g() is the link function for the expected value of the phenotype, <italic>&#945;</italic> is the model intercept, each <italic>&#946;</italic> is the regression coefficient for the respective term, <italic>Z</italic> is the multivariate normal spatial distribution nested within each split-plot replicate. In the case of plant height, fresh weight, and &#948;<sup>13</sup>C, the g() is the identity function. For the microbiome data, each tissue compartment was performed independently with <xref ref-type="disp-formula" rid="equ6">Equation 5</xref>, and g() is the log link to the zero-inflated negative-binomial distribution implemented in the R package NBZIMM R package with the glmm.zinb() function. The metabolites were not modeled.</p><p>Soil property invariant data were created for the univariate phenotypes by extracting model residuals similar to <xref ref-type="disp-formula" rid="equ2">Equation 2</xref>. For each microbiome compartment, every OTU was modeled independently and an adjusted count-like value was created by dividing the original count by the exponentiated regression coefficients for each PC as follows:<disp-formula id="equ7"><label>(6)</label><mml:math id="m7"><mml:mi>A</mml:mi><mml:mi>d</mml:mi><mml:mi>j</mml:mi><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi> </mml:mi><mml:mi>O</mml:mi><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mi> </mml:mi><mml:mo>=</mml:mo><mml:mi> </mml:mi><mml:mi>O</mml:mi><mml:mi>b</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi> </mml:mi><mml:mi>O</mml:mi><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mi> </mml:mi><mml:mo>/</mml:mo><mml:mi> </mml:mi><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:math></disp-formula></p><p>where each <italic>b</italic> coefficient is an estimated beta coefficient by fitting model (5).</p></sec><sec id="s4-6"><title>Soil property composition sampling and processing</title><p>Selected plants were excavated using a shovel to a depth of 12&#8211;14 inches. The soil (approximately 200 g) from the excavated root ball was shaken off into a wash pan in the field, homogenized and collected into a quart-size Ziploc bag. In addition to the collection of roots for microbiome analysis a subset of roots were collected for metabolite analysis as described in <xref ref-type="bibr" rid="bib28">Sheflin et al., 2019</xref>. The soil used for chemical and physical analysis was stored in the Ziploc bags at 4&#176;C and sent to Ward labs for analysis of pH, buffer pH, sum of cations , base saturation (%), soluble salts, organic matter, nitrate&#8211;nitrogen, phosphorus, potassium, calcium, magnesium, sodium, sulfur, zinc, iron, manganese, and copper.</p></sec><sec id="s4-7"><title>Root metabolomics sampling and processing</title><sec id="s4-7-1"><title>Nontargeted metabolite profiling using GC&#8211;MS</title><p>Metabolite extraction was conducted by weighing out 19&#8211;21 mg of each freeze-dried sorghum root and placing them into clean 2 ml autosampler glass vials (VWR, Radnor, PA, USA). Automated control of sample extraction (i.e., solvent proportions, solvent volumes, sample agitation, and supernatant transfers) was accomplished using a standalone Gerstel MultiPurpose Sampler (MPS). Samples were extracted by adding 770 &#956;l of methyl-tert-butyl-ether (MTBE) and 385 &#956;l to each vial and vortexing on the MPS at room temperature for 30 min. To separate organic and aqueous layers, 640 &#956;l of water was added to the remaining extract and vortexed for 15 min. Samples were then centrifuged for 25 min at 3500 rpm at 4&#176;C. The organic layer was extracted twice by transferring into a new 2 ml autosampler vial without disturbing the lower layer then adding 600 &#956;l of MTBE and transferring again. The aqueous layer was also extracted twice by transferring out of the vial into a new 2 ml autosampler vial without disturbing the pellet then adding 300 &#956;l of methanol and 300 &#956;l of acetonitrile, vortexing for 3 min and transferring again. The aqueous layer was completely dried under N gas, resuspended in 300 &#956;l of 75% methanol. 20 &#181;l of the aqueous layer from each sample was transferred to another set of glass vials, centrifuged for 2 min at 3500 rpm and then dried under N<sub>2</sub> (g) for 30 min. Dried samples were stored at &#8722;80&#176;C until derivatization. Derivatization (methoximation and silylation) took place immediately prior to running the samples. Dried down samples were allowed to warm to room temperature and then resuspended in 50 &#181;l of methoxyamine HCl (prewarmed to 60&#176;C) and centrifuged for 30 s. Samples were then incubated at 60&#176;C for 45 min, followed by a brief vortex, sonication for 10 min and an additional incubation at 60&#176;C for 45 min. Following this, the samples were centrifuged before receiving 50 &#181;l of <italic>N</italic>-methyl-<italic>N</italic>-(trimethylsilyl)trifluoroacetamide (MSTFA) + 1% trimethylchlorosilane (TMCS) (Thermo Fisher Scientific, Waltham, MA, USA), briefly vortexed and incubated at 60&#176;C for 40 min, as described previously (<xref ref-type="bibr" rid="bib4">Chaparro et al., 2018</xref>). Samples were loaded (~100 &#181;l) into glass inserts within glass autosampler vials and centrifuged for 30 s prior to GC&#8211;MS analysis. In addition, a pooled extract was created by combining equal volumes of each sample into one glass vial for use as a consistent representative quality control sample (QC).</p><p>GC&#8211;MS analysis was performed using a Trace 1310 GC coupled to a Thermo ISQ mass spectrometer (Thermo Scientific). Derivatized samples (1 &#181;l) were injected in a 1:10 split ratio. Metabolites were separated with a 30 m TG-5MS column (Thermo Scientific, 0.25 mm i.d. 0.25 &#956;m film thickness). The GC program began at 80&#176;C for 0.5 min and ramped to 330&#176;C at a rate of 15&#176;C per min and ended with an 8 min hold at a 1.2 ml min<sup>&#8722;1</sup> helium gas flow rate. The inlet temperature was held at 285&#176;C and the transfer line was held at 260&#176;C. Masses between 50 and 650 <italic>m</italic>/<italic>z</italic> were scanned at 5 scans/s after electron impact ionization.</p><p>Metabolomic data processing was conducted as previously described (<xref ref-type="bibr" rid="bib4">Chaparro et al., 2018</xref>). GC&#8211;MS files were converted to.cdf format and processed by XCMS in R (<xref ref-type="bibr" rid="bib30">Smith et al., 2006</xref>; <xref ref-type="bibr" rid="bib11">Mahieu et al., 2016</xref>; <xref ref-type="bibr" rid="bib24">R Core Team, 2015</xref>). All samples were normalized to the total ion current. RAMClustR was used to deconvolute peaks into spectral clusters for metabolite annotation (<xref ref-type="bibr" rid="bib2">Broeckling et al., 2014</xref>). RAMSearch (<xref ref-type="bibr" rid="bib3">Broeckling et al., 2016</xref>) was used to match metabolites using retention time, retention index, and matching mass spectra data with external databases including Golm Metabolome Database (<xref ref-type="bibr" rid="bib9">Hummel et al., 2007</xref>; <xref ref-type="bibr" rid="bib10">Hummel et al., 2013</xref>) and NIST (<xref ref-type="bibr" rid="bib3">Broeckling et al., 2016</xref>).</p></sec></sec><sec id="s4-8"><title>Leaf traits analyses</title><p>The middle portion (10&#8211;12 cm long) of the uppermost fully expanded leaf from individual plants was harvested in a coin envelope for the analysis of specific leaf area, C and N content, and stable isotopes of C and N. Leaf samples were oven-dried at 65&#176;C to a constant mass and ca. 2.5 mg of the dry leaf was subsample using a custom-made leaf punch system in a tin capsule. The leaf punch provided the leaf area of subsample, which were weighed to estimate specific leaf area. The N, C and &#948;<sup>15</sup>N and &#948;<sup>13</sup>C concentrations of dry leaf were determined by combusting encapsulated samples in an elemental analyzer (ECS 4010, Costech Analytical Technologies) coupled to a continuous flow isotope ratio mass spectrometer (Delta XP, Finnigan MAT) at the Stable Isotope Core Laboratory, Washington State University.</p></sec><sec id="s4-9"><title>Software used and data availability</title><p>All analyses herein were performed in R using the following packages: raster(3.4.5), ggplot2(3.3.3), deldir(0.1.21), vegan(2.5.5), plyr(1.8.4), gridExtra(2.3), reshape(1.4.3), FactoMineR(2.4), factoextra(1.0.7), chngpt(2019.3.12), stringr(1.4.0), gstat(2.0.6), sp(1.4.2), scales(1.0.0), lme4(1.1.21), nlme(3.1.140), parallel(3.5.2), and patchwork(1.1.1). All data and scripts used to create all figures and perform all analyses can be found at <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/record/4715924">https://zenodo.org/record/4715924</ext-link>.</p></sec></sec></body><back><sec sec-type="additional-information" id="s5"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="COI-statement" id="conf1"><p>No competing interests declared</p></fn><fn fn-type="COI-statement" id="conf2"><p>No competing interests declared</p></fn><fn fn-type="COI-statement" id="conf3"><p>Reviewing editor, eLife</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>Conceptualization, Data curation, Formal analysis, Methodology, Validation, Visualization, Writing &#8211; original draft, Writing &#8211; review and editing</p></fn><fn fn-type="con" id="con2"><p>Formal analysis, Investigation, Writing &#8211; review and editing</p></fn><fn fn-type="con" id="con3"><p>Resources, Writing &#8211; review and editing</p></fn><fn fn-type="con" id="con4"><p>Data curation, Resources</p></fn><fn fn-type="con" id="con5"><p>Funding acquisition, Resources, Writing &#8211; review and editing</p></fn><fn fn-type="con" id="con6"><p>Data curation, Resources, Supervision</p></fn><fn fn-type="con" id="con7"><p>Conceptualization, Data curation, Funding acquisition, Methodology, Project administration, Resources, Supervision, Writing &#8211; review and editing</p></fn><fn fn-type="con" id="con8"><p>Conceptualization, Formal analysis, Investigation, Methodology, Supervision, Writing &#8211; review and editing</p></fn><fn fn-type="con" id="con9"><p>Conceptualization, Funding acquisition, Project administration, Supervision, Writing &#8211; original draft, Writing &#8211; review and editing</p></fn></fn-group></sec><sec sec-type="supplementary-material" id="s6"><title>Additional files</title><supplementary-material id="transrepform"><label>Transparent reporting form</label><media xlink:href="elife-70056-transrepform1-v1.pdf" mimetype="application" mime-subtype="pdf"/></supplementary-material></sec><sec sec-type="data-availability" id="s7"><title>Data availability</title><p>The 16S microbiome sequencing was done at JGI and was previously published (<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/2021.04.13.437608">https://doi.org/10.1101/2021.04.13.437608</ext-link>). Raw sequence data are available for download through the JGI user portal: Author: Daniel Schachtman; Title: "Systems Analysis of the Physiological and Molecular Mechanisms of Sorghum Nitrogen Use Efficiency, Water Use Efficiency and Interactions with the Soil Microbiome" at the following url: <ext-link ext-link-type="uri" xlink:href="https://genome.jgi.doe.gov/portal/pages/projectStatus.jsf?db=EneSor2017_itags_14">https://genome.jgi.doe.gov/portal/pages/projectStatus.jsf?db=EneSor2017_itags_14</ext-link>. Data are listed under the following title: Energy Sorghum Plate 2017_"62-98" itags. Alternatively, all raw data are available for download from the project website: <ext-link ext-link-type="uri" xlink:href="http://shiny.danforthcenter.org/sorghum_systems/">http://shiny.danforthcenter.org/sorghum_systems/</ext-link>. Additional data and code associated with this manuscript are available here: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.4715924">https://doi.org/10.5281/zenodo.4715924</ext-link>.</p><p>The following dataset was generated:</p><p><element-citation publication-type="data" specific-use="isSupplementedBy" id="dataset1"><person-group person-group-type="author"><name><surname>Berry</surname><given-names>JC </given-names></name></person-group><year iso-8601-date="2021">2021</year><data-title>Increased signal to noise ratios within experimental field trials by regressing spatially distributed soil properties as principal components</data-title><source>Zenodo</source><pub-id pub-id-type="doi">10.5281/zenodo.4715924</pub-id></element-citation></p><p>The following previously published datasets were used:</p><p><element-citation publication-type="data" specific-use="references" id="dataset2"><person-group person-group-type="author"><name><surname>Qi</surname><given-names>M</given-names></name></person-group><year iso-8601-date="2021">2021</year><data-title>Interacting beneficial and detrimental responses to drought in the sorghum microbiome</data-title><source>NCBI BioProject</source><pub-id pub-id-type="accession" xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJNA720397">PRJNA720397</pub-id></element-citation></p><p><element-citation publication-type="data" specific-use="references" id="dataset3"><person-group person-group-type="author"><name><surname>Schachtman</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2021">2021</year><data-title>Systems Analysis of the Physiological and Molecular Mechanisms of Sorghum Nitrogen Use Efficiency, Water Use Efficiency and Interactions with the Soil Microbiome</data-title><source>JGI</source><pub-id pub-id-type="accession" xlink:href="https://genome.jgi.doe.gov/portal/pages/projectStatus.jsf?db=EneSor2017_itags_14">1191873</pub-id></element-citation></p></sec><ack id="ack"><title>Acknowledgements</title><p>Funding: This work was supported by the US Department of Energy award DE_SC0014395.</p></ack><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bai</surname><given-names>E</given-names></name><name><surname>Boutton</surname><given-names>TW</given-names></name><name><surname>Liu</surname><given-names>F</given-names></name><name><surname>Wu</surname><given-names>XB</given-names></name><name><surname>Hallmark</surname><given-names>CT</given-names></name><name><surname>Archer</surname><given-names>SR</given-names></name></person-group><year iso-8601-date="2012">2012</year><article-title>Spatial variation of soil &#948;13C and its relation to carbon input and soil texture in a subtropical lowland woodland</article-title><source>Soil Biology and Biochemistry</source><volume>44</volume><fpage>102</fpage><lpage>112</lpage><pub-id pub-id-type="doi">10.1016/j.soilbio.2011.09.013</pub-id></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Broeckling</surname><given-names>CD</given-names></name><name><surname>Afsar</surname><given-names>FA</given-names></name><name><surname>Neumann</surname><given-names>S</given-names></name><name><surname>Ben-Hur</surname><given-names>A</given-names></name><name><surname>Prenni</surname><given-names>JE</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>RAMClust: A novel feature clustering method enables spectral-matching-based annotation for metabolomics data</article-title><source>Analytical Chemistry</source><volume>86</volume><fpage>6812</fpage><lpage>6817</lpage><pub-id pub-id-type="doi">10.1021/ac501530d</pub-id><pub-id pub-id-type="pmid">24927477</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Broeckling</surname><given-names>CD</given-names></name><name><surname>Ganna</surname><given-names>A</given-names></name><name><surname>Layer</surname><given-names>M</given-names></name><name><surname>Brown</surname><given-names>K</given-names></name><name><surname>Sutton</surname><given-names>B</given-names></name><name><surname>Ingelsson</surname><given-names>E</given-names></name><name><surname>Peers</surname><given-names>G</given-names></name><name><surname>Prenni</surname><given-names>JE</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>Enabling efficient and confident annotation of LC-MS metabolomics data through MS1 spectrum and time pPrediction</article-title><source>Analytical Chemistry</source><volume>88</volume><fpage>9226</fpage><lpage>9234</lpage><pub-id pub-id-type="doi">10.1021/acs.analchem.6b02479</pub-id><pub-id pub-id-type="pmid">27560453</pub-id></element-citation></ref><ref id="bib4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chaparro</surname><given-names>JM</given-names></name><name><surname>Holm</surname><given-names>DG</given-names></name><name><surname>Broeckling</surname><given-names>CD</given-names></name><name><surname>Prenni</surname><given-names>JE</given-names></name><name><surname>Heuberger</surname><given-names>AL</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>Metabolomics and ionomics of potato tuber reveals an influence of cultivar and market class on human nutrients and bioactive compounds</article-title><source>Frontiers in Nutrition</source><volume>5</volume><elocation-id>e36</elocation-id><pub-id pub-id-type="doi">10.3389/fnut.2018.00036</pub-id><pub-id pub-id-type="pmid">29876353</pub-id></element-citation></ref><ref id="bib5"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Chapin</surname><given-names>FS</given-names></name></person-group><year iso-8601-date="1987">1987</year><chapter-title>Adaptations and Physiological Responses of Wild Plants to Nutrient Stress</chapter-title><person-group person-group-type="editor"><name><surname>Bassam</surname><given-names>N</given-names></name><name><surname>Dambroth</surname><given-names>M</given-names></name><name><surname>Loughman</surname><given-names>BC</given-names></name></person-group><source>In Genetic Aspects of Plant Mineral Nutrition</source><publisher-loc>Dordrecht</publisher-loc><publisher-name>Springer Netherlands</publisher-name><fpage>15</fpage><lpage>25</lpage><pub-id pub-id-type="doi">10.1007/978-94-009-2053-8</pub-id></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Finley</surname><given-names>AO</given-names></name><name><surname>Banerjee</surname><given-names>S</given-names></name><name><surname>Gelfand</surname><given-names>AE</given-names></name></person-group><year iso-8601-date="2015">2015</year><article-title>SpBayes for large univariate and multivariate point-referenced spatio-temporal data models</article-title><source>Journal of Statistical Software</source><volume>63</volume><fpage>1</fpage><lpage>28</lpage><pub-id pub-id-type="doi">10.18637/jss.v063.i13</pub-id></element-citation></ref><ref id="bib7"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Fisher</surname><given-names>RA</given-names></name></person-group><year iso-8601-date="1925">1925</year><source>Statistical Methods for Research Workers</source><publisher-name>Springer</publisher-name><pub-id pub-id-type="doi">10.1007/978-1-4612-4380-9_6</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Franklin</surname><given-names>RB</given-names></name><name><surname>Mills</surname><given-names>AL</given-names></name></person-group><year iso-8601-date="2003">2003</year><article-title>Multi-scale variation in spatial heterogeneity for microbial community structure in an eastern Virginia agricultural field</article-title><source>FEMS Microbiology Ecology</source><volume>44</volume><fpage>335</fpage><lpage>346</lpage><pub-id pub-id-type="doi">10.1016/S0168-6496(03)00074-6</pub-id><pub-id pub-id-type="pmid">12830827</pub-id></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hummel</surname><given-names>J</given-names></name><name><surname>Niemann</surname><given-names>M</given-names></name><name><surname>Wienkoop</surname><given-names>S</given-names></name><name><surname>Schulze</surname><given-names>W</given-names></name><name><surname>Steinhauser</surname><given-names>D</given-names></name><name><surname>Selbig</surname><given-names>J</given-names></name><name><surname>Walther</surname><given-names>D</given-names></name><name><surname>Weckwerth</surname><given-names>W</given-names></name></person-group><year iso-8601-date="2007">2007</year><article-title>ProMEX: A mass spectral reference database for proteins and protein phosphorylation sites</article-title><source>BMC Bioinformatics</source><volume>8</volume><elocation-id>216</elocation-id><pub-id pub-id-type="doi">10.1186/1471-2105-8-216</pub-id><pub-id pub-id-type="pmid">17587460</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hummel</surname><given-names>J</given-names></name><name><surname>Strehmel</surname><given-names>N</given-names></name><name><surname>B&#246;lling</surname><given-names>C</given-names></name><name><surname>Schmidt</surname><given-names>S</given-names></name><name><surname>Walther</surname><given-names>D</given-names></name><name><surname>Kopka</surname><given-names>J</given-names></name></person-group><year iso-8601-date="2013">2013</year><chapter-title>Mass Spectral Search and Analysis Using the Golm Metabolome Database</chapter-title><person-group person-group-type="editor"><name><surname>Kahl</surname><given-names>G</given-names></name><name><surname>Weckwerth</surname><given-names>W</given-names></name></person-group><source>In The Handbook of Plant Metabolomics</source><publisher-loc>Weinheim, Germany</publisher-loc><publisher-name>Wiley-VCH Verlag GmbH &amp; Co. KGaA</publisher-name><fpage>321</fpage><lpage>343</lpage></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mahieu</surname><given-names>NG</given-names></name><name><surname>Genenbacher</surname><given-names>JL</given-names></name><name><surname>Patti</surname><given-names>GJ</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>A roadmap for the XCMS family of software solutions in metabolomics</article-title><source>Current Opinion in Chemical Biology</source><volume>30</volume><fpage>87</fpage><lpage>93</lpage><pub-id pub-id-type="doi">10.1016/j.cbpa.2015.11.009</pub-id><pub-id pub-id-type="pmid">26673825</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>M&#246;tt&#246;nen</surname><given-names>M</given-names></name><name><surname>J&#228;rvinen</surname><given-names>E</given-names></name><name><surname>Hokkanen</surname><given-names>TJ</given-names></name><name><surname>Kuuluvainen</surname><given-names>T</given-names></name><name><surname>Ohtonen</surname><given-names>R</given-names></name></person-group><year iso-8601-date="1999">1999</year><article-title>Spatial distribution of soil ergosterol in the organic layer of a mature Scots pine (Pinus sylvestris L.) forest</article-title><source>Soil Biology and Biochemistry</source><volume>31</volume><fpage>503</fpage><lpage>516</lpage><pub-id pub-id-type="doi">10.1016/S0038-0717(98)00122-9</pub-id></element-citation></ref><ref id="bib13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Murren</surname><given-names>CJ</given-names></name><name><surname>Alt</surname><given-names>CHS</given-names></name><name><surname>Kohler</surname><given-names>C</given-names></name><name><surname>Sancho</surname><given-names>G</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Natural variation on whole-plant form in the wild is influenced by multivariate soil nutrient characteristics: natural selection acts on root traits</article-title><source>American Journal of Botany</source><volume>107</volume><fpage>319</fpage><lpage>328</lpage><pub-id pub-id-type="doi">10.1002/ajb2.1420</pub-id><pub-id pub-id-type="pmid">32002983</pub-id></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ohashi</surname><given-names>M</given-names></name><name><surname>Gyokusen</surname><given-names>K</given-names></name></person-group><year iso-8601-date="2007">2007</year><article-title>Temporal change in spatial variability of soil respiration on a slope of Japanese cedar (Cryptomeria japonica D. Don) forest</article-title><source>Soil Biology and Biochemistry</source><volume>39</volume><fpage>1130</fpage><lpage>1138</lpage><pub-id pub-id-type="doi">10.1016/j.soilbio.2006.12.021</pub-id></element-citation></ref><ref id="bib15"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Olea</surname><given-names>RA</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>A practical primer on geostatistics</article-title><source>US Geological Survey</source><elocation-id>e1103</elocation-id><pub-id pub-id-type="doi">10.3133/ofr20091103</pub-id></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pauli</surname><given-names>D</given-names></name><name><surname>Ziegler</surname><given-names>G</given-names></name><name><surname>Ren</surname><given-names>M</given-names></name><name><surname>Jenks</surname><given-names>MA</given-names></name><name><surname>Hunsaker</surname><given-names>DJ</given-names></name><name><surname>Zhang</surname><given-names>M</given-names></name><name><surname>Baxter</surname><given-names>I</given-names></name><name><surname>Gore</surname><given-names>MA</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>Multivariate analysis of the cotton seed ionome reveals a shared genetic architecture</article-title><source>G3: Genes, Genomes, Genetics</source><volume>8</volume><fpage>1147</fpage><lpage>1160</lpage><pub-id pub-id-type="doi">10.1534/g3.117.300479</pub-id><pub-id pub-id-type="pmid">29437829</pub-id></element-citation></ref><ref id="bib17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pebesma</surname><given-names>EJ</given-names></name><name><surname>Wesseling</surname><given-names>CG</given-names></name></person-group><year iso-8601-date="1998">1998</year><article-title>Gstat: A program for geostatistical modelling, prediction and simulation</article-title><source>Computers &amp; Geosciences</source><volume>24</volume><fpage>17</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1016/S0098-3004(97)00082-4</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pebesma</surname><given-names>EJ</given-names></name></person-group><year iso-8601-date="2004">2004</year><article-title>Multivariable geostatistics in S: the gstat package</article-title><source>Computers &amp; Geosciences</source><volume>30</volume><fpage>683</fpage><lpage>691</lpage><pub-id pub-id-type="doi">10.1016/j.cageo.2004.03.012</pub-id></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Piepho</surname><given-names>H-P</given-names></name><name><surname>Richter</surname><given-names>C</given-names></name><name><surname>Williams</surname><given-names>E</given-names></name></person-group><year iso-8601-date="2008">2008</year><article-title>Nearest neighbour adjustment and linear variance models in plant breeding trials</article-title><source>Biometrical Journal. Biometrische Zeitschrift</source><volume>50</volume><fpage>164</fpage><lpage>189</lpage><pub-id pub-id-type="doi">10.1002/bimj.200710414</pub-id><pub-id pub-id-type="pmid">18383446</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Piepho</surname><given-names>HP</given-names></name><name><surname>M&#246;hring</surname><given-names>J</given-names></name><name><surname>Williams</surname><given-names>ER</given-names></name></person-group><year iso-8601-date="2013">2013</year><article-title>Why Randomize Agricultural Experiments?</article-title><source>Journal of Agronomy and Crop Science</source><volume>199</volume><fpage>374</fpage><lpage>383</lpage><pub-id pub-id-type="doi">10.1111/jac.12026</pub-id></element-citation></ref><ref id="bib21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Price</surname><given-names>AL</given-names></name><name><surname>Patterson</surname><given-names>NJ</given-names></name><name><surname>Plenge</surname><given-names>RM</given-names></name><name><surname>Weinblatt</surname><given-names>ME</given-names></name><name><surname>Shadick</surname><given-names>NA</given-names></name><name><surname>Reich</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>Principal components analysis corrects for stratification in genome-wide association studies</article-title><source>Nature Genetics</source><volume>38</volume><fpage>904</fpage><lpage>909</lpage><pub-id pub-id-type="doi">10.1038/ng1847</pub-id><pub-id pub-id-type="pmid">16862161</pub-id></element-citation></ref><ref id="bib22"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qi</surname><given-names>M</given-names></name><name><surname>Berry</surname><given-names>JC</given-names></name><name><surname>Veley</surname><given-names>KW</given-names></name><name><surname>O&#8217;Connor</surname><given-names>L</given-names></name><name><surname>Finkel</surname><given-names>OM</given-names></name><name><surname>Salas-Gonz&#225;lez</surname><given-names>I</given-names></name><name><surname>Kuhs</surname><given-names>M</given-names></name><name><surname>Jupe</surname><given-names>J</given-names></name><name><surname>Holcomb</surname><given-names>E</given-names></name><name><surname>Glavina Del Rio</surname><given-names>T</given-names></name><name><surname>Creech</surname><given-names>C</given-names></name><name><surname>Liu</surname><given-names>P</given-names></name><name><surname>Tringe</surname><given-names>SG</given-names></name><name><surname>Dangl</surname><given-names>JL</given-names></name><name><surname>Schachtman</surname><given-names>DP</given-names></name><name><surname>Bart</surname><given-names>RS</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Identification of beneficial and detrimental bacteria impacting sorghum responses to drought using multi-scale and multi-system microbiome comparisons</article-title><source>The ISME Journal</source><elocation-id>e012454</elocation-id><pub-id pub-id-type="doi">10.1038/s41396-022-01245-4</pub-id><pub-id pub-id-type="pmid">35523959</pub-id></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Quist</surname><given-names>CW</given-names></name><name><surname>Gort</surname><given-names>G</given-names></name><name><surname>Mooijman</surname><given-names>P</given-names></name><name><surname>Brus</surname><given-names>DJ</given-names></name><name><surname>van den Elsen</surname><given-names>S</given-names></name><name><surname>Kostenko</surname><given-names>O</given-names></name><name><surname>Vervoort</surname><given-names>M</given-names></name><name><surname>Bakker</surname><given-names>J</given-names></name><name><surname>van der Putten</surname><given-names>WH</given-names></name><name><surname>Helder</surname><given-names>J</given-names></name></person-group><year iso-8601-date="2019">2019</year><article-title>Spatial distribution of soil nematodes relates to soil organic matter and life strategy</article-title><source>Soil Biology and Biochemistry</source><volume>136</volume><elocation-id>e107542</elocation-id><pub-id pub-id-type="doi">10.1016/j.soilbio.2019.107542</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="web"><person-group person-group-type="author"><collab>R Core Team</collab></person-group><year iso-8601-date="2015">2015</year><article-title>R: A Language and Environment for Statistical Computing</article-title><ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link><date-in-citation iso-8601-date="2022-06-23">June 23, 2022</date-in-citation></element-citation></ref><ref id="bib25"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>Rodr&#237;guez-&#193;lvarez</surname><given-names>MX</given-names></name><name><surname>Boer</surname><given-names>MP</given-names></name><name><surname>Fred</surname><given-names>A</given-names></name><name><surname>Eeuwijk</surname><given-names>V</given-names></name><name><surname>Eilers</surname><given-names>PHC</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>Spatial Models for Field Trials</article-title><source>arXiv</source><ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/1607.08255">http://arxiv.org/abs/1607.08255</ext-link></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rognes</surname><given-names>T</given-names></name><name><surname>Flouri</surname><given-names>T</given-names></name><name><surname>Nichols</surname><given-names>B</given-names></name><name><surname>Quince</surname><given-names>C</given-names></name><name><surname>Mah&#233;</surname><given-names>F</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>VSEARCH: A versatile open source tool for metagenomics</article-title><source>PeerJ</source><volume>4</volume><elocation-id>e2584</elocation-id><pub-id pub-id-type="doi">10.7717/peerj.2584</pub-id><pub-id pub-id-type="pmid">27781170</pub-id></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rushworth</surname><given-names>A</given-names></name><name><surname>Lee</surname><given-names>D</given-names></name><name><surname>Mitchell</surname><given-names>R</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>A spatio-temporal model for estimating the long-term effects of air pollution on respiratory hospital admissions in Greater London</article-title><source>Spatial and Spatio-Temporal Epidemiology</source><volume>10</volume><fpage>29</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1016/j.sste.2014.05.001</pub-id><pub-id pub-id-type="pmid">25113589</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sheflin</surname><given-names>AM</given-names></name><name><surname>Chiniquy</surname><given-names>D</given-names></name><name><surname>Yuan</surname><given-names>C</given-names></name><name><surname>Goren</surname><given-names>E</given-names></name><name><surname>Kumar</surname><given-names>I</given-names></name><name><surname>Braud</surname><given-names>M</given-names></name><name><surname>Brutnell</surname><given-names>T</given-names></name><name><surname>Eveland</surname><given-names>AL</given-names></name><name><surname>Tringe</surname><given-names>S</given-names></name><name><surname>Liu</surname><given-names>P</given-names></name><name><surname>Kresovich</surname><given-names>S</given-names></name><name><surname>Marsh</surname><given-names>EL</given-names></name><name><surname>Schachtman</surname><given-names>DP</given-names></name><name><surname>Prenni</surname><given-names>JE</given-names></name></person-group><year iso-8601-date="2019">2019</year><article-title>Metabolomics of sorghum roots during nitrogen stress reveals compromised metabolic capacity for salicylic acid biosynthesis</article-title><source>Plant Direct</source><volume>3</volume><elocation-id>e00122</elocation-id><pub-id pub-id-type="doi">10.1002/pld3.122</pub-id><pub-id pub-id-type="pmid">31245765</pub-id></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shukla</surname><given-names>D</given-names></name><name><surname>Rinehart</surname><given-names>CA</given-names></name><name><surname>Sahi</surname><given-names>SV</given-names></name></person-group><year iso-8601-date="2017">2017</year><article-title>Comprehensive study of excess phosphate response reveals ethylene mediated signaling that negatively regulates plant growth and development</article-title><source>Scientific Reports</source><volume>7</volume><elocation-id>3074</elocation-id><pub-id pub-id-type="doi">10.1038/s41598-017-03061-9</pub-id><pub-id pub-id-type="pmid">28596610</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname><given-names>CA</given-names></name><name><surname>Want</surname><given-names>EJ</given-names></name><name><surname>O&#8217;Maille</surname><given-names>G</given-names></name><name><surname>Abagyan</surname><given-names>R</given-names></name><name><surname>Siuzdak</surname><given-names>G</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>XCMS: processing mass spectrometry data for metabolite profiling using nonlinear peak alignment, matching, and identification</article-title><source>Analytical Chemistry</source><volume>78</volume><fpage>779</fpage><lpage>787</lpage><pub-id pub-id-type="doi">10.1021/ac051437y</pub-id><pub-id pub-id-type="pmid">16448051</pub-id></element-citation></ref><ref id="bib31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Song</surname><given-names>L</given-names></name><name><surname>Yu</surname><given-names>H</given-names></name><name><surname>Dong</surname><given-names>J</given-names></name><name><surname>Che</surname><given-names>X</given-names></name><name><surname>Jiao</surname><given-names>Y</given-names></name><name><surname>Liu</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>The molecular mechanism of ethylene-mediated root hair development induced by phosphate starvation</article-title><source>PLOS Genetics</source><volume>12</volume><elocation-id>e1006194</elocation-id><pub-id pub-id-type="doi">10.1371/journal.pgen.1006194</pub-id><pub-id pub-id-type="pmid">27427911</pub-id></element-citation></ref><ref id="bib32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Velazco</surname><given-names>JG</given-names></name><name><surname>Rodr&#237;guez-&#193;lvarez</surname><given-names>MX</given-names></name><name><surname>Boer</surname><given-names>MP</given-names></name><name><surname>Jordan</surname><given-names>DR</given-names></name><name><surname>Eilers</surname><given-names>PHC</given-names></name><name><surname>Malosetti</surname><given-names>M</given-names></name><name><surname>van Eeuwijk</surname><given-names>FA</given-names></name></person-group><year iso-8601-date="2017">2017</year><article-title>Modelling spatial trends in sorghum breeding field trials using a two-dimensional P-spline mixed model</article-title><source>TAG. Theoretical and Applied Genetics. Theoretische Und Angewandte Genetik</source><volume>130</volume><fpage>1375</fpage><lpage>1392</lpage><pub-id pub-id-type="doi">10.1007/s00122-017-2894-4</pub-id><pub-id pub-id-type="pmid">28374049</pub-id></element-citation></ref><ref id="bib33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Veley</surname><given-names>KM</given-names></name><name><surname>Berry</surname><given-names>JC</given-names></name><name><surname>Fentress</surname><given-names>SJ</given-names></name><name><surname>Schachtman</surname><given-names>DP</given-names></name><name><surname>Baxter</surname><given-names>I</given-names></name><name><surname>Bart</surname><given-names>R</given-names></name></person-group><year iso-8601-date="2017">2017</year><article-title>High-throughput profiling and analysis of plant responses over time to abiotic stress</article-title><source>Plant Direct</source><volume>1</volume><elocation-id>e00023</elocation-id><pub-id pub-id-type="doi">10.1002/pld3.23</pub-id><pub-id pub-id-type="pmid">31245669</pub-id></element-citation></ref></ref-list></back><sub-article article-type="editor-report" id="sa0"><front-stub><article-id pub-id-type="doi">10.7554/eLife.70056.sa0</article-id><title-group><article-title>Editor's evaluation</article-title></title-group><contrib-group><contrib contrib-type="author"><name><surname>Whiteman</surname><given-names>Noah K</given-names></name><role specific-use="editor">Reviewing Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/01an7q238</institution-id><institution>University of California, Berkeley</institution></institution-wrap><country>United States</country></aff></contrib></contrib-group><related-object id="sa0ro1" object-id-type="id" object-id="10.1101/2021.04.29.441834" link-type="continued-by" xlink:href="https://sciety.org/articles/activity/10.1101/2021.04.29.441834"/></front-stub><body><p>In this study, the authors took an experimental, empirical approach to tackle the thorny problem of micro-scale variation in soil properties within and among field plots in confounding statistical analyses. The issue is that in field experiments, small variation in one or more soil property variables can obscure true effects of experimental variables on plant phenotypes. The main result is that without their framework they would not have found the association between water treatment, plant growth and Microvirga bacterial abundance, it would have been lost to the noise inherent in these kind of large-scale experiments with relatively modest degrees of freedom. Overall, the PC-based approach to de-noise these kinds of datasets provides an important advance by pulling out subtle phenotypic effects in field trials.</p></body></sub-article><sub-article article-type="decision-letter" id="sa1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.70056.sa1</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Whiteman</surname><given-names>Noah K</given-names></name><role>Reviewing Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/01an7q238</institution-id><institution>University of California, Berkeley</institution></institution-wrap><country>United States</country></aff></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name><surname>Gloss</surname><given-names>Andrew</given-names></name><role>Reviewer</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/0190ak572</institution-id><institution>New York University</institution></institution-wrap><country>United States</country></aff></contrib></contrib-group></front-stub><body><boxed-text id="sa2-box1"><p>Our editorial process produces two outputs: (i) <ext-link ext-link-type="uri" xlink:href="https://sciety.org/articles/activity/10.1101/2021.04.29.441834">public reviews</ext-link> designed to be posted alongside <ext-link ext-link-type="uri" xlink:href="https://www.biorxiv.org/content/10.1101/2021.04.29.441834v1">the preprint</ext-link> for the benefit of readers; (ii) feedback on the manuscript for the authors, including requests for revisions, shown below. We also include an acceptance summary that explains what the editors found interesting or important about the work.</p></boxed-text><p><bold>Decision letter after peer review:</bold></p><p>Thank you for submitting your article "Increased signal to noise ratios within experimental field trials by regressing spatially distributed soil properties as principal components." for consideration by <italic>eLife</italic>. Your article has now been reviewed by 3 peer reviewers, one of whom is a member of our Board of Reviewing Editors, and the evaluation has been overseen by Meredith Schuman as the Senior Editor. The following individual involved in review of your submission has agreed to reveal their identity: Andrew Gloss (Reviewer #2).</p><p>The reviewers have discussed their reviews with one another, and the Reviewing Editor has drafted this to help you prepare a revised submission.</p><p>Essential revisions:</p><p>1) Each of the reviewers' specific concerns below should be addressed prior to resubmission of a revised manuscript for publication consideration. A major concern was a lack of detail on many aspects of the statistical model, the location of the raw data and ability of others to re-run the code. This should be the focus of the author's should they consider revising and resubmitting this manuscript.</p><p>2) Because the manuscript is, essentially a methods development report, it is of the utmost importance that the authors revise this manuscript in alignment with those goals. It should be straightforward, not difficult, for others to implement the tools developed.</p><p><italic>Reviewer #1 (Recommendations for the authors):</italic></p><p>I would like to see the raw data and methods for all of these experiments uploaded/appended as supplementary information here--it isn't satisfactory that one should turn to a preprint for this information--it should all be in the present manuscript. In that vein, I was also confused by whether microbes were a treatment or a passive variable here in this study (e.g., I had a hard time differentiating between the Qi et al., biorxiv manuscript and this one).</p><p><italic>Reviewer #2 (Recommendations for the authors):</italic></p><p>Overall, I found the approach exciting, and the paper makes a good case for its use in future experiments! However, some work is needed to make the presentation more complete in a few different ways, as numbered below. When revising, please consider the following sentence-by-sentence and analysis-by-analysis: is the approach fully described in the methods? Are the necessary results provided so a reader could evaluate support for the statement rather than taking the written interpretation at face value? (e.g., L245: readers should be able to view the results, rather than relying on the vague assurance that the results that aren't shown are "similar" to the one that is. This paper could and probably should have more extensive supplementary results to achieve this).</p><p>1. Not all aspects of the method's implementation are described thoroughly in the paper, in both practical and conceptual senses. This includes both the specific formulations of each analysis -- with a careful description of the model inputs, outputs, specification, and software packages -- and very clear explanations of the goal of each step and how the approach achieves it. The incompleteness of these details made it difficult to fully evaluate, and could pose obstacles to its reliable implementation and interpretation by other researchers. In a paper presenting a methodological approach and arguing for its broader use, clearly walking readers through these steps is essential. Addressing this will require revisions to the methods and results alike. While I do appreciate the brevity, other papers implementing spatial models for environmental variation -- such as Pauli et al., 2018 (G3, doi: 10.1534/g3.117.300479) or Velazco et al., 2017 (Theor. Appl. Genet; doi: 10.1007/s00122-017-2894-4) -- walk the reader through their approaches more thoroughly.</p><p>2. Results are presented incompletely. For example, Figure 4E shows how PC regression affected variance partitioning among explanatory variables and unexplained noise, but only for one dependent variable. This really should be conducted for every dependent variable included in the study. When conducting many tests, a seemingly compelling result can arise even by chance, so it's difficult to know how to interpret a single strong result that is hand-picked to be presented. Similarly, the effect of spatial modeling should be presented across <italic>all</italic> OTUs considered in the study, not just one with a particularly strong effect in the desired direction. Otherwise, one is left wondering if this is a chance result (since re-fitting a model on adjusted data will always alter it) that only arose because so many OTUs were tested -- and whether patterns in the opposite direction, where a significant effect emerged only in the unadjusted phenotypes, also were observed. Note that multiple test corrections (likely FDR) should be presented when applicable as well throughout the paper, especially for the large number of OTU tests that must've been conducted but not shown. It seems like the results should pass this scrutiny, but it must be applied nonetheless!</p><p>3. How the approach builds on or is unique from previous studies and approaches, and ensuing strengths and weaknesses to be expected as a result, is not sufficiently described; see Public Review for further details.</p><p>Specific comments:</p><p>I don't quite follow how separation of cluster centers in the PCA plots (Figures 4A-B) suggests that environmental noise has been reduced and treatment signals have been boosted. In these figures, the centers for each condition do appear further apart, but the spread of points within each condition have also increased. If both spread and centers increase, does this actually reflect better separation of conditions, or just a re-scaling of the overall plot? Does variance partitioning (explained vs. unexplained variance) and the significance of the condition effect in a statistical model actually improve? (Also, I'm confused by L223-224 -- wouldn't weakening a true treatment effect also enlarge clusters, albeit in a different way by drawing points toward the center of the plot?)</p><p>It would be very helpful to walk through the spatial model selection more -- a bit on the different spatial covariance structures that were tested in particular. The test results used for model selection should also be more fully presented, including parameters relevant to the likelihood ratio tests that were conducted (e.g., degrees of freedom for each comparison, log likelihood scores for each model and associated p-value, etc).</p><p>L143-147: Microbiomes of different compartments being affected by different soil elements seems plausible, but this could also just reflect false negatives, a common problem when simply "tallying up" the tests significant at some threshold. As a result of experimental noise (or differences in the amount of unexplained variance within specific compartments), it's possible that an element might have significant effects only in one compartment even if it affects all of them. Evaluating a model of microbiome_composition ~ element * compartment is needed to test this, paying attention the significance of the interaction term.</p><p>Table S1: How is model complexity taken into account? Typically, a more complex model will always have reduced error. Do the model comparisons penalize for increasing model complexity?</p><p>I was unable to access the scripts on Zenodo, but maybe that was user error on my end!</p><p><italic>Reviewer #3 (Recommendations for the authors):</italic></p><p>1. It might be helpful to describe the details of the statistical model.</p><p>2. To help readers employ the proposed tool in their studies, it is valuable to include the step-by-step procedure of the proposed strategy, such as selection of variables, number of PCs to include in the model etc.</p><p>3. For the proposed tool, will the correlations between the phenotypes affect the results/performance?</p><p>4. Page 10, line 151: GCMS should be "GC/MS" or "GC-MS"?</p></body></sub-article><sub-article article-type="reply" id="sa2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.70056.sa2</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><disp-quote content-type="editor-comment"><p>Essential revisions:</p><p>1) Each of the reviewers' specific concerns below should be addressed prior to resubmission of a revised manuscript for publication consideration. A major concern was a lack of detail on many aspects of the statistical model, the location of the raw data and ability of others to re-run the code. This should be the focus of the author's should they consider revising and resubmitting this manuscript.</p><p>2) Because the manuscript is, essentially a methods development report, it is of the utmost importance that the authors revise this manuscript in alignment with those goals. It should be straightforward, not difficult, for others to implement the tools developed.</p></disp-quote><p>We agree completely and appreciate the editor and reviewer suggestions to help us achieve that goal. With this in mind, we have extensively revised the methods section and particularly appreciate Reviewer #2 pointing us towards examples of previous papers. In addition to revising the methods section, we have addressed the other points raised by each reviewer below.</p><disp-quote content-type="editor-comment"><p>Reviewer #1 (Recommendations for the authors):</p><p>I would like to see the raw data and methods for all of these experiments uploaded/appended as supplementary information here--it isn't satisfactory that one should turn to a preprint for this information--it should all be in the present manuscript. In that vein, I was also confused by whether microbes were a treatment or a passive variable here in this study (e.g., I had a hard time differentiating between the Qi et al., biorxiv manuscript and this one).</p></disp-quote><p>We thank the reviewer for these comments and for the very nice summary of our work. We have revised this submission to include zenodo links to the raw data, scripts and methods, as requested (Line 529). In this paper, the microbiome is a passive variable (not an active treatment that we applied to the field). We have clarified this in line 343. We have also edited the text in many places to clarify the relationship between this methods paper, and the related research paper (Qi, M. et al., 2021). We are also happy to share that the Qi et al., manuscript was recently accepted at ISME J. As soon as we have an updated doi, we will update that in this manuscript, as well. While both papers use the field plant data and the microbiome data, the two papers each describe additional unique datasets and report different results.</p><disp-quote content-type="editor-comment"><p>Reviewer #2 (Recommendations for the authors):</p><p>Overall, I found the approach exciting, and the paper makes a good case for its use in future experiments! However, some work is needed to make the presentation more complete in a few different ways, as numbered below. When revising, please consider the following sentence-by-sentence and analysis-by-analysis: is the approach fully described in the methods? Are the ecessaryy results provided so a reader could evaluate support for the statement rather than taking the written interpretation at face value? (e.g., L245: readers should be able to view the results, rather than relying on the vague assurance that the results that aren&#8217;t shown are &#8220;similar&#8221; to the one that is. This paper could and probably should have more extensive supplementary results to achieve this).</p></disp-quote><p>We thank the reviewer for the encouraging words, thorough evaluation of our work and many helpful suggestions for how to improve the manuscript. We have extensively revised the manuscript as described for each point below.</p><disp-quote content-type="editor-comment"><p>1. Not all aspects of the method&#8217;s implementation are described thoroughly in the paper, in both practical and conceptual senses. This includes both the specific formulations of each analysis &#8211; with a careful description of the model inputs, outputs, specification, and software packages &#8211; and very clear explanations of the goal of each step and how the approach achieves it. The incompleteness of these details made it difficult to fully evaluate, and could pose obstacles to its reliable implementation and interpretation by other researchers. In a paper presenting a methodological approach and arguing for its broader use, clearly walking readers through these steps is essential. Addressing this will require revisions to the methods and results alike. While I do appreciate the brevity, other papers implementing spatial models for environmental variation &#8211; such as Pauli et al., 2018 (G3, doi: 10.1534/g3.117.300479) or Velazco et al., 2017 (Theor. Appl. Genet; doi: 10.1007/s00122-017-2894-4) -- walk the reader through their approaches more thoroughly.</p></disp-quote><p>Thank you for this concern and also for the suggestions. We have extensively revised the methods sections to include full model parameterizations and descriptions. We also thank the reviewer for pointing out additional references that should be included. In addition to citing these previously described approaches in the intro, we have revised the discussion to include a &#8216;compare and contrast&#8217; between these previous approaches and our method.</p><disp-quote content-type="editor-comment"><p>2. Results are presented incompletely. For example, Figure 4E shows how PC regression affected variance partitioning among explanatory variables and unexplained noise, but only for one dependent variable. This really should be conducted for every dependent variable included in the study. When conducting many tests, a seemingly compelling result can arise even by chance, so it's difficult to know how to interpret a single strong result that is hand-picked to be presented.</p></disp-quote><p>Thank you for the suggestion regarding adding more variance components plots to the manuscript. We have exchanged the principal components plot (Figure 4B) with variance components to show the effects of the principal component regression on the microbiome composition. The PCA plot is now moved to supplemental in addition to showing the same treatment effect in the other two compartments, soil and root (Figure 4&#8212;figure supplement 1). All harvest data is now included in Figure 4&#8212;figure supplement 2, as requested.</p><disp-quote content-type="editor-comment"><p>Similarly, the effect of spatial modeling should be presented across all OTUs considered in the study, not just one with a particularly strong effect in the desired direction. Otherwise, one is left wondering if this is a chance result (since re-fitting a model on adjusted data will always alter it) that only arose because so many OTUs were tested -- and whether patterns in the opposite direction, where a significant effect emerged only in the unadjusted phenotypes, also were observed. Note that multiple test corrections (likely FDR) should be presented when applicable as well throughout the paper, especially for the large number of OTU tests that must've been conducted but not shown. It seems like the results should pass this scrutiny, but it must be applied nonetheless!</p></disp-quote><p>We agree with the reviewer&#8217;s concern regarding representativeness of the results. Following your suggestion, we have conducted tests for all OTUs and added additional text to the manuscript reflecting the result with control of FDR (line 266). We also have added supplemental figures showing the results of the change-points before and after (Figure 4&#8212;figure supplement 4).</p><disp-quote content-type="editor-comment"><p>3. How the approach builds on or is unique from previous studies and approaches, and ensuing strengths and weaknesses to be expected as a result, is not sufficiently described; see Public Review for further details.</p></disp-quote><p>Again, we thank the reviewer for encouraging us to more fully place this work in the context of the field. We have revised both the introduction and the discussion with this goal in mind.</p><disp-quote content-type="editor-comment"><p>Specific comments:</p><p>I don't quite follow how separation of cluster centers in the PCA plots (Figures 4A-B) suggests that environmental noise has been reduced and treatment signals have been boosted. In these figures, the centers for each condition do appear further apart, but the spread of points within each condition have also increased. If both spread and centers increase, does this actually reflect better separation of conditions, or just a re-scaling of the overall plot? Does variance partitioning (explained vs. unexplained variance) and the significance of the condition effect in a statistical model actually improve? (Also, I'm confused by L223-224 -- wouldn't weakening a true treatment effect also enlarge clusters, albeit in a different way by drawing points toward the center of the plot?)</p></disp-quote><p>Thank you for giving this part considerable thought. Since the microbiome counts are brought back to the original count space after doing the soil property adjustment, we feel that there are negligible re-scaling effects. For instance, we would not expect all counts to increase thus forcing the clusters further apart simply due to scale. OTUs are adjusted upwards in some samples and down in others. However, considering this and other comments from the reviewers, we decided to move previous panel 4B to supplemental in exchange for a variance component analysis on the compositions.</p><disp-quote content-type="editor-comment"><p>It would be very helpful to walk through the spatial model selection more -- a bit on the different spatial covariance structures that were tested in particular. The test results used for model selection should also be more fully presented, including parameters relevant to the likelihood ratio tests that were conducted (e.g., degrees of freedom for each comparison, log likelihood scores for each model and associated p-value, etc).</p></disp-quote><p>We leverage an existing R package, gstat, and all possible types of models (nugget, exponential, spherical, Gaussian, Matern, and Stein) were evaluated. Full parameterizations of these can be found in their manuscripts (Pebesma and Wesseling 1998; Pebesma 2004)(Pebesma 1998, Pebema 2004). These models are fully parameterized by the same three coefficients (nugget, sill, and range) and we have revised the text to clarify their explicit definitions (methods) as well as added in an additional citation. As suggested, we have extensively revised the methods section to more fully walk the reader through the process.</p><disp-quote content-type="editor-comment"><p>L143-147: Microbiomes of different compartments being affected by different soil elements seems plausible, but this could also just reflect false negatives, a common problem when simply "tallying up" the tests significant at some threshold. As a result of experimental noise (or differences in the amount of unexplained variance within specific compartments), it's possible that an element might have significant effects only in one compartment even if it affects all of them. Evaluating a model of microbiome_composition ~ element * compartment is needed to test this, paying attention the significance of the interaction term.</p></disp-quote><p>We appreciate the amount of thought that went into this suggestion and how interpretable the results are from a biological point of view. From a statistical standpoint, a model that includes a compartment interaction term allows different compartments react differently to the changing element values. As suggested, we have completed the analysis including an interaction term and have included an additional table that directly assesses the effect of the interaction term (Figure 2&#8212;figure supplement 1). We have also updated the text to include this analysis in the results. The statistical test to decide if interaction is significant is limiting in its interpretability. We therefore also performed post-hoc tests to investigate more directly which compartments are affected.</p><disp-quote content-type="editor-comment"><p>Table S1: How is model complexity taken into account? Typically, a more complex model will always have reduced error. Do the model comparisons penalize for increasing model complexity?</p></disp-quote><p>This is an absolutely accurate statement and something geospatial statistics has aimed to alleviate. For all the different types of spatial models considered here, each one of them is fully parameterized by the same three coefficients (nugget, sill, and range) so from a degrees of freedom and error point of view, they are all equally complex. We identify which spatial model is &#8220;best&#8221; by finding the model that produces the smallest error, and since the complexities are the same, this metric is suitable for this determination. We have added a sentence in the methods of the manuscript to bring more clarity to the errors of each type of model.</p><disp-quote content-type="editor-comment"><p>I was unable to access the scripts on Zenodo, but maybe that was user error on my end!</p></disp-quote><p>We have updated the link and verified that others can access the site (the previous link may have been locked!). Hopefully it now works for you, as well. If not, please just let <italic>eLife</italic> staff know and we&#8217;ll sort it out.</p><disp-quote content-type="editor-comment"><p>Reviewer #3 (Recommendations for the authors):</p><p>1. It might be helpful to describe the details of the statistical model.</p></disp-quote><p>Thank you for this suggestion. We have extensively revised the methods sections, including specifying model equations, to give a more systematic look at exactly what was done. We also have included all scripts and raw data in a zenodo repository.</p><disp-quote content-type="editor-comment"><p>2. To help readers employ the proposed tool in their studies, it is valuable to include the step-by-step procedure of the proposed strategy, such as selection of variables, number of PCs to include in the model etc.</p></disp-quote><p>We think the use of the word &#8216;tool&#8217; was misleading since we are not providing a deployable R package for general use. Therefore, we have changed this word to &#8216;method&#8217;. Regardless, we like this suggestion and so have updated the text in the Discussion section to highlight what we feel is the balancing act between the number of PC&#8217;s to use vs the number of degrees of freedom required to adjust. This is an important concept, thank you for the suggestion.</p><disp-quote content-type="editor-comment"><p>3. For the proposed tool, will the correlations between the phenotypes affect the results/performance?</p></disp-quote><p>Good question. In this manuscript we are only considering phenotypes one at a time to assess for soil property variations and removing those effects. However, in our related paper (Qi et al.,), after de-noising the data, we leveraged correlations between experimental systems and between plant phenotypes to further exclude potential &#8216;false positives.&#8217;</p><disp-quote content-type="editor-comment"><p>4. Page 10, line 151: GCMS should be "GC/MS" or "GC-MS"?</p></disp-quote><p>Corrected.</p></body></sub-article></article>