import React, { useState, useEffect, useRef } from 'react';
import './Placeholder.css';
import './CustomWaitingList.css';
import './Science.css'
import FlowFieldWords from './flow_full';
import DataTable from 'react-data-table-component';
import styled from 'styled-components';
import { MathJax, MathJaxContext } from 'better-react-mathjax';




const data = [
    { SNP: 'rs12345352', EffectAllele: 'C', A2: 'G', BETA: '-0.02351', P: '3.15E-33' },
    { SNP: 'rs34340947', EffectAllele: 'A', A2: 'G', BETA: '0.00731', P: '6.94E-07' },
    { SNP: 'rs2271593', EffectAllele: 'T', A2: 'C', BETA: '0.00965', P: '3.02E-11' },
    { SNP: 'rs141896074', EffectAllele: 'T', A2: 'G', BETA: '0.01941', P: '8.42E-08' },
    { SNP: 'rs12030919', EffectAllele: 'T', A2: 'C', BETA: '0.00627', P: '8.66E-06' },
    { SNP: 'rs77363699', EffectAllele: 'A', A2: 'T', BETA: '-0.01328', P: '1.12E-09' },
    { SNP: 'rs115801630', EffectAllele: 'T', A2: 'C', BETA: '0.01396', P: '4.14E-07' },
    { SNP: 'rs3795156', EffectAllele: 'A', A2: 'G', BETA: '0.00548', P: '2.66E-07' },
    ];

const columns = [
    { name: 'SNP', selector: row => row.SNP },
    { name: 'Effect allele', selector: row => row.EffectAllele },
    { name: 'Other allele', selector: row => row.A2 },
    { name: 'Beta', selector: row => row.BETA },
    { name: 'P', selector: row => row.P },
    ];



const styles = {
    container: {
      color: '#333',
      backgroundColor: '#f8f8f8',
      padding: '20px',
      maxWidth: '80vw',
      margin: '40px auto',
      borderRadius: '8px',
      boxShadow: '0 4px 6px rgba(0, 0, 0, 0.1)',
      fontFamily: '"Helvetica Neue", Helvetica, Arial, sans-serif'
    },
    header: {
      color: '#2c3e50',
      textAlign: 'center',
      marginBottom: '30px'
    },
    section: {
      background: '#fff',
      position: 'center',
      maxWidth: '80vw',
      margin: 'auto',
      padding: '20px',
      borderRadius: '8px',
      marginBottom: '20px',
      borderLeft: '5px solid #3498db',
    },
    subHeader: {
      color: '#2c3e50',
      fontSize: '22px',
      fontWeight: '600',
      marginBottom: '10px',
      borderBottom: '2px solid #3498db',
      paddingBottom: '5px',
    },
    paragraph: {
      color: '#555',
      fontSize: '16px',
      lineHeight: '1.6',
      textAlign: 'justify',
      maxWidth: '80vw',
      marginBottom: '15px',
    },

    list: {
        listStyle: 'none', // Remove default list style
        paddingLeft: '0',  // Align with the content
        marginLeft: '0',   // Align with the content
        listStyleType: 'disc', // This will ensure items are shown with bullet points
        marginLeft: '20px',    // Proper indentation for list items
        padding: '0',
      },
      listItem: {
        marginBottom: '15px',
        paddingLeft: '20px', // Space for custom bullet
        position: 'relative',
        fontSize: '16px',
        lineHeight: '1.6',
        color: '#555',
        '::before': {
          content: '"•"',  // Custom bullet symbol
          color: '#3498db', // Bullet color
          fontWeight: 'bold', // Makes the bullet bolder
          display: 'inline-block', 
          width: '20px', // Fixed width for alignment
          marginLeft: '-20px', // Align bullet within the flow
        }
      },
    reference: {
      color: '#3498db',
      textDecoration: 'none',
      fontSize: '0.85em',
      marginLeft: '5px',
      position: 'relative',
      top: '-2px',
    },
    referenceText: {
        fontWeight: 'bold',
        color: '#34495e',
        fontSize: '0.7em',
        position: 'relative',
        top: '-5px',
        left: '2px',
      },

  link: {
      color: '#3498db',
      textDecoration: 'none',
      fontWeight: '600',
      ':hover': {
        textDecoration: 'underline',
      },

      href: {
        color: 'blue', // Define your hyperlink color
        textDecoration: 'underline' // Define other hyperlink styles as needed
    },

      figcaption: {
        color: '#666',              // Text color for the caption
        fontSize: '0.7em',          // Font size
        textAlign: 'center',        // Align text to the center
        marginTop: '5px',           // Space above the caption
        fontStyle: 'italic',        // Italicize the text
        fontWeight: 'normal',       // Normal font weight
      },
    },
}


const customStyles = {
    cells: {
      style: {
        whiteSpace: 'normal', // Break lines as needed
        wordBreak: 'break-word', // Break words that are too long for the container
        overflow: 'hidden',
        textOverflow: 'ellipsis',
        maxWidth: '150px', // Set a maximum width and allow the text to take up as much space as needed
      },
    },
    headCells: {
      style: {
        whiteSpace: 'normal', // Ensure the header also wraps correctly
        wordBreak: 'break-word',
        maxWidth: '150px',
      },
    },
  };

const config = {
    loader: { load: ['input/tex', 'output/svg'] },
  };
  


  const columnsMath = [
    {
      name: 'Method',
      selector: row => row.name,
      sortable: true,
      wrap: true,
      grow: 0.1, // adjust the grow factor as needed
    },
    {
      name: 'Distribution of SNP Effects',
      cell: row => <MathJax>{row.distribution}</MathJax>,
      ignoreRowClick: true,
      allowOverflow: true,
      wrap: true,
      grow: 4.5, // this column might contain larger content
    },
    {
      name: 'Tuning Sample',
      selector: row => row.tuningSample,
      sortable: true,
      wrap: true,
      grow: 0.9,
    },
    {
      name: 'Predefined Parameters',
      selector: row => row.predefinedParameters,
      wrap: true,
      grow: 1,
    },
    {
      name: 'Parameters Estimated in Tuning Sample',
      selector: row => row.estimatedParameters,
      wrap: true,
      grow: 1,
    },
  ];
  

  const dataMath = [
    {
      id: 1,
      name: 'PC+T',
      distribution: 'None',
      tuningSample: 'Yes',
      predefinedParameters: '—',
      estimatedParameters: 'p-value threshold',
    },
    {
      id: 2,
      name: 'SBLUP',
      distribution: '\\( \\beta \\sim N(0, \\frac{\\sigma_g^2}{m}) \\)',
      tuningSample: 'No',
      predefinedParameters: `LD radius in kb, (\\( \\gamma \\))`,
      estimatedParameters: '—',
    },
    {
      id: 3,
      name: 'LDpred2-Inf',
      distribution: '\\( \\beta \\sim N(0, \\frac{\\sigma_g^2}{m}) \\)',
      tuningSample: 'No',
      predefinedParameters: '\\( h_g^2 \\), LD radius in cM or kb',
      estimatedParameters: '—',
    },
    {
      id: 4,
      name: 'LDpred-funct',
      distribution: '\\( \\beta \\sim N(0, c\\sigma_g^2) \\)',
      tuningSample: 'No',
      predefinedParameters: '\\( h_g^2 \\), LD radius in number of SNPs',
      estimatedParameters: '—',
    },
    {
      id: 5,
      name: 'LDpred2',
      distribution: '\\( \\beta_k \\sim N \\left( 0, \\frac{h_g^2}{m} \\right) \\), with probability of \\( \\pi \\), and 0 with probability of \\( 1 - \\pi \\)',
      tuningSample: 'Yes',
      predefinedParameters: '\\( h_g^2 \\), \\( \\pi \\) software default values, LD radius in cM or kb',
      estimatedParameters: 'sparsity, \\( \\pi \\)',
    },
    {
      id: 6,
      name: 'Lassosum',
      distribution: '\\( f(\\beta) = y^T y + (1 - s) \\beta^T X_r^T X_r \\beta - 2 \\beta^T X^T y + s \\beta^T \\beta + 2\\lambda \\|\\beta\\|^{1}_{1} \\)',
      tuningSample: 'Yes',
      predefinedParameters: 'LD blocks',
      estimatedParameters: '\\( \\lambda \\), s',
    },
    {
      id: 7,
      name: 'PRS-CS',
      distribution: '\\( \\beta_j \\sim N(0, \\frac{\\sigma^2}{n} \\psi_j) \\)',
      tuningSample: 'Yes',
      predefinedParameters: `a = 1, b = 0.5, sample size LD Blocks`,
      estimatedParameters: '\\( \\phi \\)',
    },
    {
      id: 8,
      name: 'PRS-CS-auto',
      distribution: 'Same as PRS-CS, but estimates \\( \\phi \\) from the discovery GWAS.',
      tuningSample: 'No',
      predefinedParameters: 'a = 1, b = 0.5, sample size LD Blocks',
      estimatedParameters: '—',
    },
    {
      id: 9,
      name: 'SBayesR',
      distribution: `\\[
        \\beta_j \\mid \\pi, \\sigma^2_{\\beta} \\sim
        \\begin{cases}
        0, & \\text{with probability of } \\pi_1 \\\\
        N(0, \\gamma_{2}^2\\sigma^2_{\\beta}), & \\text{with probability of } \\pi_2 \\\\
        \\vdots \\\\
        N(0, \\gamma_{C}^2\\sigma^2_{\\beta}), & \\text{with probability of } 1 - \\sum_{c=1}^{C-1} \\pi_c
        \\end{cases}
      \\]`,
      tuningSample: 'No',
      predefinedParameters: 'LD radius in cM or kb, C = 4, \\( \\gamma \\) software default values',
      estimatedParameters: '—',
    },
    {
      id: 10,
      name: 'MegaPRS',
      distribution: `\\[
        \\begin{matrix}
        \\text{Lasso: } & \\beta_j \\sim DE(\\lambda \\sigma_j) \\\\
        \\text{Ridge regression: } & \\beta_j \\sim N (0, v \\sigma_j^2) \\\\
        \\text{BOLT-LMM: } & \\beta_j 
        \\begin{cases}
        N (0, (1 - f_2) / \\pi \\sigma_j^2 ), & \\text{with probability of } \\pi \\\\
        N (0, (f_2) / (1 - \\pi) \\sigma_j^2 ), & \\text{with probability of } 1 - \\pi
        \\end{cases}
        \\end{matrix}
      \\]`,
      tuningSample: 'Yes',
      predefinedParameters: 'LD radius in cM or kb, Parameters used in BLD-LDAK, Grid search parameter values for each method',
      estimatedParameters: 'The tuning cohort is used to estimate the parameters that maximize prediction for each model, and from these the model that maximizes prediction is selected.',
    },
];

    const DistributionsDescription = () => {
        const smallTextStyle = {
            fontSize: '10px' // You can adjust the size as needed
        };
    
        return (
          <div>
            <MathJaxContext config={config}>
                <div style={smallTextStyle}>
                    <strong>Distributions:</strong><br />
                    <MathJax inline>{"N: normal distribution; "}</MathJax>
                    <MathJax inline>{"\\( \\chi^2 \\): chi-squared distribution; "}</MathJax>
                    <MathJax inline>{"Dir: Dirichlet distribution; "}</MathJax>
                    <MathJax inline>{"DE: double exponential distribution; "}</MathJax>
                    <MathJax inline>{"\\( \\mathbb{I}[1] \\): the \\( \\chi^2 \\) (SNP-based heritability) is a predefined parameter, it is estimated from the discovery GWAS, where discovery GWAS is the genome-wide set of association statistics (SNP identification number, reference allele, frequency of reference allele, association effect size for reference allele, standard error of effect size, association p value, sample size). Bold indicates matrix notation, and italic indicates scalar notation. All methods require a reference sample with genotypes to model LD between SNPs."}</MathJax><br /><br />
                    <MathJax inline>{"cm: centimorgan; "}</MathJax>
                    <MathJax inline>{"GWAS: genome-wide association study; "}</MathJax>
                    <MathJax inline>{"kb: kilobase pair; "}</MathJax>
                    <MathJax inline>{"LD: linkage disequilibrium; "}</MathJax>
                    <MathJax inline>{"SNP: single nucleotide polymorphism."}</MathJax>
                </div>
            </MathJaxContext>
            </div>
        );
    };
    
    export default DistributionsDescription;





    const CompressedSensingSection = () => {
        const activeSNPsData = [
            { phenotype: 'Height', activeSNPs: 22000 },
            { phenotype: 'Heel Bone Density', activeSNPs: 15000 },
            { phenotype: 'BMI', activeSNPs: 22000 },
            { phenotype: 'Educational Attainment', activeSNPs: 17000 },
            { phenotype: 'Apolipoprotein A', activeSNPs: 15000 },
            { phenotype: 'Apolipoprotein B', activeSNPs: 9000 },
            { phenotype: 'Cholesterol', activeSNPs: 10000 },
            { phenotype: 'Direct Bilirubin', activeSNPs: 4000 },
            { phenotype: 'HDL Cholesterol', activeSNPs: 17000 },
            { phenotype: 'Lipoprotein A', activeSNPs: 3000 },
            { phenotype: 'Platelet Count', activeSNPs: 15000 },
            { phenotype: 'Total Bilirubin', activeSNPs: 5000 },
            { phenotype: 'Total Protein', activeSNPs: 15000 },
            { phenotype: 'Triglycerides', activeSNPs: 11000 }
          ];
          
          const columns = [
            { name: 'Phenotype', selector: row => row.phenotype },
            { name: 'Active SNPs', selector: row => row.activeSNPs },
          ];
          
      
        return (
          <div style={styles.container}>
            <h3 style={styles.subHeader}>Compressed Sensing</h3>
            <MathJaxContext>
              <p style={styles.paragraph}>
                Compressed Sensing (CS) is instrumental in genome-wide association studies (GWAS) for signal recovery when the number of single-nucleotide polymorphisms (SNPs) surpasses the sample size. It proposes that identifying sparse markers relative to sample size is feasible using efficient algorithms, facilitating phenotype prediction even with an overwhelming number of SNPs.
              </p>
              <p style={styles.paragraph}>
                As per CS, the transition from inadequate to complete marker selection correlates with sample size growth, contingent on trait heritability. Traits with complete heritability (1) exhibit a stark transition, unlike those with lower heritability, indicating non-linear growth in prediction accuracy with sample size increases.
              </p>
              <MathJax>
                <p style={styles.paragraph}>
                  The concept's utility lies in gauging the distance from accurate trait prediction, answering queries like the requisite sample size for accurate disease prediction. 'Active' SNPs, indicating a phenotype's polygenic nature, are crucial for model accuracy. The following table delineates the estimated number of active SNPs across various phenotypes:
                </p>
              </MathJax>
            </MathJaxContext>
            <DataTable
              columns={columns}
              data={activeSNPsData}
              noHeader
              customStyles={customStyles}
              />
          </div>
        );
      };







// Component for 'How does a GWAS work?'
const HowGWASWorks = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>How does a GWAS work?</h2>
      <section style={styles.section}>
    <p style={styles.paragraph}>
      A GWAS, or Genome-Wide Association Study, identifies a set of genetic variants in different individuals to see if any variant is associated with a trait. These associations are typically weak but can inform us about important biological pathways and are useful when analyzed in aggregate.
      <sup style={styles.referenceNumber}>
        <a href="https://pubmed.ncbi.nlm.nih.gov/20647212" style={styles.reference}>[link]</a>
        <a href="https://pubmed.ncbi.nlm.nih.gov/20300123" style={styles.reference}>[link]</a>
      </sup>
    </p>
    <p style={styles.paragraph}>
      GWAS compare the DNA of participants with varying phenotypes for a particular trait or disease. This might include people with a disease (cases) and similar people without the disease (controls), or those with different phenotypes for a specific trait, like blood pressure. GWAS identify SNPs and other DNA variants associated with a disease but cannot specify which genes are causal on their own.
      <sup style={styles.referenceNumber}>
        <a href="https://pubmed.ncbi.nlm.nih.gov/20647212" style={styles.reference}>[link]</a>
        <a href="https://pubmed.ncbi.nlm.nih.gov/18349094" style={styles.reference}>[link]</a>
        <a href="#cite_note-5" style={styles.reference}>[link]</a>
      </sup>
    </p>
    <p style={styles.paragraph}>
      A key step in most GWAS is the imputation of genotypes at SNPs not on the genotype chip used in the study. This process increases the number of SNPs that can be tested for association and facilitates meta-analysis of GWAS across distinct cohorts.
      <sup style={styles.referenceNumber}>
        <a href="#cite_note-6" style={styles.reference}>[link]</a>
      </sup>
    </p>
    <p style={styles.paragraph}>
      Factors like sex, age, genotyping array, and ancestry are controlled for when discovering variants, as many genetic variations are associated with geography. This process is known as controlling for population stratification.
      <sup style={styles.referenceNumber}>
        <a href="https://pubmed.ncbi.nlm.nih.gov/18758442" style={styles.reference}>[link]</a>
        <a href="#cite_note-11" style={styles.reference}>[link]</a>
      </sup>
    </p>
    <p style={styles.paragraph}>
      Summary statistics from GWAS are used to make a polygenic score. These statistics include the SNP rsID, the effect allele, the non-effect allele, the beta coefficient or odds ratio, and the p-value.
    </p>
  <DataTable
      title="Example of GWAS Summary Statistics"
      columns={columns}
      data={data}
    />
</section>

</div>
)








// Component for 'Current GWAS Trends'
const CurrentGWASTrends = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Current GWAS Trends</h2>
      <section style={styles.section}>
        <p style={styles.paragraph}>
          Genome-Wide Association Studies (GWAS) have evolved significantly since their inception. A key trend has been the expansion of sample sizes, with some studies in 2018 reaching over 1 million participants, such as the study on educational attainment 
          <a href="https://www.nature.com/articles/s41588-018-0147-3" style={styles.reference}>[link]</a>, and others in 2022 encompassing up to 3 million individuals 
          <a href="https://www.nature.com/articles/s41588-022-01016-z" style={styles.reference}>[link]</a>. This growth aims to enhance the detection of risk-SNPs with smaller effect sizes and lower allele frequencies. Concurrently, there has been a shift towards more narrowly defined phenotypes to improve the specificity of findings 
          <a href="https://pubmed.ncbi.nlm.nih.gov/19901186" style={styles.reference}>[link]</a>. Another innovative approach in GWAS is the use of participants who are first-degree relatives of individuals with a specific disease, offering a unique perspective in genetic research 
          <a href="https://www.nature.com/articles/ng.3766" style={styles.reference}>[link]</a>.
        </p>
        <p style={styles.paragraph}>
          Most SNP variations identified in GWAS are associated with only a minor increased risk of disease. The challenge remains in detecting the contribution of very rare mutations not included in standard arrays. To address this, biobanks with higher genotype depth are being developed to find rare variant associations, contributing to solving the puzzle of missing heritability.
        </p>
        <p style={styles.paragraph}>
          In the realm of polygenic scores, calculated from GWAS data, there's a focus on combining GWAS summary statistics with individual genome data. The polygenic score, a key metric, is derived by summing the beta coefficients for matching rsIDs post-imputation and dividing by the number of variants used. This score is then translated into a percentile for comparative analysis. Techniques such as pruning and thresholding, and clumping and thresholding, are employed to refine the predictive power of these scores. However, these methods might limit the comprehensiveness of SNP data, underscoring the importance of complete summary statistics and effective imputation methods for enhanced accuracy.
        </p>
        <p style={styles.paragraph}>
            <b>Summary images from academic research:</b>
        </p>
        <figure>
      <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Minor_allele_frequency_versus_effect_size.png/299px-Minor_allele_frequency_versus_effect_size.png" alt="Minor allele frequency versus effect size" />
      <p style={{ color: '#666', fontSize: '1em', textAlign: 'left', marginTop: '5px', fontStyle: 'italic', fontWeight: 'normal' }}>
        The rarer the variant, the larger the effect size
      </p>
    </figure>

    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/7/7f/Howtomake.png/278px-Howtomake.png" alt="Steps to make a polygenic score" />
      <p style={{ color: '#666', fontSize: '1em', textAlign: 'left', marginTop: '5px', fontStyle: 'italic', fontWeight: 'normal' }}>
        Steps to make a polygenic score
      </p>
    </figure>

    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/3/34/Image.png/390px-Image.png" alt="The GWAS to polygenic score process" />
      <p style={{ color: '#666', fontSize: '1em', textAlign: 'left', marginTop: '5px', fontStyle: 'italic', fontWeight: 'normal' }}>
        The GWAS to polygenic score process
      </p>
    </figure>
      </section>
    </div>
  );

  





// Component for 'Polygenic Scores from GWAS Data'
const PolygenicScoresFromGWAS = () => (
    <div style={{
      color: '#333',
      backgroundColor: '#f8f8f8',
      padding: '20px',
      maxWidth: '80vw',
      margin: '40px auto',
      borderRadius: '8px',
      boxShadow: '0 4px 6px rgba(0, 0, 0, 0.1)',
      fontFamily: '"Helvetica Neue", Helvetica, Arial, sans-serif'
    }}>
      <h2 style={{
        color: '#2c3e50',
        textAlign: 'center',
        marginBottom: '30px'
      }}>
        Polygenic Scores from GWAS Data
      </h2>
      <section style={{
        background: '#fff',
        maxWidth: '80vw',
        margin: 'auto',
        padding: '20px',
        borderRadius: '8px',
        marginBottom: '20px',
        borderLeft: '5px solid #3498db',
      }}>
        <p style={{
          color: '#555',
          fontSize: '16px',
          lineHeight: '1.6',
          textAlign: 'justify',
          maxWidth: '80vw',
          marginBottom: '15px',
        }}>
          Polygenic scores can be calculated by combining GWAS sumstats (rsID and beta coefficient) with an individual’s genome data. Summary statistics can range from just a few SNPs to millions of SNPs. A polygenic score is the sum of beta coefficients for matching rsIDs after imputation, divided by the number of variants actually used in the calculation. To translate the polygenic score into a percentile, many scores from different people must be calculated, like using 1000 genomes data to create a distribution.
        </p>
        <figure>
          <img src="//static.miraheze.org/genomicswiki/thumb/7/7f/Howtomake.png/278px-Howtomake.png" alt="Steps to make a polygenic score" />
          <p style={{
            color: '#666',
            fontSize: '0.9em',
            textAlign: 'left',
            marginTop: '5px',
            fontStyle: 'italic',
            fontWeight: 'normal',
          }}>Steps to make a polygenic score</p>
        </figure>
        <figure>
          <img src="//static.miraheze.org/genomicswiki/thumb/3/34/Image.png/390px-Image.png" alt="The GWAS to polygenic score process" />
          <p style={{
            color: '#666',
            fontSize: '0.9em',
            textAlign: 'left',
            marginTop: '5px',
            fontStyle: 'italic',
            fontWeight: 'normal',
          }}>The GWAS to polygenic score process</p>
        </figure>
        <figure>
          <img src="//static.miraheze.org/genomicswiki/thumb/d/d7/Pt.png/184px-Pt.png" alt="Pruning and thresholding methods" />
          <p style={{
            color: '#666',
            fontSize: '0.9em',
            textAlign: 'left',
            marginTop: '5px',
            fontStyle: 'italic',
            fontWeight: 'normal',
          }}>These methods might pick the purple SNP, and ignore the rest</p>
        </figure>
        <ul style={{
          listStyle: 'none',
          paddingLeft: '0',
          marginLeft: '20px',
          padding: '0',
        }}>
          <li style={{
            marginBottom: '15px',
            paddingLeft: '20px',
            position: 'relative',
            fontSize: '16px',
            lineHeight: '1.6',
            color: '#555',
          }}>Thresholding: removing loci based on high levels of pairwise LD in a particular small chromosomal/SNP region is pruning.</li>
          <li style={{
            marginBottom: '15px',
            paddingLeft: '20px',
            position: 'relative',
            fontSize: '16px',
            lineHeight: '1.6',
            color: '#555',
          }}>Pruning: removing one of two SNPs if the squared correlation (r2) between allelic values at two loci is higher than a certain value, e.g., 0.9.</li>
          <li style={{
            marginBottom: '15px',
            paddingLeft: '20px',
            position: 'relative',
            fontSize: '16px',
            lineHeight: '1.6',
            color: '#555',
          }}>Clumping is when only the most significant SNP is reported.</li>
        </ul>
        <p style={{
          color: '#555',
          fontSize: '16px',
          lineHeight: '1.6',
          textAlign: 'justify',
          maxWidth: '80vw',
          marginBottom: '15px',
        }}>
          These approaches are significant for GWASs trying to discover biological pathways. For predicting phenotypes though, we want more information. This is important because many GWAS summary statistics are already pruned or clumped, and thresholded.
        </p>
        <p style={{
          color: '#555',
          fontSize: '16px',
          lineHeight: '1.6',
          textAlign: 'justify',
          maxWidth: '80vw',
          marginBottom: '15px',
        }}>
          To increase SNP overlap between GWAS and the individual genome, imputation must be conducted. There are many free methods to do this, e.g., BEAGLE, the Michigan Imputation Server (Minimac4), or others.
        </p>
      </section>
    </div>
  );








  
  // Component for 'Improving polygenic scores'
  const ImprovingPolygenicScores = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Improving Polygenic Scores</h2>
      <section style={styles.section}>
        <p style={styles.paragraph}>
          Methods that improve polygenic scores can involve optimizing GWAS summary statistics, or bypassing the GWAS step to directly model individual-level data (SNP | phenotype).
          Techniques include MTAG, SBayesR, PRS-CS-auto, LDpred2,
          <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4596916/" style={styles.reference}>[link]</a>
          <a href="https://academic.oup.com/bioinformatics/article/36/22-23/5424/6039173" style={styles.reference}>[link]</a> PUMA-CUBS, and genomic LASSO. Additional approaches involve creating a per-locus Bayesian prior effect based on variables like genomic features. Some LASSO derivatives include lassosum, 
          <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8775060/" > smoothedLasso</a>, and TLP.
        </p>
        <p style={styles.paragraph}>
          For head-to-head comparisons of PRS optimization methods, visit
          <a href="https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1009021" > this link </a>
          and
          <a href="https://academic.oup.com/bioinformatics/article/35/20/4038/5419857" > this link</a>.
        </p>


        <MathJaxContext config={config}>
            <DataTable
            title="SNP Methods Analysis"
            columns={columnsMath}
            data={dataMath}
            paginationPerPage={20}
            defaultSortFieldId={1}
            pagination
            customStyles={customStyles}
            />
        </MathJaxContext>
        <DistributionsDescription style={{ fontSize: '4px' }}>
        </DistributionsDescription>

        <p style={styles.paragraph}>
        <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8500913/">Table source</a>
        </p>
        <br></br>        <br></br>

        <figure>
          <img src="//static.miraheze.org/genomicswiki/thumb/7/75/41588_2023_1443_Fig1_HTML.webp/300px-41588_2023_1443_Fig1_HTML.webp.png" alt="Incorporation of genomic features" />
          <figcaption style={styles.figcaption}>Incorporation of genomic features leads to better power and more accurate models</figcaption>
        </figure>
      </section>
    </div>
  );
  
  
  const MultiTraitApproaches = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Multi-trait approaches</h2>
      <section style={styles.section}>
        <p style={styles.paragraph}>
          Multi-Trait Analysis of GWAS (MTAG) is a robust method that leverages the genetic correlations between multiple traits to enhance the accuracy of predictions. This approach is particularly effective in scenarios where traits are genetically correlated. For instance, if traits A and B, and A and C are genetically correlated, and B and C are well-studied with extensive GWAS data, this information can be instrumental in understanding trait A. MTAG facilitates the use of GWAS or polygenic risk score (PRS) formation with individual-level data and can also be applied using only summary statistics by informing single nucleotide polymorphism (SNP) weights of PGS A based on the summary statistics of B and C, combined with linkage disequilibrium (LD) info from a reference panel. Additionally, the Multitrait Deep Learning Model, which employs a multilayer perceptron (MLP) topology with multiple hidden layers and output neurons, offers a sophisticated framework for analyzing multiple traits. However, it's important to note that low genetic correlation between traits might lead to reduced accuracy if the genetic correlation is inaccurately estimated<a href="https://www.nature.com/articles/s41467-017-02769-6" style={styles.reference}>[link]</a>.
        </p>
      </section>
    </div>
  );
  
  const BayesianApproaches = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Bayesian approaches</h2>
      <section style={styles.section}>
        <p style={styles.paragraph}>
          Bayesian approaches in genomics offer a nuanced perspective by evaluating the conditional probability of model parameters given observed data. This method synthesizes the likelihood of data with prior beliefs about parameter values. Commonly used prior types include Gaussian, scaled-t, and double exponential, which vary in levels of shrinkage. The posterior distribution, crucial in Bayesian analysis, is derived by combining the likelihood with the prior probability of parameter sets. The Markov Chain Monte Carlo (MCMC) method is then employed to efficiently sample from this posterior distribution, enabling a stochastic exploration of the parameter space. The versatility of Bayesian approaches allows for a broad comparison of various polygenic score methods, enhancing the understanding and application of genomic predictions.
        </p>
        <figure>
          <img src="https://web.stat.tamu.edu/~yni/publication/j_gwas/featured.jpg" alt="Polygenic Score Method" />
        </figure>
        <p style={styles.figcaption}>Image from Tian Ge et al. 2019</p>

      </section>
    </div>
  );
  
  
  const DiseasePRSStudies = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Useful example studies regarding disease polygenic scores</h2>
      <h5 style={{ color: 'gray', fontSize: 'small', textAlign: 'center' }}>
        <i>These studies are interesting, but are not necessarily used by PolyCypher</i>
        </h5>

      <section style={styles.section}>
        {/* Coronary Artery Disease Section */}
        <div style={styles.container}>
        <h3 style={styles.subHeader}>Coronary artery disease</h3>
      <p style={styles.paragraph}>
        The most powerful study of coronary artery disease polygenic scores is a multi-ancestry analysis demonstrating the effectiveness of polygenic scores. It shows that CAD PRS outperforms many traditional risk factors and is significantly predictive for a large portion of the population. Specifically, being in the top 3% of the CAD PRS distribution is as predictively useful as a CAD event. The odds ratio (OR) per standard deviation (SD) for predicting prevalent CAD is 2.14, with variations across different ancestries. Even after adjusting for various clinical risk factors, the OR/SD remains at 2.07. Notably, 3% of the population has a PRS as informative as a CAD event, and individuals at the extremes of the polygenic score spectrum show significant differences in CAD prevalence. Further, a high PRS could help identify individuals for cholesterol-lowering therapies as an adjunct to current guidelines. More detailed information can be found in the study linked <a href="https://www.nature.com/articles/s41591-023-02429-x" style={styles.reference}>[here]</a>.
      </p>
      {/* Images and Captions */}
      <figure>
        <img src="//static.miraheze.org/genomicswiki/thumb/6/68/CVD_PRS_CAD.png/661px-CVD_PRS_CAD.png" alt="CAD PRS Study Figures" />
        <figcaption style={styles.figcaption}>The figures from this study show that CAD PRS is better than many other risk factors, lots of people have clinically relevant CAD risk by PRS, and being in the top 3% of the CAD PRS distribution is as predictively useful as a CAD event.</figcaption>
      </figure>
      <figure>
        <img src="//static.miraheze.org/genomicswiki/thumb/9/96/Orsdcad.png/300px-Orsdcad.png" alt="OR/SD of CAD PRS" />
        <figcaption style={styles.figcaption}>The OR/SD of CAD PRS is better than traditional risk factors.</figcaption>
      </figure>
      <figure>
        <img src="//static.miraheze.org/genomicswiki/thumb/6/62/Cadprspredpce.png/495px-Cadprspredpce.png" alt="CAD PRS and PCE" />
        <figcaption style={styles.figcaption}>CAD PRS is a useful addition to the American College of Cardiology/American Heart Association Pooled Cohort Equations (PCE) in predicting 10-year risk of CAD.</figcaption>
      </figure>
      <figure>
        <img src="//static.miraheze.org/genomicswiki/thumb/3/3c/Hazardratio.png/351px-Hazardratio.png" alt="Hazards Ratio of CAD PRS" />
        <figcaption style={styles.figcaption}>Hazards ratio per standard deviation in individuals without prior CAD, adjusted for age, sex, genotyping array, and 10 PCs.</figcaption>
      </figure>
      {/* Further Reading Links */}
      <div style={styles.link}>
        <p>Further reading:</p>
        <ul style={styles.list}>
          <li><a href="https://www.annualreviews.org/doi/abs/10.1146/annurev-med-042921-112629" style={styles.reference}>Advances and Applications of Polygenic Scores for Coronary Artery Disease</a></li>
          <li><a href="https://www.sciencedirect.com/science/article/pii/S1098360023000023" style={styles.reference}>Returning integrated genomic risk and clinical recommendations: The eMERGE study</a></li>
          <li><a href="https://www.nejm.org/doi/full/10.1056/nejmoa1605086" style={styles.reference}>Genetic Risk, Adherence to a Healthy Lifestyle, and Coronary Disease</a></li>
          <li><a href="https://www.ahajournals.org/doi/full/10.1161/CIRCULATIONAHA.116.024436" style={styles.reference}>Polygenic Risk Score Identifies Subgroup With Higher Burden of Atherosclerosis</a></li>
          <li><a href="https://www.jacc.org/doi/abs/10.1016/j.jacc.2020.04.027" style={styles.reference}>Limitations of Contemporary Guidelines for Managing Patients at High Genetic Risk of Coronary Artery Disease</a></li>
        </ul>
      </div>
      <p style={styles.paragraph}>
        Additionally, cardiovascular disease polygenic scores can be pivotal in determining who should undergo comprehensive cardiovascular disease risk screening. This approach could substantially reduce the number of screenings needed to prevent a cardiovascular disease event. Further information on this can be found <a href="https://www.ahajournals.org/doi/10.1161/JAHA.122.029296" style={styles.reference}>[here]</a>.
      </p>
  </div>
  
        {/* Type 2 Diabetes Section */}
        <div style={styles.container}>
        <h3 style={styles.subHeader}>Type 2 Diabetes</h3>
      <p style={styles.paragraph}>
        The most notable study in type 2 diabetes highlights that the prevalence of T2D increases sharply in the top quantile of the Polygenic Score (PGS) distribution. This results in an odds ratio (OR) of 19.16 for the top 20% of the population compared to the rest, independent of age, sex, BMI, physical activity, and family risk. The genotypes for this study were derived using the Illumina Omni 2.5 and Illumina Omni Express arrays, with over 40 million SNPs available for each participant. The study, referencing Khera et al. 2018, found a non-linear association between PGS and T2D risk. The predictive power of PGS for T2D shows an AUC of 0.869 for prevalent and 0.613 for incident cases. More details on this study can be found <a href="https://www.nature.com/articles/s41598-023-31496-w" style={styles.reference}>[here]</a>.
      </p>
      {/* Images and Captions */}
      <figure>
        <img src="//static.miraheze.org/genomicswiki/thumb/2/2e/Diabprs.png/587px-Diabprs.png" alt="Type 2 Diabetes PRS Study" />
        <figcaption style={styles.figcaption}>Very good case/control discrimination in T2D risk prediction using PRS.</figcaption>
      </figure>
      <figure>
        <img src="//static.miraheze.org/genomicswiki/thumb/1/10/Td2.png/262px-Td2.png" alt="Diabetes Risk and PRS" />
        <figcaption style={styles.figcaption}>Diabetes risk increases quickly with higher PRS.</figcaption>
      </figure>
      <figure>
        <img src="//static.miraheze.org/genomicswiki/thumb/8/8f/T2d_risk.png/613px-T2d_risk.png" alt="T2D Risk and PRS" />
        <figcaption style={styles.figcaption}>Illustration of T2D risk associated with varying levels of PRS.</figcaption>
      </figure>
      {/* Further Reading Links */}
      <div style={styles.link}>
        <p>Further reading:</p>
        <ul style={styles.list}>
          <li><a href="https://www.nature.com/articles/s41588-018-0183-z" style={styles.reference}>Khera et al. 2018 study</a></li>
          <li><a href="https://github.com/lukfor/pgs-calc" style={styles.reference}>PGS-Calc for calculating Polygenic Scores</a></li>
        </ul>
      </div>
  </div>
  
  <div style={styles.container}>
            {/* Alzheimer’s Disease Section */}
    <h3 style={styles.subHeader}>Alzheimer’s disease</h3>
    <p style={styles.paragraph}>
      A significant study on Alzheimer’s disease found that the onset time differs by 10 years between the highest and lowest polygenic risk score (PRS) deciles (HR = 3.34). This study demonstrates the potential of PRS in predicting Alzheimer's disease, especially when ApoE effects are excluded. For more details, see the full study <a href="https://journals.plos.org/plosmedicine/article?id=10.1371/journal.pmed.1002258" style={styles.reference}>[here]</a>.
    </p>
    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/1/19/Noapoe.png/300px-Noapoe.png" alt="2017 Alzheimer's PRS" />
      <figcaption style={styles.figcaption}>2017 Alzheimer's PRS</figcaption>
    </figure>
    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/1/18/80th.png/300px-80th.png" alt="80th percentile Alzheimer's PRS" />
      <figcaption style={styles.figcaption}>Being in the 80th percentile is about as bad as having ApoE e4</figcaption>
    </figure>
    <p>Additional studies:</p>
    <ul style={styles.list}>
      <li><a href="https://agsjournals.onlinelibrary.wiley.com/doi/10.1111/jgs.16406" style={styles.reference}>Study on Alzheimer's disease</a></li>
      <li><a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6414493/" style={styles.reference}>PMC Article on Alzheimer's disease</a></li>
    </ul>
    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/d/da/Alz.png/300px-Alz.png" alt="Alzheimer's disease case/controls by PRS" />
      <figcaption style={styles.figcaption}>Alzheimer's disease case/controls by PRS</figcaption>
    </figure>
    </div>

    {/* Stroke Section */}
    <div style={styles.container}>
    <h3 style={styles.subHeader}>Stroke</h3>
    <p style={styles.paragraph}>
      Research on stroke shows the effectiveness of PRS in predicting stroke risk. A notable study using 3.2 million SNPs in the UK Biobank demonstrates that PRS is more predictive of ischemic stroke risk than traditional risk factors like family history, blood pressure, and BMI. The detailed study can be accessed <a href="https://www.nature.com/articles/s41586-022-05165-3" style={styles.reference}>[here]</a>.
    </p>
    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/c/c1/Byancestry.png/300px-Byancestry.png" alt="PRS effectiveness by ancestry" />
      <figcaption style={styles.figcaption}>PRS effectiveness by ancestry</figcaption>
    </figure>
    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/7/7e/Best_stroke_predictor.png/520px-Best_stroke_predictor.png" alt="Stroke risk modified by PRS" />
      <figcaption style={styles.figcaption}>Stroke risk is modified by PRS (purple denotes European ancestry)</figcaption>
    </figure>
    <p>Additional study:</p>
    <a href="https://www.nature.com/articles/s41467-019-13848-1" style={styles.reference}>Nature Article on CVD PRS and stroke risk</a>
    <figure>
      <img src="//static.miraheze.org/genomicswiki/thumb/1/14/Example_of_informative.png/352px-Example_of_informative.png" alt="Polygenic analyses for stroke" />
      <figcaption style={styles.figcaption}>Polygenic analyses can identify what is physiologically associated with (or sometimes what causes) a disease</figcaption>
    </figure>
    </div>
      </section>
    </div>
  );
  
  
  
  const TermsAndConcepts = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Terms and concepts</h2>
      <section style={styles.section}>
        <h3 style={styles.subHeader}>Heritability and SNP Heritability</h3>
        <p style={styles.paragraph}>
          Heritability measures the proportion of variance in a trait explained by genetic factors. Its estimation, often done through twin studies, sets the upper limit for the predictive power of genetic risk scoring. If heritability is low, the utility of polygenic scores in predicting the trait diminishes. More details can be found in this <a href="https://www.nature.com/articles/ng.3285" style={styles}>study</a>.
        </p>
        <p style={styles.paragraph}>
          SNP heritability, in contrast, quantifies the variance explained by known SNPs, not accounting for rare variants or non-additive effects. It represents the maximum variance some polygenic scoring methods can explain given a specific SNP array. A low SNP heritability suggests limited utility in developing a polygenic score for a particular trait.
        </p>
  
        <h3 style={styles.subHeader}>Linkage Disequilibrium (LD)</h3>
        <p style={styles.paragraph}>
          LD describes the correlation between genetic variants that are close together on a chromosome and thus inherited together. This phenomenon is crucial for the imputation process and for pruning in genetic studies.
        </p>
  
        <h3 style={styles.subHeader}>Additive Model</h3>
        <p style={styles.paragraph}>
        The additive model, fundamental to polygenic scores and most GWAS, assumes that the effects of alleles can be cumulatively summed. This model is highly effective in simplifying the complex interplay of genetics in traits and diseases. The basic premise is that possessing two copies of allele Z results in twice the effect of one copy, and the combined effect of alleles Z and Y is their additive sum. This is referenced in <a href="https://www.medterms.com/script/main/art.asp?articlekey=25508" style={styles.reference}>[Rieger et al., 1968]</a>. While the model does not explicitly include dominance or epistasis, it remains a robust assumption for most genetic studies. GWAS often lack the power to accurately assess gene-by-gene (GxG) interactions or dominance effects. Furthermore, non-additive contributions to non-Mendelian phenotypes are generally minimal, making the additive model a practical choice in these contexts. Detailed insights into this can be found in studies by <a href="https://doi.org/10.1016/j.ajhg.2021.03.018" style={styles.reference}>Pazokitoroudi et al., 2021</a> and <a href="https://doi.org/10.1534/genetics.114.165282" style={styles.reference}>Mäki-Tanila and Hill, 2014</a>. Nevertheless, there is potential for non-linear effects to be relevant in certain polygenic scores, as explored in <a href="https://www.nature.com/articles/s42003-022-03812-z" style={styles}>this study</a>.
        </p>

  
        <h3 style={styles.subHeader}>Ancestry Group and Population Stratification</h3>
        <p style={styles.paragraph}>
          Ancestry groups can exhibit distinct LD patterns, affecting the transferability and accuracy of polygenic scores across different populations. The transferability of scores can vary significantly based on the phenotype under study.
        </p>
  
        <h3 style={styles.subHeader}>Deep Learning in Genomics</h3>
        <p style={styles.paragraph}>
        A deep learning model is characterized by its input layer, output layer, and various hidden layers. The data flows from the input to output through these hidden layers, undergoing nonlinear transformations. Connection strengths between nodes are weighted; weights are based off of training data. To prevent overfitting, the model uses regularization techniques like weight constraints and dropout methods. The former imposes penalties on larger weights, while the latter temporarily removes a random subset of neurons during training, reducing the model's sensitivity to specific weights. Hyperparameters, including the number of hidden layers, dropout rate, and number of units, affect the model's performance. Common hyperparameter tuning approaches include grid search, random search, Latin hypercube sampling, optimization, and newer kernel-based methods.
        </p>
  
        <h3 style={styles.subHeader}>Genomic LASSO</h3>
        <p style={styles.paragraph}>
        Genomic LASSO uses LASSO regression (Least Absolute Shrinkage and Selection Operator) on SNPs. LASSO minimizes the mean squared error term (the distance between predicted and actual values) but with a penalty term (L1) so there is "shrinkage" behavior, pushing less important predictors towards zero faster. The degree of shrinkage is controlled by the hyperparameter λ. The solution to the optimization problem is found using a soft-thresholding function, which sets to zero those predictors that do not meet a certain threshold. The value of λ is initially chosen to be the maximum value that results in all predictors being zero, and it is then decreased gradually, allowing more SNPs to have non-zero effect sizes.

        This can be useful to predict how many SNPs are needed to accurately recover the true non-zero predictors (i.e., ~perfect prediction). The Donoho-Tanner phase transition applies: we should expect to recover the true signal when the number of samples is approximately 30 to 100 times the number of non-zero SNPs.

        To add other variables like age or sex, a separate least squares regression can be performed and the results added to the LASSO predictor scores if there are AUC improvements when simultaneously regressing using the polygenic score (PGS), age, and sex.
        </p>
  

        <CompressedSensingSection />
  
        <h3 style={styles.subHeader}>Pleiotropy</h3>
        <p style={styles.paragraph}>
          Pleiotropy occurs when a single gene or gene set influences multiple phenotypic traits.
        </p>
      </section>
    </div>
  );



// Component for 'Clinical perception of polygenic scores'
const ClinicalPerception = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Clinical perception of polygenic scores</h2>
      <section style={styles.section}>
        <p style={styles.paragraph}>
          The clinical perception of polygenic scores (PRS) is predominantly positive, with cost being the primary barrier to wider implementation. Studies, such as <a href="https://europepmc.org/article/med/36807341" style={styles.reference}>this one</a>, highlight the potential and challenges of PRS in clinical settings. Their utility in enhancing clinical decision-making and patient care is increasingly recognized.
        </p>
        <figure>
          <img src="https://static.miraheze.org/genomicswiki/e/ee/Pgsuseful.png" alt="Polygenic scores are generally perceived as useful" style={{ width: '70%', position: 'center', border: '1px solid #ccc', display: 'block', marginLeft: 'auto', marginRight: 'auto' }} />
          <figcaption style={{ textAlign: 'center', ...styles.figcaption }}>Polygenic scores are generally perceived as useful</figcaption>
        </figure>
        <figure>
          <img src="https://static.miraheze.org/genomicswiki/c/c8/Screen_Shot_2023-07-24_at_5.13.05_AM.png" alt="There's more support for more action as opposed to less" style={{ width: '70%', border: '1px solid #ccc', display: 'block', marginLeft: 'auto', marginRight: 'auto' }} />
          <figcaption style={ {textAlign: 'center', ...styles.figcaption} }>There's more support for <i>more</i> action as opposed to less</figcaption>
        </figure>
        <figure>
          <img src="https://static.miraheze.org/genomicswiki/0/0e/Cost_is_by_far_the_largest_barrier_to_PRS_implementation.png" alt="Cost is by far the largest barrier to PRS implementation" style={{ width: '70%', border: '1px solid #ccc', display: 'block', marginLeft: 'auto', marginRight: 'auto' }} />
          <figcaption style={ {textAlign: 'center', ...styles.figcaption} }>Cost is by far the largest barrier to PRS implementation</figcaption>
        </figure>
      </section>
    </div>
  );
  
  

  
  
  
  // Component for 'Caveats and limitations'
  const CaveatsAndLimitations = () => (
    <div style={styles.container}>
      <h2 style={styles.header}>Caveats and limitations</h2>
      <section style={styles.section}>
        <p style={styles.paragraph}>
        Polygenic scores are known to have different accuracy in different geographical populations or different environments. Further, even if a polygenic score is equally accurate across ancestries, the score threshold to make clinical actionability relevant can differ by group (i.e., South Asians may have a lower threshold for relevance to cardiovascular disease).

        If the reference rate of disease cases differs significantly from the sample under study, it could lead to misestimating risk. Different studies might also employ validation cohorts with varying characteristics and model covariates, meaning score accuracy may not be comparable.

        Polygenic scores are always probabilistic. The risk conferred by a polygenic score increase should ideally be assessed in different cohorts.

        In a clinical context, it is difficult to choose between old, replicated polygenic scores, and newer, more accurate polygenic scores.

        A polygenic score can only be as good as the heritability of the phenotype.


        </p>
      </section>
    </div>
  );





  export {
    HowGWASWorks, CurrentGWASTrends, PolygenicScoresFromGWAS, ImprovingPolygenicScores,
    MultiTraitApproaches, BayesianApproaches, DiseasePRSStudies, TermsAndConcepts,
    ClinicalPerception, CaveatsAndLimitations
  };