ASCAT Copy-Number Segmentation¶

Allele-specific copy number analysis of tumors (ASCAT) is a widely used method for analyzing copy number alterations in cancer genomes. This page presents ASCAT's simulated example data for sample S96 and reproduces ASCAT's segmentation plots with GenomeSpy. The visualization adds zooming, panning, and tooltips, which makes it easier to assess the segmentation or raw data and the estimated copy-number changes. The same example is also available in the Observable notebook ASCAT Copy-Number Segmentation, where it is structured a bit differently.

For the core purity/ploidy fit and rounding step, see the companion ASCAT Algorithm in GenomeSpy page.

{
  "description": [
    "ASCAT segmentation and copy number estimates",
    "Loo P Van, Nordgard SH, Lingjærde OC, et al.",
    "Allele-specific copy number analysis of tumors",
    "Proc Natl Acad Sci. 2010;107(39):16910-16915. doi:10.1073/pnas.1009843107"
  ],

  "assembly": "hg18",

  "data": {
    "url": "https://data.genomespy.app/sample-data/ASCAT/segments_S96.tsv"
  },

  "resolve": { "axis": { "x": "shared" } },

  "encoding": {
    "x": {
      "chrom": "chr",
      "pos": "startpos",
      "type": "locus",
      "scale": { "type": "locus" }
    },
    "x2": {
      "chrom": "chr",
      "pos": "endpos",
      "offset": 1
    }
  },

  "vconcat": [
    {
      "name": "copyNumberTrack",

      "title": {
        "text": "Allele-specific copy numbers",
        "style": "overlay"
      },

      "layer": [
        {
          "title": "nMinor",
          "mark": {
            "type": "rule",
            "minLength": 2.0,
            "yOffset": -3.0
          },
          "encoding": {
            "y": {
              "field": "nMinor",
              "type": "quantitative",
              "scale": {
                "domain": [0, 6],
                "padding": 0.04,
                "clamp": true
              },
              "axis": {
                "tickMinStep": 1.0
              }
            },
            "size": { "value": 5 },
            "color": { "value": "#88d27a" }
          }
        },
        {
          "title": "nMajor",
          "mark": {
            "type": "rule",
            "minLength": 2.0,
            "yOffset": 3.0
          },
          "encoding": {
            "y": {
              "field": "nMajor",
              "type": "quantitative",
              "scale": {
                "domain": [0, 6]
              }
            },
            "size": { "value": 5 },
            "color": {
              "field": "nMajor",
              "type": "quantitative",
              "scale": {
                "domain": [0, 6, 16],
                "range": ["#f06850", "#f06850", "#5F0F0F"]
              }
            }
          }
        }
      ]
    },

    {
      "name": "logRTrack",

      "layer": [
        {
          "data": {
            "url": "https://data.genomespy.app/sample-data/ASCAT/raw_S96.tsv"
          },

          "title": "Single probe",

          "mark": {
            "type": "point",
            "size": { "expr": "min(10 * pow(zoomLevel, 1.5), 200)" }
          },

          "encoding": {
            "x": {
              "chrom": "chr",
              "pos": "pos",
              "type": "locus"
            },
            "y": { "field": "logR", "type": "quantitative", "title": null },
            "color": { "value": "#7090c0" },
            "opacity": { "value": 0.25 },
            "strokeWidth": { "value": 0 }
          }
        },
        {
          "title": "Mean LogR",
          "mark": {
            "type": "rule",
            "minLength": 3.0
          },
          "encoding": {
            "y": {
              "field": "logRMean",
              "type": "quantitative",
              "title": "LogR"
            },
            "size": { "value": 3 },
            "color": { "value": "black" }
          }
        }
      ]
    },

    {
      "name": "bafTrack",

      "layer": [
        {
          "data": {
            "url": "https://data.genomespy.app/sample-data/ASCAT/raw_S96.tsv"
          },

          "transform": [{ "type": "filter", "expr": "datum.baf !== null" }],

          "title": "Single probe",

          "mark": {
            "type": "point",
            "size": { "expr": "min(10 * pow(zoomLevel, 1.5), 200)" }
          },

          "encoding": {
            "x": {
              "chrom": "chr",
              "pos": "pos",
              "type": "locus"
            },
            "y": { "field": "baf", "type": "quantitative", "title": null },
            "color": { "value": "#7090c0" },
            "opacity": { "value": 0.3 },
            "strokeWidth": { "value": 0 }
          }
        },
        {
          "title": "Mean BAF",
          "mark": {
            "type": "rule",
            "minLength": 3.0
          },
          "encoding": {
            "y": {
              "field": "bafMean",
              "type": "quantitative",
              "scale": { "domain": [0, 1] },
              "title": "B-allele frequency"
            },
            "size": { "value": 3 },
            "color": { "value": "black" }
          }
        },
        {
          "title": "Mean BAF",
          "mark": {
            "type": "rule",
            "minLength": 3.0
          },
          "encoding": {
            "y": {
              "expr": "1 - datum.bafMean",
              "type": "quantitative",
              "title": null
            },
            "size": { "value": 3 },
            "color": { "value": "black" }
          }
        }
      ]
    }
  ],

  "background": "#fafafa",

  "config": {
    "axisX": {
      "grid": false,
      "chromGrid": true,
      "orient": "bottom"
    },
    "axisY": {
      "grid": true,
      "gridColor": "#f8f8f8"
    },
    "view": {
      "fill": "white",
      "stroke": "#c8c8c8",
      "shadowBlur": 8,
      "shadowColor": "black",
      "shadowOpacity": 0.1
    }
  }
}

The example shows simulated example data for sample S96 from Allele-specific copy number analysis of tumors by Loo et al.

What to notice¶

The view is built as vertically concatenated tracks that share the same genomic x-axis, so each locus stays aligned across all panels. The top track shows allele-specific copy-number estimates, with the minor and major alleles offset slightly to avoid overlap. The middle track overlays raw LogR probe values with the segmented mean, and the bottom track does the same for B-allele frequency, including the mirrored 1 - BAF line.

Data wrangling¶

Even though ASCAT computes start and end positions for the segments in the copy number profiles, it does not provide them for the raw allele-specific copy number segmentations. The preprocessing script below runs ASCAT, computes segmented LogR and BAF means, and writes two TSV files for a single sample: one for the segments and one for the raw LogR and BAF SNPs.

library(ASCAT)
library(dplyr)
library(magrittr)
library(readr)
library(tibble)

# Choose a sample id.
sampleId <- 96

# Run ASCAT analysis.
ascat.bc = ascat.loadData("Tumor_LogR.txt", "Tumor_BAF.txt",
                          "Germline_LogR.txt", "Germline_BAF.txt")
ascat.bc = ascat.aspcf(ascat.bc)
ascat.output = ascat.runAscat(ascat.bc)

# Join SNP positions to LogR and BAF values.
segmentedSNPs <- as_tibble(ascat.bc$SNPpos, rownames = "SNP") %>%
  rename(chr = chrs) %>%
  left_join(tibble(SNP = rownames(ascat.bc$Tumor_LogR_segmented),
                   logR = ascat.bc$Tumor_LogR_segmented[, sampleId])) %>%
  left_join(tibble(SNP = rownames(ascat.bc$Tumor_BAF_segmented[[sampleId]]),
                   baf = ascat.bc$Tumor_BAF_segmented[[sampleId]])) %>%
  mutate(segmentId = 0)

# Pick the segments of the selected sample and enumerate them.
segments <- ascat.output$segments %>%
  filter(sample == paste0("S", sampleId)) %>%
  mutate(segmentId = row_number())

# Assign each SNP to a segment.
for (i in seq_len(nrow(segmentedSNPs))) {
  segmentedSNPs$segmentId[i] = min(which(
    segmentedSNPs$pos[i] >= segments$startpos &
    segmentedSNPs$pos[i] <= segments$endpos &
    segmentedSNPs$chr[i] == segments$chr
  ))
}

# Join the segments with the LogR and BAF values and write them to a file.
segments %>%
  left_join(segmentedSNPs %>%
              group_by(segmentId) %>%
              summarise(logRMean = mean(logR, na.rm = TRUE),
                        bafMean = mean(baf, na.rm = TRUE),
                        nProbes = n())) %>%
  select(-segmentId) %>%
  mutate_if(is.numeric, round, digits = 3) %>%
  write_tsv(paste0("ascat_segments_S", sampleId, ".tsv"), na = "")

# Write the raw data. Only include BAF for germline homozygous SNPs.
segmentedSNPs %>%
  mutate(segmentedBaf = baf) %>%
  select(-logR, -segmentId, -baf) %>%
  left_join(tibble(SNP = rownames(ascat.bc$Tumor_LogR),
                   logR = ascat.bc$Tumor_LogR[, sampleId])) %>%
  left_join(tibble(SNP = rownames(ascat.bc$Tumor_BAF),
                   baf = ascat.bc$Tumor_BAF[, sampleId])) %>%
  mutate(baf = ifelse(is.na(segmentedBaf), NA, baf)) %>%
  select(-segmentedBaf) %>%
  write_tsv(paste0("ascat_raw_S", sampleId, ".tsv"), na = "")

The first five rows from the produced files:

`ascat_segments_S96.tsv`¶

chr	startpos	endpos	nMajor	nMinor	logRMean	bafMean
1	1695590	116624361	2	0	-0.133	0.218
1	116976886	120138178	2	2	0.18	0.5
1	143133910	147896005	4	1	0.373	0.301
1	147970991	244820741	3	1	0.219	0.325
2	385195	3254139	2	0	-0.109	0.244

`ascat_raw_S96.tsv`¶

SNP	chr	pos	logR	baf
SNP1	1	1695590	-0.0589	0.2464
SNP2	1	2189662	0.0293	0.2013
SNP3	1	2393282	-0.2291
SNP4	1	2414781	-0.2221	0.7504
SNP5	1	2516275	-0.0379

ASCAT Copy-Number Segmentation¶

What to notice¶

Data wrangling¶

ascat_segments_S96.tsv¶

ascat_raw_S96.tsv¶

`ascat_segments_S96.tsv`¶

`ascat_raw_S96.tsv`¶