scrublet
Identify and predict doublets in single-cell RNA sequencing data by analyzing transcriptomes, enabling accurate downstream analysis with configurable parameters for simulation and detection.
Instructions
Predict doublets using Scrublet
Input Schema
Name | Required | Description | Default |
---|---|---|---|
adata_sim | No | Optional path to AnnData object with simulated doublets. | |
batch_key | No | Key in adata.obs for batch information. | |
expected_doublet_rate | No | Estimated doublet rate for the experiment. | |
get_doublet_neighbor_parents | No | Return parent transcriptomes that generated doublet neighbors. | |
knn_dist_metric | No | Distance metric used when finding nearest neighbors. | euclidean |
log_transform | No | Whether to log-transform the data prior to PCA. | |
mean_center | No | Center data such that each gene has mean of 0. | |
n_neighbors | No | Number of neighbors used to construct KNN graph. | |
n_prin_comps | No | Number of principal components used for embedding. | |
normalize_variance | No | Normalize data such that each gene has variance of 1. | |
sim_doublet_ratio | No | Number of doublets to simulate relative to observed transcriptomes. | |
stdev_doublet_rate | No | Uncertainty in the expected doublet rate. | |
synthetic_doublet_umi_subsampling | No | Rate for sampling UMIs when creating synthetic doublets. | |
threshold | No | Doublet score threshold for calling a transcriptome a doublet. | |
use_approx_neighbors | No | Use approximate nearest neighbor method (annoy). |
Input Schema (JSON Schema)
{
"description": "Input schema for the scrublet doublet prediction tool.",
"properties": {
"adata_sim": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Optional path to AnnData object with simulated doublets.",
"title": "Adata Sim"
},
"batch_key": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Key in adata.obs for batch information.",
"title": "Batch Key"
},
"expected_doublet_rate": {
"default": 0.05,
"description": "Estimated doublet rate for the experiment.",
"maximum": 1,
"minimum": 0,
"title": "Expected Doublet Rate",
"type": "number"
},
"get_doublet_neighbor_parents": {
"default": false,
"description": "Return parent transcriptomes that generated doublet neighbors.",
"title": "Get Doublet Neighbor Parents",
"type": "boolean"
},
"knn_dist_metric": {
"default": "euclidean",
"description": "Distance metric used when finding nearest neighbors.",
"title": "Knn Dist Metric",
"type": "string"
},
"log_transform": {
"default": false,
"description": "Whether to log-transform the data prior to PCA.",
"title": "Log Transform",
"type": "boolean"
},
"mean_center": {
"default": true,
"description": "Center data such that each gene has mean of 0.",
"title": "Mean Center",
"type": "boolean"
},
"n_neighbors": {
"anyOf": [
{
"exclusiveMinimum": 0,
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"description": "Number of neighbors used to construct KNN graph.",
"title": "N Neighbors"
},
"n_prin_comps": {
"default": 30,
"description": "Number of principal components used for embedding.",
"exclusiveMinimum": 0,
"title": "N Prin Comps",
"type": "integer"
},
"normalize_variance": {
"default": true,
"description": "Normalize data such that each gene has variance of 1.",
"title": "Normalize Variance",
"type": "boolean"
},
"sim_doublet_ratio": {
"default": 2,
"description": "Number of doublets to simulate relative to observed transcriptomes.",
"exclusiveMinimum": 0,
"title": "Sim Doublet Ratio",
"type": "number"
},
"stdev_doublet_rate": {
"default": 0.02,
"description": "Uncertainty in the expected doublet rate.",
"maximum": 1,
"minimum": 0,
"title": "Stdev Doublet Rate",
"type": "number"
},
"synthetic_doublet_umi_subsampling": {
"default": 1,
"description": "Rate for sampling UMIs when creating synthetic doublets.",
"exclusiveMinimum": 0,
"maximum": 1,
"title": "Synthetic Doublet Umi Subsampling",
"type": "number"
},
"threshold": {
"anyOf": [
{
"maximum": 1,
"minimum": 0,
"type": "number"
},
{
"type": "null"
}
],
"default": null,
"description": "Doublet score threshold for calling a transcriptome a doublet.",
"title": "Threshold"
},
"use_approx_neighbors": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"default": null,
"description": "Use approximate nearest neighbor method (annoy).",
"title": "Use Approx Neighbors"
}
},
"title": "ScrubletModel",
"type": "object"
}