Data file

Schema for submitting metadata for a data file.

{
    "title": "Data file",
    "description": "Schema for submitting metadata for a data file.",
    "id": "/profiles/file.json",
    "$schema": "http://json-schema.org/draft-04/schema#",
    "type": "object",
    "anyOf": [
        {
            "required": [
                "dataset",
                "file_format",
                "output_type",
                "award",
                "lab",
                "md5sum"
            ]
        },
        {
            "required": [
                "dataset",
                "file_format",
                "output_type",
                "award",
                "lab",
                "no_file_available"
            ]
        }
    ],
    "identifyingProperties": [
        "uuid",
        "accession",
        "aliases"
    ],
    "additionalProperties": false,
    "mixinProperties": [
        {
            "$ref": "mixins.json#/notes"
        },
        {
            "$ref": "mixins.json#/submitted"
        },
        {
            "$ref": "mixins.json#/attribution"
        },
        {
            "$ref": "mixins.json#/accessioned_status"
        },
        {
            "$ref": "mixins.json#/schema_version"
        },
        {
            "$ref": "mixins.json#/aliases"
        },
        {
            "$ref": "mixins.json#/accession"
        },
        {
            "$ref": "mixins.json#/uuid"
        },
        {
            "$ref": "mixins.json#/submitter_comment"
        }
    ],
    "facets": {
        "status": {
            "title": "File status"
        },
        "output_category": {
            "title": "Content category"
        },
        "output_type": {
            "title": "Content type"
        },
        "file_format": {
            "title": "File format"
        },
        "file_format_type": {
            "title": "Specific file format type"
        },
        "award.project": {
            "title": "Project"
        },
        "assembly": {
            "title": "Mapping assembly"
        },
        "lab.title": {
            "title": "Lab"
        }
    },
    "dependencies": {
        "no_file_available": {
            "comment": "md5sum is required for all files unless the file is not available from the portal. file_size is required for files present on portal and have one of the statuses: in progress, revoked, archived or released.",
            "oneOf": [
                {
                    "properties": {
                        "no_file_available": {
                            "enum": [
                                true
                            ]
                        }
                    }
                },
                {
                    "allOf": [
                        {
                            "required": [
                                "md5sum"
                            ],
                            "properties": {
                                "no_file_available": {
                                    "enum": [
                                        false
                                    ]
                                }
                            }
                        },
                        {
                            "oneOf": [
                                {
                                    "not": {
                                        "required": [
                                            "status"
                                        ]
                                    },
                                    "properties": {
                                        "no_file_available": {
                                            "enum": [
                                                false
                                            ]
                                        }
                                    }
                                },
                                {
                                    "required": [
                                        "file_size",
                                        "status"
                                    ],
                                    "properties": {
                                        "status": {
                                            "enum": [
                                                "in progress",
                                                "revoked",
                                                "archived",
                                                "released"
                                            ]
                                        },
                                        "no_file_available": {
                                            "enum": [
                                                false
                                            ]
                                        }
                                    }
                                },
                                {
                                    "required": [
                                        "status"
                                    ],
                                    "properties": {
                                        "status": {
                                            "enum": [
                                                "uploading",
                                                "upload failed",
                                                "deleted",
                                                "replaced",
                                                "content error"
                                            ]
                                        },
                                        "no_file_available": {
                                            "enum": [
                                                false
                                            ]
                                        }
                                    }
                                }
                            ]
                        }
                    ]
                }
            ]
        },
        "paired_with": {
            "comment": "Only paired-ended files should have paired_with value.",
            "required": [
                "run_type"
            ],
            "properties": {
                "run_type": {
                    "enum": [
                        "paired-ended"
                    ]
                }
            }
        },
        "run_type": {
            "comment": "Only paired-ended files should have paired_end values.",
            "oneOf": [
                {
                    "properties": {
                        "run_type": {
                            "enum": [
                                "single-ended"
                            ]
                        }
                    }
                },
                {
                    "required": [
                        "paired_end"
                    ],
                    "properties": {
                        "run_type": {
                            "enum": [
                                "paired-ended"
                            ]
                        },
                        "paired_end": {
                            "enum": [
                                "1",
                                "2",
                                "1,2"
                            ]
                        }
                    }
                }
            ]
        },
        "external_accession": {
            "comment": "A file with an external accession should not get an ENCODE accession.",
            "not": {
                "required": [
                    "accession"
                ]
            }
        },
        "revoke_detail": {
            "comment": "Only revoked files can have revoke_detail specified.",
            "properties": {
                "status": {
                    "enum": [
                        "revoked"
                    ]
                }
            }
        },
        "mapped_run_type": {
            "comment": "Only bam files can have mapped_run_type specified.",
            "properties": {
                "file_format": {
                    "enum": [
                        "bam"
                    ]
                }
            }
        },
        "mapped_read_length": {
            "comment": "Only bam files can have mapped_read_length specified.",
            "properties": {
                "file_format": {
                    "enum": [
                        "bam"
                    ]
                }
            }
        },
        "paired_end": {
            "comment": "Files with paired-end value 2 require a paired_with value and files with paired_end specified require run_type to be specified as paired-ended",
            "oneOf": [
                {
                    "required": [
                        "paired_with",
                        "run_type"
                    ],
                    "properties": {
                        "paired_end": {
                            "enum": [
                                "2"
                            ]
                        },
                        "run_type": {
                            "enum": [
                                "paired-ended"
                            ]
                        }
                    }
                },
                {
                    "required": [
                        "run_type"
                    ],
                    "properties": {
                        "paired_end": {
                            "enum": [
                                "1"
                            ]
                        },
                        "run_type": {
                            "enum": [
                                "paired-ended"
                            ]
                        }
                    }
                },
                {
                    "required": [
                        "run_type"
                    ],
                    "properties": {
                        "file_format": {
                            "enum": [
                                "sra"
                            ]
                        },
                        "paired_end": {
                            "enum": [
                                "1,2"
                            ]
                        },
                        "run_type": {
                            "enum": [
                                "paired-ended"
                            ]
                        }
                    }
                }
            ]
        },
        "output_type": {
            "comment": "Files with output_type reads and file format fastq, fatsa, csfasta, csqual and sra require read_length to be specified.",
            "oneOf": [
                {
                    "required": [
                        "read_length"
                    ],
                    "properties": {
                        "file_format": {
                            "enum": [
                                "fastq",
                                "fasta",
                                "csfasta",
                                "csqual",
                                "sra"
                            ]
                        },
                        "output_type": {
                            "enum": [
                                "reads"
                            ]
                        }
                    }
                },
                {
                    "not": {
                        "properties": {
                            "output_type": {
                                "enum": [
                                    "reads"
                                ]
                            }
                        }
                    }
                }
            ]
        },
        "file_format": {
            "comment": "Fastq and sra files require run_type and replicate but should not have assembly. Raw data files require platform to be specified. Processed files require assembly to be specified.",
            "oneOf": [
                {
                    "allOf": [
                        {
                            "required": [
                                "replicate",
                                "run_type"
                            ],
                            "properties": {
                                "file_format": {
                                    "enum": [
                                        "fastq",
                                        "sra"
                                    ]
                                }
                            }
                        },
                        {
                            "not": {
                                "required": [
                                    "assembly"
                                ],
                                "properties": {
                                    "file_format": {
                                        "enum": [
                                            "fastq",
                                            "sra"
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "required": [
                                "platform"
                            ],
                            "properties": {
                                "file_format": {
                                    "enum": [
                                        "sra",
                                        "fastq",
                                        "csfasta",
                                        "csqual",
                                        "rcc",
                                        "idat",
                                        "CEL"
                                    ]
                                }
                            }
                        }
                    ]
                },
                {
                    "required": [
                        "file_format_type",
                        "assembly"
                    ],
                    "properties": {
                        "file_format": {
                            "enum": [
                                "gff",
                                "bed",
                                "bigBed"
                            ]
                        }
                    }
                },
                {
                    "required": [
                        "assembly"
                    ],
                    "properties": {
                        "file_format": {
                            "enum": [
                                "bam",
                                "sam",
                                "gtf",
                                "bigWig"
                            ]
                        }
                    }
                },
                {
                    "not": {
                        "properties": {
                            "file_format": {
                                "enum": [
                                    "fastq",
                                    "gff",
                                    "gtf",
                                    "bed",
                                    "bigBed",
                                    "bam",
                                    "sam",
                                    "sra",
                                    "bigWig"
                                ]
                            }
                        }
                    }
                }
            ]
        },
        "content_error_detail": {
            "comment": "Specification of status of content error is required if content_error_detail is specified",
            "required": [
                "status"
            ],
            "properties": {
                "status": {
                    "enum": [
                        "content error"
                    ]
                }
            }
        },
        "status": {
            "comment": "file_size is required in files with statuses in progress, revoked, archived and released unless the file is not available from the portal. content_error_details can be specified in files with status deleted and is required in files with the status content error.",
            "allOf": [
                {
                    "oneOf": [
                        {
                            "anyOf": [
                                {
                                    "required": [
                                        "no_file_available"
                                    ],
                                    "properties": {
                                        "status": {
                                            "enum": [
                                                "in progress",
                                                "revoked",
                                                "archived",
                                                "released"
                                            ]
                                        },
                                        "no_file_available": {
                                            "enum": [
                                                true
                                            ]
                                        }
                                    }
                                },
                                {
                                    "required": [
                                        "file_size"
                                    ],
                                    "properties": {
                                        "status": {
                                            "enum": [
                                                "in progress",
                                                "revoked",
                                                "archived",
                                                "released"
                                            ]
                                        }
                                    }
                                }
                            ]
                        },
                        {
                            "properties": {
                                "status": {
                                    "enum": [
                                        "uploading",
                                        "upload failed",
                                        "deleted",
                                        "replaced",
                                        "content error"
                                    ]
                                }
                            }
                        }
                    ]
                },
                {
                    "oneOf": [
                        {
                            "required": [
                                "content_error_detail"
                            ],
                            "properties": {
                                "status": {
                                    "enum": [
                                        "content error"
                                    ]
                                }
                            }
                        },
                        {
                            "not": {
                                "oneOf": [
                                    {
                                        "required": [
                                            "content_error_detail"
                                        ],
                                        "properties": {
                                            "status": {
                                                "enum": [
                                                    "uploading",
                                                    "uploaded",
                                                    "upload failed",
                                                    "format check failed",
                                                    "in progress",
                                                    "replaced",
                                                    "revoked",
                                                    "archived",
                                                    "released"
                                                ]
                                            }
                                        }
                                    },
                                    {
                                        "properties": {
                                            "status": {
                                                "enum": [
                                                    "content error"
                                                ]
                                            }
                                        }
                                    }
                                ]
                            }
                        }
                    ]
                }
            ]
        }
    },
    "properties": {
        "submitter_comment": {
            "title": "Submitter comment",
            "type": "string",
            "description": "Additional information specified by the submitter to be displayed as a comment on the portal.",
            "pattern": "^(\\S+(\\s|\\S)*\\S+|\\S)$"
        },
        "uuid": {
            "readonly": true,
            "description": "The unique identifier associated with every object.",
            "format": "uuid",
            "type": "string",
            "comment": "Do not submit. The uuid is set by the server.",
            "serverDefault": "uuid4",
            "requestMethod": "POST",
            "title": "UUID",
            "permission": "import_items"
        },
        "accession": {
            "readonly": true,
            "description": "A unique identifier to be used to reference the object prefixed with ENC.",
            "format": "accession",
            "type": "string",
            "serverDefault": "accession",
            "comment": "Do not submit. The accession is assigned by the server.",
            "title": "Accession",
            "permission": "import_items",
            "accessionType": "FF"
        },
        "aliases": {
            "default": [],
            "uniqueItems": true,
            "description": "Lab specific identifiers to reference an object.",
            "comment": "The purpose of this field is to provide a link into the lab LIMS and to facilitate shared objects.",
            "title": "Lab aliases",
            "type": "array",
            "items": {
                "uniqueKey": "alias",
                "description": "A lab specific identifier to reference an object.",
                "comment": "Current convention is colon separated lab name and lab identifier. (e.g. john-doe:42).",
                "title": "Lab alias",
                "type": "string",
                "pattern": "^(?:alexander-hoffmann|alexander-rudensky|alexander-urban|ali-mortazavi|alkes-price|andrew-fire|anshul-kundaje|anton-valouev|barbara-wold|bill-noble|bin-yu|bing-ren|bradley-bernstein|brenton-graveley|charles-gersbach|chris-burge|christina-leslie|colin-dewey|david-gifford|david-gilbert|douglas-black|elliott-margulies|emery-bresnick|encode-awg|encode-consortium|encode-processing-pipeline|erez-lieberman|eric-lecuyer|eric-mendehall|ewan-birney|feng-yue|gene-yeo|george-stamatoyannopoulos|greg-cooper|gregory-crawford|guo-cheng-yuan|haiyan-huang|haiyuan-yu|howard-chang|j-michael-cherry|jason-ernst|jason-lieb|jay-shendure|jennifer-harrow|jeremy-luban|job-dekker|joe-ecker|john-lis|john-rinn|john-stamatoyannopoulos|jonathan-pritchard|joseph-costello|kenneth-offit|kevin-struhl|kevin-white|ladeana-hillier|laura-elnitski|len-pennacchio|leonard-lipovich|manolis-kellis|manuel-garber|maria-ciofani|mark-gerstein|mats-ljungman|matteo-pellegrini|michael-bassik|michael-beer|michael-hoffman|michael-snyder|morgan-giddings|nadav-ahituv|pardis-sabeti|paul-khavari|peggy-farnham|peter-bickel|peter-park|piero-carninci|rafael-irizarry|richard-myers|roadmap-epigenomics|rob-spitale|robert-klein|robert-waterston|roderic-guigo|ross-hardison|scott-tenenbaum|sherman-weissman|souma-raychaudhuri|stephen-smale|sunduz-keles|susan-celniker|thomas-gingeras|thomas-tullius|tim-reddy|timothy-hubbard|ting-wang|tommi-jaakkola|unknown|valerie-reinke|vishwanath-iyer|w-james-kent|wei-wang|will-greenleaf|xiang-dong-fu|xiaole-shirley|xinshu-xiao|yi-xing|yijun-ruan|yin-shen|yoav-gilad|zhiping-weng|brian-oliver|david-macalpine|hugo-bellen|peter-cherbas|terry-orr-weaver|abby-dernburg|anthony-hyman|arshad-desai|david-miller|eric-lai|fabio-piano|frank-slack|gary-karpen|gregory-hannon|james-posakony|john-kim|julie-ahringer|kamran-ahmad|kris-gunsalus|lincoln-stein|michael-brent|michael-maccoss|mitzi-kuroda|nikolaus-rajewsky|norbert-perrimon|philip-green|sarah-elgin|steven-henikoff|steven-russell|susan-strome|vincenzo-pirrotta|MitaniLab|UofC-HGAC|wesley-hung|encode|modern|dnanexus|modencode|gencode|ggr|cgc|bloomington|dssc|kyoto|gtex|pgp|biochain|promocell|nichd):[a-zA-Z\\d_$.+!*,()'-]+(?:\\s[a-zA-Z\\d_$.+!*,()'-]+)*$"
            }
        },
        "schema_version": {
            "default": "11",
            "pattern": "^\\d+(\\.\\d+)*$",
            "description": "The version of the JSON schema that the server uses to validate the object.",
            "requestMethod": [],
            "title": "Schema Version",
            "comment": "Do not submit. The version used to validate the object is set by the server. The default should be set to the current version.",
            "type": "string"
        },
        "status": {
            "enum": [
                "uploading",
                "upload failed",
                "in progress",
                "released",
                "archived",
                "deleted",
                "replaced",
                "revoked",
                "content error"
            ],
            "default": "uploading",
            "description": "The status of the file object.",
            "comment": "Do not submit.  This is set by admins along the process of file submission.",
            "title": "Status",
            "permission": "import_items",
            "type": "string",
            "readonly": true
        },
        "lab": {
            "linkTo": "Lab",
            "description": "Lab associated with the submission.",
            "comment": "Required. See lab.json for list of available identifiers.",
            "title": "Lab",
            "linkSubmitsFor": true,
            "type": "string"
        },
        "award": {
            "title": "Grant",
            "type": "string",
            "linkTo": "Award",
            "description": "Grant associated with the submission.",
            "comment": "Required. See award.json for list of available identifiers."
        },
        "date_created": {
            "readonly": true,
            "rdfs:subPropertyOf": "dc:created",
            "description": "The date the object was created.",
            "format": "date-time",
            "decription": "The date the object was created.",
            "anyOf": [
                {
                    "format": "date-time"
                },
                {
                    "format": "date"
                }
            ],
            "type": "string",
            "comment": "Do not submit. The date the object is created is assigned by the server.",
            "title": "Date created",
            "serverDefault": "now",
            "permission": "import_items"
        },
        "submitted_by": {
            "readonly": true,
            "linkTo": "User",
            "rdfs:subPropertyOf": "dc:creator",
            "comment": "Do not submit. The user that created the object is assigned by the server.",
            "title": "Submitted by",
            "permission": "import_items",
            "serverDefault": "userid",
            "type": "string"
        },
        "notes": {
            "readonly": true,
            "pattern": "^(\\S+(\\s|\\S)*\\S+|\\S)$",
            "description": "DCC internal notes.",
            "comment": "Do not submit. A place for the DCC to keep information that does not have a place in the schema.",
            "title": "Notes",
            "permission": "import_items",
            "type": "string"
        },
        "alternate_accessions": {
            "readonly": true,
            "default": [],
            "description": "Accessions previously assigned to objects that have been merged with this object.",
            "comment": "Do not submit. Only admins are allowed to set or update this value.",
            "title": "Alternate accessions",
            "permission": "import_items",
            "type": "array",
            "items": {
                "title": "Alternate Accession",
                "description": "An accession previously assigned to an object that has been merged with this object.",
                "comment": "Only accessions of objects that have status equal replaced will work here.",
                "type": "string",
                "format": "accession",
                "pattern": "^(ENCFF\\d{3}[A-Z]{3})$|^(TSTFF\\d{6})$"
            }
        },
        "external_accession": {
            "readonly": true,
            "uniqueKey": "accession",
            "description": "For files that were not accessioned at our site but have an accession elsewhere.",
            "comment": "Do not submit. Only admins are allowed to set or update this value.",
            "title": "External accession",
            "permission": "import_items",
            "type": "string"
        },
        "read_count": {
            "readonly": true,
            "description": "Number of reads in fastq file.",
            "comment": "Do not submit. This value is calculated and posted by checkfiles script upon submission.",
            "title": "Read count",
            "permission": "import_items",
            "type": "integer"
        },
        "file_format": {
            "oneOf": [
                {
                    "comment": "This field is required.",
                    "enum": [
                        "bam",
                        "bed",
                        "bedpe",
                        "bigBed",
                        "bigWig",
                        "CEL",
                        "chain",
                        "csv",
                        "fasta",
                        "fastq",
                        "gff",
                        "gtf",
                        "hdf5",
                        "hic",
                        "idat",
                        "PWM",
                        "rcc",
                        "sam",
                        "tagAlign",
                        "tar",
                        "tsv",
                        "vcf",
                        "wig"
                    ]
                },
                {
                    "comment": "Historical file formats, not valid for new submissions.",
                    "permission": "import_items",
                    "enum": [
                        "2bit",
                        "btr",
                        "csfasta",
                        "csqual",
                        "sra"
                    ]
                }
            ],
            "title": "File format",
            "type": "string"
        },
        "file_format_type": {
            "oneOf": [
                {
                    "enum": [
                        "bed12",
                        "bed3",
                        "bed3+",
                        "bed5",
                        "bed6",
                        "bed6+",
                        "bed9",
                        "bed9+",
                        "bedGraph",
                        "bedLogR",
                        "bedMethyl",
                        "broadPeak",
                        "candidate enhancer predictions",
                        "enhancer predictions",
                        "enhancerAssay",
                        "gappedPeak",
                        "gff2",
                        "gff3",
                        "idr_peak",
                        "narrowPeak",
                        "tss_peak"
                    ]
                },
                {
                    "comment": "Historical file formats, not valid for new submissions.",
                    "permission": "import_items",
                    "enum": [
                        "bedExonScore",
                        "bedRnaElements",
                        "mango",
                        "modPepMap",
                        "openChromCombinedPeaks",
                        "pepMap",
                        "peptideMapping",
                        "shortFrags",
                        "unknown"
                    ]
                }
            ],
            "title": "Specific file format type",
            "type": "string",
            "description": "Files of type bed and gff require further specification"
        },
        "file_format_specifications": {
            "title": "File format specifications documents",
            "type": "array",
            "description": "Text or .as files the further explain the file format",
            "uniqueItems": true,
            "items": {
                "comment": "See document.json for a list of available identifiers.",
                "type": "string",
                "linkTo": "Document"
            }
        },
        "restricted": {
            "readonly": true,
            "description": "A flag to indicate whether this file is subject to restricted access",
            "comment": "Do not submit. This value can only be set by admins.",
            "title": "Restricted file",
            "permission": "import_items",
            "type": "boolean"
        },
        "no_file_available": {
            "readonly": true,
            "default": false,
            "description": "A flag to indicate whether this file is available to download. It may also not exist if it is the result of an unproductive analysis.",
            "comment": "Do not submit. This value can only be set by admins.",
            "title": "No file available",
            "permission": "import_items",
            "type": "boolean"
        },
        "submitted_file_name": {
            "title": "Original file name",
            "type": "string",
            "description": "The local file name used at time of submission.",
            "comment": "Although this is not required. It is highly recommended."
        },
        "md5sum": {
            "pattern": "[a-f\\d]{32}|[A-F\\d]{32}",
            "description": "The md5sum of the file being transferred.",
            "comment": "Required for files with no_file_available = false . Warning: This value can vary for files of same content gzipped at different times",
            "format": "hex",
            "title": "MD5sum",
            "type": "string",
            "maxLength": 32
        },
        "content_md5sum": {
            "readonly": true,
            "description": "The MD5sum of the uncompressed file.",
            "format": "hex",
            "type": "string",
            "pattern": "[a-f\\d]{32}|[A-F\\d]{32}",
            "comment": "Do not submit. This is only relavant for gzipped files. This value is calculated and posted by checkfiles script upon submission.",
            "title": "Content MD5sum",
            "permission": "import_items",
            "maxLength": 32
        },
        "fastq_signature": {
            "readonly": true,
            "description": "Fastq file flowcell based unique signature to reference a file.",
            "type": "array",
            "items": {
                "title": "Flowcell based fastq signature part",
                "description": "Fastq file flowcell based unique signature to reference a file.",
                "comment": "Current convention is based on the Illumina unique sequence identifier, colon separated flowcell, lane, read_end(1/2) and barcode.",
                "type": "string",
                "pattern": "^[a-zA-Z0-9_-]+:[0-9]+:[12]:.*:.*$"
            },
            "default": [],
            "uniqueItems": true,
            "comment": "Do not submit. This value is calculated and posted by checkfiles script upon submission.",
            "title": "Fastq file flowcell based signature",
            "permission": "import_items"
        },
        "file_size": {
            "title": "File size",
            "type": "integer",
            "description": "File size specified in bytes.",
            "comment": "Required."
        },
        "platform": {
            "title": "Platform",
            "type": "string",
            "linkTo": "Platform",
            "description": "The measurement device used to collect data.",
            "comment": "This field is required for csfasta, csqual, rcc, idat and CEL file formats. See platform.json for identifer format."
        },
        "read_length": {
            "title": "Read length",
            "type": "integer",
            "description": "For high-throughput sequencing, the number of contiguous nucleotides determined by sequencing.",
            "comment": "This field is required for fastq, fasta, csfasta, csqual and sra file formats containing sequencing reads."
        },
        "run_type": {
            "enum": [
                "single-ended",
                "paired-ended"
            ],
            "comment": "This field is required for sra and fastq files.",
            "type": "string",
            "description": "Indicates if file is part of a single or paired end sequencing run",
            "title": "Run type for sequencing files"
        },
        "paired_end": {
            "enum": [
                "1",
                "2",
                "1,2"
            ],
            "comment": "This field is required if run_type is paired-ended.",
            "type": "string",
            "description": "Which read of the pair the file represents (in case of paired end sequencing run)",
            "title": "Paired End Identifier"
        },
        "paired_with": {
            "title": "File pairing",
            "type": "string",
            "linkTo": "File",
            "description": "The paired end fastq that corresponds with this file.",
            "comment": "This is required for files with paired_end 2 and is calculated for files with paired_end 1."
        },
        "mapped_read_length": {
            "readonly": true,
            "description": "The length of the reads actually mapped, if the original read length was clipped.",
            "comment": "This is used when the original sequenced read length needed to be adjusted to match a replicate, or to match experiments and controls.",
            "title": "Mapped read length",
            "permission": "import_items",
            "type": "integer"
        },
        "mapped_run_type": {
            "enum": [
                "single-ended",
                "paired-ended"
            ],
            "description": "The mapped run type of the alignment file which may differ from the fastqs it is derived from.",
            "comment": "When the control and experiment run types do not match, one may be changed in order to meet pipeline requirements.",
            "title": "Mapped run type for bam files",
            "permission": "import_items",
            "type": "string",
            "readonly": true
        },
        "flowcell_details": {
            "title": "Flowcells",
            "default": [],
            "type": "array",
            "description": "For high-throughput sequencing, the flowcells used for the sequencing of the replicate.",
            "items": {
                "title": "Flowcell details",
                "type": "object",
                "additionalProperties": false,
                "properties": {
                    "machine": {
                        "title": "Machine Name",
                        "description": "The lab specific name of the machine used.",
                        "type": "string"
                    },
                    "flowcell": {
                        "title": "Flowcell ID",
                        "type": "string"
                    },
                    "lane": {
                        "title": "Lane",
                        "type": "string"
                    },
                    "barcode": {
                        "title": "Barcode",
                        "type": "string"
                    },
                    "barcode_in_read": {
                        "title": "Barcode in read",
                        "description": "The read the barcode is located on.",
                        "type": "string",
                        "enum": [
                            "1",
                            "2"
                        ]
                    },
                    "barcode_position": {
                        "title": "Barcode position",
                        "description": "The 1-based start position of the barcode in 5->3 orientation.",
                        "type": "integer"
                    },
                    "chunk": {
                        "title": "Chunk",
                        "description": "The file chunk label as assigned by Illumina software when splitting up a fastq into specified chunk sizes.",
                        "comment": "This label is used to re-assemble the chunks into the original file in the correct order.",
                        "type": "string"
                    }
                }
            }
        },
        "output_type": {
            "oneOf": [
                {
                    "enum": [
                        "alignments",
                        "blacklisted regions",
                        "candidate enhancers",
                        "candidate promoters",
                        "candidate regulatory elements",
                        "chromatin interactions",
                        "conservative idr thresholded peaks",
                        "control normalized signal",
                        "DHS peaks",
                        "enhancer validation",
                        "enrichment",
                        "exon quantifications",
                        "female genome index",
                        "female genome reference",
                        "filtered indels",
                        "filtered peaks",
                        "filtered regions",
                        "filtered SNPs",
                        "fold change over control",
                        "gene quantifications",
                        "genome compartments",
                        "genome index",
                        "genome reference",
                        "hotspots",
                        "idat green channel",
                        "idat red channel",
                        "intensity values",
                        "library fraction",
                        "long range chromatin interactions",
                        "male genome index",
                        "male genome reference",
                        "maternal haplotype mapping",
                        "maternal variant calls",
                        "methylation state at CHG",
                        "methylation state at CHH",
                        "methylation state at CpG",
                        "microRNA quantifications",
                        "minus strand signal of all reads",
                        "minus strand signal of unique reads",
                        "mitochondria blacklisted regions",
                        "motif model",
                        "nested topologically associated domains",
                        "normalized signal of all reads",
                        "open chromatin regions",
                        "optimal idr thresholded peaks",
                        "paternal haplotype mapping",
                        "paternal variant calls",
                        "peaks",
                        "phased mapping",
                        "phased variant calls",
                        "plus strand signal of all reads",
                        "plus strand signal of unique reads",
                        "predicted enhancers",
                        "pseudoreplicated idr thresholded peaks",
                        "raw minus strand signal",
                        "raw normalized signal",
                        "raw plus strand signal",
                        "raw signal",
                        "read-depth normalized signal",
                        "reads",
                        "reference variants",
                        "relative replication signal",
                        "replicated peaks",
                        "replication timing profile",
                        "reporter code counts",
                        "representative dnase hypersensitivity sites",
                        "RNA-binding protein associated mRNAs",
                        "rRNA reference",
                        "semi-automated genome annotation",
                        "signal of all reads",
                        "signal of unique reads",
                        "signal p-value",
                        "spike-in sequence",
                        "splice junctions",
                        "stable peaks",
                        "topologically associated domains",
                        "transcript quantifications",
                        "transcription start sites",
                        "transcriptome alignments",
                        "transcriptome index",
                        "transcriptome reference",
                        "TSS reference",
                        "unfiltered alignments",
                        "variant calls"
                    ]
                },
                {
                    "comment": "Historical output types, not valid for new submissions.",
                    "permission": "import_items",
                    "enum": [
                        "base overlap signal",
                        "clusters",
                        "contigs",
                        "copy number variation",
                        "distal peaks",
                        "filtered modified peptide quantification",
                        "filtered peptide quantification",
                        "filtered transcribed fragments",
                        "HMM predicted chromatin state",
                        "minus strand signal",
                        "minus strand transcription start sites",
                        "miRNA reference",
                        "percentage normalized signal",
                        "plus strand signal",
                        "plus strand transcription start sites",
                        "predicted forebrain enhancers",
                        "predicted heart enhancers",
                        "predicted transcription start sites",
                        "predicted whole brain enhancers",
                        "primer sequence",
                        "proximal peaks",
                        "raw data",
                        "reference",
                        "rejected reads",
                        "sequence alignability",
                        "sequence uniqueness",
                        "signal",
                        "snRNA reference",
                        "spike-in alignments",
                        "summed densities signal",
                        "transcribed fragments",
                        "tRNA reference",
                        "unfiltered modified peptide quantification",
                        "unfiltered peptide quantification",
                        "validation",
                        "valleys",
                        "wavelet-smoothed signal"
                    ]
                }
            ],
            "title": "Output type",
            "type": "string",
            "description": "A description of the file's purpose or contents."
        },
        "derived_from": {
            "title": "Derived from",
            "type": "array",
            "description": "The files participating as inputs into software to produce this output file.",
            "uniqueItems": true,
            "items": {
                "comment": "See file.json for a list of available identifiers.",
                "type": "string",
                "linkTo": "File"
            }
        },
        "controlled_by": {
            "description": "The files which should be used as a control for this file. ",
            "uniqueItems": true,
            "comment": "This is used by the pipeline to find ChIP-seq controls.",
            "title": "Controlled by",
            "type": "array",
            "items": {
                "comment": "See file.json for a list of available identifiers.",
                "type": "string",
                "linkTo": "File"
            }
        },
        "supersedes": {
            "description": "The files that this file supersedes.",
            "uniqueItems": true,
            "comment": "The files in this list should be obsolete and the superseeding file object should be used instead.",
            "title": "Supersedes",
            "type": "array",
            "items": {
                "comment": "The files in this list should be archived or revoked.",
                "type": "string",
                "linkTo": "File"
            }
        },
        "dataset": {
            "title": "Dataset",
            "type": "string",
            "linkTo": "Dataset",
            "description": "The experiment or dataset the file belongs to.",
            "comment": "Required. See dataset.json for available identifiers."
        },
        "replicate": {
            "title": "Replicate",
            "type": "string",
            "linkTo": "Replicate",
            "description": "The experimental replicate designation for the file.",
            "comment": "This field is required for sra and fastq files. See replicate.json for available identifiers."
        },
        "assembly": {
            "enum": [
                "ce10",
                "ce11",
                "dm3",
                "dm6",
                "GRCh38-minimal",
                "GRCh38",
                "hg19",
                "J02459.1",
                "mm10-minimal",
                "mm10",
                "mm9"
            ],
            "comment": "Required property for bam, sam, gtf, gff, bed and bigBed file format files. This property should not be submitted with sra or fastq files.",
            "type": "string",
            "description": "Genome assembly that files were mapped to.",
            "title": "Mapping assembly"
        },
        "genome_annotation": {
            "enum": [
                "ENSEMBL V65",
                "M2",
                "M3",
                "M4",
                "M7",
                "miRBase V21",
                "None",
                "V10",
                "V19",
                "V22",
                "V24",
                "V3c",
                "V7",
                "WS235",
                "WS245"
            ],
            "comment": "Applies to files created using annotations.",
            "type": "string",
            "description": "Genome annotation that file was generated with.",
            "title": "Genome annotation"
        },
        "dbxrefs": {
            "description": "Identifiers from external resources that may have 1-to-1 or 1-to-many relationships with ENCODE objects.",
            "rdfs:subPropertyOf": "rdfs:seeAlso",
            "type": "array",
            "items": {
                "title": "External identifier",
                "description": "Identifier from an external resource that may have 1-to-1 or 1-to-many relationships with ENCODE objects.",
                "type": "string",
                "pattern": "^(SRA:(SRR|SRX)\\d+)$"
            },
            "default": [],
            "uniqueItems": true,
            "comment": "Do not submit.  This is for admins to make virtual file objects for files at external resources.",
            "title": "External identifiers",
            "@type": "@id"
        },
        "step_run": {
            "title": "Analysis step run",
            "type": "string",
            "linkTo": "AnalysisStepRun",
            "description": "The run instance of the step used to generate the file.",
            "comment": "This may be linked to an individual instance at DNANexus or it may be virtual and have many files linked to it."
        },
        "content_error_detail": {
            "readonly": true,
            "description": "Explanation of why the file failed the automated content checks.",
            "comment": "Do not submit.  This is posted by the checkfiles script. It is required when status is 'content error'",
            "title": "Content error detail",
            "permission": "import_items",
            "type": "string"
        },
        "revoke_detail": {
            "readonly": true,
            "description": "Explanation of why the file was revoked.",
            "comment": "Do not submit. The field would be posted by DCC.",
            "title": "Revoke error detail",
            "permission": "import_items",
            "type": "string"
        },
        "output_category": {
            "enum": [
                "raw data",
                "alignment",
                "signal",
                "annotation",
                "quantification",
                "reference"
            ],
            "description": "The overall catagory of the file content.",
            "comment": "Do not submit.  This field is calculated from output_type_output_category.",
            "title": "Output category",
            "type": "string",
            "calculatedProperty": true
        },
        "href": {
            "title": "Download URL",
            "type": "string",
            "description": "The download path for S3 to obtain the actual file.",
            "calculatedProperty": true,
            "comment": "Do not submit. This is issued by the server."
        },
        "technical_replicates": {
            "description": "The technical replicate numbers associated with this file.",
            "comment": "Do not submit.  This field is calculated through the derived_from relationship back to the raw data.",
            "title": "Technical replicates",
            "type": "array",
            "calculatedProperty": true,
            "items": {
                "title": "Technical replicate number",
                "type": "string",
                "description": "The identifying number of each relevant technical replicate"
            }
        },
        "quality_metrics": {
            "title": "QC Metric",
            "comment": "Do not submit. Values in the list are reverse links of a quality metric with this file in quality_metric_of field.",
            "type": "array",
            "description": "The list of QC metric objects associated with this file.",
            "items": {
                "type": [
                    "string",
                    "object"
                ],
                "linkFrom": "QualityMetric.quality_metric_of"
            }
        },
        "@type": {
            "title": "Type",
            "type": "array",
            "calculatedProperty": true,
            "items": {
                "type": "string"
            }
        },
        "upload_credentials": {
            "title": "Upload Credentials",
            "type": "object",
            "description": "The upload credentials for S3 to submit the file content.",
            "calculatedProperty": true,
            "comment": "Do not submit. This is issued by the server."
        },
        "analysis_step_version": {
            "linkTo": "AnalysisStepVersion",
            "description": "The step version of the pipeline from which this file is an output.",
            "comment": "Do not submit.  This field is calculated from step_run.",
            "title": "Analysis Step Version",
            "type": "string",
            "calculatedProperty": true
        },
        "biological_replicates": {
            "description": "The biological replicate numbers associated with this file.",
            "comment": "Do not submit.  This field is calculated through the derived_from relationship back to the raw data.",
            "title": "Biological replicates",
            "type": "array",
            "calculatedProperty": true,
            "items": {
                "title": "Biological replicate number",
                "type": "integer",
                "description": "The identifying number of each relevant biological replicate"
            }
        },
        "file_type": {
            "title": "File type",
            "type": "string",
            "description": "The concatenation of file_format and file_format_type",
            "calculatedProperty": true,
            "comment": "Do not submit. This field is calculated from file_format and file_format_type."
        },
        "title": {
            "title": "Title",
            "type": "string",
            "description": "The title of the file either the accession or the external_accession.",
            "calculatedProperty": true,
            "comment": "Do not submit. This is a calculated property"
        },
        "read_length_units": {
            "enum": [
                "nt"
            ],
            "description": "The units for read length.",
            "comment": "Do not submit. This is a fixed value.",
            "title": "Read length units",
            "type": "string",
            "calculatedProperty": true
        },
        "@id": {
            "title": "ID",
            "type": "string",
            "calculatedProperty": true
        },
        "superseded_by": {
            "title": "Superseded by",
            "comment": "Do not submit. Values in the list are reverse links of a file that supersedes.",
            "type": "array",
            "description": "The file(s) that supersede this file (i.e. are more preferable to use).",
            "items": {
                "type": [
                    "string",
                    "object"
                ],
                "linkFrom": "File.supersedes"
            }
        }
    },
    "columns": {
        "title": {
            "title": "Title"
        },
        "accession": {
            "title": "Accession"
        },
        "dataset": {
            "title": "Dataset"
        },
        "assembly": {
            "title": "Mapping assembly"
        },
        "technical_replicates": {
            "title": "Technical replicates"
        },
        "biological_replicates": {
            "title": "Biological replicates"
        },
        "file_format": {
            "title": "File Format"
        },
        "file_type": {
            "title": "File type"
        },
        "file_format_type": {
            "title": "Specific file format type"
        },
        "file_size": {
            "title": "File size"
        },
        "href": {
            "title": "Download URL"
        },
        "derived_from": {
            "title": "Derived from"
        },
        "genome_annotation": {
            "title": "Genome annotation"
        },
        "replicate.library.accession": {
            "title": "Library"
        },
        "paired_end": {
            "title": "Paired End Identifier"
        },
        "paired_with": {
            "title": "File pairing"
        },
        "run_type": {
            "title": "Run type for sequencing files"
        },
        "read_length": {
            "title": "Read length"
        },
        "mapped_read_length": {
            "title": "Mapped read length"
        },
        "read_length_units": {
            "title": "Read length units"
        },
        "output_category": {
            "title": "Data category"
        },
        "output_type": {
            "title": "Data type"
        },
        "quality_metrics": {
            "title": "QC Metric"
        },
        "lab": {
            "title": "Lab"
        },
        "award.project": {
            "title": "BioProject"
        },
        "step_run": {
            "title": "Analysis step run"
        },
        "date_created": {
            "title": "Date added"
        },
        "analysis_step_version": {
            "title": "Analysis Step Version"
        },
        "restricted": {
            "title": "Restricted file"
        },
        "status": {
            "title": "File status"
        }
    },
    "output_type_output_category": {
        "idat green channel": "raw data",
        "idat red channel": "raw data",
        "intensity values": "raw data",
        "reads": "raw data",
        "rejected reads": "raw data",
        "raw data": "raw data",
        "reporter code counts": "raw data",
        "alignments": "alignment",
        "unfiltered alignments": "alignment",
        "transcriptome alignments": "alignment",
        "spike-in alignments": "alignment",
        "maternal haplotype mapping": "alignment",
        "paternal haplotype mapping": "alignment",
        "phased mapping": "alignment",
        "minus strand signal of all reads": "signal",
        "plus strand signal of all reads": "signal",
        "signal of all reads": "signal",
        "normalized signal of all reads": "signal",
        "raw minus strand signal": "signal",
        "raw plus strand signal": "signal",
        "raw signal": "signal",
        "raw normalized signal": "signal",
        "read-depth normalized signal": "signal",
        "control normalized signal": "signal",
        "minus strand signal of unique reads": "signal",
        "plus strand signal of unique reads": "signal",
        "signal of unique reads": "signal",
        "signal p-value": "signal",
        "fold change over control": "signal",
        "minus strand signal": "signal",
        "plus strand signal": "signal",
        "signal": "signal",
        "base overlap signal": "signal",
        "percentage normalized signal": "signal",
        "summed densities signal": "signal",
        "wavelet-smoothed signal": "signal",
        "relative replication signal": "signal",
        "enrichment": "quantification",
        "library fraction": "quantification",
        "exon quantifications": "quantification",
        "gene quantifications": "quantification",
        "microRNA quantifications": "quantification",
        "transcript quantifications": "quantification",
        "methylation state at CpG": "quantification",
        "methylation state at CHG": "quantification",
        "methylation state at CHH": "quantification",
        "filtered modified peptide quantification": "quantification",
        "unfiltered modified peptide quantification": "quantification",
        "filtered peptide quantification": "quantification",
        "unfiltered peptide quantification": "quantification",
        "replication timing profile": "quantification",
        "motif model": "quantification",
        "hotspots": "annotation",
        "long range chromatin interactions": "annotation",
        "chromatin interactions": "annotation",
        "topologically associated domains": "annotation",
        "nested topologically associated domains": "annotation",
        "genome compartments": "annotation",
        "open chromatin regions": "annotation",
        "filtered peaks": "annotation",
        "filtered regions": "annotation",
        "DHS peaks": "annotation",
        "peaks": "annotation",
        "replicated peaks": "annotation",
        "RNA-binding protein associated mRNAs": "annotation",
        "splice junctions": "annotation",
        "copy number variation": "annotation",
        "clusters": "annotation",
        "contigs": "annotation",
        "transcribed fragments": "annotation",
        "filtered transcribed fragments": "annotation",
        "valleys": "annotation",
        "blacklisted regions": "annotation",
        "mitochondria blacklisted regions": "annotation",
        "distal peaks": "annotation",
        "proximal peaks": "annotation",
        "optimal idr thresholded peaks": "annotation",
        "conservative idr thresholded peaks": "annotation",
        "pseudoreplicated idr thresholded peaks": "annotation",
        "stable peaks": "annotation",
        "predicted forebrain enhancers": "annotation",
        "predicted heart enhancers": "annotation",
        "predicted whole brain enhancers": "annotation",
        "predicted enhancers": "annotation",
        "candidate enhancers": "annotation",
        "candidate promoters": "annotation",
        "predicted transcription start sites": "annotation",
        "transcription start sites": "annotation",
        "candidate regulatory elements": "annotation",
        "representative dnase hypersensitivity sites": "annotation",
        "variant calls": "annotation",
        "maternal variant calls": "annotation",
        "paternal variant calls": "annotation",
        "phased variant calls": "annotation",
        "filtered SNPs": "annotation",
        "filtered indels": "annotation",
        "transcriptome reference": "reference",
        "transcriptome index": "reference",
        "genome index": "reference",
        "tRNA reference": "reference",
        "miRNA reference": "reference",
        "snRNA reference": "reference",
        "rRNA reference": "reference",
        "TSS reference": "reference",
        "reference variants": "reference",
        "genome reference": "reference",
        "female genome reference": "reference",
        "female genome index": "reference",
        "male genome reference": "reference",
        "male genome index": "reference",
        "primer sequence": "reference",
        "spike-in sequence": "reference",
        "reference": "reference",
        "sequence alignability": "reference",
        "sequence uniqueness": "reference",
        "enhancer validation": "validation",
        "validation": "validation",
        "HMM predicted chromatin state": "annotation",
        "semi-automated genome annotation": "annotation"
    },
    "file_format_file_extension": {
        "2bit": ".2bit",
        "CEL": ".cel.gz",
        "bam": ".bam",
        "bed": ".bed.gz",
        "bedpe": ".bedpe.gz",
        "chain": ".chain",
        "bigBed": ".bigBed",
        "bigWig": ".bigWig",
        "btr": ".btr",
        "csfasta": ".csfasta.gz",
        "csqual": ".csqual.gz",
        "fasta": ".fasta.gz",
        "fastq": ".fastq.gz",
        "gff": ".gff.gz",
        "gtf": ".gtf.gz",
        "hic": ".hic",
        "hdf5": ".h5",
        "idat": ".idat",
        "PWM": ".pwm",
        "rcc": ".rcc",
        "sra": ".sra",
        "tagAlign": ".tagAlign.gz",
        "tar": ".tar.gz",
        "tsv": ".tsv",
        "csv": ".csv",
        "vcf": ".vcf.gz",
        "wig": ".wig.gz",
        "sam": ".sam.gz"
    },
    "changelog": "/profiles/changelogs/file.md",
    "@type": [
        "JSONSchema"
    ]
}