cli_add
Add URLs to ArchiveBox for web archiving, with options to tag content, control crawl depth, update existing snapshots, and configure extraction methods.
Instructions
Execute archivebox add command.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| urls | Yes | List of URLs to archive | |
| tag | No | Comma-separated tags | |
| depth | No | Crawl depth | |
| update | No | Update existing snapshots | |
| update_all | No | Update all snapshots | |
| index_only | No | Index without archiving | |
| overwrite | No | Overwrite existing files | |
| init | No | Initialize collection if needed | |
| extractors | No | Comma-separated list of extractors to use | |
| parser | No | Parser type | auto |
| extra_data | No | Additional parameters as a dictionary |
Input Schema (JSON Schema)
{
"properties": {
"depth": {
"default": 0,
"description": "Crawl depth",
"type": "integer"
},
"extra_data": {
"anyOf": [
{
"additionalProperties": true,
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"description": "Additional parameters as a dictionary"
},
"extractors": {
"default": "",
"description": "Comma-separated list of extractors to use",
"type": "string"
},
"index_only": {
"default": false,
"description": "Index without archiving",
"type": "boolean"
},
"init": {
"default": false,
"description": "Initialize collection if needed",
"type": "boolean"
},
"overwrite": {
"default": false,
"description": "Overwrite existing files",
"type": "boolean"
},
"parser": {
"default": "auto",
"description": "Parser type",
"type": "string"
},
"tag": {
"default": "",
"description": "Comma-separated tags",
"type": "string"
},
"update": {
"default": false,
"description": "Update existing snapshots",
"type": "boolean"
},
"update_all": {
"default": false,
"description": "Update all snapshots",
"type": "boolean"
},
"urls": {
"description": "List of URLs to archive",
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"urls"
],
"type": "object"
}