cli_update
Update ArchiveBox snapshots by archiving new URLs or re-processing existing ones with configurable filters for timestamp, status, and extractors.
Instructions
Execute archivebox update command.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| resume | No | Resume from timestamp | |
| only_new | No | Update only new snapshots | |
| index_only | No | Index without archiving | |
| overwrite | No | Overwrite existing files | |
| after | No | Filter snapshots after timestamp | |
| before | No | Filter snapshots before timestamp | |
| status | No | Filter by status | unarchived |
| filter_type | No | Filter type | substring |
| filter_patterns | No | List of filter patterns | |
| extractors | No | Comma-separated list of extractors | |
| extra_data | No | Additional parameters as a dictionary |
Input Schema (JSON Schema)
{
"properties": {
"after": {
"anyOf": [
{
"type": "number"
},
{
"type": "null"
}
],
"default": 0,
"description": "Filter snapshots after timestamp"
},
"before": {
"anyOf": [
{
"type": "number"
},
{
"type": "null"
}
],
"default": 999999999999999,
"description": "Filter snapshots before timestamp"
},
"extra_data": {
"anyOf": [
{
"additionalProperties": true,
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"description": "Additional parameters as a dictionary"
},
"extractors": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": "",
"description": "Comma-separated list of extractors"
},
"filter_patterns": {
"anyOf": [
{
"items": {
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"description": "List of filter patterns"
},
"filter_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": "substring",
"description": "Filter type"
},
"index_only": {
"default": false,
"description": "Index without archiving",
"type": "boolean"
},
"only_new": {
"default": true,
"description": "Update only new snapshots",
"type": "boolean"
},
"overwrite": {
"default": false,
"description": "Overwrite existing files",
"type": "boolean"
},
"resume": {
"anyOf": [
{
"type": "number"
},
{
"type": "null"
}
],
"default": 0,
"description": "Resume from timestamp"
},
"status": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": "unarchived",
"description": "Filter by status"
}
},
"type": "object"
}