/**
* Statistical ensemble generator for Random Tree Model
* Creates multiple tree instances to capture population-level variance
* @module core/rtm-ensemble
*/
import {
Narrative,
RandomTree,
RTMParameters,
RTMEnsemble,
EnsembleStatistics,
TemporalScale,
Clause
} from '../types/rtm.js';
import { RTMTreeBuilder } from './rtm-builder.js';
import { RTMTraversal } from './rtm-traversal.js';
import { calculateScalingExponent } from './rtm-math.js';
import pLimit from 'p-limit';
/**
* Generates and analyzes statistical ensembles of Random Trees
*/
export class RTMEnsembleGenerator {
private limit = pLimit(5); // Concurrent tree generation limit
constructor(private parameters: RTMParameters) {}
/**
* Generate an ensemble of Random Trees for a narrative
* @param narrative - Source narrative
* @param ensembleSize - Number of trees to generate
* @returns Complete ensemble with statistics
*/
async generateEnsemble(
narrative: Narrative,
ensembleSize: number
): Promise<RTMEnsemble> {
// Create clause map for traversal
const clauseMap = new Map(
narrative.clauses.map(c => [c.id, c])
);
// Generate trees in parallel with concurrency limit
const treePromises = Array.from({ length: ensembleSize }, () =>
this.limit(() => this.generateSingleTree(narrative))
);
const trees = await Promise.all(treePromises);
// Calculate ensemble statistics
const statistics = this.calculateEnsembleStatistics(trees, clauseMap);
return {
narrativeId: narrative.id,
trees,
parameters: this.parameters,
statistics
};
}
/**
* Generate a single Random Tree
* @param narrative - Source narrative
* @returns Random tree instance
*/
private async generateSingleTree(narrative: Narrative): Promise<RandomTree> {
const builder = new RTMTreeBuilder(this.parameters);
return builder.buildTree(narrative);
}
/**
* Calculate statistical properties of the ensemble
* @param trees - Array of trees
* @param clauseMap - Map of clause IDs to clauses
* @returns Ensemble statistics
*/
private calculateEnsembleStatistics(
trees: RandomTree[],
clauseMap: Map<string, Clause>
): EnsembleStatistics {
const recallLengths: number[] = [];
const compressionByScale: Record<TemporalScale, number[]> = {
'clause': [],
'sentence': [],
'paragraph': [],
'section': [],
'chapter': [],
'document': []
};
// Analyze each tree
trees.forEach(tree => {
const traversal = new RTMTraversal(tree, clauseMap);
// Simulate recall at max depth
const result = traversal.traverseToDepth(this.parameters.maxRecallDepth);
recallLengths.push(result.totalClauses);
// Collect compression ratios by scale
tree.nodes.forEach(node => {
compressionByScale[node.temporalScale].push(node.compressionRatio);
});
});
// Calculate mean and std of recall lengths
const meanRecallLength = recallLengths.reduce((a, b) => a + b, 0) / recallLengths.length;
const variance = recallLengths.reduce(
(sum, len) => sum + Math.pow(len - meanRecallLength, 2),
0
) / recallLengths.length;
const stdRecallLength = Math.sqrt(variance);
// Calculate scaling exponent if enough data
const allCompressionRatios = Object.values(compressionByScale)
.flat()
.filter(r => r > 0);
const scalingExponent = allCompressionRatios.length > 10
? calculateScalingExponent(allCompressionRatios)
: undefined;
return {
meanRecallLength,
stdRecallLength,
compressionDistribution: compressionByScale,
scalingExponent
};
}
/**
* Analyze variance across ensemble
* @param ensemble - Complete ensemble
* @returns Variance analysis
*/
analyzeEnsembleVariance(ensemble: RTMEnsemble): {
structuralVariance: number;
depthVariance: number;
branchingVariance: number;
} {
const structures = ensemble.trees.map(tree => {
const stats = this.getTreeStructureStats(tree);
return {
nodeCount: stats.totalNodes,
maxDepth: stats.maxDepth,
avgBranching: stats.avgBranchingFactor
};
});
// Calculate variances
const avgNodeCount = structures.reduce((sum, s) => sum + s.nodeCount, 0) / structures.length;
const structuralVariance = structures.reduce(
(sum, s) => sum + Math.pow(s.nodeCount - avgNodeCount, 2),
0
) / structures.length;
const avgDepth = structures.reduce((sum, s) => sum + s.maxDepth, 0) / structures.length;
const depthVariance = structures.reduce(
(sum, s) => sum + Math.pow(s.maxDepth - avgDepth, 2),
0
) / structures.length;
const avgBranching = structures.reduce((sum, s) => sum + s.avgBranching, 0) / structures.length;
const branchingVariance = structures.reduce(
(sum, s) => sum + Math.pow(s.avgBranching - avgBranching, 2),
0
) / structures.length;
return {
structuralVariance,
depthVariance,
branchingVariance
};
}
/**
* Get structural statistics for a tree
* @param tree - Random tree
* @returns Tree statistics
*/
private getTreeStructureStats(tree: RandomTree) {
const nodes = Array.from(tree.nodes.values());
const leafNodes = nodes.filter(n => n.children.length === 0);
let maxDepth = 0;
nodes.forEach(node => {
maxDepth = Math.max(maxDepth, node.level);
});
const nodesWithChildren = nodes.filter(n => n.children.length > 0);
const avgBranchingFactor = nodesWithChildren.length > 0
? nodesWithChildren.reduce((sum, n) => sum + n.children.length, 0) / nodesWithChildren.length
: 0;
return {
totalNodes: nodes.length,
maxDepth,
avgBranchingFactor,
leafNodes: leafNodes.length
};
}
/**
* Test for scale invariance in the ensemble
* @param ensemble - Complete ensemble
* @param minNarrativeLength - Minimum length for scale invariance
* @returns Scale invariance test results
*/
testScaleInvariance(
ensemble: RTMEnsemble,
minNarrativeLength: number = 1000
): {
isScaleInvariant: boolean;
scalingExponent?: number;
confidence: number;
} {
// Check if narrative is long enough
const narrativeLength = ensemble.trees[0]?.nodes.get(
ensemble.trees[0].rootNodeId
)?.clauses.length || 0;
if (narrativeLength < minNarrativeLength) {
return {
isScaleInvariant: false,
confidence: 0
};
}
// Check if we have a consistent scaling exponent
const exponent = ensemble.statistics.scalingExponent;
if (!exponent) {
return {
isScaleInvariant: false,
confidence: 0
};
}
// Calculate confidence based on variance in compression ratios
const allRatios = Object.values(ensemble.statistics.compressionDistribution)
.flat()
.filter(r => r > 0);
const meanRatio = allRatios.reduce((a, b) => a + b, 0) / allRatios.length;
const variance = allRatios.reduce(
(sum, r) => sum + Math.pow(r - meanRatio, 2),
0
) / allRatios.length;
// Lower variance = higher confidence
const confidence = Math.exp(-variance / meanRatio);
return {
isScaleInvariant: Math.abs(exponent) > 0.1,
scalingExponent: exponent,
confidence
};
}
}
/**
* Create default RTM parameters
* @returns Default parameters based on cognitive science findings
*/
export function createDefaultParameters(): RTMParameters {
return {
maxBranchingFactor: 4, // Miller's magical number
maxRecallDepth: 6, // Typical working memory depth
compressionTargets: {
'clause': 1.0,
'sentence': 1.5,
'paragraph': 2.5,
'section': 4.0,
'chapter': 8.0,
'document': 16.0
},
minSegmentSize: 1
};
}