Skip to content

Data Model – Canonical Patient Bundle

Overview

The Canonical Patient Bundle is the single source of truth for all patient oncology data in Entheory.AI. It is a comprehensive JSON document that aggregates data from multiple hospital systems into a unified, validated structure.


Design Principles

  1. Single Source of Truth: One bundle per patient contains ALL clinical data
  2. Append-Only: Historical data never deleted, only marked as superseded
  3. Provenance: Every data element includes source system and timestamp
  4. Validation: Strictly validated against JSON schema before persistence
  5. FHIR-Aligned: Structure maps cleanly to FHIR R4 resources

File Organization

src/data/patients/
  ├─ ABHA-12345678901/
  │    ├─ bundle.json              # Current canonical bundle
  │    ├─ bundle.backup.json        # Previous version (for rollback)
  │    └─ bundle.pending.json       # Partial data (validation failed)
  ├─ ABHA-98765432102/
  │    └─ bundle.json
  └─ ...

src/data/processed_patients.json   # Aggregated cache for patient list

Bundle Schema (Top-Level)

interface PatientBundle {
  // Metadata
  bundleVersion: string;           // "1.0"
  patientId: string;               // Internal ID: "case_001"
  abhaId: string;                  // ABHA ID: "ABHA-12345678901"
  lastUpdated: string;             // ISO 8601 timestamp

  // Core Data Sections
  demographics: Demographics;
  cancer: CancerData;
  vitals: Vitals[];
  labs: LabResult[];
  imaging: ImagingStudy[];
  pathology: PathologyReport[];
  genomics: GenomicsData;
  therapy: TreatmentLine[];
  medications: Medication[];
  allergies: Allergy[];
  documents: Document[];           // OCR outputs
  transcripts: Transcript[];       // ASR outputs
  timeline: TimelineEvent[];

  // Provenance & Audit
  provenance: Provenance;
}

Section Schemas

1. Demographics

interface Demographics {
  name: {
    firstName: string;
    middleName?: string;
    lastName: string;
    fullName: string;             // Computed: for display
  };

  dateOfBirth: string;            // YYYY-MM-DD
  age: number;                    // Computed from DOB
  gender: "Male" | "Female" | "Other" | "Unknown";

  contactInfo: {
    phone?: string;
    email?: string;
    address?: {
      line1: string;
      line2?: string;
      city: string;
      state: string;
      pincode: string;
      country: "India";           // MVP: India only
    };
  };

  identifiers: {
    abhaId: string;               // Primary
    hospitalMrn: string;          // Hospital Medical Record Number
    aadharLastFour?: string;      // Last 4 digits only (privacy)
  };

  emergencyContact?: {
    name: string;
    relationship: string;
    phone: string;
  };

  insurance?: {
    type: "CGHS" | "ECHS" | "Ayushman Bharat" | "Private" | "Self-Pay";
    policyNumber?: string;
    validUntil?: string;
  };
}

2. Cancer Data (Core)

interface CancerData {
  primarySite: string;            // "Breast", "Lung", "Colorectal", etc.
  histology: string;              // "Invasive Ductal Carcinoma", "Adeno", etc.

  stage: {
    clinical?: string;            // "IIA", "IIIB", etc.
    pathologic?: string;
    tnm?: {
      t: string;                  // "T2"
      n: string;                  // "N1"
      m: string;                  // "M0"
    };
  };

  grade?: "G1" | "G2" | "G3" | "GX";

  diagnosisDate: string;          // YYYY-MM-DD
  diagnosisSource: "Biopsy" | "Imaging" | "Clinical" | "Unknown";

  metastases?: {
    present: boolean;
    sites: string[];              // ["Liver", "Bone"]
  };

  biomarkers?: {                  // Key prognostic/predictive markers
    [key: string]: {
      name: string;               // "ER", "PR", "HER2", "PD-L1"
      value: string;              // "Positive", "Negative", "95%"
      date: string;
      source: string;             // Report ID or system
    };
  };

  performanceStatus?: {
    ecog?: 0 | 1 | 2 | 3 | 4 | 5;  // ECOG scale
    karnofsky?: number;           // 0-100
    assessmentDate: string;
  };
}

3. Vitals

interface Vitals {
  vitalId: string;                // "vital_2024-12-03_001"
  date: string;                   // YYYY-MM-DD
  time?: string;                  // HH:MM

  weight?: {
    value: number;
    unit: "kg";
  };

  height?: {
    value: number;
    unit: "cm";
  };

  bmi?: number;                   // Computed: weight(kg) / (height(m)^2)

  bloodPressure?: {
    systolic: number;             // mmHg
    diastolic: number;
  };

  heartRate?: number;             // bpm
  respiratoryRate?: number;       // breaths/min
  temperature?: {
    value: number;
    unit: "C" | "F";
  };

  spO2?: number;                  // 0-100%

  source: string;                 // "EMR-HIMS", "Manual Entry"
  recordedBy?: string;            // Nurse/physician name
}

4. Lab Results

interface LabResult {
  labId: string;                  // "lab_2024-12-03_oru_123"
  testName: string;               // "Hemoglobin", "Creatinine"
  loincCode?: string;             // LOINC code if available

  value: number | string;         // 12.5 or "Positive"
  unit?: string;                  // "g/dL", "mg/dL"

  referenceRange?: string;        // "12-16 g/dL"
  status: "normal" | "abnormal" | "critical" | "unknown";
  abnormalFlag?: "H" | "L" | "HH" | "LL";  // High, Low, Critical High/Low

  date: string;                   // YYYY-MM-DD
  time?: string;

  category: "Hematology" | "Chemistry" | "Tumor Marker" | "Coagulation" | "Urinalysis" | "Other";

  delta?: {                       // Change from previous value
    value: number;
    percent: number;
    direction: "up" | "down" | "stable";
    comparedTo: string;           // labId of previous test
  };

  provenance: {
    source: string;               // "LIS-HOSPITALX"
    hl7MessageId?: string;        // Control ID from HL7 message
    receivedAt: string;
  };
}

Example Lab Categories:

  • Hematology: CBC, WBC, Hemoglobin, Platelets
  • Chemistry: BUN, Creatinine, ALT, AST, Bilirubin, Albumin
  • Tumor Markers: CA-125, CA 15-3, CEA, PSA, AFP, CA 19-9
  • Coagulation: PT, INR, aPTT
  • Urinalysis: Specific gravity, protein, glucose

5. Imaging Studies

interface ImagingStudy {
  studyId: string;                // DICOM Study Instance UID
  studyDate: string;
  modality: "CT" | "MRI" | "PET" | "X-Ray" | "Ultrasound" | "Mammography" | "PET-CT";

  bodyPart: string;               // "Chest", "Abdomen", "Brain"
  description?: string;           // "CT Chest with Contrast"

  findings?: string;              // Radiologist impression (text)

  measurements?: {                // Structured data if available
    recist?: {                    // RECIST target lesions
      targetLesions: {
        lesionId: string;
        location: string;
        diameter: number;         // mm
      }[];
      sumOfDiameters: number;
    };

    suvmax?: number;              // PET SUVmax value
  };

  assets?: {                      // DICOM file links
    manifestPath?: string;
    assetCount?: number;
    completeness: number;         // 0.0-1.0 (% of assets available)
    viewerUrl?: string;           // Link to PACS viewer
  };

  status: "complete" | "incomplete" | "preliminary";

  provenance: {
    source: string;               // "PACS-HOSPITALX"
    receivedAt: string;
  };
}

6. Pathology Reports

interface PathologyReport {
  reportId: string;
  reportDate: string;
  reportType: "Biopsy" | "Surgical Pathology" | "Cytology";

  specimen: {
    type: string;                 // "Breast lumpectomy"
    site: string;                 // "Left breast, upper outer quadrant"
    collectionDate?: string;
  };

  diagnosis: {
    primary: string;              // "Invasive Ductal Carcinoma"
    histology?: string;
    grade?: "G1" | "G2" | "G3" | "GX";
  };

  biomarkers?: {
    [key: string]: {
      name: string;               // "ER", "PR", "HER2", "Ki67"
      result: string;             // "Positive (95%)"
      method?: string;            // "IHC", "FISH"
    };
  };

  margins?: {
    status: "Negative" | "Positive" | "Close";
    closestMargin?: string;       // "2mm"
  };

  lymphNodes?: {
    examined: number;
    positive: number;
  };

  fullReportText?: string;        // Free text from pathologist

  provenance: {
    source: string;               // "Pathology-HOSPITALX"
    pathologist?: string;
    receivedAt: string;
  };
}

7. Genomics Data

interface GenomicsData {
  testDate: string;
  testType: "NGS Panel" | "WES" | "WGS" | "Single Gene" | "Other";
  panelName?: string;             // "FoundationOne CDx", "OncoPanel"

  somaticMutations?: {
    gene: string;                 // "EGFR", "KRAS", "TP53"
    variant: string;              // HGVS notation: "NM_005228.3:c.2235_2249del"
    vaf: number;                  // Variant Allele Frequency (0-100%)
    classification: "Pathogenic" | "Likely Pathogenic" | "VUS" | "Likely Benign" | "Benign";
    actionable: boolean;          // Clinically actionable per guidelines
  }[];

  biomarkers?: {
    tmb?: {                       // Tumor Mutational Burden
      value: number;              // mutations/megabase


 interpretation: "High" | "Intermediate" | "Low";
    };

    msi?: {                       // Microsatellite Instability
      status: "MSI-H" | "MSS";
    };

    pdl1?: {
      tps?: number;               // Tumor Proportion Score (%)
      cps?: number;               // Combined Positive Score
      method: "IHC 22C3" | "IHC SP263" | "Other";
    };
  };

  copyNumberVariations?: {
    gene: string;
    type: "Amplification" | "Deletion";
    copyNumber?: number;
  }[];

  fullReportPath?: string;        // Path to JSON or PDF report

  provenance: {
    source: string;               // "Genomics-Lab-X"
    receivedAt: string;
  };
}

8. Treatment Lines

interface TreatmentLine {
  lineId: string;
  lineNumber: number;             // 1st line, 2nd line, etc.

  intent: "Curative" | "Adjuvant" | "Neoadjuvant" | "Palliative";

  regimenName: string;            // "FOLFOX", "Pembrolizumab", "AC-T"
  drugs?: string[];               // ["5-FU", "Oxaliplatin", "Leucovorin"]

  startDate: string;
  endDate?: string;

  status: "Planned" | "Ongoing" | "Completed" | "Discontinued";
  discontinuationReason?: string; // "Toxicity", "Progression", "Patient Request"

  cycles?: {
    planned?: number;
    completed?: number;
  };

  response?: {
    assessment: "CR" | "PR" | "SD" | "PD" | "Not Assessed";
    assessmentDate: string;
    method: "RECIST" | "Clinical" | "Imaging";
  };

  adverseEvents?: {
    event: string;
    grade: 1 | 2 | 3 | 4 | 5;     // CTCAE grade
    date: string;
  }[];

  provenance: {
    source: string;
    enteredBy?: string;
  };
}

9. Medications & Allergies

interface Medication {
  medicationId: string;
  name: string;
  genericName?: string;

  dosage?: string;                // "500mg"
  route?: "Oral" | "IV" | "IM" | "Subcutaneous" | "Other";
  frequency?: string;             // "BID", "TID", "weekly"

  category: "Chemotherapy" | "Targeted" | "Immunotherapy" | "Supportive" | "Comorbidity";

  startDate?: string;
  endDate?: string;
  status: "Active" | "Discontinued" | "On Hold";

  prescribedBy?: string;
}

interface Allergy {
  allergyId: string;
  allergen: string;               // Drug name or substance

  reaction?: string;              // "Anaphylaxis", "Rash", "Nausea"
  severity: "Mild" | "Moderate" | "Severe" | "Life-Threatening";

  onsetDate?: string;
  reportedBy?: string;
}

10. Documents (OCR Outputs)

interface Document {
  documentId: string;
  type: "Discharge Summary" | "Lab Report" | "Imaging Report" | "Consult Note" | "Other";

  filePath: string;               // S3 path to original PDF
  hash: string;                   // SHA-256 hash of file

  extractedText: string;          // OCR output
  language: "en" | "hi-IN";

  ocrEngine: string;              // "tesseract-5.3.0"
  ocrMode?: string;               // "lstm"
  confidence: number;             // 0.0-1.0

  uploadDate: string;
  uploadedBy?: string;

  needsReview: boolean;           // True if confidence <0.70
}

11. Transcripts (ASR Outputs)

interface Transcript {
  transcriptId: string;
  audioPath: string;              // S3 path to audio file

  transcript: string;             // Full text
  language: "en-IN" | "hi-IN";

  asrModel: string;               // "whisper-large-v3"
  confidence: number;

  duration: string;               // "00:04:32"
  recordingDate: string;

  consentFlag: boolean;           // Patient consent obtained

  timestamps?: {                  // Word-level timestamps (optional)
    start: number;
    end: number;
    word: string;
  }[];
}

12. Timeline Events

interface TimelineEvent {
  eventId: string;
  eventType: "Diagnosis" | "Admission" | "Discharge" | "Procedure" | "Treatment Start" | "Lab" | "Imaging" | "Note";

  date: string;
  time?: string;

  title: string;                  // "Chemotherapy Cycle 1 Started"
  description?: string;

  linkedDataId?: string;          // Reference to labs[].labId, imaging[].studyId, etc.
  linkedDataType?: "lab" | "imaging" | "pathology" | "genomics" | "treatment" | "document";

  source: string;
}

13. Provenance & Audit

interface Provenance {
  createdAt: string;              // First bundle creation
  lastUpdated: string;            // Most recent update

  dataSources: {
    name: string;                 // "EMR-HOSPITALX", "LIS-HOSPITALX"
    type: "HL7" | "FHIR" | "JSON" | "Manual Entry";
    lastSync?: string;
    recordCount?: number;         // How many records from this source
  }[];

  updateHistory?: {
    timestamp: string;
    updatedBy: string;            // System or user
    changeType: "Ingestion" | "Manual Edit" | "Correction";
    changeDescription: string;
  }[];
}

Validation Rules

Required Fields

  • bundleVersion, patientId, abhaId, lastUpdated
  • demographics.name.fullName
  • demographics.dateOfBirth
  • demographics.gender
  • cancer.primarySite, cancer.diagnosisDate

Data Integrity

  • All dates in YYYY-MM-DD format
  • All timestamps in ISO 8601 format
  • ABHA ID: 12-digit numeric string
  • Email: Valid email format (if provided)
  • Phone: 10-digit Indian mobile format (if provided)

Referential Integrity

  • timeline[].linkedDataId must reference valid item in corresponding array
  • labs[].delta.comparedTo must reference valid labId

Example Bundle (Simplified)

{
  "bundleVersion": "1.0",
  "patientId": "case_001",
  "abhaId": "ABHA-12345678901",
  "lastUpdated": "2024-12-03T10:15:30Z",

  "demographics": {
    "name": {
      "firstName": "Priya",
      "lastName": "Sharma",
      "fullName": "Priya Sharma"
    },
    "dateOfBirth": "1975-06-15",
    "age": 49,
    "gender": "Female",
    "identifiers": {
      "abhaId": "ABHA-12345678901",
      "hospitalMrn": "MRN-H1-98765"
    }
  },

  "cancer": {
    "primarySite": "Breast",
    "histology": "Invasive Ductal Carcinoma",
    "stage": {
      "pathologic": "IIB",
      "tnm": { "t": "T2", "n": "N1", "m": "M0" }
    },
    "grade": "G2",
    "diagnosisDate": "2024-01-15",
    "biomarkers": {
      "ER": { "name": "ER", "value": "Positive (95%)", "date": "2024-01-20", "source": "path_report_001" },
      "PR": { "name": "PR", "value": "Positive (80%)", "date": "2024-01-20", "source": "path_report_001" },
      "HER2": { "name": "HER2", "value": "Negative", "date": "2024-01-20", "source": "path_report_001" }
    }
  },

  "labs": [
    {
      "labId": "lab_2024-12-01_001",
      "testName": "Hemoglobin",
      "value": 12.8,
      "unit": "g/dL",
      "referenceRange": "12-16 g/dL",
      "status": "normal",
      "date": "2024-12-01",
      "category": "Hematology",
      "delta": {
        "value": -0.2,
        "percent": -1.5,
        "direction": "down",
        "comparedTo": "lab_2024-11-15_001"
      },
      "provenance": {
        "source": "LIS-HOSPITALX",
        "hl7MessageId": "ORU-20241201-001",
        "receivedAt": "2024-12-01T08:30:00Z"
      }
    }
  ],

  "therapy": [
    {
      "lineId": "tx_line_1",
      "lineNumber": 1,
      "intent": "Adjuvant",
      "regimenName": "AC-T",
      "drugs": ["Doxorubicin", "Cyclophosphamide", "Paclitaxel"],
      "startDate": "2024-03-01",
      "endDate": "2024-08-15",
      "status": "Completed",
      "cycles": { "planned": 8, "completed": 8 },
      "response": {
        "assessment": "CR",
        "assessmentDate": "2024-09-01",
        "method": "Imaging"
      }
    }
  ],

  "provenance": {
    "createdAt": "2024-01-15T12:00:00Z",
    "lastUpdated": "2024-12-03T10:15:30Z",
    "dataSources": [
      { "name": "EMR-HOSPITALX", "type": "HL7", "lastSync": "2024-12-03T10:00:00Z", "recordCount": 45 },
      { "name": "LIS-HOSPITALX", "type": "HL7", "lastSync": "2024-12-01T08:30:00Z", "recordCount": 128 },
      { "name": "PACS-HOSPITALX", "type": "JSON", "lastSync": "2024-11-28T14:00:00Z", "recordCount": 12 }
    ]
  }
}

FHIR Mapping

Bundle Section FHIR Resource
demographics Patient
cancer Condition
vitals Observation (category: vital-signs)
labs Observation (category: laboratory)
imaging ImagingStudy
pathology DiagnosticReport (category: PAT)
genomics DiagnosticReport (category: GE) + Observation (variants)
therapy MedicationStatement, Procedure
medications MedicationStatement
allergies AllergyIntolerance
timeline (No direct mapping, derived view)

Document Owner: Data Architect / Backend Lead
Last Updated: 2024-12-03
Related: High-Level Architecture | APIs