CROP
ProjectsParts ServicesCatalog

Category Taxonomy Documentation

Unified category taxonomy for CROP parts catalog based on New Holland, Briggs & Stratton, and Ventrac standards.

Category Taxonomy Documentation

Overview

Unified category taxonomy for CROP parts catalog based on New Holland, Briggs & Stratton, and Ventrac standards.

Location: /packages/shared-types/src/categories/


Category Hierarchy

graph TD
    subgraph "Level 1 - Root Categories"
        AF[AIR FILTRATION]
        AS[ALTERNATORS & STARTERS]
        BA[BATTERIES & ACCESSORIES]
        BB[BEARINGS & BUSHINGS]
        BC[BELTS & CHAINS]
        BL[BLADES & CUTTING]
        BR[BRAKES]
        CF[CARBURETORS & FUEL]
        CT[CLUTCHES & TRANSMISSION]
        EL[ELECTRICAL]
        EP[ENGINE PARTS]
        FI[FILTERS]
        FL[FLUIDS & LUBRICANTS]
        GS[GASKETS & SEALS]
        GI[GAUGES & INSTRUMENTS]
        HW[HARDWARE]
        HY[HYDRAULICS]
        IS[IGNITION & SPARK]
        LI[LIGHTS]
        PT[PTO]
        PS[PULLEYS & SPINDLES]
        SE[SENSORS]
        ST[STEERING]
        WT[WHEELS & TIRES]
        CE[COOLING & EXHAUST]
        UN[UNCATEGORIZED]
    end

    subgraph "Level 2 - Subcategories Examples"
        AF --> AF1[Air Filters]
        AF --> AF2[Pre-Filters]

        BB --> BB1[Bearings]
        BB --> BB2[Bushings]

        FI --> FI1[Oil Filters]
        FI --> FI2[Fuel Filters]
        FI --> FI3[Hydraulic Filters]
        FI --> FI4[Cabin Filters]
        FI --> FI5[Filter Kits]

        GS --> GS1[Gaskets]
        GS --> GS2[Seals]
        GS --> GS3[O-Rings]
        GS --> GS4[Seal Kits]

        HY --> HY1[Hydraulic Pumps]
        HY --> HY2[Hydraulic Cylinders]
        HY --> HY3[Hydraulic Valves]
        HY --> HY4[Fittings & Hoses]

        EL --> EL1[Switches]
        EL --> EL2[Wiring & Connectors]
        EL --> EL3[Relays & Fuses]
    end

    style AF fill:#e1f5fe
    style FI fill:#e1f5fe
    style HY fill:#e1f5fe
    style EL fill:#e1f5fe
    style BB fill:#e1f5fe
    style GS fill:#e1f5fe
    style UN fill:#ffebee

Normalization Flow

flowchart TD
    A[Product Title] --> B{Find Keyword Match}
    B -->|Found| C[Get Category ID]
    B -->|Not Found| D[UNCATEGORIZED]

    C --> E[Calculate Confidence Score]
    E --> F{Confidence >= threshold?}
    F -->|Yes| G[Build Category Result]
    F -->|No| H[Flag for Review]

    D --> I[Set needsReview = true]
    H --> G
    I --> G

    G --> J[ProductCategoryResult]

    subgraph "Result Structure"
        J --> K[category: CategoryResult[]]
        J --> L[category_path: string[]]
        J --> M[category_id: string[]]
        J --> N[category_name: string[]]
        J --> O[breadcrumbs: string[]]
        J --> P[uncategorized: boolean]
    end

    style A fill:#e3f2fd
    style J fill:#c8e6c9
    style D fill:#ffebee
    style H fill:#fff3e0

Keyword Matching Algorithm

flowchart LR
    A[Input Title] --> B[Normalize to lowercase]
    B --> C[Sort Keywords by Priority]
    C --> D[Iterate Keywords]

    D --> E{Title includes keyword?}
    E -->|Yes| F[Return Match]
    E -->|No| G{More keywords?}
    G -->|Yes| D
    G -->|No| H[Return null]

    F --> I[categoryId]
    F --> J[keyword]
    F --> K[priority]

    style A fill:#e3f2fd
    style F fill:#c8e6c9
    style H fill:#ffebee

Data Flow Architecture

flowchart TB
    subgraph "Source Data"
        BNS[BNS Parts CSV]
        VNT[VNT Parts CSV]
        NHL[NHL Parts DB]
    end

    subgraph "Transformers"
        BT[BNS Transformer]
        VT[VNT Transformer]
        NT[NHL Transformer]
    end

    subgraph "Category Normalization"
        CN[categorizeProduct]
        TX[TAXONOMY]
        KW[KEYWORD_MAPPINGS]
    end

    subgraph "MongoDB Collections"
        PS[parts_stage]
        PC[categories]
    end

    subgraph "Elasticsearch"
        ES[parts index]
    end

    BNS --> BT
    VNT --> VT
    NHL --> NT

    BT --> CN
    VT --> CN
    NT --> CN

    TX --> CN
    KW --> CN

    CN --> PS
    TX --> PC

    PS --> ES
    PC --> ES

    style CN fill:#c8e6c9
    style TX fill:#fff3e0
    style KW fill:#fff3e0

Category Document Schema

erDiagram
    CATEGORY {
        string id PK
        string slug
        string name
        int level
        string path
        array path_ids
        string parent_id FK
        array children
        array keywords
        string icon
        int sort_order
    }

    PRODUCT {
        string id PK
        string partNumber
        string title
        string manufacturer
    }

    PRODUCT_CATEGORY {
        string id
        string slug
        string name
        int level
        string path
        array path_ids
        boolean leaf
        float confidence
        string matchedKeyword
        boolean needsReview
    }

    CATEGORY ||--o{ CATEGORY : "parent_id"
    PRODUCT ||--|| PRODUCT_CATEGORY : "category[0]"

MongoDB Index Strategy

graph LR
    subgraph "Categories Collection Indexes"
        I1[_id - Primary]
        I2[slug - Unique]
        I3[parent_id - Reference]
        I4[level - Filter]
        I5["path_ids - Array"]
    end

    subgraph "Parts Collection Indexes"
        P1["category_id - Category filter"]
        P2["category.id - Nested"]
        P3["uncategorized - Flag"]
        P4["category.needsReview - QA"]
    end

    style I1 fill:#e8f5e9
    style I2 fill:#e8f5e9
    style P1 fill:#e3f2fd
    style P2 fill:#e3f2fd

Confidence Score Calculation

flowchart TD
    A[Start: priority value] --> B[Base = 0.5 + priority/200]
    B --> C{Word boundary match?}
    C -->|Yes| D[+0.1]
    C -->|No| E[+0.0]
    D --> F{Starts with keyword?}
    E --> F
    F -->|Yes| G[+0.05]
    F -->|No| H[+0.0]
    G --> I{Multi-word keyword?}
    H --> I
    I -->|Yes| J[+0.1]
    I -->|No| K[+0.0]
    J --> L[Cap at 1.0]
    K --> L
    L --> M[Final Confidence]

    style A fill:#e3f2fd
    style M fill:#c8e6c9

Usage Examples

Categorize Single Product

import { categorizeProduct } from '@crop/shared-types/categories';

const result = categorizeProduct('Bearing, Ball 25 mm x 62 mm');

console.log(result);
// {
//   category: [{
//     id: 'bearings',
//     name: 'Bearings',
//     path: 'BEARINGS & BUSHINGS > Bearings',
//     confidence: 0.85,
//     matchedKeyword: 'bearing'
//   }],
//   category_path: ['BEARINGS & BUSHINGS > Bearings'],
//   category_id: ['bearings'],
//   category_name: ['Bearings'],
//   breadcrumbs: ['BEARINGS & BUSHINGS', 'Bearings'],
//   uncategorized: false
// }

Batch Categorization

import { categorizeProducts, getCategorizeStats } from '@crop/shared-types/categories';

const products = [
  { title: 'SWITCH-PTO PUSH' },
  { title: 'KIT, FILTER' },
  { title: 'Bearing, Ball' },
];

const categorized = categorizeProducts(products);
const stats = getCategorizeStats(categorized);

console.log(stats);
// {
//   total: 3,
//   categorized: 3,
//   uncategorized: 0,
//   needsReview: 0,
//   byCategory: { 'pto': 1, 'filter-kits': 1, 'bearings': 1 },
//   avgConfidence: 0.87
// }

Get All Categories

import { getRootCategories, getCategoryChildren, TAXONOMY } from '@crop/shared-types/categories';

// Get root categories
const roots = getRootCategories();
console.log(roots.map(c => c.name));
// ['AIR FILTRATION', 'ALTERNATORS & STARTERS', ...]

// Get children
const filterChildren = getCategoryChildren('filters');
console.log(filterChildren.map(c => c.name));
// ['Oil Filters', 'Fuel Filters', 'Hydraulic Filters', 'Cabin Filters', 'Filter Kits']

File Structure

packages/shared-types/src/categories/
├── index.ts          # Public exports
├── taxonomy.ts       # Category definitions and schema
├── keywords.ts       # Keyword-to-category mappings
└── normalizer.ts     # Categorization functions

Stats

  • Total Categories: 56 (26 Level 1, 30 Level 2)
  • Total Keywords: 250+
  • Average Keywords per Category: 5-10

Last updated: 2025-11-24

On this page