
    h                         d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZ d dlmZ dd	Z G d
 de      Zy)    N)Path)AnyDictListOptional)build_yolo_dataset)DetectionTrainer)
WorldModel)DEFAULT_CFGLOGGERRANK)de_parallelc                 4   t         dv rt        | j                  j                  j                  d   j                               D cg c]  }|j                  dd      d    }}t        | j                  j                        j                  |d       yyc c}w )	zISet up model classes and text encoder at the end of the pretrain routine.>   r   names/   r   Fcache_clip_modelN)
r   listtest_loaderdatasetdatavaluessplitr   emaset_classes)trainernamer   s      a/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/ultralytics/models/yolo/world/train.pyon_pretrain_routine_endr!      sz    w378K8K8S8S8X8XY`8a8h8h8j3kl4C#A&llGKKOO$000O ls   Bc            
            e Zd ZdZeddfdeeeef      f fdZ	ddee   de
defdZdd	ed
edee   fdZdee   dee   ddfdZdee   dededeeej&                  f   fdZdeeef   deeef   fdZ xZS )WorldTrainera  
    A trainer class for fine-tuning YOLO World models on close-set datasets.

    This trainer extends the DetectionTrainer to support training YOLO World models, which combine visual and textual
    features for improved object detection and understanding. It handles text embedding generation and caching to
    accelerate training with multi-modal data.

    Attributes:
        text_embeddings (Dict[str, torch.Tensor] | None): Cached text embeddings for category names to accelerate
            training.
        model (WorldModel): The YOLO World model being trained.
        data (Dict[str, Any]): Dataset configuration containing class information.
        args (Any): Training arguments and configuration.

    Methods:
        get_model: Return WorldModel initialized with specified config and weights.
        build_dataset: Build YOLO Dataset for training or validation.
        set_text_embeddings: Set text embeddings for datasets to accelerate training.
        generate_text_embeddings: Generate text embeddings for a list of text samples.
        preprocess_batch: Preprocess a batch of images and text for YOLOWorld training.

    Examples:
        Initialize and train a YOLO World model
        >>> from ultralytics.models.yolo.world import WorldTrainer
        >>> args = dict(model="yolov8s-world.pt", data="coco8.yaml", epochs=3)
        >>> trainer = WorldTrainer(overrides=args)
        >>> trainer.train()
    N	overridesc                 >    |i }t         |   |||       d| _        y)a-  
        Initialize a WorldTrainer object with given arguments.

        Args:
            cfg (Dict[str, Any]): Configuration for the trainer.
            overrides (Dict[str, Any], optional): Configuration overrides.
            _callbacks (List[Any], optional): List of callback functions.
        N)super__init__text_embeddings)selfcfgr$   
_callbacks	__class__s       r    r'   zWorldTrainer.__init__6   s)     Ii4#    weightsverbosereturnc                     t        t        |t              r|d   n|| j                  d   t	        | j                  d   d      |xr	 t
        dk(        }|r|j                  |       | j                  dt               |S )ah  
        Return WorldModel initialized with specified config and weights.

        Args:
            cfg (Dict[str, Any] | str, optional): Model configuration.
            weights (str, optional): Path to pretrained weights.
            verbose (bool): Whether to display model info.

        Returns:
            (WorldModel): Initialized WorldModel.
        	yaml_filechannelsncP   r   )chr4   r/   r!   )	r
   
isinstancedictr   minr   loadadd_callbackr!   )r)   r*   r.   r/   models        r    	get_modelzWorldTrainer.get_modelD   ss      *3 5C3yy$499T?B'*
	
 JJw35LMr-   img_pathmodebatchc                 ,   t        t        | j                  r-t        | j                        j                  j                         nd      d      }t        | j                  ||| j                  ||dk(  ||dk(        }|dk(  r| j                  |g|       |S )a  
        Build YOLO Dataset for training or validation.

        Args:
            img_path (str): Path to the folder containing images.
            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
            batch (int, optional): Size of batches, this is for `rect`.

        Returns:
            (Any): YOLO dataset configured for training or validation.
        r       valtrain)r?   rectstridemulti_modal)	maxintr<   r   rF   r   argsr   set_text_embeddings)r)   r>   r?   r@   gsr   s         r    build_datasetzWorldTrainer.build_dataset^   s     TZZ[,33779QOQST$IIx		45=Y[imqxix
 7?$$gY6r-   datasetsc           
          i }|D ]b  }t        |d      s|j                  | j                  t        |j                        |t        |j                        j                               d || _        y)a  
        Set text embeddings for datasets to accelerate training by caching category names.

        This method collects unique category names from all datasets, then generates and caches text embeddings
        for these categories to improve training efficiency.

        Args:
            datasets (List[Any]): List of datasets from which to extract category names.
            batch (int | None): Batch size used for processing.

        Notes:
            This method collects category names from datasets that have the 'category_names' attribute,
            then uses the first dataset's image path to determine where to cache the generated text embeddings.
        category_names)	cache_dirN)	hasattrupdategenerate_text_embeddingsr   rP   r   r>   parentr(   )r)   rN   r@   r(   r   s        r    rK   z WorldTrainer.set_text_embeddingsr   st      	G7$45""--//0%4HXHXCYC`C` . 	  /r-   textsrQ   c                 N   d}|d|j                  dd      j                  dd       dz  }|j                         rat        j                  d| d       t	        j
                  || j                  	      }t        |j                               t        |      k(  r|S t        j                  d
| d       | j                  J t        | j                        j                  ||d      }t        t        ||j                  d                  }t	        j                  ||       |S )a~  
        Generate text embeddings for a list of text samples.

        Args:
            texts (List[str]): List of text samples to encode.
            batch (int): Batch size for processing.
            cache_dir (Path): Directory to save/load cached embeddings.

        Returns:
            (Dict[str, torch.Tensor]): Dictionary mapping text samples to their embeddings.
        zclip:ViT-B/32text_embeddings_:_r   z.ptzReading existed cache from '')map_locationzCaching text embeddings to 'Fr   r   )replaceexistsr   infotorchr:   devicesortedkeysr<   r   get_text_per8   zipsqueezesave)r)   rV   r@   rQ   r<   
cache_pathtxt_map	txt_featss           r    rT   z%WorldTrainer.generate_text_embeddings   s     #3EMM#s4K4S4STWY\4]3^^a!bb
KK6zl!DEjj$++FGglln%62:,a@Azz%%%

+77uW\7]	s5)"3"3A"678

7J'r-   c                    t        j                  | |      }t        t        j                  |d          }t        j                  |D cg c]  }| j                  |    c}      j                  | j                        }||j                  ddd      z  }|j                  t        |d         d|j                  d         |d<   |S c c}w )z=Preprocess a batch of images and text for YOLOWorld training.rV      r   T)pdimkeepdimrj   )r	   preprocess_batchr   	itertoolschainr`   stackr(   tora   normreshapelenshape)r)   r@   rV   textrj   s        r    rp   zWorldTrainer.preprocess_batch   s     11$> Y__eGn56KK N!5!5d!; NORRSWS^S^_		D II	&..s5>/BB	XZH[\k !Os   C)NNT)rD   N)__name__
__module____qualname____doc__r   r   r   strr   r'   boolr
   r=   rI   rM   r   rK   r   r`   TensorrT   rp   __classcell__)r,   s   @r    r#   r#      s    : 'd_c $8DcN3K $8C= $ Zd 4c  xPS} (/DI /hsm /PT /4d3i  PT Y]^achcoco^oYp 4	d38n 	c3h 	r-   r#   )r0   N)rq   pathlibr   typingr   r   r   r   r`   ultralytics.datar   ultralytics.models.yolo.detectr	   ultralytics.nn.tasksr
   ultralytics.utilsr   r   r   ultralytics.utils.torch_utilsr   r!   r#    r-   r    <module>r      s=      , ,  / ; + 7 7 5PW# Wr-   