
    iS                       d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZmZ  ej                  e      Zerd dlmZmZ 	 d dlmZ 	 d dlmZmZ 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd	Z	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd
Z	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZy# e$ r Y uw xY w# e$ r Y ww xY w)    )annotationsN)Path)TYPE_CHECKINGCallableLiteral)disable_datasets_cachingis_datasets_availableCrossEncoderSentenceTransformer)OVQuantizationConfig)OptimizationConfigQuantizationConfigc                F   ddl m}m} 	 ddlm}m}	m}
 ddlm} t        | |      xr# t        |       xr t        | j                  |      }t        | |      xr t        | j                  |	      }|s|st        d      |r| j                  }n| j                  }|
j                  |      t        t               r0|j"                  vrt        d      xs  t%        |             d	t'        fd
d|||d| 	       y# t        $ r t        d      w xY w)a  
    Export an optimized ONNX model from a SentenceTransformer or CrossEncoder model.

    The O1-O4 optimization levels are defined by Optimum and are documented here:
    https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/optimization

    The optimization levels are:

    - O1: basic general optimizations.
    - O2: basic and extended general optimizations, transformers-specific fusions.
    - O3: same as O2 with GELU approximation.
    - O4: same as O3 with mixed precision (fp16, GPU-only)

    See the following pages for more information & benchmarks:

    - `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
    - `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

    Args:
        model (SentenceTransformer | CrossEncoder): The SentenceTransformer or CrossEncoder model to be optimized.
            Must be loaded with `backend="onnx"`.
        optimization_config (OptimizationConfig | Literal["O1", "O2", "O3", "O4"]): The optimization configuration or level.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the optimized model will be saved.
        push_to_hub (bool, optional): Whether to push the optimized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the optimized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer or CrossEncoder model loaded with `backend="onnx"`.
        ValueError: If the provided optimization_config is not valid.

    Returns:
        None
    r   r
   )ORTModelForFeatureExtraction!ORTModelForSequenceClassificationORTOptimizer)AutoOptimizationConfigPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`mThe model must be a Transformer-based SentenceTransformer or CrossEncoder model loaded with `backend="onnx"`.z\optimization_config must be an OptimizationConfig instance or one of 'O1', 'O2', 'O3', 'O4'.N	optimizedc                ,    j                  |       S N)file_suffix)optimize)save_dirr   optimization_config	optimizers    [/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/sentence_transformers/backend.py<lambda>z-export_optimized_onnx_model.<locals>.<lambda>q       ););<OQYgr);)s     export_optimized_onnx_modelonnx	export_functionexport_function_nameconfigmodel_name_or_pathpush_to_hub	create_prr   backendmodel)sentence_transformersr   r   optimum.onnxruntimer   r   r   !optimum.onnxruntime.configurationr   ImportError
isinstancelentransformers_modelr-   
ValueErrorfrom_pretrainedstr_LEVELSgetattrsave_or_push_to_hub_model)r-   r   r)   r*   r+   r   r   r   r   r   r   r   viable_st_modelviable_ce_model	ort_modelr   s    `   `         @r   r#   r#      sH   V H
uuL 	5-. 	OJ	Ou//1MN 
 !5t*U[[Rs:tO{
 	
 272J2J	7<{{	,,Y7I%s+&<&D&DDn  "8%8Rg&<>QRT!s:"-
I  
8
 	

s   D D c                   ddl m}m} 	 ddlm}m}	m}
 ddlm} t        | |      xr# t        |       xr t        | j                  |      }t        | |      xr t        | j                  |	      }|s|st        d      |r| j                  }n| j                  }|
j                  |      t        t               rTdvrt        d      d	d	 } t#        |      d
      xs) j$                  j&                  j)                          d| 'j$                  j&                  j)                          dt+        fdd|||d| 	       y	# t        $ r t        d      w xY w)a  
    Export a quantized ONNX model from a SentenceTransformer or CrossEncoder model.

    This function applies dynamic quantization, i.e. without a calibration dataset.
    Each of the default quantization configurations quantize the model to int8, allowing
    for faster inference on CPUs, but are likely slower on GPUs.

    See the following pages for more information & benchmarks:

    - `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
    - `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

    Args:
        model (SentenceTransformer | CrossEncoder): The SentenceTransformer or CrossEncoder model to be quantized.
            Must be loaded with `backend="onnx"`.
        quantization_config (QuantizationConfig): The quantization configuration.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the quantized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer or CrossEncoder model loaded with `backend="onnx"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r
   )r   r   ORTQuantizer)AutoQuantizationConfigr   r   )arm64avx2avx512avx512_vnnizqquantization_config must be an QuantizationConfig instance or one of 'arm64', 'avx2', 'avx512', or 'avx512_vnni'.NF)	is_static_
_quantizedc                ,    j                  |       S r   quantize)r   r   quantization_config	quantizers    r   r    z5export_dynamic_quantized_onnx_model.<locals>.<lambda>   r!   r"   #export_dynamic_quantized_onnx_modelr$   r%   )r.   r   r   r/   r   r   r?   r0   r@   r1   r2   r3   r4   r-   r5   r6   r7   r9   weights_dtypenamelowerr:   )r-   rK   r)   r*   r+   r   r   r   r   r   r?   r@   r;   r<   r=   quantization_config_namerL   s    `   `          @r   rM   rM   }   s   J H
uuL 	5-. 	OJ	Ou//1MN 
 !5t*U[[Rs:tO{
 	
 272J2J	7<{{	,,Y7I%s+&PP D  $7q#9 Rg&<>QR]bc!s(;(I(I(N(N(T(T(V'WWXYqXr%s,::??EEGH
SsB"-
K  
8
 	

s   E E)c
                D    ddl m}
m} 	 ddlm}m}m}m}m} t               st        d      t         |      xr# t               xr t         j                  |      }t         |
      xr t         j                  |      }|s|st        d      | |       }|r j                  }n j                  } ||      |j!                  |      t#        d	 |||fD              r!t%        d
 |||fD              st        d       fd||nd}||nd}||nd}ndt'               5  j)                  ||fd||j*                  nd|      ddd       t-        fdd|||||	d 	       y# t        $ r t        d      w xY w# 1 sw Y   ;xY w)a+	  
    Export a quantized OpenVINO model from a SentenceTransformer or CrossEncoder model.

    This function applies Post-Training Static Quantization (PTQ) using a calibration dataset, which calibrates
    quantization constants without requiring model retraining. Each default quantization configuration converts
    the model to int8 precision, enabling faster inference while maintaining accuracy.

    See the following pages for more information & benchmarks:

    - `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
    - `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

    Args:
        model (SentenceTransformer | CrossEncoder): The SentenceTransformer or CrossEncoder model to be quantized.
            Must be loaded with `backend="openvino"`.
        quantization_config (OVQuantizationConfig | dict | None): The quantization configuration. If None, default values are used.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        dataset_name(str, optional): The name of the dataset to load for calibration.
            If not specified, the `sst2` subset of the `glue` dataset will be used by default.
        dataset_config_name (str, optional): The specific configuration of the dataset to load.
        dataset_split (str, optional): The split of the dataset to load (e.g., 'train', 'test'). Defaults to None.
        column_name (str, optional): The column name in the dataset to use for calibration. Defaults to None.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str, optional): The suffix to add to the quantized model file name. Defaults to `qint8_quantized`.

    Raises:
        ImportError: If the required packages `optimum` and `openvino` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer or CrossEncoder model loaded with `backend="openvino"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r
   )OVConfigOVModelForFeatureExtraction OVModelForSequenceClassificationr   OVQuantizerzPlease install datasets, optimum-intel and openvino to use this function. You can install them with pip: `pip install datasets optimum[openvino]`zaPlease install datasets to use this function. You can install it with pip: `pip install datasets`zqThe model must be a Transformer-based SentenceTransformer or CrossEncoder model loaded with `backend="openvino"`.N)rK   c              3  $   K   | ]  }|d u 
 y wN .0params     r   	<genexpr>z9export_static_quantized_openvino_model.<locals>.<genexpr>0  s     
j5
j   c              3  $   K   | ]  }|d u 
 y wrX   rY   rZ   s     r   r]   z9export_static_quantized_openvino_model.<locals>.<genexpr>0  s      w#Twr^   zEither specify all of `dataset_name`, `dataset_config_name`, `dataset_split`, and `column_name`, or leave them all unspecified.c                .    j                  | ddd      S )N
max_lengthi  T)paddingra   
truncation)	tokenizer)examplesr-   s    r   preprocess_functionzCexport_static_quantized_openvino_model.<locals>.preprocess_function7  s    x#Z^__r"   gluesst2trainsentencec                     |          S rX   rY   )re   column_namerf   s    r   r    z8export_static_quantized_openvino_model.<locals>.<lambda>B  s    1DXkEZ1[ r"   i,  )dataset_namedataset_config_namerf   num_samplesdataset_splitc                ,    j                  |       S )N)save_directory	ov_configrI   )r   calibration_datasetrs   rL   s    r   r    z8export_static_quantized_openvino_model.<locals>.<lambda>H  s    ););I *< *
 r"   &export_static_quantized_openvino_modelopenvinor%   )r.   r   r   optimum.intelrS   rT   rU   r   rV   r1   r	   r2   r3   r4   r-   r5   r6   anyallr   get_calibration_datasetro   r:   )r-   rK   r)   rm   rn   rp   rl   r*   r+   r   r   r   rS   rT   rU   r   rV   r;   r<   ov_modelrt   rs   rf   rL   s   `     `             @@@@r   ru   ru      s
   \ H
	
 	
 !"o
 	

 	5-. 	NJ	Nu//1LM 
 !5s*U[[Rr:sO
 	
 "24050H0H5:[[-@AI++H5I

j<9Lm]h*i
jjsv w(46I=Ze'fw t  N
 	
` $0#;<L1D1P-V\%2%>MGM!,!8+jK	!	# 
'??% 3 [;N;Z+77`c' @ 

 
 F"-m  
V
 	

Z
 
s   E> 3)F>FFc	                   ddl m}	m}
 |dk(  rd| d}n|dk(  rd| d}t        j                         5 } | |       |dk(  ret        |      |z  }t        j                  |d	z  |z         t        j                  |d
z  ||z  j                  d             |j                         }|dk(  rZt        |      |z  }|j                  dd       t        |      z  }||z  }t        j                  ||       |j                         }|rd}|rt        |      j                  dd      j                  dd      j                  dd      }|t        ||
      rd| d| d| d| d| d d}n!t        ||	      rd| d| d| d| d| d d}t        j                  |||dd| d ||!       nt        |      |z  }|j                  dd       t        |      z  }||z  }t        j                   ||       |dk(  rPt        |      |z  j                  d      }t        |      |z  j                  d      }t        j                   ||       d d d        y # 1 sw Y   y xY w)"Nr   r
   r$   model_z.onnxrv   openvino_model_z.xmlzopenvino_model.xmlzopenvino_model.binz.binT)parentsexist_ok (z(
	z, z,
	)z
)zGHello!

*This pull request has been automatically generated from the [`zT`](https://sbert.net/docs/package_reference/util.html#sentence_transformers.backend.zI) function from the Sentence Transformers library.*

## Config
```python
a  
```

## Tip:
Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import SentenceTransformer

# TODO: Fill in the PR number
pr_number = 2
model = SentenceTransformer(
    "z5",
    revision=f"refs/pr/{pr_number}",
    backend="z#",
    model_kwargs={"file_name": "a  "},
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)
```
a  
```

## Tip:
Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import CrossEncoder

# TODO: Fill in the PR number
pr_number = 2
model = CrossEncoder(
    "a
  "},
)

# Verify that everything works as expected
query = "Which planet is known as the Red Planet?"
passages = [
	"Venus is often called Earth's twin because of its similar size and proximity.",
	"Mars, known for its reddish appearance, is often referred to as the Red Planet.",
	"Jupiter, the largest planet in our solar system, has a prominent red spot.",
	"Saturn, famous for its rings, is sometimes mistaken for the Red Planet."
]

scores = model.predict([(query, passage) for passage in passages])
print(scores)
```
r-   zAdd exported z model )folder_pathpath_in_reporepo_id	repo_typecommit_messagecommit_descriptionr+   )r.   r   r   tempfileTemporaryDirectoryr   shutilmovewith_suffixas_posixmkdirreprreplacer2   huggingface_hubupload_foldercopy)r&   r'   r(   r)   r*   r+   r   r,   r-   r   r   	file_namer   dst_dirsourcedestinationr   opt_config_string
bin_sourcebin_destinations                       r   r:   r:   V  s    H&[M/		J	%k]$7			$	$	& x9(! j H~/HKK#77I9MNKK#77(Y:N9[9[\b9cd((*H f8nw.GMM$M6(^i/F!I-KKK,'')H!#$(L$8$8g$F$N$NtU\$]$e$efikp$q!=Ju6I$J.@ AU?U  Vj  k  j@ @     Y ""+ 
-+*&@  |4.@ AU?U  Vj  k  j@ @     Y ""+ -+$*&L ))$$*!!.wiwymL#5# -.8GMM$M6(^i/F!I-KKK, *$"8ny8EEfM
#'=9#<"I"I&"QJ8qx9 x9 x9s   HII )FFN)r-   "SentenceTransformer | CrossEncoderr   z4OptimizationConfig | Literal['O1', 'O2', 'O3', 'O4']r)   r7   r*   boolr+   r   r   
str | NonereturnNone)r-   r   rK   zFQuantizationConfig | Literal['arm64', 'avx2', 'avx512', 'avx512_vnni']r)   r7   r*   r   r+   r   r   r   r   r   )NNNNFFqint8_quantized)r-   r   rK   z"OVQuantizationConfig | dict | Noner)   r7   rm   r   rn   r   rp   r   rl   r   r*   r   r+   r   r   r7   r   r   )FFNr$   N)r&   r   r'   r7   r)   r7   r*   r   r+   r   r   r   r,   r7   r-   z)SentenceTransformer | CrossEncoder | None)
__future__r   loggingr   r   pathlibr   typingr   r   r   r   sentence_transformers.utilr   r	   	getLogger__name__loggerr.   r   r   rw   r   r1   r0   r   r   r#   rM   ru   r:   rY   r"   r   <module>r      s$   "     3 3  V			8	$G6\ "^-^M^ ^ 	^
 ^ ^ 
^J "Y-Y_Y Y 	Y
 Y Y 
Y@  $&* $"(z-z;z z 	z
 $z z z z z z 
zD "7;J9J9J9 	J9
 J9 J9 J9 J9 5J9E
    s$   C  C  CCCC