o
    }h                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ ejejd eeZe	  G dd	 d	eZG d
d dZdS )    N)DictAnyOptional)load_dotenv)
ChatOllama)BaseCallbackHandler)retrieve_semantic_docs)levelc                   @   s"   e Zd Zdd ZdefddZdS )StreamingToSocketHandlerc                 C   s
   || _ d S Nsend_token_fn)selfr    r   &/var/www/observatorio/agentic_rag_4.py__init__   s   
z!StreamingToSocketHandler.__init__tokenc                 K   s   |  | d S r   r   )r   r   kwargsr   r   r   on_llm_new_token   s   z)StreamingToSocketHandler.on_llm_new_tokenN)__name__
__module____qualname__r   strr   r   r   r   r   r
      s    r
   c                   @   sj   e Zd ZdZddefddZdedeeef fdd	Zd
edefddZ	dede
eeef  fddZdS )SimpleDeepSeekRAGzFAgentic RAG system for DeepSeek-R1 with fixed search + answer behaviordeepseek-r1:8b
model_namec              	   C   sF   t dd | _t|| jddddt|gd| _td|  d S )	NOLLAMA_HOSTzhttp://localhost:11434g      ?x   i  T)modelbase_urltemperaturetimeoutnum_predict	streaming	callbackszInitialized DeepSeek RAG with )	osgetenvstripollama_hostr   r
   llmloggerinfo)r   stream_funcr   r   r   r   r   !   s   	zSimpleDeepSeekRAG.__init__questionreturnc              
   C   s  zd}d| d}| j |j }| |}td|  zt|}tdt|t	r0t
|nd d W n tyS } ztd|  g }W Y d }~nd }~ww d	}g }	d
}
|rt|t	sd|g}d}t|dD ]D\}}t|tr|d}|d}|dpd}t
||kr|d | d }|
|d 7 }
|d| d| d| d7 }|	||d qkd| d| d}| j |j }| |}d|r|n|||t
||	|
dW S  ty } zdt| }t| d|d dW  Y d }~S d }~ww )NiX  z
            Based on this question, and considering that this is a chat strictly about Brazilian legal decisions of 2024 on elections.
            What should I search for in a document database using the KNN search?

            Question: z

            Provide only the main search terms in Portuguese, separated by spaces.
            Write the query inside <search> </search> tags.
            zGenerated search query: zSearch returned:    z resultszSearch failed: zNo documents found. zFOUND DOCUMENTS:
numero_unicodata_publicacaoconteudo_htmlz
No contentz...z


z
Document ID: z
Date: z

Content: 
)iddateu   
            Based on the search results below, answer this question **in Brazilian Portuguese (português do Brasil)**.

            Question: z*

            Search Results:
            a  

            IMPORTANT RULES:
            - Only use information from the search results above
            - Say everything you found in the document, but priorize answering the question. 
            - Be specific about what the documents say about the topic
            - You must mention the document IDs in your answer
            - When reasoning about the documents, make sure to open and close the <think> tags
            - Explain clearly and directly, without additional labels or markings
            - ONLY send the final answer inside <answer> tags in this exact format:
            <answer>
                your detailed answer based on search results, written in Brazilian Portuguese
            </answer>

            T)successresponse
raw_outputsearch_querysearch_results_countsourcestotal_contentzQuery failed: F)r7   errorr8   )r)   invokecontentr'   _extract_search_termsr*   r+   r   
isinstancelistlen	Exceptionr>   	enumeratedictgetappend_extract_jsonr   )r   r-   CHAR_CAPsearch_promptraw_search_responser:   search_resultseformatted_resultssource_listr=   idocdoc_iddoc_dater@   answer_promptr8   json_response	error_msgr   r   r   query/   sx   

(







zSimpleDeepSeekRAG.queryraw_responsec           	   
   C   s   z_t jdd|t jd }|rt|dk r?t d|t j}|r?|d}|d}dd	 |D }|r<d
|d d W S W dS dd	 |dD }|r[t dd
|d }|d d W S |p^dW S  t	yz } zt
d|  W Y d }~dS d }~ww )N<think>.*?</think>r0   flags   z<think>(.*?)</think>r/   r4   c                 S   s4   g | ]}|  d r|dd dd   qS )z-    N(r   )r'   
startswithsplit.0liner   r   r   
<listcomp>   s    z;SimpleDeepSeekRAG._extract_search_terms.<locals>.<listcomp>    	documentoc                 S   s   g | ]
}|  r|  qS r   )r'   rc   r   r   r   rf      s    z\s+r   d   zSearch term extraction failed: )resubDOTALLr'   rD   searchgrouprb   joinrE   r*   r>   )	r   rZ   
clean_textthink_matchthinking_contentlinesterms
first_linerO   r   r   r   rA      s.   


z'SimpleDeepSeekRAG._extract_search_termstextc           	   
   C   s   zMt jdd|t jd }ddg}|D ]7}t ||t j}|D ]*}zt| }t|tr>d|v s6d|v r>|W     W S W q tj	yI   Y qw qW d S  t
yh } ztd|  W Y d }~d S d }~ww )	Nr[   r0   r\   z\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}z\{\s*"answer"[^}]+\}answerr8   zJSON extraction failed: )rk   rl   rm   r'   findalljsonloadsrB   rG   JSONDecodeErrorrE   r*   r>   )	r   rw   rq   patternspatternmatchesmatchparsedrO   r   r   r   rJ      s.   zSimpleDeepSeekRAG._extract_jsonN)r   )r   r   r   __doc__r   r   r   r   rY   rA   r   rJ   r   r   r   r   r      s    b"r   )rz   r%   loggingrk   typingr   r   r   dotenvr   langchain_community.chat_modelsr   langchain_core.callbacks.baser   data_searchr   basicConfigINFO	getLoggerr   r*   r
   r   r   r   r   r   <module>   s    
