import mime from 'mime-types';
import React, { useMemo, useRef, useState } from 'react';
import ReactPlayer from 'react-player';

import { CodeRenderer } from '@/components/code-renderer';
import { mediaMimeTypes } from '@/components/input-controls/constants';
import { FileUploader, useDocumentPaste } from '@/components/input-controls/input-controls';
import { getMediaType, useChatControls } from '@/contexts/chat-controls';
import { useSendMessage } from '@/hooks/use-send-message';
import { Chat } from '@/utils/types';
import {
    Box,
    Button,
    Checkbox,
    CheckboxGroup,
    Flex,
    FormControl,
    FormLabel,
    Image as Img,
    Stack,
    Text,
} from '@chakra-ui/react';

import cl from './perception-agent.module.css';

const attributesPromptMap = {
    'spatial-relationships': `"SpatialRelationships": "A string field that explains spatial arrangement objects. Pay attention to the location of EACH OBJECT. "`,
    background: `"Background": "A string field that describes the background setting. This string should be concise."`,
    'seo-phrases': `"SeoPhrases": "A list of strings of the top multiword phrases that holistically describe the image for search and SEO purposes. Each phrase should be 2-3 words."`,
    description: `"Description": "A string field that describes the image in a few words as possible. This description must only contain factual information observed in the image. This cannot exceed 30 words."`,
    people: `"People": "A string field that describes any persons in the image. Describe what they are wearing and point out what they might be doing. If there is no one in the image keep this field blank."`,
    'object-recognition': `"ObjectRecognition": "A list of strings that identifies key objects in a few words."`,
};

type Attributes = keyof typeof attributesPromptMap;

export function PerceptionAgent({ chat }: { chat?: Chat | null }) {
    const [loading, setLoading] = useState(false);
    const [media, setMedia] = useState<null | {
        url: string;
        type: 'image' | 'video';
    }>(null);
    const { uploadControls, showDropZones, setChatMode } = useChatControls();

    const { handleMessageSendStateLess } = useSendMessage();

    const mediaUrl = media?.url || null;
    const mediaType = media?.type || null;
    const [errorMsg, setError] = useState('');
    const [responses, setResponses] = useState<Record<number, string>>({});
    const [currentIndex, setCurrentIndex] = useState(0);
    const [videoReady, setVideoReady] = useState(false);
    const videoUrlRef = useRef('');
    const videoKeyRef = useRef('');
    const [val, setVal] = useState<Array<Attributes>>(['description', 'people', 'object-recognition']);
    const [currentVal, setCurrent] = useState<Array<Attributes>>(['description', 'people', 'object-recognition']);

    useDocumentPaste((event) => {
        if ((event.target as HTMLElement)?.id === 'custom_url_input') return;
        if (loading) return;
        if (event.clipboardData?.files && event.clipboardData?.files.length > 0) {
            const file = event.clipboardData.files[0];
            const mimeType = mime.lookup(file.name);
            if (mimeType && mediaMimeTypes.includes(mimeType)) {
                handleMedia(file);
            }
        }
    });

    const handleMediaDrop = (e: React.DragEvent) => {
        e.preventDefault();
        if (loading) return;
        const item = e.dataTransfer.items[0];
        const file = item.getAsFile();
        if (file) {
            handleMedia(file);
        }
    };

    const handleMedia = async (file: File, attributes?: Attributes[]) => {
        setChatMode('perception');
        const mediaType = getMediaType(file.name);
        if (mediaType === 'image') {
            setLoading(true);
            const url = await fileToBase64(file);
            setMedia({ url, type: mediaType });
            await handleImage(url, mediaType, attributes || currentVal);
            setLoading(false);
        } else if (mediaType === 'video') {
            const objectUrl = URL.createObjectURL(file);
            setMedia({ url: objectUrl, type: mediaType });
            videoUrlRef.current = objectUrl;
            setCurrentIndex(0);
            handleProgress({ playedSeconds: 0, reset: true });
        }
    };
    const json = extractCodeBlock((mediaType === 'image' ? responses?.[0] : responses?.[currentIndex]) ?? '{}');

    const playerRef = useRef<ReactPlayer | null>(null);
    const handleProgress = async ({
        playedSeconds,
        attributes,
        reset = false,
    }: {
        playedSeconds: number;
        attributes?: Attributes[];
        reset?: boolean;
    }) => {
        const currentVideoUrl = videoUrlRef.current;
        if (reset) {
            videoKeyRef.current = Math.random().toString();
            setResponses({});
            setVideoReady(false);
        }

        // This allows us to ignore streams when conditions change
        const videoKey = videoKeyRef.current;
        const index = Math.floor(playedSeconds / 4);
        setCurrentIndex(index);
        const queue = [index, index + 1, index + 2].filter((i) => !responses[i] || reset);
        if (queue.length === 0) return;
        for (let i of queue) {
            const isFirstRequest = i === index;
            if (isFirstRequest) {
                setLoading(true);
            }
            setResponses((v) => ({ ...v, [i]: '{}' }));
            let frame: string;
            try {
                frame = await captureFrame(currentVideoUrl, i * 4 + 2);
            } catch (e) {
                continue;
            }
            handleMessageSendStateLess(
                `
           Describe the image according to the following instructions. You must output a JSON object as defined below.

            {${(attributes || currentVal).map((v) => attributesPromptMap[v]!).join(',')}}
            
            Focus solely on the objects, actions, and properties that are directly observable in the image. Avoid including any inferred or likely co-occurring elements that are not explicitly visible.

            Only start your response with \`\`\`json
        `,
                null,
                {
                    mediaUrl: frame,
                    mediaType: 'image',
                    mode: 'perception',
                },
                (text: string) => {
                    if (currentVideoUrl !== videoUrlRef.current || videoKey !== videoKeyRef.current) return false;
                    if (isFirstRequest) {
                        setVideoReady(true);
                    }
                    setResponses((v) => ({ ...v, [i]: text }));
                    return true;
                },
            ).then(() => {
                if (isFirstRequest) {
                    setLoading(false);
                    setVideoReady(true);
                }
            });
        }
    };
    const handleImage = async (url: string, type: any, attributes: Attributes[]) => {
        setLoading(true);
        await handleMessageSendStateLess(
            `
            Describe the image according to the following instructions. You must output a JSON object as defined below.

            {${(attributes || currentVal).map((v: Attributes) => attributesPromptMap[v]!).join(',')}}
            
            Focus solely on the objects, actions, and properties that are directly observable in the image. Avoid including any inferred or likely co-occurring elements that are not explicitly visible.
            `,
            null,
            {
                mediaUrl: url,
                mediaType: type,
                mode: 'perception',
            },
            (text: string) => {
                setResponses((v) => ({ ...v, [0]: text }));
                return true;
            },
        );
        setLoading(false);
    };
    const handleChange = (v: any) => {
        if (!v.includes('description')) {
            v.push('description');
        }
        if (!mediaUrl) {
            setCurrent(v);
        }
        setVal(v);
    };

    const applyAttributes = () => {
        setCurrent(val);
        if (mediaType === 'image') {
            setResponses({});
            handleImage(mediaUrl!, mediaType, val);
        } else if (mediaType === 'video' && playerRef.current) {
            handleProgress({ playedSeconds: currentIndex * 4, attributes: val, reset: true });
        }
    };

    const isSame = useMemo(
        () => currentVal.every((v) => val.includes(v) && currentVal.length === val.length),
        [currentVal, val],
    );

    return (
        <Flex
            flexDirection={['column', 'column', 'row']}
            justifyContent={'center'}
            alignItems={'center'}
            h={['auto', 'auto', '100%']}
            gap={5}
        >
            <Stack flex={'0 0 300px'}>
                <Flex
                    borderRadius={'8px'}
                    bg={'background-secondary'}
                    position={'relative'}
                    onDrop={handleMediaDrop}
                    minH={'300px'}
                    justifyContent={'center'}
                    alignItems={'center'}
                    mb={4}
                >
                    {showDropZones && !loading && (
                        <Box
                            borderRadius={'8px'}
                            border={'2px dashed'}
                            borderColor={'border-alt'}
                            position={'absolute'}
                            top={0}
                            left={0}
                            width={'100%'}
                            h={'100%'}
                            display={'flex'}
                            justifyContent={'center'}
                            alignItems={'center'}
                            bg={'background-secondary'}
                        >
                            <Text fontSize={'lg'}>Drop file here to upload</Text>
                        </Box>
                    )}
                    <Stack gap={2} alignItems={'center'} justifyContent={'center'} p={3}>
                        {mediaType === 'image' && mediaUrl && (
                            <Img boxSize={`300px`} objectFit="contain" src={mediaUrl} />
                        )}
                        {mediaType === 'video' && mediaUrl && (
                            <ReactPlayer
                                playing={videoReady}
                                muted={true}
                                ref={playerRef}
                                onProgress={handleProgress}
                                url={mediaUrl}
                                controls={videoReady}
                                width={`300px`}
                            />
                        )}

                        <FileUploader uploadMedia={handleMedia} mimetype={'photovideo'}>
                            {mediaUrl ? (
                                <Text>Upload another image or video</Text>
                            ) : (
                                <Text>
                                    Drag an image or video here
                                    <br />
                                    or
                                    <br />
                                    <Text as={'span'} textDecoration={'underline'}>
                                        Click here
                                    </Text>{' '}
                                    upload file
                                </Text>
                            )}
                        </FileUploader>
                    </Stack>
                    {errorMsg && (
                        <Text color={'red'} px={3} position={'absolute'} bottom={'-28px'} left={0}>
                            {errorMsg}
                        </Text>
                    )}
                </Flex>
                <CheckboxGroup value={val} onChange={handleChange}>
                    <Box border={'1px'} borderColor={'border-main'} borderRadius={'5px'} p={4} bg={'modal-background'}>
                        <FormControl as="fieldset">
                            <FormLabel as="legend">Attributes</FormLabel>
                            <Stack spacing={[1, 5]}>
                                <Checkbox isDisabled={true} colorScheme={'gray'} value="description">
                                    Description
                                </Checkbox>
                                <Checkbox colorScheme={'gray'} value="object-recognition">
                                    Object Recognition
                                </Checkbox>
                                <Checkbox colorScheme={'gray'} value="people">
                                    People
                                </Checkbox>
                                <Checkbox colorScheme={'gray'} value="spatial-relationships">
                                    Spatial Relationships
                                </Checkbox>
                                <Checkbox colorScheme={'gray'} value="background">
                                    Background
                                </Checkbox>
                                <Checkbox colorScheme={'gray'} value="seo-phrases">
                                    Seo Phrases
                                </Checkbox>
                                {mediaUrl && !isSame && (
                                    <Button isDisabled={loading} onClick={applyAttributes}>
                                        Apply
                                    </Button>
                                )}
                            </Stack>
                        </FormControl>
                    </Box>
                </CheckboxGroup>
            </Stack>
            {mediaType && (
                <Box flex={'1'}>
                    <CodeRenderer
                        isLoading={loading || chat?.loading}
                        message={'```json\n' + json + '\n```'}
                        innerClassname={cl.codeBlock}
                    />
                </Box>
            )}
        </Flex>
    );
}

// Function to capture a specific frame
function captureFrame(videoSrc: string, timeInSeconds: number): Promise<string> {
    return new Promise((resolve, reject) => {
        // Create video element
        const video = document.createElement('video');
        video.src = videoSrc;

        // Load video metadata
        video.addEventListener('loadedmetadata', () => {
            // Ensure the specified time is within the video duration
            if (timeInSeconds > video.duration) {
                reject(new Error('Time exceeds video duration'));
                return;
            }
            // Seek to the specified time
            video.currentTime = timeInSeconds;
        });

        // Listen for seeked event to capture the frame
        video.addEventListener('seeked', () => {
            // Create canvas and set dimensions
            const canvas = document.createElement('canvas');
            canvas.width = video.videoWidth;
            canvas.height = video.videoHeight;
            const context = canvas.getContext('2d');

            if (!context) {
                reject(new Error('Failed to get canvas context'));
                return;
            }

            // Draw video frame to canvas
            context.drawImage(video, 0, 0, canvas.width, canvas.height);

            // Export as image
            const base64Image = canvas.toDataURL('image/jpeg');
            resolve(base64Image);
        });

        // Handle errors
        video.addEventListener('error', (e) => {
            reject(new Error(`Video failed to load: ${e.message}`));
        });
    });
}

function extractCodeBlock(content?: string) {
    if (!content) return '{}';
    // content is valid
    if (content.startsWith('{') || content.startsWith(' {')) {
        return content;
    }
    // find block using triple backticks
    // Regular expression to find the opening triple backticks with optional language identifier
    const openingCodeBlockRegex = /```[a-zA-Z]*\n?/;
    // Check if there is an opening triple backtick
    const startMatch = content.match(openingCodeBlockRegex);

    // If no opening triple backtick is found, return null
    if (!startMatch) {
        return '{}';
    }

    // Find the position of the opening triple backticks
    const startIndex = startMatch.index! + startMatch[0].length;

    // Check for the closing triple backticks
    const closingCodeBlockRegex = /```/;
    const endMatch = content.slice(startIndex).match(closingCodeBlockRegex);

    // If closing triple backticks are found, return the content between them
    if (endMatch) {
        const endIndex = startIndex + endMatch.index!;
        return content.slice(startIndex, endIndex).trim();
    }

    // find the closing brackets as a last resort
    if (content.indexOf('}')) {
        return content.slice(startIndex, content.indexOf('}') + 1);
    }

    // If no closing backticks, return everything after the opening backticks
    return content.slice(startIndex).trim();
}

function fileToBase64(file: File): Promise<string> {
    return new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.onloadend = () => {
            try {
                // @ts-ignore
                resolve(reader.result);
            } catch (e) {
                reject(e);
            }
        };
        reader.onerror = (error) => reject(error);
        reader.readAsDataURL(file);
    });
}
