import React, { useState } from 'react';
import {
  Box, Paper, Typography, FormControl, InputLabel,
  Select, MenuItem, TextField, Button, Grid, List,
  ListItem, ListItemText, Card, CardContent, Chip,
  Stepper, Step, StepLabel, IconButton, Tabs, Tab,
  Table, TableBody, TableCell, TableContainer, TableHead, TableRow, CircularProgress,
  Checkbox
} from '@mui/material';
import HumanEvaluation from './HumanEvaluation';
import AutoEvaluation from './AutoEvaluation';
import AutoEvaluationHistory from './AutoEvaluationHistory';


const hardwareServers = [
  {
    id: 't4',
    name: 'NVIDIA T4',
    specs: '16GB VRAM, 8.1 TFLOPS',
    cost: '$0.35/hour',
    availability: 'High'
  },
  { 
    id: 'a100',
    name: 'NVIDIA A100',
    specs: '80GB VRAM, 19.5 TFLOPS',
    cost: '$3.00/hour',
    availability: 'Medium'
  },
  {
    id: 'h100',
    name: 'NVIDIA H100',
    specs: '80GB VRAM, 26.2 TFLOPS',
    cost: '$5.00/hour',
    availability: 'Limited'
  }
];

const datasets = [
  { id: 1, name: "mlabonne/guanaco-llama2-1k", taskType: "", type: "General", size: "1K", samples: 1000, description: "A subset of the OpenAssistant-Guanaco dataset, formatted to match Llama 2's prompt style.", updated: "2 days ago" },
  { id: 2, name: "bitext/Bitext-customer-support-llm-chatbot-training-dataset", taskType: "QA", type: "QA", size: "10K-100K", samples: 26872, description: "Contains 26,872 question-answer pairs across 27 intents in customer service, totaling approximately 3.57 million tokens.", updated: "1 week ago" },
  { id: 3, name: "b-mc2/sql-create-context", taskType: "QA", type: "QA", size: "1K-10K", samples: 5000, description: "A SQL-focused dataset for generating context in QA tasks.", updated: "3 days ago" },
  { id: 4, name: "google/xtreme", taskType: "QA", type: "QA", size: "100K+", samples: 100000, description: "A benchmark for evaluating cross-lingual generalization across 40 languages and 9 tasks.", updated: "1 month ago" },
  { id: 5, name: "mandarjoshi/trivia_qa", taskType: "QA", type: "QA", size: "100K+", samples: 650000, description: "A reading comprehension dataset containing over 650K question-answer pairs.", updated: "2 weeks ago" },
  { id: 6, name: "riotu-lab/ArabicQA_2.1M", taskType: "QA", type: "QA", size: "1M+", samples: 2100000, description: "An Arabic question-answering dataset with 2.1 million samples.", updated: "5 days ago" },
  { id: 7, name: "llamafactory/alpaca_gpt4_en", taskType: "QA", type: "QA", size: "50K", samples: 50000, description: "A dataset tailored for Alpaca and GPT-4 QA tasks.", updated: "1 day ago" },
  { id: 8, name: "Quardo/gpt-4o-qa", taskType: "QA", type: "QA", size: "10K", samples: 10000, description: "A GPT-4 fine-tuning dataset for QA tasks.", updated: "4 days ago" },
  { id: 9, name: "LimYeri/LeetCode_Python_Solutions_v2", taskType: "QA", type: "CODE", size: "5K", samples: 5000, description: "A dataset containing Python solutions to LeetCode problems.", updated: "1 week ago" }
];



export default function ModelEvaluation({ myModels, publicModels }) {
  const [evaluationType, setEvaluationType] = useState('human');
  const [selectedModel, setSelectedModel] = useState(null);
  
  return (
    <Box sx={{ display: 'flex', flexDirection: 'column', p: 3 }}>
      <Typography variant="h5" gutterBottom>
        Model Evaluation
      </Typography>
      <Typography variant="body2" color="text.secondary" sx={{ mb: 3 }}>
        Measure model performance metrics and evaluate responses.
      </Typography>

      <Tabs value={evaluationType} onChange={(e, val) => setEvaluationType(val)} sx={{ mb: 3 }}>
        <Tab value="human" label="Human Evaluation" />
        <Tab value="auto" label="Auto Evaluation" />
        <Tab value="autoHistory" label="Auto Evaluation History" />
      </Tabs>

      {evaluationType === 'auto' ? (
        <AutoEvaluation 
          myModels={myModels}
          publicModels={publicModels}
          datasets={datasets}
          onStartEvaluation={handleStartEvaluation}
        />
      ) : evaluationType === 'human' ? (
        <HumanEvaluation 
          models={[...myModels, ...publicModels]}
          datasets={datasets}
          selectedModel={selectedModel}
          onModelSelect={setSelectedModel}
        />
      ) : (
        <AutoEvaluationHistory />
      )}
    </Box>
  );
}

function handleStartEvaluation() {
  console.log('Starting evaluation...');
}