clusterHMM <- function(J, x, obsdist, seed = NULL) {

  # Set seed if provided for reproducibility
  if (!is.null(seed)) {
    set.seed(seed)
  }

  # Input validation: ensure sufficient distinct data points for clustering
  if (length(unique(x)) < J) {
    stop("Number of distinct data points must be greater than or equal to number of clusters")
  }

  # Perform initial k-means clustering on observed data
  clustering <- kmeans(x, centers = J)
  means <- tapply(x, clustering$cluster, mean)
  vars <- tapply(x, clustering$cluster, var)

  # Distribution-specific parameter initialization
  if (obsdist == "norm") {
    # Normal distribution: use cluster means and standard deviations
    mean0 <- means
    sd0 <- sqrt(vars)
    # Order parameters by increasing mean for identifiability
    idx <- order(means)
    mean0 <- mean0[idx]
    sd0 <- sd0[idx]
    par <- list(mean = mean0, sd = sd0)

  } else if (obsdist == "pois") {
    # Poisson distribution: lambda equals cluster means
    lambda0 <- means
    idx <- order(lambda0)
    lambda0 <- lambda0[idx]
    par <- list(lambda = lambda0)

  } else if (obsdist == "weibull") {
    # Weibull distribution: method of moments for shape and scale parameters
    shape0 <- numeric(J)
    scale0 <- numeric(J)
    for (j in 1:J) {
      cluster_data <- x[clustering$cluster == j]
      mean_j <- mean(cluster_data)
      var_j <- var(cluster_data)
      cv <- sqrt(var_j) / mean_j  # Coefficient of variation
      # Empirical approximation for shape parameter
      shape0[j] <- (0.9874 / cv)^1.0362
      # Scale parameter from mean and gamma function
      scale0[j] <- mean_j / gamma(1 + 1 / shape0[j])
    }
    idx <- order(scale0)
    shape0 <- shape0[idx]
    scale0 <- scale0[idx]
    par <- list(shape = shape0, scale = scale0)

  } else if (obsdist == "zip") {
    # Zero-inflated Poisson: cluster non-zero values, uniform zero-inflation
    pi0 <- rep(0.5, J)  # Equal zero-inflation probabilities
    x2 <- x[-which(x == 0)]  # Remove zeros for clustering
    clustering2 <- kmeans(x2, centers = J, nstart = 25)
    lambda0 <- tapply(x2, clustering2$cluster, mean)
    idx <- order(lambda0)
    lambda0 <- lambda0[idx]
    par <- list(lambda = lambda0, pi = pi0)

  } else if (obsdist == "nbinom") {
    # Negative binomial: method of moments for size and mu parameters
    mu0 <- means
    size0 <- mu0^2 / (vars - mu0)  # Method of moments estimator
    idx <- order(size0)
    mu0 <- mu0[idx]
    size0 <- size0[idx]
    par <- list(size = size0, mu = mu0)

  } else if (obsdist == "zinb") {
    # Zero-inflated negative binomial: similar to ZIP but with NB parameters
    pi0 <- rep(0.5, J)
    x2 <- x[-which(x == 0)]
    clustering2 <- kmeans(x2, centers = J)
    means2 <- tapply(x2, clustering2$cluster, mean)
    vars2 <- tapply(x2, clustering2$cluster, var)
    mu0 <- means2
    size0 <- mu0^2 / (vars2 - mu0)
    idx <- order(size0)
    mu0 <- mu0[idx]
    size0 <- size0[idx]
    par <- list(pi = pi0, size = size0, mu = mu0)

  } else if (obsdist == "exp") {
    # Exponential distribution: rate as inverse of cluster means
    rate0 <- 1 / means
    idx <- order(rate0)
    rate0 <- rate0[idx]
    par <- list(rate = rate0)

  } else if (obsdist == "gamma") {
    # Gamma distribution: method of moments for shape and rate
    shape0 <- (means^2) / vars
    rate0 <- means / vars
    idx <- order(shape0)
    shape0 <- shape0[idx]
    rate0 <- rate0[idx]
    par <- list(shape = shape0, rate = rate0)

  } else if (obsdist == "lnorm") {
    # Log-normal distribution: cluster log-transformed data
    logx <- log(x)
    clustering <- kmeans(log(x), J)
    meanlog0 <- tapply(logx, clustering$cluster, mean)
    sdlog0 <- tapply(logx, clustering$cluster, sd)
    idx <- order(meanlog0)
    meanlog0 <- meanlog0[idx]  # Fixed typo: was meanog0
    sdlog0 <- sdlog0[idx]
    par <- list(meanlog = meanlog0, sdlog = sdlog0)

  } else if (obsdist == "ZInormal") {
    # Zero-inflated Normal: cluster non-zero values, uniform zero-inflation
    pi0 <- rep(0.5, J)  # Equal zero-inflation probabilities
    x2 <- x[-which(x == 0)]  # Remove zeros for clustering

    clustering2 <- kmeans(x2, centers = J, nstart = 25)
    means2 <- tapply(x2, clustering2$cluster, mean)
    vars2 <- tapply(x2, clustering2$cluster, var)

    mean0 <- means2
    sd0 <- sqrt(pmax(vars2, 0.01))  # Ensure positive variance
    idx <- order(mean0)
    mean0 <- mean0[idx]
    sd0 <- sd0[idx]
    par <- list(pi = pi0, mean = mean0, sd = sd0)

  } else if (obsdist == "ZIgamma") {
    # Zero-inflated Gamma: cluster non-zero values, uniform zero-inflation
    pi0 <- rep(0.5, J)  # Equal zero-inflation probabilities
    x2 <- x[-which(x == 0)]  # Remove zeros for clustering

    clustering2 <- kmeans(x2, centers = J, nstart = 25)
    means2 <- tapply(x2, clustering2$cluster, mean)
    vars2 <- tapply(x2, clustering2$cluster, var)

    # Method of moments for Gamma parameters
    shape0 <- (means2^2) / pmax(vars2, means2 * 0.1)  # Ensure positive variance
    rate0 <- means2 / pmax(vars2, means2 * 0.1)

    # Ensure valid Gamma parameters
    shape0 <- pmax(shape0, 0.1)
    rate0 <- pmax(rate0, 0.1)

    idx <- order(shape0)
    shape0 <- shape0[idx]
    rate0 <- rate0[idx]
    par <- list(pi = pi0, shape = shape0, rate = rate0)

  } else {
    stop("Distribution type not supported")
  }

  # Initialize transition matrix with high persistence (0.9 diagonal, 0.1/(J-1) off-diagonal)
  Pi <- diag(J)
  for (i in 1:J) {
    Pi[i, ] <- 0.1 / (J - 1)  # Equal probability of switching to other states
  }
  diag(Pi) <- 0.9  # High probability of staying in same state

  return(list(Pi = Pi, obspar = par))
}
