/**
 * Capitalizes the first letter and lowercases the rest of the string
 */
export function capitalizeFirstLetter(str: string): string {
  if (str.length === 0) {
    return "";
  }

  return str[0]!.toUpperCase() + str.slice(1).toLowerCase();
}

/**
 * Replaces accented and other extended characters with the basic latin equivalent
 * @param str
 */
export function toBasicLatin(str: string): string {
  // https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript
  // if this isn't sufficient, lodash deburr might also be a good option
  return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
}

/**
 * Trims the string and returns undefined if there is nothing left.
 * Returns undefined if undefined is passed in.
 */
export function emptyToUndefined(str?: string): string | undefined {
  if (!str) {
    return undefined;
  }

  const trimmed = str.trim();
  return trimmed.length === 0 ? undefined : trimmed;
}

export function ellipsizeSentence(str: string, maxLen: number): string {
  const trimmed = str.trim();
  if (trimmed.length <= maxLen) {
    return trimmed;
  }

  // save room for the ellipsis
  const candidate = trimmed.substring(0, maxLen - 2);
  const end = candidate.lastIndexOf(" ");
  const chunk = candidate.substring(0, end).trim();
  return `${chunk}...`;
}

/**
 * Returns an array containing objects with a sentence and the indicies for the sentence in the original string
 * This function simply splits on /[.][ ]{1,2}[A-Z]/ so it will not work as expected
 * if things are not formatted "normally". Will always return the original string as a sentence if multiple sentences
 * are not found.
 * @param str
 */
export function splitOnSentences(str: string): Array<{ sentence: string; indices: [number, number] }> {
  let r: RegExpExecArray | undefined | null;
  // allow for 1 or 2 spaces after the period
  const regex = /[.][ ]{1,2}[A-Z]/g;
  const indices: Array<[number, number]> = [];
  let start = 0;
  while ((r = regex.exec(str)) !== null) {
    indices.push([start, r.index]);
    start = r.index + (r[0]?.length ?? 1) - 1;
  }

  // if we're not at the end, push the remaining text. This also accounts for the case where no matches are found.
  if (start < str.length - 1) {
    indices.push([start, str.length - 1]);
  }

  return indices.map(i => {
    return { sentence: str.substring(i[0], i[1] + 1), indices: i };
  });
}

/**
 * Get the character length of a string accounting for double-width characters.
 * "😂".length is 2 charLength("😂") is 1.
 * @param str
 */
export function charLength(str: string): number {
  return [...str].length;
}

/**
 * Chunks the given string into chunks of the specified `chunkSizeBytes`.
 * @param str
 * @param chunkSizeBytes
 * @returns
 */
export function chunkString(str: string, chunkSizeBytes: number): string[] {
  const chunks: string[] = [];
  let currentChunk = "";
  let currentByteSize = 0;

  for (const char of str) {
    const charByteSize = utf8ByteSize(char);
    if (currentByteSize + charByteSize > chunkSizeBytes) {
      chunks.push(currentChunk);
      currentChunk = char;
      currentByteSize = charByteSize;
    } else {
      currentChunk += char;
      currentByteSize += charByteSize;
    }
  }

  if (currentChunk) {
    chunks.push(currentChunk);
  }

  return chunks;
}

/**
 * Iterates over each character of the string, checks its Unicode code point, and calculates the byte size based on UTF-8 encoding rules:
 * - Single-byte characters (standard ASCII): 1 byte.
 * - Two-byte characters: 2 bytes.
 * - Surrogate pairs (like many emojis): 4 bytes.
 * - Other characters: 3 bytes.
 * This method provides a direct way to calculate the byte size of a string in environments without TextEncoder (i.e. React Native).
 * @param str
 * @returns
 */
export function utf8ByteSize(str: string): number {
  let size = 0;
  for (let i = 0; i < str.length; i++) {
    const code = str.charCodeAt(i);
    if (code <= 0x7f) {
      size += 1;
    } else if (code <= 0x7ff) {
      size += 2;
    } else if (code >= 0xd800 && code <= 0xdbff) {
      // Surrogate pair: These take 4 bytes.
      size += 4;
      i++; // Skip the next character
    } else {
      size += 3;
    }
  }
  return size;
}

export function formatNumberWithCommas(number: number, locale: "en-US" = "en-US") {
  return new Intl.NumberFormat(locale).format(number);
}
