import React from 'react'

import { Text } from '@primer/components'

import Code from '../../components/Code'
import Snippet from '../../components/Snippet'

import { CATEGORY_INTRODUCE_BUGS as category, IMPACT_HIGH as impact } from '../../constants'


export const exampleTitle = 'catalogue.py'

export const exampleBefore = (
`with open('sample-chinese.txt', encoding='utf_8') as f:
    content = f.read()`
)

export const exampleAfter = (
`with open('sample-chinese.txt', encoding='big5') as f:
    content = f.read()`
)

export const code = 'NoIncorrectFileEncodingRead'

export const ogImage = `/og-image/${code}.png`

export const title = "Use correct file encoding"

export const label = title

export const wordCode = "no-incorrect-file-encoding-read"

export const furtherReading = [
  {
    href: 'https://en.wikipedia.org/wiki/ISO/IEC_8859-1',
    text: 'Wikipedia page for EC 8859-1 text encoding.',
  },
]

export function Summary(props) {
  return (
    <Text as={'p'} className={props.className}>
      Specifying incorrect <Code>encoding</Code> when reading a file can cause <Code>UnicodeDecodeError</Code> if the contents of the file is incompatible with the specified encoding.
    </Text>
  )
}


export const explanation = (
  <>
    <Text as='p'>Files are stored as bytes. Therefore before we can save a Python string to disk the string must be serialising to bytes, and conversely it's necessary to decode those bytes back to string in order to read the file from disk. There are a <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">variety of different text serialisation codecs</a> that handle this encoding and decoding, which are collectively referred to as text encoding. In order to make sense of bytes and decode them correctly it's necessary to know what text encoding was used when it was saved to disk.</Text>
    <Text as='p'>It's important to use <Code>encoding</Code> when reading and writing files, but it's more important to use <i>the correct <Code>encoding</Code></i>, as otherwise a <Code>UnicodeDecodeError</Code> will occur at runtime. It's far to easy to mistakenly assume all files are encoded as utf_8.</Text>
    <Text as='p'>Our checks can infer the encoding of the file and detect when the <Code>encoding</Code> specified is wrong and suggest the fix.</Text> 
  </>
)


export {category, impact}