Browse Source

venv

master
BARRAUX Arthur 2 years ago
parent
commit
910d99f98b
  1. 247
      venv/bin/Activate.ps1
  2. 83
      venv/bin/activate
  3. 55
      venv/bin/activate.csh
  4. 100
      venv/bin/activate.fish
  5. 92
      venv/bin/activate.nu
  6. 60
      venv/bin/activate.ps1
  7. 31
      venv/bin/activate_this.py
  8. 32
      venv/bin/deactivate.nu
  9. 8
      venv/bin/google-oauthlib-tool
  10. 8
      venv/bin/normalizer
  11. 8
      venv/bin/pip
  12. 8
      venv/bin/pip-3.10
  13. 8
      venv/bin/pip3
  14. 8
      venv/bin/pip3.10
  15. 8
      venv/bin/pyrsa-decrypt
  16. 8
      venv/bin/pyrsa-encrypt
  17. 8
      venv/bin/pyrsa-keygen
  18. 8
      venv/bin/pyrsa-priv2pub
  19. 8
      venv/bin/pyrsa-sign
  20. 8
      venv/bin/pyrsa-verify
  21. 1
      venv/bin/python
  22. 1
      venv/bin/python3
  23. 1
      venv/bin/python3.10
  24. 8
      venv/bin/wheel
  25. 8
      venv/bin/wheel-3.10
  26. 8
      venv/bin/wheel3
  27. 8
      venv/bin/wheel3.10
  28. BIN
      venv/lib/python3.10/site-packages/__pycache__/_virtualenv.cpython-310.pyc
  29. BIN
      venv/lib/python3.10/site-packages/__pycache__/google_auth_httplib2.cpython-310.pyc
  30. BIN
      venv/lib/python3.10/site-packages/__pycache__/six.cpython-310.pyc
  31. 132
      venv/lib/python3.10/site-packages/_distutils_hack/__init__.py
  32. 1
      venv/lib/python3.10/site-packages/_distutils_hack/override.py
  33. 1
      venv/lib/python3.10/site-packages/_virtualenv.pth
  34. 130
      venv/lib/python3.10/site-packages/_virtualenv.py
  35. 27
      venv/lib/python3.10/site-packages/apiclient/__init__.py
  36. BIN
      venv/lib/python3.10/site-packages/apiclient/__pycache__/__init__.cpython-310.pyc
  37. 1
      venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/INSTALLER
  38. 20
      venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/LICENSE
  39. 146
      venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/METADATA
  40. 12
      venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/RECORD
  41. 5
      venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/WHEEL
  42. 1
      venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/top_level.txt
  43. 745
      venv/lib/python3.10/site-packages/cachetools/__init__.py
  44. BIN
      venv/lib/python3.10/site-packages/cachetools/__pycache__/__init__.cpython-310.pyc
  45. BIN
      venv/lib/python3.10/site-packages/cachetools/__pycache__/func.cpython-310.pyc
  46. BIN
      venv/lib/python3.10/site-packages/cachetools/__pycache__/keys.cpython-310.pyc
  47. 172
      venv/lib/python3.10/site-packages/cachetools/func.py
  48. 57
      venv/lib/python3.10/site-packages/cachetools/keys.py
  49. 1
      venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/INSTALLER
  50. 21
      venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/LICENSE
  51. 83
      venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/METADATA
  52. 14
      venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/RECORD
  53. 5
      venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/WHEEL
  54. 1
      venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/top_level.txt
  55. 4
      venv/lib/python3.10/site-packages/certifi/__init__.py
  56. 12
      venv/lib/python3.10/site-packages/certifi/__main__.py
  57. BIN
      venv/lib/python3.10/site-packages/certifi/__pycache__/__init__.cpython-310.pyc
  58. BIN
      venv/lib/python3.10/site-packages/certifi/__pycache__/__main__.cpython-310.pyc
  59. BIN
      venv/lib/python3.10/site-packages/certifi/__pycache__/core.cpython-310.pyc
  60. 4527
      venv/lib/python3.10/site-packages/certifi/cacert.pem
  61. 108
      venv/lib/python3.10/site-packages/certifi/core.py
  62. 0
      venv/lib/python3.10/site-packages/certifi/py.typed
  63. 1
      venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/INSTALLER
  64. 21
      venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/LICENSE
  65. 269
      venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/METADATA
  66. 33
      venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/RECORD
  67. 5
      venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/WHEEL
  68. 2
      venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/entry_points.txt
  69. 1
      venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/top_level.txt
  70. 56
      venv/lib/python3.10/site-packages/charset_normalizer/__init__.py
  71. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/__init__.cpython-310.pyc
  72. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/api.cpython-310.pyc
  73. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/cd.cpython-310.pyc
  74. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/constant.cpython-310.pyc
  75. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/legacy.cpython-310.pyc
  76. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/md.cpython-310.pyc
  77. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/models.cpython-310.pyc
  78. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/utils.cpython-310.pyc
  79. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/version.cpython-310.pyc
  80. 584
      venv/lib/python3.10/site-packages/charset_normalizer/api.py
  81. 1122
      venv/lib/python3.10/site-packages/charset_normalizer/assets/__init__.py
  82. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/assets/__pycache__/__init__.cpython-310.pyc
  83. 339
      venv/lib/python3.10/site-packages/charset_normalizer/cd.py
  84. 0
      venv/lib/python3.10/site-packages/charset_normalizer/cli/__init__.py
  85. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-310.pyc
  86. BIN
      venv/lib/python3.10/site-packages/charset_normalizer/cli/__pycache__/normalizer.cpython-310.pyc
  87. 295
      venv/lib/python3.10/site-packages/charset_normalizer/cli/normalizer.py
  88. 497
      venv/lib/python3.10/site-packages/charset_normalizer/constant.py
  89. 95
      venv/lib/python3.10/site-packages/charset_normalizer/legacy.py
  90. 553
      venv/lib/python3.10/site-packages/charset_normalizer/md.py
  91. 401
      venv/lib/python3.10/site-packages/charset_normalizer/models.py
  92. 0
      venv/lib/python3.10/site-packages/charset_normalizer/py.typed
  93. 424
      venv/lib/python3.10/site-packages/charset_normalizer/utils.py
  94. 6
      venv/lib/python3.10/site-packages/charset_normalizer/version.py
  95. 1
      venv/lib/python3.10/site-packages/distutils-precedence.pth
  96. BIN
      venv/lib/python3.10/site-packages/google/_upb/_message.abi3.so
  97. 0
      venv/lib/python3.10/site-packages/google/api/__init__.py
  98. BIN
      venv/lib/python3.10/site-packages/google/api/__pycache__/__init__.cpython-310.pyc
  99. BIN
      venv/lib/python3.10/site-packages/google/api/__pycache__/annotations_pb2.cpython-310.pyc
  100. BIN
      venv/lib/python3.10/site-packages/google/api/__pycache__/auth_pb2.cpython-310.pyc

247
venv/bin/Activate.ps1

@ -0,0 +1,247 @@
<#
.Synopsis
Activate a Python virtual environment for the current PowerShell session.
.Description
Pushes the python executable for a virtual environment to the front of the
$Env:PATH environment variable and sets the prompt to signify that you are
in a Python virtual environment. Makes use of the command line switches as
well as the `pyvenv.cfg` file values present in the virtual environment.
.Parameter VenvDir
Path to the directory that contains the virtual environment to activate. The
default value for this is the parent of the directory that the Activate.ps1
script is located within.
.Parameter Prompt
The prompt prefix to display when this virtual environment is activated. By
default, this prompt is the name of the virtual environment folder (VenvDir)
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
.Example
Activate.ps1
Activates the Python virtual environment that contains the Activate.ps1 script.
.Example
Activate.ps1 -Verbose
Activates the Python virtual environment that contains the Activate.ps1 script,
and shows extra information about the activation as it executes.
.Example
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
Activates the Python virtual environment located in the specified location.
.Example
Activate.ps1 -Prompt "MyPython"
Activates the Python virtual environment that contains the Activate.ps1 script,
and prefixes the current prompt with the specified string (surrounded in
parentheses) while the virtual environment is active.
.Notes
On Windows, it may be required to enable this Activate.ps1 script by setting the
execution policy for the user. You can do this by issuing the following PowerShell
command:
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
For more information on Execution Policies:
https://go.microsoft.com/fwlink/?LinkID=135170
#>
Param(
[Parameter(Mandatory = $false)]
[String]
$VenvDir,
[Parameter(Mandatory = $false)]
[String]
$Prompt
)
<# Function declarations --------------------------------------------------- #>
<#
.Synopsis
Remove all shell session elements added by the Activate script, including the
addition of the virtual environment's Python executable from the beginning of
the PATH variable.
.Parameter NonDestructive
If present, do not remove this function from the global namespace for the
session.
#>
function global:deactivate ([switch]$NonDestructive) {
# Revert to original values
# The prior prompt:
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
}
# The prior PYTHONHOME:
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
}
# The prior PATH:
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
}
# Just remove the VIRTUAL_ENV altogether:
if (Test-Path -Path Env:VIRTUAL_ENV) {
Remove-Item -Path env:VIRTUAL_ENV
}
# Just remove VIRTUAL_ENV_PROMPT altogether.
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
}
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
}
# Leave deactivate function in the global namespace if requested:
if (-not $NonDestructive) {
Remove-Item -Path function:deactivate
}
}
<#
.Description
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
given folder, and returns them in a map.
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
two strings separated by `=` (with any amount of whitespace surrounding the =)
then it is considered a `key = value` line. The left hand string is the key,
the right hand is the value.
If the value starts with a `'` or a `"` then the first and last character is
stripped from the value before being captured.
.Parameter ConfigDir
Path to the directory that contains the `pyvenv.cfg` file.
#>
function Get-PyVenvConfig(
[String]
$ConfigDir
) {
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
# An empty map will be returned if no config file is found.
$pyvenvConfig = @{ }
if ($pyvenvConfigPath) {
Write-Verbose "File exists, parse `key = value` lines"
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
$pyvenvConfigContent | ForEach-Object {
$keyval = $PSItem -split "\s*=\s*", 2
if ($keyval[0] -and $keyval[1]) {
$val = $keyval[1]
# Remove extraneous quotations around a string value.
if ("'""".Contains($val.Substring(0, 1))) {
$val = $val.Substring(1, $val.Length - 2)
}
$pyvenvConfig[$keyval[0]] = $val
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
}
}
}
return $pyvenvConfig
}
<# Begin Activate script --------------------------------------------------- #>
# Determine the containing directory of this script
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
$VenvExecDir = Get-Item -Path $VenvExecPath
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
# Set values required in priority: CmdLine, ConfigFile, Default
# First, get the location of the virtual environment, it might not be
# VenvExecDir if specified on the command line.
if ($VenvDir) {
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
}
else {
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
Write-Verbose "VenvDir=$VenvDir"
}
# Next, read the `pyvenv.cfg` file to determine any required value such
# as `prompt`.
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
# Next, set the prompt from the command line, or the config file, or
# just use the name of the virtual environment folder.
if ($Prompt) {
Write-Verbose "Prompt specified as argument, using '$Prompt'"
}
else {
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
$Prompt = $pyvenvCfg['prompt'];
}
else {
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
$Prompt = Split-Path -Path $venvDir -Leaf
}
}
Write-Verbose "Prompt = '$Prompt'"
Write-Verbose "VenvDir='$VenvDir'"
# Deactivate any currently active virtual environment, but leave the
# deactivate function in place.
deactivate -nondestructive
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
# that there is an activated venv.
$env:VIRTUAL_ENV = $VenvDir
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
Write-Verbose "Setting prompt to '$Prompt'"
# Set the prompt to include the env name
# Make sure _OLD_VIRTUAL_PROMPT is global
function global:_OLD_VIRTUAL_PROMPT { "" }
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
function global:prompt {
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
_OLD_VIRTUAL_PROMPT
}
$env:VIRTUAL_ENV_PROMPT = $Prompt
}
# Clear PYTHONHOME
if (Test-Path -Path Env:PYTHONHOME) {
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
Remove-Item -Path Env:PYTHONHOME
}
# Add the venv to the PATH
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"

83
venv/bin/activate

@ -0,0 +1,83 @@
# This file must be used with "source bin/activate" *from bash*
# you cannot run it directly
if [ "${BASH_SOURCE-}" = "$0" ]; then
echo "You must source this script: \$ source $0" >&2
exit 33
fi
deactivate () {
unset -f pydoc >/dev/null 2>&1 || true
# reset old environment variables
# ! [ -z ${VAR+_} ] returns true if VAR is declared at all
if ! [ -z "${_OLD_VIRTUAL_PATH:+_}" ] ; then
PATH="$_OLD_VIRTUAL_PATH"
export PATH
unset _OLD_VIRTUAL_PATH
fi
if ! [ -z "${_OLD_VIRTUAL_PYTHONHOME+_}" ] ; then
PYTHONHOME="$_OLD_VIRTUAL_PYTHONHOME"
export PYTHONHOME
unset _OLD_VIRTUAL_PYTHONHOME
fi
# The hash command must be called to get it to forget past
# commands. Without forgetting past commands the $PATH changes
# we made may not be respected
hash -r 2>/dev/null
if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then
PS1="$_OLD_VIRTUAL_PS1"
export PS1
unset _OLD_VIRTUAL_PS1
fi
unset VIRTUAL_ENV
if [ ! "${1-}" = "nondestructive" ] ; then
# Self destruct!
unset -f deactivate
fi
}
# unset irrelevant variables
deactivate nondestructive
VIRTUAL_ENV='/home/arthur/Documents/drive_sync/venv'
if ([ "$OSTYPE" = "cygwin" ] || [ "$OSTYPE" = "msys" ]) && $(command -v cygpath &> /dev/null) ; then
VIRTUAL_ENV=$(cygpath -u "$VIRTUAL_ENV")
fi
export VIRTUAL_ENV
_OLD_VIRTUAL_PATH="$PATH"
PATH="$VIRTUAL_ENV/bin:$PATH"
export PATH
# unset PYTHONHOME if set
if ! [ -z "${PYTHONHOME+_}" ] ; then
_OLD_VIRTUAL_PYTHONHOME="$PYTHONHOME"
unset PYTHONHOME
fi
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then
_OLD_VIRTUAL_PS1="${PS1-}"
if [ "x" != x ] ; then
PS1="() ${PS1-}"
else
PS1="(`basename \"$VIRTUAL_ENV\"`) ${PS1-}"
fi
export PS1
fi
# Make sure to unalias pydoc if it's already there
alias pydoc 2>/dev/null >/dev/null && unalias pydoc || true
pydoc () {
python -m pydoc "$@"
}
# The hash command must be called to get it to forget past
# commands. Without forgetting past commands the $PATH changes
# we made may not be respected
hash -r 2>/dev/null

55
venv/bin/activate.csh

@ -0,0 +1,55 @@
# This file must be used with "source bin/activate.csh" *from csh*.
# You cannot run it directly.
# Created by Davide Di Blasi <davidedb@gmail.com>.
set newline='\
'
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH:q" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT:q" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate && unalias pydoc'
# Unset irrelevant variables.
deactivate nondestructive
setenv VIRTUAL_ENV '/home/arthur/Documents/drive_sync/venv'
set _OLD_VIRTUAL_PATH="$PATH:q"
setenv PATH "$VIRTUAL_ENV:q/bin:$PATH:q"
if ('' != "") then
set env_name = '() '
else
set env_name = '('"$VIRTUAL_ENV:t:q"') '
endif
if ( $?VIRTUAL_ENV_DISABLE_PROMPT ) then
if ( $VIRTUAL_ENV_DISABLE_PROMPT == "" ) then
set do_prompt = "1"
else
set do_prompt = "0"
endif
else
set do_prompt = "1"
endif
if ( $do_prompt == "1" ) then
# Could be in a non-interactive environment,
# in which case, $prompt is undefined and we wouldn't
# care about the prompt anyway.
if ( $?prompt ) then
set _OLD_VIRTUAL_PROMPT="$prompt:q"
if ( "$prompt:q" =~ *"$newline:q"* ) then
:
else
set prompt = "$env_name:q$prompt:q"
endif
endif
endif
unset env_name
unset do_prompt
alias pydoc python -m pydoc
rehash

100
venv/bin/activate.fish

@ -0,0 +1,100 @@
# This file must be used using `source bin/activate.fish` *within a running fish ( http://fishshell.com ) session*.
# Do not run it directly.
function _bashify_path -d "Converts a fish path to something bash can recognize"
set fishy_path $argv
set bashy_path $fishy_path[1]
for path_part in $fishy_path[2..-1]
set bashy_path "$bashy_path:$path_part"
end
echo $bashy_path
end
function _fishify_path -d "Converts a bash path to something fish can recognize"
echo $argv | tr ':' '\n'
end
function deactivate -d 'Exit virtualenv mode and return to the normal environment.'
# reset old environment variables
if test -n "$_OLD_VIRTUAL_PATH"
# https://github.com/fish-shell/fish-shell/issues/436 altered PATH handling
if test (echo $FISH_VERSION | head -c 1) -lt 3
set -gx PATH (_fishify_path "$_OLD_VIRTUAL_PATH")
else
set -gx PATH $_OLD_VIRTUAL_PATH
end
set -e _OLD_VIRTUAL_PATH
end
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
set -gx PYTHONHOME "$_OLD_VIRTUAL_PYTHONHOME"
set -e _OLD_VIRTUAL_PYTHONHOME
end
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
and functions -q _old_fish_prompt
# Set an empty local `$fish_function_path` to allow the removal of `fish_prompt` using `functions -e`.
set -l fish_function_path
# Erase virtualenv's `fish_prompt` and restore the original.
functions -e fish_prompt
functions -c _old_fish_prompt fish_prompt
functions -e _old_fish_prompt
set -e _OLD_FISH_PROMPT_OVERRIDE
end
set -e VIRTUAL_ENV
if test "$argv[1]" != 'nondestructive'
# Self-destruct!
functions -e pydoc
functions -e deactivate
functions -e _bashify_path
functions -e _fishify_path
end
end
# Unset irrelevant variables.
deactivate nondestructive
set -gx VIRTUAL_ENV '/home/arthur/Documents/drive_sync/venv'
# https://github.com/fish-shell/fish-shell/issues/436 altered PATH handling
if test (echo $FISH_VERSION | head -c 1) -lt 3
set -gx _OLD_VIRTUAL_PATH (_bashify_path $PATH)
else
set -gx _OLD_VIRTUAL_PATH $PATH
end
set -gx PATH "$VIRTUAL_ENV"'/bin' $PATH
# Unset `$PYTHONHOME` if set.
if set -q PYTHONHOME
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
set -e PYTHONHOME
end
function pydoc
python -m pydoc $argv
end
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
# Copy the current `fish_prompt` function as `_old_fish_prompt`.
functions -c fish_prompt _old_fish_prompt
function fish_prompt
# Run the user's prompt first; it might depend on (pipe)status.
set -l prompt (_old_fish_prompt)
# Prompt override provided?
# If not, just prepend the environment name.
if test -n ''
printf '(%s) ' ''
else
printf '(%s) ' (basename "$VIRTUAL_ENV")
end
string join -- \n $prompt # handle multi-line prompts
end
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
end

92
venv/bin/activate.nu

@ -0,0 +1,92 @@
# This command prepares the required environment variables
def-env activate-virtualenv [] {
def is-string [x] {
($x | describe) == 'string'
}
def has-env [name: string] {
$name in (env).name
}
let is_windows = ((sys).host.name | str downcase) == 'windows'
let virtual_env = '/home/arthur/Documents/drive_sync/venv'
let bin = 'bin'
let path_sep = ':'
let path_name = if $is_windows {
if (has-env 'Path') {
'Path'
} else {
'PATH'
}
} else {
'PATH'
}
let old_path = (
if $is_windows {
if (has-env 'Path') {
$env.Path
} else {
$env.PATH
}
} else {
$env.PATH
} | if (is-string $in) {
# if Path/PATH is a string, make it a list
$in | split row $path_sep | path expand
} else {
$in
}
)
let venv_path = ([$virtual_env $bin] | path join)
let new_path = ($old_path | prepend $venv_path | str collect $path_sep)
# Creating the new prompt for the session
let virtual_prompt = if ('' == '') {
$'(char lparen)($virtual_env | path basename)(char rparen) '
} else {
'() '
}
# Back up the old prompt builder
let old_prompt_command = if (has-env 'VIRTUAL_ENV') && (has-env '_OLD_PROMPT_COMMAND') {
$env._OLD_PROMPT_COMMAND
} else {
if (has-env 'PROMPT_COMMAND') {
$env.PROMPT_COMMAND
} else {
''
}
}
# If there is no default prompt, then only the env is printed in the prompt
let new_prompt = if (has-env 'PROMPT_COMMAND') {
if ($old_prompt_command | describe) == 'block' {
{ $'($virtual_prompt)(do $old_prompt_command)' }
} else {
{ $'($virtual_prompt)($old_prompt_command)' }
}
} else {
{ $'($virtual_prompt)' }
}
# Environment variables that will be batched loaded to the virtual env
let new_env = {
$path_name : $new_path
VIRTUAL_ENV : $virtual_env
_OLD_VIRTUAL_PATH : ($old_path | str collect $path_sep)
_OLD_PROMPT_COMMAND : $old_prompt_command
PROMPT_COMMAND : $new_prompt
VIRTUAL_PROMPT : $virtual_prompt
}
# Activate the environment variables
load-env $new_env
}
# Activate the virtualenv
activate-virtualenv
alias pydoc = python -m pydoc
alias deactivate = source '/home/arthur/Documents/drive_sync/venv/bin/deactivate.nu'

60
venv/bin/activate.ps1

@ -0,0 +1,60 @@
$script:THIS_PATH = $myinvocation.mycommand.path
$script:BASE_DIR = Split-Path (Resolve-Path "$THIS_PATH/..") -Parent
function global:deactivate([switch] $NonDestructive) {
if (Test-Path variable:_OLD_VIRTUAL_PATH) {
$env:PATH = $variable:_OLD_VIRTUAL_PATH
Remove-Variable "_OLD_VIRTUAL_PATH" -Scope global
}
if (Test-Path function:_old_virtual_prompt) {
$function:prompt = $function:_old_virtual_prompt
Remove-Item function:\_old_virtual_prompt
}
if ($env:VIRTUAL_ENV) {
Remove-Item env:VIRTUAL_ENV -ErrorAction SilentlyContinue
}
if (!$NonDestructive) {
# Self destruct!
Remove-Item function:deactivate
Remove-Item function:pydoc
}
}
function global:pydoc {
python -m pydoc $args
}
# unset irrelevant variables
deactivate -nondestructive
$VIRTUAL_ENV = $BASE_DIR
$env:VIRTUAL_ENV = $VIRTUAL_ENV
New-Variable -Scope global -Name _OLD_VIRTUAL_PATH -Value $env:PATH
$env:PATH = "$env:VIRTUAL_ENV/bin:" + $env:PATH
if (!$env:VIRTUAL_ENV_DISABLE_PROMPT) {
function global:_old_virtual_prompt {
""
}
$function:_old_virtual_prompt = $function:prompt
if ("" -ne "") {
function global:prompt {
# Add the custom prefix to the existing prompt
$previous_prompt_value = & $function:_old_virtual_prompt
("() " + $previous_prompt_value)
}
}
else {
function global:prompt {
# Add a prefix to the current prompt, but don't discard it.
$previous_prompt_value = & $function:_old_virtual_prompt
$new_prompt_value = "($( Split-Path $env:VIRTUAL_ENV -Leaf )) "
($new_prompt_value + $previous_prompt_value)
}
}
}

31
venv/bin/activate_this.py

@ -0,0 +1,31 @@
"""Activate virtualenv for current interpreter:
Use exec(open(this_file).read(), {'__file__': this_file}).
This can be used when you must use an existing Python interpreter, not the virtualenv bin/python.
"""
import os
import site
import sys
try:
abs_file = os.path.abspath(__file__)
except NameError:
raise AssertionError("You must use exec(open(this_file).read(), {'__file__': this_file}))")
bin_dir = os.path.dirname(abs_file)
base = bin_dir[: -len("bin") - 1] # strip away the bin part from the __file__, plus the path separator
# prepend bin to PATH (this file is inside the bin directory)
os.environ["PATH"] = os.pathsep.join([bin_dir] + os.environ.get("PATH", "").split(os.pathsep))
os.environ["VIRTUAL_ENV"] = base # virtual env is right above bin directory
# add the virtual environments libraries to the host python import mechanism
prev_length = len(sys.path)
for lib in "../lib/python3.10/site-packages".split(os.pathsep):
path = os.path.realpath(os.path.join(bin_dir, lib))
site.addsitedir(path.decode("utf-8") if "" else path)
sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]
sys.real_prefix = sys.prefix
sys.prefix = base

32
venv/bin/deactivate.nu

@ -0,0 +1,32 @@
def-env deactivate-virtualenv [] {
def has-env [name: string] {
$name in (env).name
}
let is_windows = ((sys).host.name | str downcase) == 'windows'
let path_name = if $is_windows {
if (has-env 'Path') {
'Path'
} else {
'PATH'
}
} else {
'PATH'
}
load-env { $path_name : $env._OLD_VIRTUAL_PATH }
let-env PROMPT_COMMAND = $env._OLD_PROMPT_COMMAND
# Hiding the environment variables that were created when activating the env
hide _OLD_VIRTUAL_PATH
hide _OLD_PROMPT_COMMAND
hide VIRTUAL_ENV
hide VIRTUAL_PROMPT
}
deactivate-virtualenv
hide pydoc
hide deactivate

8
venv/bin/google-oauthlib-tool

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from google_auth_oauthlib.tool.__main__ import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/normalizer

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from charset_normalizer.cli.normalizer import cli_detect
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(cli_detect())

8
venv/bin/pip

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/pip-3.10

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/pip3

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/pip3.10

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/pyrsa-decrypt

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from rsa.cli import decrypt
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(decrypt())

8
venv/bin/pyrsa-encrypt

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from rsa.cli import encrypt
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(encrypt())

8
venv/bin/pyrsa-keygen

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from rsa.cli import keygen
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(keygen())

8
venv/bin/pyrsa-priv2pub

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from rsa.util import private_to_public
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(private_to_public())

8
venv/bin/pyrsa-sign

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from rsa.cli import sign
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(sign())

8
venv/bin/pyrsa-verify

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from rsa.cli import verify
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(verify())

1
venv/bin/python

@ -0,0 +1 @@
/usr/bin/python3

1
venv/bin/python3

@ -0,0 +1 @@
python

1
venv/bin/python3.10

@ -0,0 +1 @@
python

8
venv/bin/wheel

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from wheel.cli import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/wheel-3.10

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from wheel.cli import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/wheel3

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from wheel.cli import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
venv/bin/wheel3.10

@ -0,0 +1,8 @@
#!/home/arthur/Documents/drive_sync/venv/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from wheel.cli import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

BIN
venv/lib/python3.10/site-packages/__pycache__/_virtualenv.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/__pycache__/google_auth_httplib2.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/__pycache__/six.cpython-310.pyc

Binary file not shown.

132
venv/lib/python3.10/site-packages/_distutils_hack/__init__.py

@ -0,0 +1,132 @@
import sys
import os
import re
import importlib
import warnings
is_pypy = '__pypy__' in sys.builtin_module_names
warnings.filterwarnings('ignore',
r'.+ distutils\b.+ deprecated',
DeprecationWarning)
def warn_distutils_present():
if 'distutils' not in sys.modules:
return
if is_pypy and sys.version_info < (3, 7):
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
return
warnings.warn(
"Distutils was imported before Setuptools, but importing Setuptools "
"also replaces the `distutils` module in `sys.modules`. This may lead "
"to undesirable behaviors or errors. To avoid these issues, avoid "
"using distutils directly, ensure that setuptools is installed in the "
"traditional way (e.g. not an editable install), and/or make sure "
"that setuptools is always imported before distutils.")
def clear_distutils():
if 'distutils' not in sys.modules:
return
warnings.warn("Setuptools is replacing distutils.")
mods = [name for name in sys.modules if re.match(r'distutils\b', name)]
for name in mods:
del sys.modules[name]
def enabled():
"""
Allow selection of distutils by environment variable.
"""
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib')
return which == 'local'
def ensure_local_distutils():
clear_distutils()
# With the DistutilsMetaFinder in place,
# perform an import to cause distutils to be
# loaded from setuptools._distutils. Ref #2906.
add_shim()
importlib.import_module('distutils')
remove_shim()
# check that submodules load as expected
core = importlib.import_module('distutils.core')
assert '_distutils' in core.__file__, core.__file__
def do_override():
"""
Ensure that the local copy of distutils is preferred over stdlib.
See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
for more motivation.
"""
if enabled():
warn_distutils_present()
ensure_local_distutils()
class DistutilsMetaFinder:
def find_spec(self, fullname, path, target=None):
if path is not None:
return
method_name = 'spec_for_{fullname}'.format(**locals())
method = getattr(self, method_name, lambda: None)
return method()
def spec_for_distutils(self):
import importlib.abc
import importlib.util
class DistutilsLoader(importlib.abc.Loader):
def create_module(self, spec):
return importlib.import_module('setuptools._distutils')
def exec_module(self, module):
pass
return importlib.util.spec_from_loader('distutils', DistutilsLoader())
def spec_for_pip(self):
"""
Ensure stdlib distutils when running under pip.
See pypa/pip#8761 for rationale.
"""
if self.pip_imported_during_build():
return
clear_distutils()
self.spec_for_distutils = lambda: None
@staticmethod
def pip_imported_during_build():
"""
Detect if pip is being imported in a build script. Ref #2355.
"""
import traceback
return any(
frame.f_globals['__file__'].endswith('setup.py')
for frame, line in traceback.walk_stack(None)
)
DISTUTILS_FINDER = DistutilsMetaFinder()
def add_shim():
sys.meta_path.insert(0, DISTUTILS_FINDER)
def remove_shim():
try:
sys.meta_path.remove(DISTUTILS_FINDER)
except ValueError:
pass

1
venv/lib/python3.10/site-packages/_distutils_hack/override.py

@ -0,0 +1 @@
__import__('_distutils_hack').do_override()

1
venv/lib/python3.10/site-packages/_virtualenv.pth

@ -0,0 +1 @@
import _virtualenv

130
venv/lib/python3.10/site-packages/_virtualenv.py

@ -0,0 +1,130 @@
"""Patches that are applied at runtime to the virtual environment"""
# -*- coding: utf-8 -*-
import os
import sys
VIRTUALENV_PATCH_FILE = os.path.join(__file__)
def patch_dist(dist):
"""
Distutils allows user to configure some arguments via a configuration file:
https://docs.python.org/3/install/index.html#distutils-configuration-files
Some of this arguments though don't make sense in context of the virtual environment files, let's fix them up.
"""
# we cannot allow some install config as that would get packages installed outside of the virtual environment
old_parse_config_files = dist.Distribution.parse_config_files
def parse_config_files(self, *args, **kwargs):
result = old_parse_config_files(self, *args, **kwargs)
install = self.get_option_dict("install")
if "prefix" in install: # the prefix governs where to install the libraries
install["prefix"] = VIRTUALENV_PATCH_FILE, os.path.abspath(sys.prefix)
for base in ("purelib", "platlib", "headers", "scripts", "data"):
key = "install_{}".format(base)
if key in install: # do not allow global configs to hijack venv paths
install.pop(key, None)
return result
dist.Distribution.parse_config_files = parse_config_files
# Import hook that patches some modules to ignore configuration values that break package installation in case
# of virtual environments.
_DISTUTILS_PATCH = "distutils.dist", "setuptools.dist"
if sys.version_info > (3, 4):
# https://docs.python.org/3/library/importlib.html#setting-up-an-importer
class _Finder:
"""A meta path finder that allows patching the imported distutils modules"""
fullname = None
# lock[0] is threading.Lock(), but initialized lazily to avoid importing threading very early at startup,
# because there are gevent-based applications that need to be first to import threading by themselves.
# See https://github.com/pypa/virtualenv/issues/1895 for details.
lock = []
def find_spec(self, fullname, path, target=None): # noqa: U100
if fullname in _DISTUTILS_PATCH and self.fullname is None:
# initialize lock[0] lazily
if len(self.lock) == 0:
import threading
lock = threading.Lock()
# there is possibility that two threads T1 and T2 are simultaneously running into find_spec,
# observing .lock as empty, and further going into hereby initialization. However due to the GIL,
# list.append() operation is atomic and this way only one of the threads will "win" to put the lock
# - that every thread will use - into .lock[0].
# https://docs.python.org/3/faq/library.html#what-kinds-of-global-value-mutation-are-thread-safe
self.lock.append(lock)
from functools import partial
from importlib.util import find_spec
with self.lock[0]:
self.fullname = fullname
try:
spec = find_spec(fullname, path)
if spec is not None:
# https://www.python.org/dev/peps/pep-0451/#how-loading-will-work
is_new_api = hasattr(spec.loader, "exec_module")
func_name = "exec_module" if is_new_api else "load_module"
old = getattr(spec.loader, func_name)
func = self.exec_module if is_new_api else self.load_module
if old is not func:
try:
setattr(spec.loader, func_name, partial(func, old))
except AttributeError:
pass # C-Extension loaders are r/o such as zipimporter with <python 3.7
return spec
finally:
self.fullname = None
@staticmethod
def exec_module(old, module):
old(module)
if module.__name__ in _DISTUTILS_PATCH:
patch_dist(module)
@staticmethod
def load_module(old, name):
module = old(name)
if module.__name__ in _DISTUTILS_PATCH:
patch_dist(module)
return module
sys.meta_path.insert(0, _Finder())
else:
# https://www.python.org/dev/peps/pep-0302/
from imp import find_module
from pkgutil import ImpImporter, ImpLoader
class _VirtualenvImporter(object, ImpImporter):
def __init__(self, path=None):
object.__init__(self)
ImpImporter.__init__(self, path)
def find_module(self, fullname, path=None):
if fullname in _DISTUTILS_PATCH:
try:
return _VirtualenvLoader(fullname, *find_module(fullname.split(".")[-1], path))
except ImportError:
pass
return None
class _VirtualenvLoader(object, ImpLoader):
def __init__(self, fullname, file, filename, etc):
object.__init__(self)
ImpLoader.__init__(self, fullname, file, filename, etc)
def load_module(self, fullname):
module = super(_VirtualenvLoader, self).load_module(fullname)
patch_dist(module)
module.__loader__ = None # distlib fallback
return module
sys.meta_path.append(_VirtualenvImporter())

27
venv/lib/python3.10/site-packages/apiclient/__init__.py

@ -0,0 +1,27 @@
"""Retain apiclient as an alias for googleapiclient."""
from googleapiclient import channel, discovery, errors, http, mimeparse, model
try:
from googleapiclient import sample_tools
except ImportError:
# Silently ignore, because the vast majority of consumers won't use it and
# it has deep dependence on oauth2client, an optional dependency.
sample_tools = None
from googleapiclient import schema
_SUBMODULES = {
"channel": channel,
"discovery": discovery,
"errors": errors,
"http": http,
"mimeparse": mimeparse,
"model": model,
"sample_tools": sample_tools,
"schema": schema,
}
import sys
for module_name, module in _SUBMODULES.items():
sys.modules["apiclient.%s" % module_name] = module

BIN
venv/lib/python3.10/site-packages/apiclient/__pycache__/__init__.cpython-310.pyc

Binary file not shown.

1
venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/INSTALLER

@ -0,0 +1 @@
pip

20
venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/LICENSE

@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2014-2022 Thomas Kemmer
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

146
venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/METADATA

@ -0,0 +1,146 @@
Metadata-Version: 2.1
Name: cachetools
Version: 5.2.0
Summary: Extensible memoizing collections and decorators
Home-page: https://github.com/tkem/cachetools/
Author: Thomas Kemmer
Author-email: tkemmer@computer.org
License: MIT
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Other Environment
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Requires-Python: ~=3.7
License-File: LICENSE
cachetools
========================================================================
.. image:: https://img.shields.io/pypi/v/cachetools
:target: https://pypi.org/project/cachetools/
:alt: Latest PyPI version
.. image:: https://img.shields.io/github/workflow/status/tkem/cachetools/CI
:target: https://github.com/tkem/cachetools/actions/workflows/ci.yml
:alt: CI build status
.. image:: https://img.shields.io/readthedocs/cachetools
:target: https://cachetools.readthedocs.io/
:alt: Documentation build status
.. image:: https://img.shields.io/codecov/c/github/tkem/cachetools/master.svg
:target: https://codecov.io/gh/tkem/cachetools
:alt: Test coverage
.. image:: https://img.shields.io/librariesio/sourcerank/pypi/cachetools
:target: https://libraries.io/pypi/cachetools
:alt: Libraries.io SourceRank
.. image:: https://img.shields.io/github/license/tkem/cachetools
:target: https://raw.github.com/tkem/cachetools/master/LICENSE
:alt: License
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
:target: https://github.com/psf/black
:alt: Code style: black
This module provides various memoizing collections and decorators,
including variants of the Python Standard Library's `@lru_cache`_
function decorator.
.. code-block:: python
from cachetools import cached, LRUCache, TTLCache
# speed up calculating Fibonacci numbers with dynamic programming
@cached(cache={})
def fib(n):
return n if n < 2 else fib(n - 1) + fib(n - 2)
# cache least recently used Python Enhancement Proposals
@cached(cache=LRUCache(maxsize=32))
def get_pep(num):
url = 'http://www.python.org/dev/peps/pep-%04d/' % num
with urllib.request.urlopen(url) as s:
return s.read()
# cache weather data for no longer than ten minutes
@cached(cache=TTLCache(maxsize=1024, ttl=600))
def get_weather(place):
return owm.weather_at_place(place).get_weather()
For the purpose of this module, a *cache* is a mutable_ mapping_ of a
fixed maximum size. When the cache is full, i.e. by adding another
item the cache would exceed its maximum size, the cache must choose
which item(s) to discard based on a suitable `cache algorithm`_.
This module provides multiple cache classes based on different cache
algorithms, as well as decorators for easily memoizing function and
method calls.
Installation
------------------------------------------------------------------------
cachetools is available from PyPI_ and can be installed by running::
pip install cachetools
Typing stubs for this package are provided by typeshed_ and can be
installed by running::
pip install types-cachetools
Project Resources
------------------------------------------------------------------------
- `Documentation`_
- `Issue tracker`_
- `Source code`_
- `Change log`_
Related Projects
------------------------------------------------------------------------
- asyncache_: Helpers to use cachetools with async functions
- CacheToolsUtils_: Cachetools Utilities
- `kids.cache`_: Kids caching library
- shelved-cache_: Persistent cache for Python cachetools
License
------------------------------------------------------------------------
Copyright (c) 2014-2022 Thomas Kemmer.
Licensed under the `MIT License`_.
.. _@lru_cache: https://docs.python.org/3/library/functools.html#functools.lru_cache
.. _mutable: https://docs.python.org/dev/glossary.html#term-mutable
.. _mapping: https://docs.python.org/dev/glossary.html#term-mapping
.. _cache algorithm: https://en.wikipedia.org/wiki/Cache_algorithms
.. _PyPI: https://pypi.org/project/cachetools/
.. _typeshed: https://github.com/python/typeshed/
.. _Documentation: https://cachetools.readthedocs.io/
.. _Issue tracker: https://github.com/tkem/cachetools/issues/
.. _Source code: https://github.com/tkem/cachetools/
.. _Change log: https://github.com/tkem/cachetools/blob/master/CHANGELOG.rst
.. _MIT License: https://raw.github.com/tkem/cachetools/master/LICENSE
.. _asyncache: https://pypi.org/project/asyncache/
.. _CacheToolsUtils: https://pypi.org/project/CacheToolsUtils/
.. _kids.cache: https://pypi.org/project/kids.cache/
.. _shelved-cache: https://pypi.org/project/shelved-cache/

12
venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/RECORD

@ -0,0 +1,12 @@
cachetools-5.2.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
cachetools-5.2.0.dist-info/LICENSE,sha256=diYME3Cn1B1frHGifXgfOt1dckmt-7-pMIRtLZ5H29U,1085
cachetools-5.2.0.dist-info/METADATA,sha256=cofhuzJUGMo5kVMpE1yvKFxOMAGYF9Fe14UJEtUTr4s,5124
cachetools-5.2.0.dist-info/RECORD,,
cachetools-5.2.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
cachetools-5.2.0.dist-info/top_level.txt,sha256=ai2FH78TGwoBcCgVfoqbzk5IQCtnDukdSs4zKuVPvDs,11
cachetools/__init__.py,sha256=rEErTnGMZszbSHz8POD5h_DJgVOuocRelxu6G8zMOlY,21785
cachetools/__pycache__/__init__.cpython-310.pyc,,
cachetools/__pycache__/func.cpython-310.pyc,,
cachetools/__pycache__/keys.cpython-310.pyc,,
cachetools/func.py,sha256=c5CAlRae2hUymZaeAJi6GEc_VpZIn6Mosi95bAlQEcs,4934
cachetools/keys.py,sha256=d-cpW252E_uV50ySlw13IevdNQnSc0MfiMViImQktRI,1613

5
venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/WHEEL

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.1)
Root-Is-Purelib: true
Tag: py3-none-any

1
venv/lib/python3.10/site-packages/cachetools-5.2.0.dist-info/top_level.txt

@ -0,0 +1 @@
cachetools

745
venv/lib/python3.10/site-packages/cachetools/__init__.py

@ -0,0 +1,745 @@
"""Extensible memoizing collections and decorators."""
__all__ = (
"Cache",
"FIFOCache",
"LFUCache",
"LRUCache",
"MRUCache",
"RRCache",
"TLRUCache",
"TTLCache",
"cached",
"cachedmethod",
)
__version__ = "5.2.0"
import collections
import collections.abc
import functools
import heapq
import random
import time
from . import keys
class _DefaultSize:
__slots__ = ()
def __getitem__(self, _):
return 1
def __setitem__(self, _, value):
assert value == 1
def pop(self, _):
return 1
class Cache(collections.abc.MutableMapping):
"""Mutable mapping to serve as a simple cache or cache base class."""
__marker = object()
__size = _DefaultSize()
def __init__(self, maxsize, getsizeof=None):
if getsizeof:
self.getsizeof = getsizeof
if self.getsizeof is not Cache.getsizeof:
self.__size = dict()
self.__data = dict()
self.__currsize = 0
self.__maxsize = maxsize
def __repr__(self):
return "%s(%s, maxsize=%r, currsize=%r)" % (
self.__class__.__name__,
repr(self.__data),
self.__maxsize,
self.__currsize,
)
def __getitem__(self, key):
try:
return self.__data[key]
except KeyError:
return self.__missing__(key)
def __setitem__(self, key, value):
maxsize = self.__maxsize
size = self.getsizeof(value)
if size > maxsize:
raise ValueError("value too large")
if key not in self.__data or self.__size[key] < size:
while self.__currsize + size > maxsize:
self.popitem()
if key in self.__data:
diffsize = size - self.__size[key]
else:
diffsize = size
self.__data[key] = value
self.__size[key] = size
self.__currsize += diffsize
def __delitem__(self, key):
size = self.__size.pop(key)
del self.__data[key]
self.__currsize -= size
def __contains__(self, key):
return key in self.__data
def __missing__(self, key):
raise KeyError(key)
def __iter__(self):
return iter(self.__data)
def __len__(self):
return len(self.__data)
def get(self, key, default=None):
if key in self:
return self[key]
else:
return default
def pop(self, key, default=__marker):
if key in self:
value = self[key]
del self[key]
elif default is self.__marker:
raise KeyError(key)
else:
value = default
return value
def setdefault(self, key, default=None):
if key in self:
value = self[key]
else:
self[key] = value = default
return value
@property
def maxsize(self):
"""The maximum size of the cache."""
return self.__maxsize
@property
def currsize(self):
"""The current size of the cache."""
return self.__currsize
@staticmethod
def getsizeof(value):
"""Return the size of a cache element's value."""
return 1
class FIFOCache(Cache):
"""First In First Out (FIFO) cache implementation."""
def __init__(self, maxsize, getsizeof=None):
Cache.__init__(self, maxsize, getsizeof)
self.__order = collections.OrderedDict()
def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
cache_setitem(self, key, value)
try:
self.__order.move_to_end(key)
except KeyError:
self.__order[key] = None
def __delitem__(self, key, cache_delitem=Cache.__delitem__):
cache_delitem(self, key)
del self.__order[key]
def popitem(self):
"""Remove and return the `(key, value)` pair first inserted."""
try:
key = next(iter(self.__order))
except StopIteration:
raise KeyError("%s is empty" % type(self).__name__) from None
else:
return (key, self.pop(key))
class LFUCache(Cache):
"""Least Frequently Used (LFU) cache implementation."""
def __init__(self, maxsize, getsizeof=None):
Cache.__init__(self, maxsize, getsizeof)
self.__counter = collections.Counter()
def __getitem__(self, key, cache_getitem=Cache.__getitem__):
value = cache_getitem(self, key)
if key in self: # __missing__ may not store item
self.__counter[key] -= 1
return value
def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
cache_setitem(self, key, value)
self.__counter[key] -= 1
def __delitem__(self, key, cache_delitem=Cache.__delitem__):
cache_delitem(self, key)
del self.__counter[key]
def popitem(self):
"""Remove and return the `(key, value)` pair least frequently used."""
try:
((key, _),) = self.__counter.most_common(1)
except ValueError:
raise KeyError("%s is empty" % type(self).__name__) from None
else:
return (key, self.pop(key))
class LRUCache(Cache):
"""Least Recently Used (LRU) cache implementation."""
def __init__(self, maxsize, getsizeof=None):
Cache.__init__(self, maxsize, getsizeof)
self.__order = collections.OrderedDict()
def __getitem__(self, key, cache_getitem=Cache.__getitem__):
value = cache_getitem(self, key)
if key in self: # __missing__ may not store item
self.__update(key)
return value
def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
cache_setitem(self, key, value)
self.__update(key)
def __delitem__(self, key, cache_delitem=Cache.__delitem__):
cache_delitem(self, key)
del self.__order[key]
def popitem(self):
"""Remove and return the `(key, value)` pair least recently used."""
try:
key = next(iter(self.__order))
except StopIteration:
raise KeyError("%s is empty" % type(self).__name__) from None
else:
return (key, self.pop(key))
def __update(self, key):
try:
self.__order.move_to_end(key)
except KeyError:
self.__order[key] = None
class MRUCache(Cache):
"""Most Recently Used (MRU) cache implementation."""
def __init__(self, maxsize, getsizeof=None):
Cache.__init__(self, maxsize, getsizeof)
self.__order = collections.OrderedDict()
def __getitem__(self, key, cache_getitem=Cache.__getitem__):
value = cache_getitem(self, key)
if key in self: # __missing__ may not store item
self.__update(key)
return value
def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
cache_setitem(self, key, value)
self.__update(key)
def __delitem__(self, key, cache_delitem=Cache.__delitem__):
cache_delitem(self, key)
del self.__order[key]
def popitem(self):
"""Remove and return the `(key, value)` pair most recently used."""
try:
key = next(iter(self.__order))
except StopIteration:
raise KeyError("%s is empty" % type(self).__name__) from None
else:
return (key, self.pop(key))
def __update(self, key):
try:
self.__order.move_to_end(key, last=False)
except KeyError:
self.__order[key] = None
class RRCache(Cache):
"""Random Replacement (RR) cache implementation."""
def __init__(self, maxsize, choice=random.choice, getsizeof=None):
Cache.__init__(self, maxsize, getsizeof)
self.__choice = choice
@property
def choice(self):
"""The `choice` function used by the cache."""
return self.__choice
def popitem(self):
"""Remove and return a random `(key, value)` pair."""
try:
key = self.__choice(list(self))
except IndexError:
raise KeyError("%s is empty" % type(self).__name__) from None
else:
return (key, self.pop(key))
class _TimedCache(Cache):
"""Base class for time aware cache implementations."""
class _Timer:
def __init__(self, timer):
self.__timer = timer
self.__nesting = 0
def __call__(self):
if self.__nesting == 0:
return self.__timer()
else:
return self.__time
def __enter__(self):
if self.__nesting == 0:
self.__time = time = self.__timer()
else:
time = self.__time
self.__nesting += 1
return time
def __exit__(self, *exc):
self.__nesting -= 1
def __reduce__(self):
return _TimedCache._Timer, (self.__timer,)
def __getattr__(self, name):
return getattr(self.__timer, name)
def __init__(self, maxsize, timer=time.monotonic, getsizeof=None):
Cache.__init__(self, maxsize, getsizeof)
self.__timer = _TimedCache._Timer(timer)
def __repr__(self, cache_repr=Cache.__repr__):
with self.__timer as time:
self.expire(time)
return cache_repr(self)
def __len__(self, cache_len=Cache.__len__):
with self.__timer as time:
self.expire(time)
return cache_len(self)
@property
def currsize(self):
with self.__timer as time:
self.expire(time)
return super().currsize
@property
def timer(self):
"""The timer function used by the cache."""
return self.__timer
def clear(self):
with self.__timer as time:
self.expire(time)
Cache.clear(self)
def get(self, *args, **kwargs):
with self.__timer:
return Cache.get(self, *args, **kwargs)
def pop(self, *args, **kwargs):
with self.__timer:
return Cache.pop(self, *args, **kwargs)
def setdefault(self, *args, **kwargs):
with self.__timer:
return Cache.setdefault(self, *args, **kwargs)
class TTLCache(_TimedCache):
"""LRU Cache implementation with per-item time-to-live (TTL) value."""
class _Link:
__slots__ = ("key", "expires", "next", "prev")
def __init__(self, key=None, expires=None):
self.key = key
self.expires = expires
def __reduce__(self):
return TTLCache._Link, (self.key, self.expires)
def unlink(self):
next = self.next
prev = self.prev
prev.next = next
next.prev = prev
def __init__(self, maxsize, ttl, timer=time.monotonic, getsizeof=None):
_TimedCache.__init__(self, maxsize, timer, getsizeof)
self.__root = root = TTLCache._Link()
root.prev = root.next = root
self.__links = collections.OrderedDict()
self.__ttl = ttl
def __contains__(self, key):
try:
link = self.__links[key] # no reordering
except KeyError:
return False
else:
return self.timer() < link.expires
def __getitem__(self, key, cache_getitem=Cache.__getitem__):
try:
link = self.__getlink(key)
except KeyError:
expired = False
else:
expired = not (self.timer() < link.expires)
if expired:
return self.__missing__(key)
else:
return cache_getitem(self, key)
def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
with self.timer as time:
self.expire(time)
cache_setitem(self, key, value)
try:
link = self.__getlink(key)
except KeyError:
self.__links[key] = link = TTLCache._Link(key)
else:
link.unlink()
link.expires = time + self.__ttl
link.next = root = self.__root
link.prev = prev = root.prev
prev.next = root.prev = link
def __delitem__(self, key, cache_delitem=Cache.__delitem__):
cache_delitem(self, key)
link = self.__links.pop(key)
link.unlink()
if not (self.timer() < link.expires):
raise KeyError(key)
def __iter__(self):
root = self.__root
curr = root.next
while curr is not root:
# "freeze" time for iterator access
with self.timer as time:
if time < curr.expires:
yield curr.key
curr = curr.next
def __setstate__(self, state):
self.__dict__.update(state)
root = self.__root
root.prev = root.next = root
for link in sorted(self.__links.values(), key=lambda obj: obj.expires):
link.next = root
link.prev = prev = root.prev
prev.next = root.prev = link
self.expire(self.timer())
@property
def ttl(self):
"""The time-to-live value of the cache's items."""
return self.__ttl
def expire(self, time=None):
"""Remove expired items from the cache."""
if time is None:
time = self.timer()
root = self.__root
curr = root.next
links = self.__links
cache_delitem = Cache.__delitem__
while curr is not root and not (time < curr.expires):
cache_delitem(self, curr.key)
del links[curr.key]
next = curr.next
curr.unlink()
curr = next
def popitem(self):
"""Remove and return the `(key, value)` pair least recently used that
has not already expired.
"""
with self.timer as time:
self.expire(time)
try:
key = next(iter(self.__links))
except StopIteration:
raise KeyError("%s is empty" % type(self).__name__) from None
else:
return (key, self.pop(key))
def __getlink(self, key):
value = self.__links[key]
self.__links.move_to_end(key)
return value
class TLRUCache(_TimedCache):
"""Time aware Least Recently Used (TLRU) cache implementation."""
@functools.total_ordering
class _Item:
__slots__ = ("key", "expires", "removed")
def __init__(self, key=None, expires=None):
self.key = key
self.expires = expires
self.removed = False
def __lt__(self, other):
return self.expires < other.expires
def __init__(self, maxsize, ttu, timer=time.monotonic, getsizeof=None):
_TimedCache.__init__(self, maxsize, timer, getsizeof)
self.__items = collections.OrderedDict()
self.__order = []
self.__ttu = ttu
def __contains__(self, key):
try:
item = self.__items[key] # no reordering
except KeyError:
return False
else:
return self.timer() < item.expires
def __getitem__(self, key, cache_getitem=Cache.__getitem__):
try:
item = self.__getitem(key)
except KeyError:
expired = False
else:
expired = not (self.timer() < item.expires)
if expired:
return self.__missing__(key)
else:
return cache_getitem(self, key)
def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
with self.timer as time:
expires = self.__ttu(key, value, time)
if not (time < expires):
return # skip expired items
self.expire(time)
cache_setitem(self, key, value)
# removing an existing item would break the heap structure, so
# only mark it as removed for now
try:
self.__getitem(key).removed = True
except KeyError:
pass
self.__items[key] = item = TLRUCache._Item(key, expires)
heapq.heappush(self.__order, item)
def __delitem__(self, key, cache_delitem=Cache.__delitem__):
with self.timer as time:
# no self.expire() for performance reasons, e.g. self.clear() [#67]
cache_delitem(self, key)
item = self.__items.pop(key)
item.removed = True
if not (time < item.expires):
raise KeyError(key)
def __iter__(self):
for curr in self.__order:
# "freeze" time for iterator access
with self.timer as time:
if time < curr.expires and not curr.removed:
yield curr.key
@property
def ttu(self):
"""The local time-to-use function used by the cache."""
return self.__ttu
def expire(self, time=None):
"""Remove expired items from the cache."""
if time is None:
time = self.timer()
items = self.__items
order = self.__order
# clean up the heap if too many items are marked as removed
if len(order) > len(items) * 2:
self.__order = order = [item for item in order if not item.removed]
heapq.heapify(order)
cache_delitem = Cache.__delitem__
while order and (order[0].removed or not (time < order[0].expires)):
item = heapq.heappop(order)
if not item.removed:
cache_delitem(self, item.key)
del items[item.key]
def popitem(self):
"""Remove and return the `(key, value)` pair least recently used that
has not already expired.
"""
with self.timer as time:
self.expire(time)
try:
key = next(iter(self.__items))
except StopIteration:
raise KeyError("%s is empty" % self.__class__.__name__) from None
else:
return (key, self.pop(key))
def __getitem(self, key):
value = self.__items[key]
self.__items.move_to_end(key)
return value
def cached(cache, key=keys.hashkey, lock=None):
"""Decorator to wrap a function with a memoizing callable that saves
results in a cache.
"""
def decorator(func):
if cache is None:
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
def clear():
pass
elif lock is None:
def wrapper(*args, **kwargs):
k = key(*args, **kwargs)
try:
return cache[k]
except KeyError:
pass # key not found
v = func(*args, **kwargs)
try:
cache[k] = v
except ValueError:
pass # value too large
return v
def clear():
cache.clear()
else:
def wrapper(*args, **kwargs):
k = key(*args, **kwargs)
try:
with lock:
return cache[k]
except KeyError:
pass # key not found
v = func(*args, **kwargs)
# in case of a race, prefer the item already in the cache
try:
with lock:
return cache.setdefault(k, v)
except ValueError:
return v # value too large
def clear():
with lock:
cache.clear()
wrapper.cache = cache
wrapper.cache_key = key
wrapper.cache_lock = lock
wrapper.cache_clear = clear
return functools.update_wrapper(wrapper, func)
return decorator
def cachedmethod(cache, key=keys.methodkey, lock=None):
"""Decorator to wrap a class or instance method with a memoizing
callable that saves results in a cache.
"""
def decorator(method):
if lock is None:
def wrapper(self, *args, **kwargs):
c = cache(self)
if c is None:
return method(self, *args, **kwargs)
k = key(self, *args, **kwargs)
try:
return c[k]
except KeyError:
pass # key not found
v = method(self, *args, **kwargs)
try:
c[k] = v
except ValueError:
pass # value too large
return v
def clear(self):
c = cache(self)
if c is not None:
c.clear()
else:
def wrapper(self, *args, **kwargs):
c = cache(self)
if c is None:
return method(self, *args, **kwargs)
k = key(self, *args, **kwargs)
try:
with lock(self):
return c[k]
except KeyError:
pass # key not found
v = method(self, *args, **kwargs)
# in case of a race, prefer the item already in the cache
try:
with lock(self):
return c.setdefault(k, v)
except ValueError:
return v # value too large
def clear(self):
c = cache(self)
if c is not None:
with lock(self):
c.clear()
wrapper.cache = cache
wrapper.cache_key = key
wrapper.cache_lock = lock
wrapper.cache_clear = clear
return functools.update_wrapper(wrapper, method)
return decorator

BIN
venv/lib/python3.10/site-packages/cachetools/__pycache__/__init__.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/cachetools/__pycache__/func.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/cachetools/__pycache__/keys.cpython-310.pyc

Binary file not shown.

172
venv/lib/python3.10/site-packages/cachetools/func.py

@ -0,0 +1,172 @@
"""`functools.lru_cache` compatible memoizing function decorators."""
__all__ = ("fifo_cache", "lfu_cache", "lru_cache", "mru_cache", "rr_cache", "ttl_cache")
import collections
import functools
import math
import random
import time
try:
from threading import RLock
except ImportError: # pragma: no cover
from dummy_threading import RLock
from . import FIFOCache, LFUCache, LRUCache, MRUCache, RRCache, TTLCache
from . import keys
_CacheInfo = collections.namedtuple(
"CacheInfo", ["hits", "misses", "maxsize", "currsize"]
)
class _UnboundCache(dict):
@property
def maxsize(self):
return None
@property
def currsize(self):
return len(self)
class _UnboundTTLCache(TTLCache):
def __init__(self, ttl, timer):
TTLCache.__init__(self, math.inf, ttl, timer)
@property
def maxsize(self):
return None
def _cache(cache, typed):
maxsize = cache.maxsize
def decorator(func):
key = keys.typedkey if typed else keys.hashkey
hits = misses = 0
lock = RLock()
def wrapper(*args, **kwargs):
nonlocal hits, misses
k = key(*args, **kwargs)
with lock:
try:
v = cache[k]
hits += 1
return v
except KeyError:
misses += 1
v = func(*args, **kwargs)
# in case of a race, prefer the item already in the cache
try:
with lock:
return cache.setdefault(k, v)
except ValueError:
return v # value too large
def cache_info():
with lock:
maxsize = cache.maxsize
currsize = cache.currsize
return _CacheInfo(hits, misses, maxsize, currsize)
def cache_clear():
nonlocal hits, misses
with lock:
try:
cache.clear()
finally:
hits = misses = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
wrapper.cache_parameters = lambda: {"maxsize": maxsize, "typed": typed}
functools.update_wrapper(wrapper, func)
return wrapper
return decorator
def fifo_cache(maxsize=128, typed=False):
"""Decorator to wrap a function with a memoizing callable that saves
up to `maxsize` results based on a First In First Out (FIFO)
algorithm.
"""
if maxsize is None:
return _cache(_UnboundCache(), typed)
elif callable(maxsize):
return _cache(FIFOCache(128), typed)(maxsize)
else:
return _cache(FIFOCache(maxsize), typed)
def lfu_cache(maxsize=128, typed=False):
"""Decorator to wrap a function with a memoizing callable that saves
up to `maxsize` results based on a Least Frequently Used (LFU)
algorithm.
"""
if maxsize is None:
return _cache(_UnboundCache(), typed)
elif callable(maxsize):
return _cache(LFUCache(128), typed)(maxsize)
else:
return _cache(LFUCache(maxsize), typed)
def lru_cache(maxsize=128, typed=False):
"""Decorator to wrap a function with a memoizing callable that saves
up to `maxsize` results based on a Least Recently Used (LRU)
algorithm.
"""
if maxsize is None:
return _cache(_UnboundCache(), typed)
elif callable(maxsize):
return _cache(LRUCache(128), typed)(maxsize)
else:
return _cache(LRUCache(maxsize), typed)
def mru_cache(maxsize=128, typed=False):
"""Decorator to wrap a function with a memoizing callable that saves
up to `maxsize` results based on a Most Recently Used (MRU)
algorithm.
"""
if maxsize is None:
return _cache(_UnboundCache(), typed)
elif callable(maxsize):
return _cache(MRUCache(128), typed)(maxsize)
else:
return _cache(MRUCache(maxsize), typed)
def rr_cache(maxsize=128, choice=random.choice, typed=False):
"""Decorator to wrap a function with a memoizing callable that saves
up to `maxsize` results based on a Random Replacement (RR)
algorithm.
"""
if maxsize is None:
return _cache(_UnboundCache(), typed)
elif callable(maxsize):
return _cache(RRCache(128, choice), typed)(maxsize)
else:
return _cache(RRCache(maxsize, choice), typed)
def ttl_cache(maxsize=128, ttl=600, timer=time.monotonic, typed=False):
"""Decorator to wrap a function with a memoizing callable that saves
up to `maxsize` results based on a Least Recently Used (LRU)
algorithm with a per-item time-to-live (TTL) value.
"""
if maxsize is None:
return _cache(_UnboundTTLCache(ttl, timer), typed)
elif callable(maxsize):
return _cache(TTLCache(128, ttl, timer), typed)(maxsize)
else:
return _cache(TTLCache(maxsize, ttl, timer), typed)

57
venv/lib/python3.10/site-packages/cachetools/keys.py

@ -0,0 +1,57 @@
"""Key functions for memoizing decorators."""
__all__ = ("hashkey", "methodkey", "typedkey")
class _HashedTuple(tuple):
"""A tuple that ensures that hash() will be called no more than once
per element, since cache decorators will hash the key multiple
times on a cache miss. See also _HashedSeq in the standard
library functools implementation.
"""
__hashvalue = None
def __hash__(self, hash=tuple.__hash__):
hashvalue = self.__hashvalue
if hashvalue is None:
self.__hashvalue = hashvalue = hash(self)
return hashvalue
def __add__(self, other, add=tuple.__add__):
return _HashedTuple(add(self, other))
def __radd__(self, other, add=tuple.__add__):
return _HashedTuple(add(other, self))
def __getstate__(self):
return {}
# used for separating keyword arguments; we do not use an object
# instance here so identity is preserved when pickling/unpickling
_kwmark = (_HashedTuple,)
def hashkey(*args, **kwargs):
"""Return a cache key for the specified hashable arguments."""
if kwargs:
return _HashedTuple(args + sum(sorted(kwargs.items()), _kwmark))
else:
return _HashedTuple(args)
def methodkey(self, *args, **kwargs):
"""Return a cache key for use with cached methods."""
return hashkey(*args, **kwargs)
def typedkey(*args, **kwargs):
"""Return a typed cache key for the specified hashable arguments."""
key = hashkey(*args, **kwargs)
key += tuple(type(v) for v in args)
key += tuple(type(v) for _, v in sorted(kwargs.items()))
return key

1
venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/INSTALLER

@ -0,0 +1 @@
pip

21
venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/LICENSE

@ -0,0 +1,21 @@
This package contains a modified version of ca-bundle.crt:
ca-bundle.crt -- Bundle of CA Root Certificates
Certificate data from Mozilla as of: Thu Nov 3 19:04:19 2011#
This is a bundle of X.509 certificates of public Certificate Authorities
(CA). These were automatically extracted from Mozilla's root certificates
file (certdata.txt). This file can be found in the mozilla source tree:
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
It contains the certificates in PEM format and therefore
can be directly used with curl / libcurl / php_curl, or with
an Apache+mod_ssl webserver for SSL client authentication.
Just configure this file as the SSLCACertificateFile.#
***** BEGIN LICENSE BLOCK *****
This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at http://mozilla.org/MPL/2.0/.
***** END LICENSE BLOCK *****
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $

83
venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/METADATA

@ -0,0 +1,83 @@
Metadata-Version: 2.1
Name: certifi
Version: 2022.12.7
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: https://github.com/certifi/python-certifi
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: MPL-2.0
Project-URL: Source, https://github.com/certifi/python-certifi
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Requires-Python: >=3.6
License-File: LICENSE
Certifi: Python SSL Certificates
================================
Certifi provides Mozilla's carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
``certifi`` is available on PyPI. Simply install it with ``pip``::
$ pip install certifi
Usage
-----
To reference the installed certificate authority (CA) bundle, you can use the
built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
Or from the command line::
$ python -m certifi
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
Enjoy!
1024-bit Root Certificates
~~~~~~~~~~~~~~~~~~~~~~~~~~
Browsers and certificate authorities have concluded that 1024-bit keys are
unacceptably weak for certificates, particularly root certificates. For this
reason, Mozilla has removed any weak (i.e. 1024-bit key) certificate from its
bundle, replacing it with an equivalent strong (i.e. 2048-bit or greater key)
certificate from the same CA. Because Mozilla removed these certificates from
its bundle, ``certifi`` removed them as well.
In previous versions, ``certifi`` provided the ``certifi.old_where()`` function
to intentionally re-add the 1024-bit roots back into your bundle. This was not
recommended in production and therefore was removed at the end of 2018.
.. _`Requests`: https://requests.readthedocs.io/en/master/
Addition/Removal of Certificates
--------------------------------
Certifi does not support any addition/removal or other modification of the
CA trust store content. This project is intended to provide a reliable and
highly portable root of trust to python deployments. Look to upstream projects
for methods to use alternate trust.

14
venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/RECORD

@ -0,0 +1,14 @@
certifi-2022.12.7.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
certifi-2022.12.7.dist-info/LICENSE,sha256=oC9sY4-fuE0G93ZMOrCF2K9-2luTwWbaVDEkeQd8b7A,1052
certifi-2022.12.7.dist-info/METADATA,sha256=chFpcxKhCPEQ3d8-Vz36zr2Micf1eQhKkFFk7_JvJNo,2911
certifi-2022.12.7.dist-info/RECORD,,
certifi-2022.12.7.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
certifi-2022.12.7.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
certifi/__init__.py,sha256=bK_nm9bLJzNvWZc2oZdiTwg2KWD4HSPBWGaM0zUDvMw,94
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
certifi/__pycache__/__init__.cpython-310.pyc,,
certifi/__pycache__/__main__.cpython-310.pyc,,
certifi/__pycache__/core.cpython-310.pyc,,
certifi/cacert.pem,sha256=LBHDzgj_xA05AxnHK8ENT5COnGNElNZe0svFUHMf1SQ,275233
certifi/core.py,sha256=lhewz0zFb2b4ULsQurElmloYwQoecjWzPqY67P8T7iM,4219
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

5
venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/WHEEL

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.0)
Root-Is-Purelib: true
Tag: py3-none-any

1
venv/lib/python3.10/site-packages/certifi-2022.12.7.dist-info/top_level.txt

@ -0,0 +1 @@
certifi

4
venv/lib/python3.10/site-packages/certifi/__init__.py

@ -0,0 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2022.12.07"

12
venv/lib/python3.10/site-packages/certifi/__main__.py

@ -0,0 +1,12 @@
import argparse
from certifi import contents, where
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--contents", action="store_true")
args = parser.parse_args()
if args.contents:
print(contents())
else:
print(where())

BIN
venv/lib/python3.10/site-packages/certifi/__pycache__/__init__.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/certifi/__pycache__/__main__.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/certifi/__pycache__/core.cpython-310.pyc

Binary file not shown.

4527
venv/lib/python3.10/site-packages/certifi/cacert.pem

File diff suppressed because it is too large

108
venv/lib/python3.10/site-packages/certifi/core.py

@ -0,0 +1,108 @@
"""
certifi.py
~~~~~~~~~~
This module returns the installation location of cacert.pem or its contents.
"""
import sys
if sys.version_info >= (3, 11):
from importlib.resources import as_file, files
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the file
# in cases where we're inside of a zipimport situation until someone
# actually calls where(), but we don't want to re-extract the file
# on every call of where(), so we'll do it once then store it in a
# global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you to
# manage the cleanup of this file, so it doesn't actually return a
# path, it returns a context manager that will give you the path
# when you enter it and will do any cleanup when you leave it. In
# the common case of not needing a temporary file, it will just
# return the file system location and the __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
_CACERT_PATH = str(_CACERT_CTX.__enter__())
return _CACERT_PATH
def contents() -> str:
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
elif sys.version_info >= (3, 7):
from importlib.resources import path as get_path, read_text
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the
# file in cases where we're inside of a zipimport situation until
# someone actually calls where(), but we don't want to re-extract
# the file on every call of where(), so we'll do it once then store
# it in a global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you
# to manage the cleanup of this file, so it doesn't actually
# return a path, it returns a context manager that will give
# you the path when you enter it and will do any cleanup when
# you leave it. In the common case of not needing a temporary
# file, it will just return the file system location and the
# __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = get_path("certifi", "cacert.pem")
_CACERT_PATH = str(_CACERT_CTX.__enter__())
return _CACERT_PATH
def contents() -> str:
return read_text("certifi", "cacert.pem", encoding="ascii")
else:
import os
import types
from typing import Union
Package = Union[types.ModuleType, str]
Resource = Union[str, "os.PathLike"]
# This fallback will work for Python versions prior to 3.7 that lack the
# importlib.resources module but relies on the existing `where` function
# so won't address issues with environments like PyOxidizer that don't set
# __file__ on modules.
def read_text(
package: Package,
resource: Resource,
encoding: str = 'utf-8',
errors: str = 'strict'
) -> str:
with open(where(), encoding=encoding) as data:
return data.read()
# If we don't have importlib.resources, then we will just do the old logic
# of assuming we're on the filesystem and munge the path directly.
def where() -> str:
f = os.path.dirname(__file__)
return os.path.join(f, "cacert.pem")
def contents() -> str:
return read_text("certifi", "cacert.pem", encoding="ascii")

0
venv/lib/python3.10/site-packages/certifi/py.typed

1
venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/INSTALLER

@ -0,0 +1 @@
pip

21
venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/LICENSE

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

269
venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/METADATA

@ -0,0 +1,269 @@
Metadata-Version: 2.1
Name: charset-normalizer
Version: 2.1.1
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Home-page: https://github.com/ousret/charset_normalizer
Author: Ahmed TAHRI @Ousret
Author-email: ahmed.tahri@cloudnursery.dev
License: MIT
Project-URL: Bug Reports, https://github.com/Ousret/charset_normalizer/issues
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/en/latest
Keywords: encoding,i18n,txt,text,charset,charset-detector,normalization,unicode,chardet
Classifier: Development Status :: 5 - Production/Stable
Classifier: License :: OSI Approved :: MIT License
Classifier: Intended Audience :: Developers
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Typing :: Typed
Requires-Python: >=3.6.0
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode_backport
Requires-Dist: unicodedata2 ; extra == 'unicode_backport'
<h1 align="center">Charset Detection, for Everyone 👋 <a href="https://twitter.com/intent/tweet?text=The%20Real%20First%20Universal%20Charset%20%26%20Language%20Detector&url=https://www.github.com/Ousret/charset_normalizer&hashtags=python,encoding,chardet,developers"><img src="https://img.shields.io/twitter/url/http/shields.io.svg?style=social"/></a></h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://codecov.io/gh/Ousret/charset_normalizer">
<img src="https://codecov.io/gh/Ousret/charset_normalizer/branch/master/graph/badge.svg" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://pepy.tech/badge/charset-normalizer/month" />
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
| ------------- | :-------------: | :------------------: | :------------------: |
| `Fast` | ❌<br> | ✅<br> | ✅ <br> |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size` | 193.6 kB | 39.5 kB | ~200 kB |
| `Supported Encoding` | 33 | :tada: [93](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
Did you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)
## ⭐ Your support
*Fork, test-it, star-it, submit your ideas! We do listen.*
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
| ------------- | :-------------: | :------------------: | :------------------: |
| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |
| charset-normalizer | **98 %** | **39 ms** | 26 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
| ------------- | :-------------: | :------------------: | :------------------: |
| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |
| charset-normalizer | 400 ms | 200 ms | 15 ms |
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (eg. Supported Encoding) Challenge-them if you want.
[cchardet](https://github.com/PyYoshi/cChardet) is a non-native (cpp binding) and unmaintained faster alternative with
a better accuracy than chardet but lower than this package. If speed is the most important factor, you should try it.
## ✨ Installation
Using PyPi for latest stable
```sh
pip install charset-normalizer -U
```
If you want a more up-to-date `unicodedata` than the one available in your Python setup.
```sh
pip install charset-normalizer[unicode_backport] -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
:tada: Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Normalize any text file*
```python
from charset_normalizer import normalize
try:
normalize('./my_subtitle.srt') # should write to disk my_subtitle-***.srt
except IOError as e:
print('Sadly, we are unable to perform charset normalization.', str(e))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure chaos, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is chaos/mess and coherence according to **YOU ?**
*Chaos :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess.
I know that my interpretation of what is chaotic is very subjective, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © 2019 [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)

33
venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/RECORD

@ -0,0 +1,33 @@
../../../bin/normalizer,sha256=1mJaAAogO8Fsghs8eCHIrdVCQag4qlamZ1nhWiEi0lM,278
charset_normalizer-2.1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
charset_normalizer-2.1.1.dist-info/LICENSE,sha256=6zGgxaT7Cbik4yBV0lweX5w1iidS_vPNcgIT0cz-4kE,1070
charset_normalizer-2.1.1.dist-info/METADATA,sha256=C99l12g4d1E9_UiW-mqPCWx7v2M_lYGWxy1GTOjXSsA,11942
charset_normalizer-2.1.1.dist-info/RECORD,,
charset_normalizer-2.1.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
charset_normalizer-2.1.1.dist-info/entry_points.txt,sha256=uYo8aIGLWv8YgWfSna5HnfY_En4pkF1w4bgawNAXzP0,76
charset_normalizer-2.1.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
charset_normalizer/__init__.py,sha256=jGhhf1IcOgCpZsr593E9fPvjWKnflVqHe_LwkOJjInU,1790
charset_normalizer/__pycache__/__init__.cpython-310.pyc,,
charset_normalizer/__pycache__/api.cpython-310.pyc,,
charset_normalizer/__pycache__/cd.cpython-310.pyc,,
charset_normalizer/__pycache__/constant.cpython-310.pyc,,
charset_normalizer/__pycache__/legacy.cpython-310.pyc,,
charset_normalizer/__pycache__/md.cpython-310.pyc,,
charset_normalizer/__pycache__/models.cpython-310.pyc,,
charset_normalizer/__pycache__/utils.cpython-310.pyc,,
charset_normalizer/__pycache__/version.cpython-310.pyc,,
charset_normalizer/api.py,sha256=euVPmjAMbjpqhEHPjfKtyy1mK52U0TOUBUQgM_Qy6eE,19191
charset_normalizer/assets/__init__.py,sha256=r7aakPaRIc2FFG2mw2V8NOTvkl25_euKZ3wPf5SAVa4,15222
charset_normalizer/assets/__pycache__/__init__.cpython-310.pyc,,
charset_normalizer/cd.py,sha256=Pxdkbn4cy0iZF42KTb1FiWIqqKobuz_fDjGwc6JMNBc,10811
charset_normalizer/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
charset_normalizer/cli/__pycache__/__init__.cpython-310.pyc,,
charset_normalizer/cli/__pycache__/normalizer.cpython-310.pyc,,
charset_normalizer/cli/normalizer.py,sha256=FmD1RXeMpRBg_mjR0MaJhNUpM2qZ8wz2neAE7AayBeg,9521
charset_normalizer/constant.py,sha256=NgU-pY8JH2a9lkVT8oKwAFmIUYNKOuSBwZgF9MrlNCM,19157
charset_normalizer/legacy.py,sha256=XKeZOts_HdYQU_Jb3C9ZfOjY2CiUL132k9_nXer8gig,3384
charset_normalizer/md.py,sha256=pZP8IVpSC82D8INA9Tf_y0ijJSRI-UIncZvLdfTWEd4,17642
charset_normalizer/models.py,sha256=i68YdlSLTEI3EEBVXq8TLNAbyyjrLC2OWszc-OBAk9I,13167
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
charset_normalizer/utils.py,sha256=ykOznhcAeH-ODLBWJuI7t1nbwa1SAfN_bDYTCJGyh4U,11771
charset_normalizer/version.py,sha256=_eh2MA3qS__IajlePQxKBmlw6zaBDvPYlLdEgxgIojw,79

5
venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/WHEEL

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.1)
Root-Is-Purelib: true
Tag: py3-none-any

2
venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/entry_points.txt

@ -0,0 +1,2 @@
[console_scripts]
normalizer = charset_normalizer.cli.normalizer:cli_detect

1
venv/lib/python3.10/site-packages/charset_normalizer-2.1.1.dist-info/top_level.txt

@ -0,0 +1 @@
charset_normalizer

56
venv/lib/python3.10/site-packages/charset_normalizer/__init__.py

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
"""
Charset-Normalizer
~~~~~~~~~~~~~~
The Real First Universal Charset Detector.
A library that helps you read text from an unknown charset encoding.
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
All IANA character set names for which the Python core library provides codecs are supported.
Basic usage:
>>> from charset_normalizer import from_bytes
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
>>> best_guess = results.best()
>>> str(best_guess)
'Bсеки човек има право на образование. Oбразованието!'
Others methods and usages are available - see the full documentation
at <https://github.com/Ousret/charset_normalizer>.
:copyright: (c) 2021 by Ahmed TAHRI
:license: MIT, see LICENSE for more details.
"""
import logging
from .api import from_bytes, from_fp, from_path, normalize
from .legacy import (
CharsetDetector,
CharsetDoctor,
CharsetNormalizerMatch,
CharsetNormalizerMatches,
detect,
)
from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler
from .version import VERSION, __version__
__all__ = (
"from_fp",
"from_path",
"from_bytes",
"normalize",
"detect",
"CharsetMatch",
"CharsetMatches",
"CharsetNormalizerMatch",
"CharsetNormalizerMatches",
"CharsetDetector",
"CharsetDoctor",
"__version__",
"VERSION",
"set_logging_handler",
)
# Attach a NullHandler to the top level logger by default
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/__init__.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/api.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/cd.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/constant.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/legacy.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/md.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/models.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/utils.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/__pycache__/version.cpython-310.pyc

Binary file not shown.

584
venv/lib/python3.10/site-packages/charset_normalizer/api.py

@ -0,0 +1,584 @@
import logging
import warnings
from os import PathLike
from os.path import basename, splitext
from typing import Any, BinaryIO, List, Optional, Set
from .cd import (
coherence_ratio,
encoding_languages,
mb_encoding_languages,
merge_coherence_ratios,
)
from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
from .md import mess_ratio
from .models import CharsetMatch, CharsetMatches
from .utils import (
any_specified_encoding,
cut_sequence_chunks,
iana_name,
identify_sig_or_bom,
is_cp_similar,
is_multi_byte_encoding,
should_strip_sig_or_bom,
)
# Will most likely be controversial
# logging.addLevelName(TRACE, "TRACE")
logger = logging.getLogger("charset_normalizer")
explain_handler = logging.StreamHandler()
explain_handler.setFormatter(
logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
)
def from_bytes(
sequences: bytes,
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
"""
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
If there is no results, it is a strong indicator that the source is binary/not text.
By default, the process will extract 5 blocs of 512o each to assess the mess and coherence of a given sequence.
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
but never take it for granted. Can improve the performance.
You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
purpose.
This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
Custom logging format and handler can be set manually.
"""
if not isinstance(sequences, (bytearray, bytes)):
raise TypeError(
"Expected object of type bytes or bytearray, got: {0}".format(
type(sequences)
)
)
if explain:
previous_logger_level: int = logger.level
logger.addHandler(explain_handler)
logger.setLevel(TRACE)
length: int = len(sequences)
if length == 0:
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level or logging.WARNING)
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
if cp_isolation is not None:
logger.log(
TRACE,
"cp_isolation is set. use this flag for debugging purpose. "
"limited list of encoding allowed : %s.",
", ".join(cp_isolation),
)
cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
else:
cp_isolation = []
if cp_exclusion is not None:
logger.log(
TRACE,
"cp_exclusion is set. use this flag for debugging purpose. "
"limited list of encoding excluded : %s.",
", ".join(cp_exclusion),
)
cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
else:
cp_exclusion = []
if length <= (chunk_size * steps):
logger.log(
TRACE,
"override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
steps,
chunk_size,
length,
)
steps = 1
chunk_size = length
if steps > 1 and length / steps < chunk_size:
chunk_size = int(length / steps)
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
if is_too_small_sequence:
logger.log(
TRACE,
"Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
length
),
)
elif is_too_large_sequence:
logger.log(
TRACE,
"Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
length
),
)
prioritized_encodings: List[str] = []
specified_encoding: Optional[str] = (
any_specified_encoding(sequences) if preemptive_behaviour else None
)
if specified_encoding is not None:
prioritized_encodings.append(specified_encoding)
logger.log(
TRACE,
"Detected declarative mark in sequence. Priority +1 given for %s.",
specified_encoding,
)
tested: Set[str] = set()
tested_but_hard_failure: List[str] = []
tested_but_soft_failure: List[str] = []
fallback_ascii: Optional[CharsetMatch] = None
fallback_u8: Optional[CharsetMatch] = None
fallback_specified: Optional[CharsetMatch] = None
results: CharsetMatches = CharsetMatches()
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
if sig_encoding is not None:
prioritized_encodings.append(sig_encoding)
logger.log(
TRACE,
"Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
len(sig_payload),
sig_encoding,
)
prioritized_encodings.append("ascii")
if "utf_8" not in prioritized_encodings:
prioritized_encodings.append("utf_8")
for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
if cp_isolation and encoding_iana not in cp_isolation:
continue
if cp_exclusion and encoding_iana in cp_exclusion:
continue
if encoding_iana in tested:
continue
tested.add(encoding_iana)
decoded_payload: Optional[str] = None
bom_or_sig_available: bool = sig_encoding == encoding_iana
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
encoding_iana
)
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
logger.log(
TRACE,
"Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
encoding_iana,
)
continue
try:
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
except (ModuleNotFoundError, ImportError):
logger.log(
TRACE,
"Encoding %s does not provide an IncrementalDecoder",
encoding_iana,
)
continue
try:
if is_too_large_sequence and is_multi_byte_decoder is False:
str(
sequences[: int(50e4)]
if strip_sig_or_bom is False
else sequences[len(sig_payload) : int(50e4)],
encoding=encoding_iana,
)
else:
decoded_payload = str(
sequences
if strip_sig_or_bom is False
else sequences[len(sig_payload) :],
encoding=encoding_iana,
)
except (UnicodeDecodeError, LookupError) as e:
if not isinstance(e, LookupError):
logger.log(
TRACE,
"Code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
continue
similar_soft_failure_test: bool = False
for encoding_soft_failed in tested_but_soft_failure:
if is_cp_similar(encoding_iana, encoding_soft_failed):
similar_soft_failure_test = True
break
if similar_soft_failure_test:
logger.log(
TRACE,
"%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
encoding_iana,
encoding_soft_failed,
)
continue
r_ = range(
0 if not bom_or_sig_available else len(sig_payload),
length,
int(length / steps),
)
multi_byte_bonus: bool = (
is_multi_byte_decoder
and decoded_payload is not None
and len(decoded_payload) < length
)
if multi_byte_bonus:
logger.log(
TRACE,
"Code page %s is a multi byte encoding table and it appear that at least one character "
"was encoded using n-bytes.",
encoding_iana,
)
max_chunk_gave_up: int = int(len(r_) / 4)
max_chunk_gave_up = max(max_chunk_gave_up, 2)
early_stop_count: int = 0
lazy_str_hard_failure = False
md_chunks: List[str] = []
md_ratios = []
try:
for chunk in cut_sequence_chunks(
sequences,
encoding_iana,
r_,
chunk_size,
bom_or_sig_available,
strip_sig_or_bom,
sig_payload,
is_multi_byte_decoder,
decoded_payload,
):
md_chunks.append(chunk)
md_ratios.append(mess_ratio(chunk, threshold))
if md_ratios[-1] >= threshold:
early_stop_count += 1
if (early_stop_count >= max_chunk_gave_up) or (
bom_or_sig_available and strip_sig_or_bom is False
):
break
except UnicodeDecodeError as e: # Lazy str loading may have missed something there
logger.log(
TRACE,
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
early_stop_count = max_chunk_gave_up
lazy_str_hard_failure = True
# We might want to check the sequence again with the whole content
# Only if initial MD tests passes
if (
not lazy_str_hard_failure
and is_too_large_sequence
and not is_multi_byte_decoder
):
try:
sequences[int(50e3) :].decode(encoding_iana, errors="strict")
except UnicodeDecodeError as e:
logger.log(
TRACE,
"LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
continue
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
tested_but_soft_failure.append(encoding_iana)
logger.log(
TRACE,
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
"Computed mean chaos is %f %%.",
encoding_iana,
early_stop_count,
round(mean_mess_ratio * 100, ndigits=3),
)
# Preparing those fallbacks in case we got nothing.
if (
encoding_iana in ["ascii", "utf_8", specified_encoding]
and not lazy_str_hard_failure
):
fallback_entry = CharsetMatch(
sequences, encoding_iana, threshold, False, [], decoded_payload
)
if encoding_iana == specified_encoding:
fallback_specified = fallback_entry
elif encoding_iana == "ascii":
fallback_ascii = fallback_entry
else:
fallback_u8 = fallback_entry
continue
logger.log(
TRACE,
"%s passed initial chaos probing. Mean measured chaos is %f %%",
encoding_iana,
round(mean_mess_ratio * 100, ndigits=3),
)
if not is_multi_byte_decoder:
target_languages: List[str] = encoding_languages(encoding_iana)
else:
target_languages = mb_encoding_languages(encoding_iana)
if target_languages:
logger.log(
TRACE,
"{} should target any language(s) of {}".format(
encoding_iana, str(target_languages)
),
)
cd_ratios = []
# We shall skip the CD when its about ASCII
# Most of the time its not relevant to run "language-detection" on it.
if encoding_iana != "ascii":
for chunk in md_chunks:
chunk_languages = coherence_ratio(
chunk, 0.1, ",".join(target_languages) if target_languages else None
)
cd_ratios.append(chunk_languages)
cd_ratios_merged = merge_coherence_ratios(cd_ratios)
if cd_ratios_merged:
logger.log(
TRACE,
"We detected language {} using {}".format(
cd_ratios_merged, encoding_iana
),
)
results.append(
CharsetMatch(
sequences,
encoding_iana,
mean_mess_ratio,
bom_or_sig_available,
cd_ratios_merged,
decoded_payload,
)
)
if (
encoding_iana in [specified_encoding, "ascii", "utf_8"]
and mean_mess_ratio < 0.1
):
logger.debug(
"Encoding detection: %s is most likely the one.", encoding_iana
)
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([results[encoding_iana]])
if encoding_iana == sig_encoding:
logger.debug(
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
"the beginning of the sequence.",
encoding_iana,
)
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([results[encoding_iana]])
if len(results) == 0:
if fallback_u8 or fallback_ascii or fallback_specified:
logger.log(
TRACE,
"Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
)
if fallback_specified:
logger.debug(
"Encoding detection: %s will be used as a fallback match",
fallback_specified.encoding,
)
results.append(fallback_specified)
elif (
(fallback_u8 and fallback_ascii is None)
or (
fallback_u8
and fallback_ascii
and fallback_u8.fingerprint != fallback_ascii.fingerprint
)
or (fallback_u8 is not None)
):
logger.debug("Encoding detection: utf_8 will be used as a fallback match")
results.append(fallback_u8)
elif fallback_ascii:
logger.debug("Encoding detection: ascii will be used as a fallback match")
results.append(fallback_ascii)
if results:
logger.debug(
"Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
results.best().encoding, # type: ignore
len(results) - 1,
)
else:
logger.debug("Encoding detection: Unable to determine any suitable charset.")
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return results
def from_fp(
fp: BinaryIO,
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but using a file pointer that is already ready.
Will not close the file pointer.
"""
return from_bytes(
fp.read(),
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
explain,
)
def from_path(
path: "PathLike[Any]",
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
Can raise IOError.
"""
with open(path, "rb") as fp:
return from_fp(
fp,
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
explain,
)
def normalize(
path: "PathLike[Any]",
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
) -> CharsetMatch:
"""
Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
"""
warnings.warn(
"normalize is deprecated and will be removed in 3.0",
DeprecationWarning,
)
results = from_path(
path,
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
)
filename = basename(path)
target_extensions = list(splitext(filename))
if len(results) == 0:
raise IOError(
'Unable to normalize "{}", no encoding charset seems to fit.'.format(
filename
)
)
result = results.best()
target_extensions[0] += "-" + result.encoding # type: ignore
with open(
"{}".format(str(path).replace(filename, "".join(target_extensions))), "wb"
) as fp:
fp.write(result.output()) # type: ignore
return result # type: ignore

1122
venv/lib/python3.10/site-packages/charset_normalizer/assets/__init__.py

File diff suppressed because it is too large

BIN
venv/lib/python3.10/site-packages/charset_normalizer/assets/__pycache__/__init__.cpython-310.pyc

Binary file not shown.

339
venv/lib/python3.10/site-packages/charset_normalizer/cd.py

@ -0,0 +1,339 @@
import importlib
from codecs import IncrementalDecoder
from collections import Counter
from functools import lru_cache
from typing import Counter as TypeCounter, Dict, List, Optional, Tuple
from .assets import FREQUENCIES
from .constant import KO_NAMES, LANGUAGE_SUPPORTED_COUNT, TOO_SMALL_SEQUENCE, ZH_NAMES
from .md import is_suspiciously_successive_range
from .models import CoherenceMatches
from .utils import (
is_accentuated,
is_latin,
is_multi_byte_encoding,
is_unicode_range_secondary,
unicode_range,
)
def encoding_unicode_range(iana_name: str) -> List[str]:
"""
Return associated unicode ranges in a single byte code page.
"""
if is_multi_byte_encoding(iana_name):
raise IOError("Function not supported on multi-byte code page")
decoder = importlib.import_module(
"encodings.{}".format(iana_name)
).IncrementalDecoder
p: IncrementalDecoder = decoder(errors="ignore")
seen_ranges: Dict[str, int] = {}
character_count: int = 0
for i in range(0x40, 0xFF):
chunk: str = p.decode(bytes([i]))
if chunk:
character_range: Optional[str] = unicode_range(chunk)
if character_range is None:
continue
if is_unicode_range_secondary(character_range) is False:
if character_range not in seen_ranges:
seen_ranges[character_range] = 0
seen_ranges[character_range] += 1
character_count += 1
return sorted(
[
character_range
for character_range in seen_ranges
if seen_ranges[character_range] / character_count >= 0.15
]
)
def unicode_range_languages(primary_range: str) -> List[str]:
"""
Return inferred languages used with a unicode range.
"""
languages: List[str] = []
for language, characters in FREQUENCIES.items():
for character in characters:
if unicode_range(character) == primary_range:
languages.append(language)
break
return languages
@lru_cache()
def encoding_languages(iana_name: str) -> List[str]:
"""
Single-byte encoding language association. Some code page are heavily linked to particular language(s).
This function does the correspondence.
"""
unicode_ranges: List[str] = encoding_unicode_range(iana_name)
primary_range: Optional[str] = None
for specified_range in unicode_ranges:
if "Latin" not in specified_range:
primary_range = specified_range
break
if primary_range is None:
return ["Latin Based"]
return unicode_range_languages(primary_range)
@lru_cache()
def mb_encoding_languages(iana_name: str) -> List[str]:
"""
Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
This function does the correspondence.
"""
if (
iana_name.startswith("shift_")
or iana_name.startswith("iso2022_jp")
or iana_name.startswith("euc_j")
or iana_name == "cp932"
):
return ["Japanese"]
if iana_name.startswith("gb") or iana_name in ZH_NAMES:
return ["Chinese", "Classical Chinese"]
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
return ["Korean"]
return []
@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
def get_target_features(language: str) -> Tuple[bool, bool]:
"""
Determine main aspects from a supported language if it contains accents and if is pure Latin.
"""
target_have_accents: bool = False
target_pure_latin: bool = True
for character in FREQUENCIES[language]:
if not target_have_accents and is_accentuated(character):
target_have_accents = True
if target_pure_latin and is_latin(character) is False:
target_pure_latin = False
return target_have_accents, target_pure_latin
def alphabet_languages(
characters: List[str], ignore_non_latin: bool = False
) -> List[str]:
"""
Return associated languages associated to given characters.
"""
languages: List[Tuple[str, float]] = []
source_have_accents = any(is_accentuated(character) for character in characters)
for language, language_characters in FREQUENCIES.items():
target_have_accents, target_pure_latin = get_target_features(language)
if ignore_non_latin and target_pure_latin is False:
continue
if target_have_accents is False and source_have_accents:
continue
character_count: int = len(language_characters)
character_match_count: int = len(
[c for c in language_characters if c in characters]
)
ratio: float = character_match_count / character_count
if ratio >= 0.2:
languages.append((language, ratio))
languages = sorted(languages, key=lambda x: x[1], reverse=True)
return [compatible_language[0] for compatible_language in languages]
def characters_popularity_compare(
language: str, ordered_characters: List[str]
) -> float:
"""
Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
"""
if language not in FREQUENCIES:
raise ValueError("{} not available".format(language))
character_approved_count: int = 0
FREQUENCIES_language_set = set(FREQUENCIES[language])
for character in ordered_characters:
if character not in FREQUENCIES_language_set:
continue
characters_before_source: List[str] = FREQUENCIES[language][
0 : FREQUENCIES[language].index(character)
]
characters_after_source: List[str] = FREQUENCIES[language][
FREQUENCIES[language].index(character) :
]
characters_before: List[str] = ordered_characters[
0 : ordered_characters.index(character)
]
characters_after: List[str] = ordered_characters[
ordered_characters.index(character) :
]
before_match_count: int = len(
set(characters_before) & set(characters_before_source)
)
after_match_count: int = len(
set(characters_after) & set(characters_after_source)
)
if len(characters_before_source) == 0 and before_match_count <= 4:
character_approved_count += 1
continue
if len(characters_after_source) == 0 and after_match_count <= 4:
character_approved_count += 1
continue
if (
before_match_count / len(characters_before_source) >= 0.4
or after_match_count / len(characters_after_source) >= 0.4
):
character_approved_count += 1
continue
return character_approved_count / len(ordered_characters)
def alpha_unicode_split(decoded_sequence: str) -> List[str]:
"""
Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
One containing the latin letters and the other hebrew.
"""
layers: Dict[str, str] = {}
for character in decoded_sequence:
if character.isalpha() is False:
continue
character_range: Optional[str] = unicode_range(character)
if character_range is None:
continue
layer_target_range: Optional[str] = None
for discovered_range in layers:
if (
is_suspiciously_successive_range(discovered_range, character_range)
is False
):
layer_target_range = discovered_range
break
if layer_target_range is None:
layer_target_range = character_range
if layer_target_range not in layers:
layers[layer_target_range] = character.lower()
continue
layers[layer_target_range] += character.lower()
return list(layers.values())
def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
"""
This function merge results previously given by the function coherence_ratio.
The return type is the same as coherence_ratio.
"""
per_language_ratios: Dict[str, List[float]] = {}
for result in results:
for sub_result in result:
language, ratio = sub_result
if language not in per_language_ratios:
per_language_ratios[language] = [ratio]
continue
per_language_ratios[language].append(ratio)
merge = [
(
language,
round(
sum(per_language_ratios[language]) / len(per_language_ratios[language]),
4,
),
)
for language in per_language_ratios
]
return sorted(merge, key=lambda x: x[1], reverse=True)
@lru_cache(maxsize=2048)
def coherence_ratio(
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
) -> CoherenceMatches:
"""
Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
A layer = Character extraction by alphabets/ranges.
"""
results: List[Tuple[str, float]] = []
ignore_non_latin: bool = False
sufficient_match_count: int = 0
lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
if "Latin Based" in lg_inclusion_list:
ignore_non_latin = True
lg_inclusion_list.remove("Latin Based")
for layer in alpha_unicode_split(decoded_sequence):
sequence_frequencies: TypeCounter[str] = Counter(layer)
most_common = sequence_frequencies.most_common()
character_count: int = sum(o for c, o in most_common)
if character_count <= TOO_SMALL_SEQUENCE:
continue
popular_character_ordered: List[str] = [c for c, o in most_common]
for language in lg_inclusion_list or alphabet_languages(
popular_character_ordered, ignore_non_latin
):
ratio: float = characters_popularity_compare(
language, popular_character_ordered
)
if ratio < threshold:
continue
elif ratio >= 0.8:
sufficient_match_count += 1
results.append((language, round(ratio, 4)))
if sufficient_match_count >= 3:
break
return sorted(results, key=lambda x: x[1], reverse=True)

0
venv/lib/python3.10/site-packages/charset_normalizer/cli/__init__.py

BIN
venv/lib/python3.10/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/charset_normalizer/cli/__pycache__/normalizer.cpython-310.pyc

Binary file not shown.

295
venv/lib/python3.10/site-packages/charset_normalizer/cli/normalizer.py

@ -0,0 +1,295 @@
import argparse
import sys
from json import dumps
from os.path import abspath
from platform import python_version
from typing import List, Optional
try:
from unicodedata2 import unidata_version
except ImportError:
from unicodedata import unidata_version
from charset_normalizer import from_fp
from charset_normalizer.models import CliDetectionResult
from charset_normalizer.version import __version__
def query_yes_no(question: str, default: str = "yes") -> bool:
"""Ask a yes/no question via input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
"""
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == "":
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
def cli_detect(argv: Optional[List[str]] = None) -> int:
"""
CLI assistant using ARGV and ArgumentParser
:param argv:
:return: 0 if everything is fine, anything else equal trouble
"""
parser = argparse.ArgumentParser(
description="The Real First Universal Charset Detector. "
"Discover originating encoding used on text file. "
"Normalize text to unicode."
)
parser.add_argument(
"files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
default=False,
dest="verbose",
help="Display complementary information about file if any. "
"Stdout will contain logs about the detection process.",
)
parser.add_argument(
"-a",
"--with-alternative",
action="store_true",
default=False,
dest="alternatives",
help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
)
parser.add_argument(
"-n",
"--normalize",
action="store_true",
default=False,
dest="normalize",
help="Permit to normalize input file. If not set, program does not write anything.",
)
parser.add_argument(
"-m",
"--minimal",
action="store_true",
default=False,
dest="minimal",
help="Only output the charset detected to STDOUT. Disabling JSON output.",
)
parser.add_argument(
"-r",
"--replace",
action="store_true",
default=False,
dest="replace",
help="Replace file when trying to normalize it instead of creating a new one.",
)
parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
dest="force",
help="Replace file without asking if you are sure, use this flag with caution.",
)
parser.add_argument(
"-t",
"--threshold",
action="store",
default=0.2,
type=float,
dest="threshold",
help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
)
parser.add_argument(
"--version",
action="version",
version="Charset-Normalizer {} - Python {} - Unicode {}".format(
__version__, python_version(), unidata_version
),
help="Show version information and exit.",
)
args = parser.parse_args(argv)
if args.replace is True and args.normalize is False:
print("Use --replace in addition of --normalize only.", file=sys.stderr)
return 1
if args.force is True and args.replace is False:
print("Use --force in addition of --replace only.", file=sys.stderr)
return 1
if args.threshold < 0.0 or args.threshold > 1.0:
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
return 1
x_ = []
for my_file in args.files:
matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)
best_guess = matches.best()
if best_guess is None:
print(
'Unable to identify originating encoding for "{}". {}'.format(
my_file.name,
"Maybe try increasing maximum amount of chaos."
if args.threshold < 1.0
else "",
),
file=sys.stderr,
)
x_.append(
CliDetectionResult(
abspath(my_file.name),
None,
[],
[],
"Unknown",
[],
False,
1.0,
0.0,
None,
True,
)
)
else:
x_.append(
CliDetectionResult(
abspath(my_file.name),
best_guess.encoding,
best_guess.encoding_aliases,
[
cp
for cp in best_guess.could_be_from_charset
if cp != best_guess.encoding
],
best_guess.language,
best_guess.alphabets,
best_guess.bom,
best_guess.percent_chaos,
best_guess.percent_coherence,
None,
True,
)
)
if len(matches) > 1 and args.alternatives:
for el in matches:
if el != best_guess:
x_.append(
CliDetectionResult(
abspath(my_file.name),
el.encoding,
el.encoding_aliases,
[
cp
for cp in el.could_be_from_charset
if cp != el.encoding
],
el.language,
el.alphabets,
el.bom,
el.percent_chaos,
el.percent_coherence,
None,
False,
)
)
if args.normalize is True:
if best_guess.encoding.startswith("utf") is True:
print(
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
my_file.name
),
file=sys.stderr,
)
if my_file.closed is False:
my_file.close()
continue
o_: List[str] = my_file.name.split(".")
if args.replace is False:
o_.insert(-1, best_guess.encoding)
if my_file.closed is False:
my_file.close()
elif (
args.force is False
and query_yes_no(
'Are you sure to normalize "{}" by replacing it ?'.format(
my_file.name
),
"no",
)
is False
):
if my_file.closed is False:
my_file.close()
continue
try:
x_[0].unicode_path = abspath("./{}".format(".".join(o_)))
with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
fp.write(str(best_guess))
except IOError as e:
print(str(e), file=sys.stderr)
if my_file.closed is False:
my_file.close()
return 2
if my_file.closed is False:
my_file.close()
if args.minimal is False:
print(
dumps(
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
ensure_ascii=True,
indent=4,
)
)
else:
for my_file in args.files:
print(
", ".join(
[
el.encoding or "undefined"
for el in x_
if el.path == abspath(my_file.name)
]
)
)
return 0
if __name__ == "__main__":
cli_detect()

497
venv/lib/python3.10/site-packages/charset_normalizer/constant.py

@ -0,0 +1,497 @@
from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
from encodings.aliases import aliases
from re import IGNORECASE, compile as re_compile
from typing import Dict, List, Set, Union
from .assets import FREQUENCIES
# Contain for each eligible encoding a list of/item bytes SIG/BOM
ENCODING_MARKS: Dict[str, Union[bytes, List[bytes]]] = {
"utf_8": BOM_UTF8,
"utf_7": [
b"\x2b\x2f\x76\x38",
b"\x2b\x2f\x76\x39",
b"\x2b\x2f\x76\x2b",
b"\x2b\x2f\x76\x2f",
b"\x2b\x2f\x76\x38\x2d",
],
"gb18030": b"\x84\x31\x95\x33",
"utf_32": [BOM_UTF32_BE, BOM_UTF32_LE],
"utf_16": [BOM_UTF16_BE, BOM_UTF16_LE],
}
TOO_SMALL_SEQUENCE: int = 32
TOO_BIG_SEQUENCE: int = int(10e6)
UTF8_MAXIMAL_ALLOCATION: int = 1112064
UNICODE_RANGES_COMBINED: Dict[str, range] = {
"Control character": range(31 + 1),
"Basic Latin": range(32, 127 + 1),
"Latin-1 Supplement": range(128, 255 + 1),
"Latin Extended-A": range(256, 383 + 1),
"Latin Extended-B": range(384, 591 + 1),
"IPA Extensions": range(592, 687 + 1),
"Spacing Modifier Letters": range(688, 767 + 1),
"Combining Diacritical Marks": range(768, 879 + 1),
"Greek and Coptic": range(880, 1023 + 1),
"Cyrillic": range(1024, 1279 + 1),
"Cyrillic Supplement": range(1280, 1327 + 1),
"Armenian": range(1328, 1423 + 1),
"Hebrew": range(1424, 1535 + 1),
"Arabic": range(1536, 1791 + 1),
"Syriac": range(1792, 1871 + 1),
"Arabic Supplement": range(1872, 1919 + 1),
"Thaana": range(1920, 1983 + 1),
"NKo": range(1984, 2047 + 1),
"Samaritan": range(2048, 2111 + 1),
"Mandaic": range(2112, 2143 + 1),
"Syriac Supplement": range(2144, 2159 + 1),
"Arabic Extended-A": range(2208, 2303 + 1),
"Devanagari": range(2304, 2431 + 1),
"Bengali": range(2432, 2559 + 1),
"Gurmukhi": range(2560, 2687 + 1),
"Gujarati": range(2688, 2815 + 1),
"Oriya": range(2816, 2943 + 1),
"Tamil": range(2944, 3071 + 1),
"Telugu": range(3072, 3199 + 1),
"Kannada": range(3200, 3327 + 1),
"Malayalam": range(3328, 3455 + 1),
"Sinhala": range(3456, 3583 + 1),
"Thai": range(3584, 3711 + 1),
"Lao": range(3712, 3839 + 1),
"Tibetan": range(3840, 4095 + 1),
"Myanmar": range(4096, 4255 + 1),
"Georgian": range(4256, 4351 + 1),
"Hangul Jamo": range(4352, 4607 + 1),
"Ethiopic": range(4608, 4991 + 1),
"Ethiopic Supplement": range(4992, 5023 + 1),
"Cherokee": range(5024, 5119 + 1),
"Unified Canadian Aboriginal Syllabics": range(5120, 5759 + 1),
"Ogham": range(5760, 5791 + 1),
"Runic": range(5792, 5887 + 1),
"Tagalog": range(5888, 5919 + 1),
"Hanunoo": range(5920, 5951 + 1),
"Buhid": range(5952, 5983 + 1),
"Tagbanwa": range(5984, 6015 + 1),
"Khmer": range(6016, 6143 + 1),
"Mongolian": range(6144, 6319 + 1),
"Unified Canadian Aboriginal Syllabics Extended": range(6320, 6399 + 1),
"Limbu": range(6400, 6479 + 1),
"Tai Le": range(6480, 6527 + 1),
"New Tai Lue": range(6528, 6623 + 1),
"Khmer Symbols": range(6624, 6655 + 1),
"Buginese": range(6656, 6687 + 1),
"Tai Tham": range(6688, 6831 + 1),
"Combining Diacritical Marks Extended": range(6832, 6911 + 1),
"Balinese": range(6912, 7039 + 1),
"Sundanese": range(7040, 7103 + 1),
"Batak": range(7104, 7167 + 1),
"Lepcha": range(7168, 7247 + 1),
"Ol Chiki": range(7248, 7295 + 1),
"Cyrillic Extended C": range(7296, 7311 + 1),
"Sundanese Supplement": range(7360, 7375 + 1),
"Vedic Extensions": range(7376, 7423 + 1),
"Phonetic Extensions": range(7424, 7551 + 1),
"Phonetic Extensions Supplement": range(7552, 7615 + 1),
"Combining Diacritical Marks Supplement": range(7616, 7679 + 1),
"Latin Extended Additional": range(7680, 7935 + 1),
"Greek Extended": range(7936, 8191 + 1),
"General Punctuation": range(8192, 8303 + 1),
"Superscripts and Subscripts": range(8304, 8351 + 1),
"Currency Symbols": range(8352, 8399 + 1),
"Combining Diacritical Marks for Symbols": range(8400, 8447 + 1),
"Letterlike Symbols": range(8448, 8527 + 1),
"Number Forms": range(8528, 8591 + 1),
"Arrows": range(8592, 8703 + 1),
"Mathematical Operators": range(8704, 8959 + 1),
"Miscellaneous Technical": range(8960, 9215 + 1),
"Control Pictures": range(9216, 9279 + 1),
"Optical Character Recognition": range(9280, 9311 + 1),
"Enclosed Alphanumerics": range(9312, 9471 + 1),
"Box Drawing": range(9472, 9599 + 1),
"Block Elements": range(9600, 9631 + 1),
"Geometric Shapes": range(9632, 9727 + 1),
"Miscellaneous Symbols": range(9728, 9983 + 1),
"Dingbats": range(9984, 10175 + 1),
"Miscellaneous Mathematical Symbols-A": range(10176, 10223 + 1),
"Supplemental Arrows-A": range(10224, 10239 + 1),
"Braille Patterns": range(10240, 10495 + 1),
"Supplemental Arrows-B": range(10496, 10623 + 1),
"Miscellaneous Mathematical Symbols-B": range(10624, 10751 + 1),
"Supplemental Mathematical Operators": range(10752, 11007 + 1),
"Miscellaneous Symbols and Arrows": range(11008, 11263 + 1),
"Glagolitic": range(11264, 11359 + 1),
"Latin Extended-C": range(11360, 11391 + 1),
"Coptic": range(11392, 11519 + 1),
"Georgian Supplement": range(11520, 11567 + 1),
"Tifinagh": range(11568, 11647 + 1),
"Ethiopic Extended": range(11648, 11743 + 1),
"Cyrillic Extended-A": range(11744, 11775 + 1),
"Supplemental Punctuation": range(11776, 11903 + 1),
"CJK Radicals Supplement": range(11904, 12031 + 1),
"Kangxi Radicals": range(12032, 12255 + 1),
"Ideographic Description Characters": range(12272, 12287 + 1),
"CJK Symbols and Punctuation": range(12288, 12351 + 1),
"Hiragana": range(12352, 12447 + 1),
"Katakana": range(12448, 12543 + 1),
"Bopomofo": range(12544, 12591 + 1),
"Hangul Compatibility Jamo": range(12592, 12687 + 1),
"Kanbun": range(12688, 12703 + 1),
"Bopomofo Extended": range(12704, 12735 + 1),
"CJK Strokes": range(12736, 12783 + 1),
"Katakana Phonetic Extensions": range(12784, 12799 + 1),
"Enclosed CJK Letters and Months": range(12800, 13055 + 1),
"CJK Compatibility": range(13056, 13311 + 1),
"CJK Unified Ideographs Extension A": range(13312, 19903 + 1),
"Yijing Hexagram Symbols": range(19904, 19967 + 1),
"CJK Unified Ideographs": range(19968, 40959 + 1),
"Yi Syllables": range(40960, 42127 + 1),
"Yi Radicals": range(42128, 42191 + 1),
"Lisu": range(42192, 42239 + 1),
"Vai": range(42240, 42559 + 1),
"Cyrillic Extended-B": range(42560, 42655 + 1),
"Bamum": range(42656, 42751 + 1),
"Modifier Tone Letters": range(42752, 42783 + 1),
"Latin Extended-D": range(42784, 43007 + 1),
"Syloti Nagri": range(43008, 43055 + 1),
"Common Indic Number Forms": range(43056, 43071 + 1),
"Phags-pa": range(43072, 43135 + 1),
"Saurashtra": range(43136, 43231 + 1),
"Devanagari Extended": range(43232, 43263 + 1),
"Kayah Li": range(43264, 43311 + 1),
"Rejang": range(43312, 43359 + 1),
"Hangul Jamo Extended-A": range(43360, 43391 + 1),
"Javanese": range(43392, 43487 + 1),
"Myanmar Extended-B": range(43488, 43519 + 1),
"Cham": range(43520, 43615 + 1),
"Myanmar Extended-A": range(43616, 43647 + 1),
"Tai Viet": range(43648, 43743 + 1),
"Meetei Mayek Extensions": range(43744, 43775 + 1),
"Ethiopic Extended-A": range(43776, 43823 + 1),
"Latin Extended-E": range(43824, 43887 + 1),
"Cherokee Supplement": range(43888, 43967 + 1),
"Meetei Mayek": range(43968, 44031 + 1),
"Hangul Syllables": range(44032, 55215 + 1),
"Hangul Jamo Extended-B": range(55216, 55295 + 1),
"High Surrogates": range(55296, 56191 + 1),
"High Private Use Surrogates": range(56192, 56319 + 1),
"Low Surrogates": range(56320, 57343 + 1),
"Private Use Area": range(57344, 63743 + 1),
"CJK Compatibility Ideographs": range(63744, 64255 + 1),
"Alphabetic Presentation Forms": range(64256, 64335 + 1),
"Arabic Presentation Forms-A": range(64336, 65023 + 1),
"Variation Selectors": range(65024, 65039 + 1),
"Vertical Forms": range(65040, 65055 + 1),
"Combining Half Marks": range(65056, 65071 + 1),
"CJK Compatibility Forms": range(65072, 65103 + 1),
"Small Form Variants": range(65104, 65135 + 1),
"Arabic Presentation Forms-B": range(65136, 65279 + 1),
"Halfwidth and Fullwidth Forms": range(65280, 65519 + 1),
"Specials": range(65520, 65535 + 1),
"Linear B Syllabary": range(65536, 65663 + 1),
"Linear B Ideograms": range(65664, 65791 + 1),
"Aegean Numbers": range(65792, 65855 + 1),
"Ancient Greek Numbers": range(65856, 65935 + 1),
"Ancient Symbols": range(65936, 65999 + 1),
"Phaistos Disc": range(66000, 66047 + 1),
"Lycian": range(66176, 66207 + 1),
"Carian": range(66208, 66271 + 1),
"Coptic Epact Numbers": range(66272, 66303 + 1),
"Old Italic": range(66304, 66351 + 1),
"Gothic": range(66352, 66383 + 1),
"Old Permic": range(66384, 66431 + 1),
"Ugaritic": range(66432, 66463 + 1),
"Old Persian": range(66464, 66527 + 1),
"Deseret": range(66560, 66639 + 1),
"Shavian": range(66640, 66687 + 1),
"Osmanya": range(66688, 66735 + 1),
"Osage": range(66736, 66815 + 1),
"Elbasan": range(66816, 66863 + 1),
"Caucasian Albanian": range(66864, 66927 + 1),
"Linear A": range(67072, 67455 + 1),
"Cypriot Syllabary": range(67584, 67647 + 1),
"Imperial Aramaic": range(67648, 67679 + 1),
"Palmyrene": range(67680, 67711 + 1),
"Nabataean": range(67712, 67759 + 1),
"Hatran": range(67808, 67839 + 1),
"Phoenician": range(67840, 67871 + 1),
"Lydian": range(67872, 67903 + 1),
"Meroitic Hieroglyphs": range(67968, 67999 + 1),
"Meroitic Cursive": range(68000, 68095 + 1),
"Kharoshthi": range(68096, 68191 + 1),
"Old South Arabian": range(68192, 68223 + 1),
"Old North Arabian": range(68224, 68255 + 1),
"Manichaean": range(68288, 68351 + 1),
"Avestan": range(68352, 68415 + 1),
"Inscriptional Parthian": range(68416, 68447 + 1),
"Inscriptional Pahlavi": range(68448, 68479 + 1),
"Psalter Pahlavi": range(68480, 68527 + 1),
"Old Turkic": range(68608, 68687 + 1),
"Old Hungarian": range(68736, 68863 + 1),
"Rumi Numeral Symbols": range(69216, 69247 + 1),
"Brahmi": range(69632, 69759 + 1),
"Kaithi": range(69760, 69839 + 1),
"Sora Sompeng": range(69840, 69887 + 1),
"Chakma": range(69888, 69967 + 1),
"Mahajani": range(69968, 70015 + 1),
"Sharada": range(70016, 70111 + 1),
"Sinhala Archaic Numbers": range(70112, 70143 + 1),
"Khojki": range(70144, 70223 + 1),
"Multani": range(70272, 70319 + 1),
"Khudawadi": range(70320, 70399 + 1),
"Grantha": range(70400, 70527 + 1),
"Newa": range(70656, 70783 + 1),
"Tirhuta": range(70784, 70879 + 1),
"Siddham": range(71040, 71167 + 1),
"Modi": range(71168, 71263 + 1),
"Mongolian Supplement": range(71264, 71295 + 1),
"Takri": range(71296, 71375 + 1),
"Ahom": range(71424, 71487 + 1),
"Warang Citi": range(71840, 71935 + 1),
"Zanabazar Square": range(72192, 72271 + 1),
"Soyombo": range(72272, 72367 + 1),
"Pau Cin Hau": range(72384, 72447 + 1),
"Bhaiksuki": range(72704, 72815 + 1),
"Marchen": range(72816, 72895 + 1),
"Masaram Gondi": range(72960, 73055 + 1),
"Cuneiform": range(73728, 74751 + 1),
"Cuneiform Numbers and Punctuation": range(74752, 74879 + 1),
"Early Dynastic Cuneiform": range(74880, 75087 + 1),
"Egyptian Hieroglyphs": range(77824, 78895 + 1),
"Anatolian Hieroglyphs": range(82944, 83583 + 1),
"Bamum Supplement": range(92160, 92735 + 1),
"Mro": range(92736, 92783 + 1),
"Bassa Vah": range(92880, 92927 + 1),
"Pahawh Hmong": range(92928, 93071 + 1),
"Miao": range(93952, 94111 + 1),
"Ideographic Symbols and Punctuation": range(94176, 94207 + 1),
"Tangut": range(94208, 100351 + 1),
"Tangut Components": range(100352, 101119 + 1),
"Kana Supplement": range(110592, 110847 + 1),
"Kana Extended-A": range(110848, 110895 + 1),
"Nushu": range(110960, 111359 + 1),
"Duployan": range(113664, 113823 + 1),
"Shorthand Format Controls": range(113824, 113839 + 1),
"Byzantine Musical Symbols": range(118784, 119039 + 1),
"Musical Symbols": range(119040, 119295 + 1),
"Ancient Greek Musical Notation": range(119296, 119375 + 1),
"Tai Xuan Jing Symbols": range(119552, 119647 + 1),
"Counting Rod Numerals": range(119648, 119679 + 1),
"Mathematical Alphanumeric Symbols": range(119808, 120831 + 1),
"Sutton SignWriting": range(120832, 121519 + 1),
"Glagolitic Supplement": range(122880, 122927 + 1),
"Mende Kikakui": range(124928, 125151 + 1),
"Adlam": range(125184, 125279 + 1),
"Arabic Mathematical Alphabetic Symbols": range(126464, 126719 + 1),
"Mahjong Tiles": range(126976, 127023 + 1),
"Domino Tiles": range(127024, 127135 + 1),
"Playing Cards": range(127136, 127231 + 1),
"Enclosed Alphanumeric Supplement": range(127232, 127487 + 1),
"Enclosed Ideographic Supplement": range(127488, 127743 + 1),
"Miscellaneous Symbols and Pictographs": range(127744, 128511 + 1),
"Emoticons range(Emoji)": range(128512, 128591 + 1),
"Ornamental Dingbats": range(128592, 128639 + 1),
"Transport and Map Symbols": range(128640, 128767 + 1),
"Alchemical Symbols": range(128768, 128895 + 1),
"Geometric Shapes Extended": range(128896, 129023 + 1),
"Supplemental Arrows-C": range(129024, 129279 + 1),
"Supplemental Symbols and Pictographs": range(129280, 129535 + 1),
"CJK Unified Ideographs Extension B": range(131072, 173791 + 1),
"CJK Unified Ideographs Extension C": range(173824, 177983 + 1),
"CJK Unified Ideographs Extension D": range(177984, 178207 + 1),
"CJK Unified Ideographs Extension E": range(178208, 183983 + 1),
"CJK Unified Ideographs Extension F": range(183984, 191471 + 1),
"CJK Compatibility Ideographs Supplement": range(194560, 195103 + 1),
"Tags": range(917504, 917631 + 1),
"Variation Selectors Supplement": range(917760, 917999 + 1),
}
UNICODE_SECONDARY_RANGE_KEYWORD: List[str] = [
"Supplement",
"Extended",
"Extensions",
"Modifier",
"Marks",
"Punctuation",
"Symbols",
"Forms",
"Operators",
"Miscellaneous",
"Drawing",
"Block",
"Shapes",
"Supplemental",
"Tags",
]
RE_POSSIBLE_ENCODING_INDICATION = re_compile(
r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)",
IGNORECASE,
)
IANA_SUPPORTED: List[str] = sorted(
filter(
lambda x: x.endswith("_codec") is False
and x not in {"rot_13", "tactis", "mbcs"},
list(set(aliases.values())),
)
)
IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED)
# pre-computed code page that are similar using the function cp_similarity.
IANA_SUPPORTED_SIMILAR: Dict[str, List[str]] = {
"cp037": ["cp1026", "cp1140", "cp273", "cp500"],
"cp1026": ["cp037", "cp1140", "cp273", "cp500"],
"cp1125": ["cp866"],
"cp1140": ["cp037", "cp1026", "cp273", "cp500"],
"cp1250": ["iso8859_2"],
"cp1251": ["kz1048", "ptcp154"],
"cp1252": ["iso8859_15", "iso8859_9", "latin_1"],
"cp1253": ["iso8859_7"],
"cp1254": ["iso8859_15", "iso8859_9", "latin_1"],
"cp1257": ["iso8859_13"],
"cp273": ["cp037", "cp1026", "cp1140", "cp500"],
"cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"],
"cp500": ["cp037", "cp1026", "cp1140", "cp273"],
"cp850": ["cp437", "cp857", "cp858", "cp865"],
"cp857": ["cp850", "cp858", "cp865"],
"cp858": ["cp437", "cp850", "cp857", "cp865"],
"cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"],
"cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"],
"cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"],
"cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"],
"cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"],
"cp866": ["cp1125"],
"iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"],
"iso8859_11": ["tis_620"],
"iso8859_13": ["cp1257"],
"iso8859_14": [
"iso8859_10",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
],
"iso8859_15": [
"cp1252",
"cp1254",
"iso8859_10",
"iso8859_14",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
],
"iso8859_16": [
"iso8859_14",
"iso8859_15",
"iso8859_2",
"iso8859_3",
"iso8859_9",
"latin_1",
],
"iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"],
"iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"],
"iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"],
"iso8859_7": ["cp1253"],
"iso8859_9": [
"cp1252",
"cp1254",
"cp1258",
"iso8859_10",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_4",
"latin_1",
],
"kz1048": ["cp1251", "ptcp154"],
"latin_1": [
"cp1252",
"cp1254",
"cp1258",
"iso8859_10",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_4",
"iso8859_9",
],
"mac_iceland": ["mac_roman", "mac_turkish"],
"mac_roman": ["mac_iceland", "mac_turkish"],
"mac_turkish": ["mac_iceland", "mac_roman"],
"ptcp154": ["cp1251", "kz1048"],
"tis_620": ["iso8859_11"],
}
CHARDET_CORRESPONDENCE: Dict[str, str] = {
"iso2022_kr": "ISO-2022-KR",
"iso2022_jp": "ISO-2022-JP",
"euc_kr": "EUC-KR",
"tis_620": "TIS-620",
"utf_32": "UTF-32",
"euc_jp": "EUC-JP",
"koi8_r": "KOI8-R",
"iso8859_1": "ISO-8859-1",
"iso8859_2": "ISO-8859-2",
"iso8859_5": "ISO-8859-5",
"iso8859_6": "ISO-8859-6",
"iso8859_7": "ISO-8859-7",
"iso8859_8": "ISO-8859-8",
"utf_16": "UTF-16",
"cp855": "IBM855",
"mac_cyrillic": "MacCyrillic",
"gb2312": "GB2312",
"gb18030": "GB18030",
"cp932": "CP932",
"cp866": "IBM866",
"utf_8": "utf-8",
"utf_8_sig": "UTF-8-SIG",
"shift_jis": "SHIFT_JIS",
"big5": "Big5",
"cp1250": "windows-1250",
"cp1251": "windows-1251",
"cp1252": "Windows-1252",
"cp1253": "windows-1253",
"cp1255": "windows-1255",
"cp1256": "windows-1256",
"cp1254": "Windows-1254",
"cp949": "CP949",
}
COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
"<",
">",
"=",
":",
"/",
"&",
";",
"{",
"}",
"[",
"]",
",",
"|",
'"',
"-",
}
KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)
# Logging LEVEL bellow DEBUG
TRACE: int = 5

95
venv/lib/python3.10/site-packages/charset_normalizer/legacy.py

@ -0,0 +1,95 @@
import warnings
from typing import Dict, Optional, Union
from .api import from_bytes, from_fp, from_path, normalize
from .constant import CHARDET_CORRESPONDENCE
from .models import CharsetMatch, CharsetMatches
def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
"""
chardet legacy method
Detect the encoding of the given byte string. It should be mostly backward-compatible.
Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
This function is deprecated and should be used to migrate your project easily, consult the documentation for
further information. Not planned for removal.
:param byte_str: The byte sequence to examine.
"""
if not isinstance(byte_str, (bytearray, bytes)):
raise TypeError( # pragma: nocover
"Expected object of type bytes or bytearray, got: "
"{0}".format(type(byte_str))
)
if isinstance(byte_str, bytearray):
byte_str = bytes(byte_str)
r = from_bytes(byte_str).best()
encoding = r.encoding if r is not None else None
language = r.language if r is not None and r.language != "Unknown" else ""
confidence = 1.0 - r.chaos if r is not None else None
# Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
# but chardet does return 'utf-8-sig' and it is a valid codec name.
if r is not None and encoding == "utf_8" and r.bom:
encoding += "_sig"
return {
"encoding": encoding
if encoding not in CHARDET_CORRESPONDENCE
else CHARDET_CORRESPONDENCE[encoding],
"language": language,
"confidence": confidence,
}
class CharsetNormalizerMatch(CharsetMatch):
pass
class CharsetNormalizerMatches(CharsetMatches):
@staticmethod
def from_fp(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return from_fp(*args, **kwargs) # pragma: nocover
@staticmethod
def from_bytes(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return from_bytes(*args, **kwargs) # pragma: nocover
@staticmethod
def from_path(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return from_path(*args, **kwargs) # pragma: nocover
@staticmethod
def normalize(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return normalize(*args, **kwargs) # pragma: nocover
class CharsetDetector(CharsetNormalizerMatches):
pass
class CharsetDoctor(CharsetNormalizerMatches):
pass

553
venv/lib/python3.10/site-packages/charset_normalizer/md.py

@ -0,0 +1,553 @@
from functools import lru_cache
from typing import List, Optional
from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD
from .utils import (
is_accentuated,
is_ascii,
is_case_variable,
is_cjk,
is_emoticon,
is_hangul,
is_hiragana,
is_katakana,
is_latin,
is_punctuation,
is_separator,
is_symbol,
is_thai,
is_unprintable,
remove_accent,
unicode_range,
)
class MessDetectorPlugin:
"""
Base abstract class used for mess detection plugins.
All detectors MUST extend and implement given methods.
"""
def eligible(self, character: str) -> bool:
"""
Determine if given character should be fed in.
"""
raise NotImplementedError # pragma: nocover
def feed(self, character: str) -> None:
"""
The main routine to be executed upon character.
Insert the logic in witch the text would be considered chaotic.
"""
raise NotImplementedError # pragma: nocover
def reset(self) -> None: # pragma: no cover
"""
Permit to reset the plugin to the initial state.
"""
raise NotImplementedError
@property
def ratio(self) -> float:
"""
Compute the chaos ratio based on what your feed() has seen.
Must NOT be lower than 0.; No restriction gt 0.
"""
raise NotImplementedError # pragma: nocover
class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._punctuation_count: int = 0
self._symbol_count: int = 0
self._character_count: int = 0
self._last_printable_char: Optional[str] = None
self._frenzy_symbol_in_word: bool = False
def eligible(self, character: str) -> bool:
return character.isprintable()
def feed(self, character: str) -> None:
self._character_count += 1
if (
character != self._last_printable_char
and character not in COMMON_SAFE_ASCII_CHARACTERS
):
if is_punctuation(character):
self._punctuation_count += 1
elif (
character.isdigit() is False
and is_symbol(character)
and is_emoticon(character) is False
):
self._symbol_count += 2
self._last_printable_char = character
def reset(self) -> None: # pragma: no cover
self._punctuation_count = 0
self._character_count = 0
self._symbol_count = 0
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
ratio_of_punctuation: float = (
self._punctuation_count + self._symbol_count
) / self._character_count
return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
class TooManyAccentuatedPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._character_count: int = 0
self._accentuated_count: int = 0
def eligible(self, character: str) -> bool:
return character.isalpha()
def feed(self, character: str) -> None:
self._character_count += 1
if is_accentuated(character):
self._accentuated_count += 1
def reset(self) -> None: # pragma: no cover
self._character_count = 0
self._accentuated_count = 0
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
ratio_of_accentuation: float = self._accentuated_count / self._character_count
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
class UnprintablePlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._unprintable_count: int = 0
self._character_count: int = 0
def eligible(self, character: str) -> bool:
return True
def feed(self, character: str) -> None:
if is_unprintable(character):
self._unprintable_count += 1
self._character_count += 1
def reset(self) -> None: # pragma: no cover
self._unprintable_count = 0
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
return (self._unprintable_count * 8) / self._character_count
class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._successive_count: int = 0
self._character_count: int = 0
self._last_latin_character: Optional[str] = None
def eligible(self, character: str) -> bool:
return character.isalpha() and is_latin(character)
def feed(self, character: str) -> None:
self._character_count += 1
if (
self._last_latin_character is not None
and is_accentuated(character)
and is_accentuated(self._last_latin_character)
):
if character.isupper() and self._last_latin_character.isupper():
self._successive_count += 1
# Worse if its the same char duplicated with different accent.
if remove_accent(character) == remove_accent(self._last_latin_character):
self._successive_count += 1
self._last_latin_character = character
def reset(self) -> None: # pragma: no cover
self._successive_count = 0
self._character_count = 0
self._last_latin_character = None
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
return (self._successive_count * 2) / self._character_count
class SuspiciousRange(MessDetectorPlugin):
def __init__(self) -> None:
self._suspicious_successive_range_count: int = 0
self._character_count: int = 0
self._last_printable_seen: Optional[str] = None
def eligible(self, character: str) -> bool:
return character.isprintable()
def feed(self, character: str) -> None:
self._character_count += 1
if (
character.isspace()
or is_punctuation(character)
or character in COMMON_SAFE_ASCII_CHARACTERS
):
self._last_printable_seen = None
return
if self._last_printable_seen is None:
self._last_printable_seen = character
return
unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen)
unicode_range_b: Optional[str] = unicode_range(character)
if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
self._suspicious_successive_range_count += 1
self._last_printable_seen = character
def reset(self) -> None: # pragma: no cover
self._character_count = 0
self._suspicious_successive_range_count = 0
self._last_printable_seen = None
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
ratio_of_suspicious_range_usage: float = (
self._suspicious_successive_range_count * 2
) / self._character_count
if ratio_of_suspicious_range_usage < 0.1:
return 0.0
return ratio_of_suspicious_range_usage
class SuperWeirdWordPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._word_count: int = 0
self._bad_word_count: int = 0
self._foreign_long_count: int = 0
self._is_current_word_bad: bool = False
self._foreign_long_watch: bool = False
self._character_count: int = 0
self._bad_character_count: int = 0
self._buffer: str = ""
self._buffer_accent_count: int = 0
def eligible(self, character: str) -> bool:
return True
def feed(self, character: str) -> None:
if character.isalpha():
self._buffer += character
if is_accentuated(character):
self._buffer_accent_count += 1
if (
self._foreign_long_watch is False
and (is_latin(character) is False or is_accentuated(character))
and is_cjk(character) is False
and is_hangul(character) is False
and is_katakana(character) is False
and is_hiragana(character) is False
and is_thai(character) is False
):
self._foreign_long_watch = True
return
if not self._buffer:
return
if (
character.isspace() or is_punctuation(character) or is_separator(character)
) and self._buffer:
self._word_count += 1
buffer_length: int = len(self._buffer)
self._character_count += buffer_length
if buffer_length >= 4:
if self._buffer_accent_count / buffer_length > 0.34:
self._is_current_word_bad = True
# Word/Buffer ending with a upper case accentuated letter are so rare,
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper():
self._foreign_long_count += 1
self._is_current_word_bad = True
if buffer_length >= 24 and self._foreign_long_watch:
self._foreign_long_count += 1
self._is_current_word_bad = True
if self._is_current_word_bad:
self._bad_word_count += 1
self._bad_character_count += len(self._buffer)
self._is_current_word_bad = False
self._foreign_long_watch = False
self._buffer = ""
self._buffer_accent_count = 0
elif (
character not in {"<", ">", "-", "=", "~", "|", "_"}
and character.isdigit() is False
and is_symbol(character)
):
self._is_current_word_bad = True
self._buffer += character
def reset(self) -> None: # pragma: no cover
self._buffer = ""
self._is_current_word_bad = False
self._foreign_long_watch = False
self._bad_word_count = 0
self._word_count = 0
self._character_count = 0
self._bad_character_count = 0
self._foreign_long_count = 0
@property
def ratio(self) -> float:
if self._word_count <= 10 and self._foreign_long_count == 0:
return 0.0
return self._bad_character_count / self._character_count
class CjkInvalidStopPlugin(MessDetectorPlugin):
"""
GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
can be easily detected. Searching for the overuse of '' and ''.
"""
def __init__(self) -> None:
self._wrong_stop_count: int = 0
self._cjk_character_count: int = 0
def eligible(self, character: str) -> bool:
return True
def feed(self, character: str) -> None:
if character in {"", ""}:
self._wrong_stop_count += 1
return
if is_cjk(character):
self._cjk_character_count += 1
def reset(self) -> None: # pragma: no cover
self._wrong_stop_count = 0
self._cjk_character_count = 0
@property
def ratio(self) -> float:
if self._cjk_character_count < 16:
return 0.0
return self._wrong_stop_count / self._cjk_character_count
class ArchaicUpperLowerPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._buf: bool = False
self._character_count_since_last_sep: int = 0
self._successive_upper_lower_count: int = 0
self._successive_upper_lower_count_final: int = 0
self._character_count: int = 0
self._last_alpha_seen: Optional[str] = None
self._current_ascii_only: bool = True
def eligible(self, character: str) -> bool:
return True
def feed(self, character: str) -> None:
is_concerned = character.isalpha() and is_case_variable(character)
chunk_sep = is_concerned is False
if chunk_sep and self._character_count_since_last_sep > 0:
if (
self._character_count_since_last_sep <= 64
and character.isdigit() is False
and self._current_ascii_only is False
):
self._successive_upper_lower_count_final += (
self._successive_upper_lower_count
)
self._successive_upper_lower_count = 0
self._character_count_since_last_sep = 0
self._last_alpha_seen = None
self._buf = False
self._character_count += 1
self._current_ascii_only = True
return
if self._current_ascii_only is True and is_ascii(character) is False:
self._current_ascii_only = False
if self._last_alpha_seen is not None:
if (character.isupper() and self._last_alpha_seen.islower()) or (
character.islower() and self._last_alpha_seen.isupper()
):
if self._buf is True:
self._successive_upper_lower_count += 2
self._buf = False
else:
self._buf = True
else:
self._buf = False
self._character_count += 1
self._character_count_since_last_sep += 1
self._last_alpha_seen = character
def reset(self) -> None: # pragma: no cover
self._character_count = 0
self._character_count_since_last_sep = 0
self._successive_upper_lower_count = 0
self._successive_upper_lower_count_final = 0
self._last_alpha_seen = None
self._buf = False
self._current_ascii_only = True
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
return self._successive_upper_lower_count_final / self._character_count
@lru_cache(maxsize=1024)
def is_suspiciously_successive_range(
unicode_range_a: Optional[str], unicode_range_b: Optional[str]
) -> bool:
"""
Determine if two Unicode range seen next to each other can be considered as suspicious.
"""
if unicode_range_a is None or unicode_range_b is None:
return True
if unicode_range_a == unicode_range_b:
return False
if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
return False
if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
return False
# Latin characters can be accompanied with a combining diacritical mark
# eg. Vietnamese.
if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
"Combining" in unicode_range_a or "Combining" in unicode_range_b
):
return False
keywords_range_a, keywords_range_b = unicode_range_a.split(
" "
), unicode_range_b.split(" ")
for el in keywords_range_a:
if el in UNICODE_SECONDARY_RANGE_KEYWORD:
continue
if el in keywords_range_b:
return False
# Japanese Exception
range_a_jp_chars, range_b_jp_chars = (
unicode_range_a
in (
"Hiragana",
"Katakana",
),
unicode_range_b in ("Hiragana", "Katakana"),
)
if (range_a_jp_chars or range_b_jp_chars) and (
"CJK" in unicode_range_a or "CJK" in unicode_range_b
):
return False
if range_a_jp_chars and range_b_jp_chars:
return False
if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
return False
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
return False
# Chinese/Japanese use dedicated range for punctuation and/or separators.
if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
unicode_range_a in ["Katakana", "Hiragana"]
and unicode_range_b in ["Katakana", "Hiragana"]
):
if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
return False
if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
return False
return True
@lru_cache(maxsize=2048)
def mess_ratio(
decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
) -> float:
"""
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
"""
detectors: List[MessDetectorPlugin] = [
md_class() for md_class in MessDetectorPlugin.__subclasses__()
]
length: int = len(decoded_sequence) + 1
mean_mess_ratio: float = 0.0
if length < 512:
intermediary_mean_mess_ratio_calc: int = 32
elif length <= 1024:
intermediary_mean_mess_ratio_calc = 64
else:
intermediary_mean_mess_ratio_calc = 128
for character, index in zip(decoded_sequence + "\n", range(length)):
for detector in detectors:
if detector.eligible(character):
detector.feed(character)
if (
index > 0 and index % intermediary_mean_mess_ratio_calc == 0
) or index == length - 1:
mean_mess_ratio = sum(dt.ratio for dt in detectors)
if mean_mess_ratio >= maximum_threshold:
break
if debug:
for dt in detectors: # pragma: nocover
print(dt.__class__, dt.ratio)
return round(mean_mess_ratio, 3)

401
venv/lib/python3.10/site-packages/charset_normalizer/models.py

@ -0,0 +1,401 @@
import warnings
from collections import Counter
from encodings.aliases import aliases
from hashlib import sha256
from json import dumps
from re import sub
from typing import (
Any,
Counter as TypeCounter,
Dict,
Iterator,
List,
Optional,
Tuple,
Union,
)
from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
from .md import mess_ratio
from .utils import iana_name, is_multi_byte_encoding, unicode_range
class CharsetMatch:
def __init__(
self,
payload: bytes,
guessed_encoding: str,
mean_mess_ratio: float,
has_sig_or_bom: bool,
languages: "CoherenceMatches",
decoded_payload: Optional[str] = None,
):
self._payload: bytes = payload
self._encoding: str = guessed_encoding
self._mean_mess_ratio: float = mean_mess_ratio
self._languages: CoherenceMatches = languages
self._has_sig_or_bom: bool = has_sig_or_bom
self._unicode_ranges: Optional[List[str]] = None
self._leaves: List[CharsetMatch] = []
self._mean_coherence_ratio: float = 0.0
self._output_payload: Optional[bytes] = None
self._output_encoding: Optional[str] = None
self._string: Optional[str] = decoded_payload
def __eq__(self, other: object) -> bool:
if not isinstance(other, CharsetMatch):
raise TypeError(
"__eq__ cannot be invoked on {} and {}.".format(
str(other.__class__), str(self.__class__)
)
)
return self.encoding == other.encoding and self.fingerprint == other.fingerprint
def __lt__(self, other: object) -> bool:
"""
Implemented to make sorted available upon CharsetMatches items.
"""
if not isinstance(other, CharsetMatch):
raise ValueError
chaos_difference: float = abs(self.chaos - other.chaos)
coherence_difference: float = abs(self.coherence - other.coherence)
# Bellow 1% difference --> Use Coherence
if chaos_difference < 0.01 and coherence_difference > 0.02:
# When having a tough decision, use the result that decoded as many multi-byte as possible.
if chaos_difference == 0.0 and self.coherence == other.coherence:
return self.multi_byte_usage > other.multi_byte_usage
return self.coherence > other.coherence
return self.chaos < other.chaos
@property
def multi_byte_usage(self) -> float:
return 1.0 - len(str(self)) / len(self.raw)
@property
def chaos_secondary_pass(self) -> float:
"""
Check once again chaos in decoded text, except this time, with full content.
Use with caution, this can be very slow.
Notice: Will be removed in 3.0
"""
warnings.warn(
"chaos_secondary_pass is deprecated and will be removed in 3.0",
DeprecationWarning,
)
return mess_ratio(str(self), 1.0)
@property
def coherence_non_latin(self) -> float:
"""
Coherence ratio on the first non-latin language detected if ANY.
Notice: Will be removed in 3.0
"""
warnings.warn(
"coherence_non_latin is deprecated and will be removed in 3.0",
DeprecationWarning,
)
return 0.0
@property
def w_counter(self) -> TypeCounter[str]:
"""
Word counter instance on decoded text.
Notice: Will be removed in 3.0
"""
warnings.warn(
"w_counter is deprecated and will be removed in 3.0", DeprecationWarning
)
string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower())
return Counter(string_printable_only.split())
def __str__(self) -> str:
# Lazy Str Loading
if self._string is None:
self._string = str(self._payload, self._encoding, "strict")
return self._string
def __repr__(self) -> str:
return "<CharsetMatch '{}' bytes({})>".format(self.encoding, self.fingerprint)
def add_submatch(self, other: "CharsetMatch") -> None:
if not isinstance(other, CharsetMatch) or other == self:
raise ValueError(
"Unable to add instance <{}> as a submatch of a CharsetMatch".format(
other.__class__
)
)
other._string = None # Unload RAM usage; dirty trick.
self._leaves.append(other)
@property
def encoding(self) -> str:
return self._encoding
@property
def encoding_aliases(self) -> List[str]:
"""
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
"""
also_known_as: List[str] = []
for u, p in aliases.items():
if self.encoding == u:
also_known_as.append(p)
elif self.encoding == p:
also_known_as.append(u)
return also_known_as
@property
def bom(self) -> bool:
return self._has_sig_or_bom
@property
def byte_order_mark(self) -> bool:
return self._has_sig_or_bom
@property
def languages(self) -> List[str]:
"""
Return the complete list of possible languages found in decoded sequence.
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
"""
return [e[0] for e in self._languages]
@property
def language(self) -> str:
"""
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
"Unknown".
"""
if not self._languages:
# Trying to infer the language based on the given encoding
# Its either English or we should not pronounce ourselves in certain cases.
if "ascii" in self.could_be_from_charset:
return "English"
# doing it there to avoid circular import
from charset_normalizer.cd import encoding_languages, mb_encoding_languages
languages = (
mb_encoding_languages(self.encoding)
if is_multi_byte_encoding(self.encoding)
else encoding_languages(self.encoding)
)
if len(languages) == 0 or "Latin Based" in languages:
return "Unknown"
return languages[0]
return self._languages[0][0]
@property
def chaos(self) -> float:
return self._mean_mess_ratio
@property
def coherence(self) -> float:
if not self._languages:
return 0.0
return self._languages[0][1]
@property
def percent_chaos(self) -> float:
return round(self.chaos * 100, ndigits=3)
@property
def percent_coherence(self) -> float:
return round(self.coherence * 100, ndigits=3)
@property
def raw(self) -> bytes:
"""
Original untouched bytes.
"""
return self._payload
@property
def submatch(self) -> List["CharsetMatch"]:
return self._leaves
@property
def has_submatch(self) -> bool:
return len(self._leaves) > 0
@property
def alphabets(self) -> List[str]:
if self._unicode_ranges is not None:
return self._unicode_ranges
# list detected ranges
detected_ranges: List[Optional[str]] = [
unicode_range(char) for char in str(self)
]
# filter and sort
self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
return self._unicode_ranges
@property
def could_be_from_charset(self) -> List[str]:
"""
The complete list of encoding that output the exact SAME str result and therefore could be the originating
encoding.
This list does include the encoding available in property 'encoding'.
"""
return [self._encoding] + [m.encoding for m in self._leaves]
def first(self) -> "CharsetMatch":
"""
Kept for BC reasons. Will be removed in 3.0.
"""
return self
def best(self) -> "CharsetMatch":
"""
Kept for BC reasons. Will be removed in 3.0.
"""
return self
def output(self, encoding: str = "utf_8") -> bytes:
"""
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
Any errors will be simply ignored by the encoder NOT replaced.
"""
if self._output_encoding is None or self._output_encoding != encoding:
self._output_encoding = encoding
self._output_payload = str(self).encode(encoding, "replace")
return self._output_payload # type: ignore
@property
def fingerprint(self) -> str:
"""
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
"""
return sha256(self.output()).hexdigest()
class CharsetMatches:
"""
Container with every CharsetMatch items ordered by default from most probable to the less one.
Act like a list(iterable) but does not implements all related methods.
"""
def __init__(self, results: Optional[List[CharsetMatch]] = None):
self._results: List[CharsetMatch] = sorted(results) if results else []
def __iter__(self) -> Iterator[CharsetMatch]:
yield from self._results
def __getitem__(self, item: Union[int, str]) -> CharsetMatch:
"""
Retrieve a single item either by its position or encoding name (alias may be used here).
Raise KeyError upon invalid index or encoding not present in results.
"""
if isinstance(item, int):
return self._results[item]
if isinstance(item, str):
item = iana_name(item, False)
for result in self._results:
if item in result.could_be_from_charset:
return result
raise KeyError
def __len__(self) -> int:
return len(self._results)
def __bool__(self) -> bool:
return len(self._results) > 0
def append(self, item: CharsetMatch) -> None:
"""
Insert a single match. Will be inserted accordingly to preserve sort.
Can be inserted as a submatch.
"""
if not isinstance(item, CharsetMatch):
raise ValueError(
"Cannot append instance '{}' to CharsetMatches".format(
str(item.__class__)
)
)
# We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
if len(item.raw) <= TOO_BIG_SEQUENCE:
for match in self._results:
if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
match.add_submatch(item)
return
self._results.append(item)
self._results = sorted(self._results)
def best(self) -> Optional["CharsetMatch"]:
"""
Simply return the first match. Strict equivalent to matches[0].
"""
if not self._results:
return None
return self._results[0]
def first(self) -> Optional["CharsetMatch"]:
"""
Redundant method, call the method best(). Kept for BC reasons.
"""
return self.best()
CoherenceMatch = Tuple[str, float]
CoherenceMatches = List[CoherenceMatch]
class CliDetectionResult:
def __init__(
self,
path: str,
encoding: Optional[str],
encoding_aliases: List[str],
alternative_encodings: List[str],
language: str,
alphabets: List[str],
has_sig_or_bom: bool,
chaos: float,
coherence: float,
unicode_path: Optional[str],
is_preferred: bool,
):
self.path: str = path
self.unicode_path: Optional[str] = unicode_path
self.encoding: Optional[str] = encoding
self.encoding_aliases: List[str] = encoding_aliases
self.alternative_encodings: List[str] = alternative_encodings
self.language: str = language
self.alphabets: List[str] = alphabets
self.has_sig_or_bom: bool = has_sig_or_bom
self.chaos: float = chaos
self.coherence: float = coherence
self.is_preferred: bool = is_preferred
@property
def __dict__(self) -> Dict[str, Any]: # type: ignore
return {
"path": self.path,
"encoding": self.encoding,
"encoding_aliases": self.encoding_aliases,
"alternative_encodings": self.alternative_encodings,
"language": self.language,
"alphabets": self.alphabets,
"has_sig_or_bom": self.has_sig_or_bom,
"chaos": self.chaos,
"coherence": self.coherence,
"unicode_path": self.unicode_path,
"is_preferred": self.is_preferred,
}
def to_json(self) -> str:
return dumps(self.__dict__, ensure_ascii=True, indent=4)

0
venv/lib/python3.10/site-packages/charset_normalizer/py.typed

424
venv/lib/python3.10/site-packages/charset_normalizer/utils.py

@ -0,0 +1,424 @@
try:
# WARNING: unicodedata2 support is going to be removed in 3.0
# Python is quickly catching up.
import unicodedata2 as unicodedata
except ImportError:
import unicodedata # type: ignore[no-redef]
import importlib
import logging
from codecs import IncrementalDecoder
from encodings.aliases import aliases
from functools import lru_cache
from re import findall
from typing import Generator, List, Optional, Set, Tuple, Union
from _multibytecodec import MultibyteIncrementalDecoder
from .constant import (
ENCODING_MARKS,
IANA_SUPPORTED_SIMILAR,
RE_POSSIBLE_ENCODING_INDICATION,
UNICODE_RANGES_COMBINED,
UNICODE_SECONDARY_RANGE_KEYWORD,
UTF8_MAXIMAL_ALLOCATION,
)
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_accentuated(character: str) -> bool:
try:
description: str = unicodedata.name(character)
except ValueError:
return False
return (
"WITH GRAVE" in description
or "WITH ACUTE" in description
or "WITH CEDILLA" in description
or "WITH DIAERESIS" in description
or "WITH CIRCUMFLEX" in description
or "WITH TILDE" in description
)
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def remove_accent(character: str) -> str:
decomposed: str = unicodedata.decomposition(character)
if not decomposed:
return character
codes: List[str] = decomposed.split(" ")
return chr(int(codes[0], 16))
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def unicode_range(character: str) -> Optional[str]:
"""
Retrieve the Unicode range official name from a single character.
"""
character_ord: int = ord(character)
for range_name, ord_range in UNICODE_RANGES_COMBINED.items():
if character_ord in ord_range:
return range_name
return None
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_latin(character: str) -> bool:
try:
description: str = unicodedata.name(character)
except ValueError:
return False
return "LATIN" in description
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_ascii(character: str) -> bool:
try:
character.encode("ascii")
except UnicodeEncodeError:
return False
return True
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_punctuation(character: str) -> bool:
character_category: str = unicodedata.category(character)
if "P" in character_category:
return True
character_range: Optional[str] = unicode_range(character)
if character_range is None:
return False
return "Punctuation" in character_range
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_symbol(character: str) -> bool:
character_category: str = unicodedata.category(character)
if "S" in character_category or "N" in character_category:
return True
character_range: Optional[str] = unicode_range(character)
if character_range is None:
return False
return "Forms" in character_range
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_emoticon(character: str) -> bool:
character_range: Optional[str] = unicode_range(character)
if character_range is None:
return False
return "Emoticons" in character_range
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_separator(character: str) -> bool:
if character.isspace() or character in {"", "+", ",", ";", "<", ">"}:
return True
character_category: str = unicodedata.category(character)
return "Z" in character_category
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_case_variable(character: str) -> bool:
return character.islower() != character.isupper()
def is_private_use_only(character: str) -> bool:
character_category: str = unicodedata.category(character)
return character_category == "Co"
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_cjk(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError:
return False
return "CJK" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_hiragana(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError:
return False
return "HIRAGANA" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_katakana(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError:
return False
return "KATAKANA" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_hangul(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError:
return False
return "HANGUL" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_thai(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError:
return False
return "THAI" in character_name
@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
def is_unicode_range_secondary(range_name: str) -> bool:
return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_unprintable(character: str) -> bool:
return (
character.isspace() is False # includes \n \t \r \v
and character.isprintable() is False
and character != "\x1A" # Why? Its the ASCII substitute character.
and character != "\ufeff" # bug discovered in Python,
# Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
)
def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional[str]:
"""
Extract using ASCII-only decoder any specified encoding in the first n-bytes.
"""
if not isinstance(sequence, bytes):
raise TypeError
seq_len: int = len(sequence)
results: List[str] = findall(
RE_POSSIBLE_ENCODING_INDICATION,
sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
)
if len(results) == 0:
return None
for specified_encoding in results:
specified_encoding = specified_encoding.lower().replace("-", "_")
encoding_alias: str
encoding_iana: str
for encoding_alias, encoding_iana in aliases.items():
if encoding_alias == specified_encoding:
return encoding_iana
if encoding_iana == specified_encoding:
return encoding_iana
return None
@lru_cache(maxsize=128)
def is_multi_byte_encoding(name: str) -> bool:
"""
Verify is a specific encoding is a multi byte one based on it IANA name
"""
return name in {
"utf_8",
"utf_8_sig",
"utf_16",
"utf_16_be",
"utf_16_le",
"utf_32",
"utf_32_le",
"utf_32_be",
"utf_7",
} or issubclass(
importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,
MultibyteIncrementalDecoder,
)
def identify_sig_or_bom(sequence: bytes) -> Tuple[Optional[str], bytes]:
"""
Identify and extract SIG/BOM in given sequence.
"""
for iana_encoding in ENCODING_MARKS:
marks: Union[bytes, List[bytes]] = ENCODING_MARKS[iana_encoding]
if isinstance(marks, bytes):
marks = [marks]
for mark in marks:
if sequence.startswith(mark):
return iana_encoding, mark
return None, b""
def should_strip_sig_or_bom(iana_encoding: str) -> bool:
return iana_encoding not in {"utf_16", "utf_32"}
def iana_name(cp_name: str, strict: bool = True) -> str:
cp_name = cp_name.lower().replace("-", "_")
encoding_alias: str
encoding_iana: str
for encoding_alias, encoding_iana in aliases.items():
if cp_name in [encoding_alias, encoding_iana]:
return encoding_iana
if strict:
raise ValueError("Unable to retrieve IANA for '{}'".format(cp_name))
return cp_name
def range_scan(decoded_sequence: str) -> List[str]:
ranges: Set[str] = set()
for character in decoded_sequence:
character_range: Optional[str] = unicode_range(character)
if character_range is None:
continue
ranges.add(character_range)
return list(ranges)
def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
return 0.0
decoder_a = importlib.import_module(
"encodings.{}".format(iana_name_a)
).IncrementalDecoder
decoder_b = importlib.import_module(
"encodings.{}".format(iana_name_b)
).IncrementalDecoder
id_a: IncrementalDecoder = decoder_a(errors="ignore")
id_b: IncrementalDecoder = decoder_b(errors="ignore")
character_match_count: int = 0
for i in range(255):
to_be_decoded: bytes = bytes([i])
if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded):
character_match_count += 1
return character_match_count / 254
def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool:
"""
Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
the function cp_similarity.
"""
return (
iana_name_a in IANA_SUPPORTED_SIMILAR
and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a]
)
def set_logging_handler(
name: str = "charset_normalizer",
level: int = logging.INFO,
format_string: str = "%(asctime)s | %(levelname)s | %(message)s",
) -> None:
logger = logging.getLogger(name)
logger.setLevel(level)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(format_string))
logger.addHandler(handler)
def cut_sequence_chunks(
sequences: bytes,
encoding_iana: str,
offsets: range,
chunk_size: int,
bom_or_sig_available: bool,
strip_sig_or_bom: bool,
sig_payload: bytes,
is_multi_byte_decoder: bool,
decoded_payload: Optional[str] = None,
) -> Generator[str, None, None]:
if decoded_payload and is_multi_byte_decoder is False:
for i in offsets:
chunk = decoded_payload[i : i + chunk_size]
if not chunk:
break
yield chunk
else:
for i in offsets:
chunk_end = i + chunk_size
if chunk_end > len(sequences) + 8:
continue
cut_sequence = sequences[i : i + chunk_size]
if bom_or_sig_available and strip_sig_or_bom is False:
cut_sequence = sig_payload + cut_sequence
chunk = cut_sequence.decode(
encoding_iana,
errors="ignore" if is_multi_byte_decoder else "strict",
)
# multi-byte bad cutting detector and adjustment
# not the cleanest way to perform that fix but clever enough for now.
if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80:
chunk_partial_size_chk: int = min(chunk_size, 16)
if (
decoded_payload
and chunk[:chunk_partial_size_chk] not in decoded_payload
):
for j in range(i, i - 4, -1):
cut_sequence = sequences[j:chunk_end]
if bom_or_sig_available and strip_sig_or_bom is False:
cut_sequence = sig_payload + cut_sequence
chunk = cut_sequence.decode(encoding_iana, errors="ignore")
if chunk[:chunk_partial_size_chk] in decoded_payload:
break
yield chunk

6
venv/lib/python3.10/site-packages/charset_normalizer/version.py

@ -0,0 +1,6 @@
"""
Expose version
"""
__version__ = "2.1.1"
VERSION = __version__.split(".")

1
venv/lib/python3.10/site-packages/distutils-precedence.pth

@ -0,0 +1 @@
import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'stdlib') == 'local'; enabled and __import__('_distutils_hack').add_shim();

BIN
venv/lib/python3.10/site-packages/google/_upb/_message.abi3.so

Binary file not shown.

0
venv/lib/python3.10/site-packages/google/api/__init__.py

BIN
venv/lib/python3.10/site-packages/google/api/__pycache__/__init__.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/google/api/__pycache__/annotations_pb2.cpython-310.pyc

Binary file not shown.

BIN
venv/lib/python3.10/site-packages/google/api/__pycache__/auth_pb2.cpython-310.pyc

Binary file not shown.

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save