diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a609f3704..7fb8f9f83 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,81 +1,445 @@ name: CI -on: [push, pull_request] + +env: + all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 + main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11 + pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 + cpython-versions: main + test-set: core + +on: + push: + inputs: + cpython-versions: + type: string + default: all + test-set: + type: string + default: core + pull_request: + inputs: + cpython-versions: + type: string + default: main + test-set: + type: string + default: both + workflow_dispatch: + inputs: + cpython-versions: + type: choice + description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11) + options: + - all + - main + required: true + default: main + test-set: + type: choice + description: core, download + options: + - both + - core + - download + required: true + default: both + +permissions: + contents: read + jobs: + select: + name: Select tests from inputs + runs-on: ubuntu-latest + outputs: + cpython-versions: ${{ steps.run.outputs.cpython-versions }} + test-set: ${{ steps.run.outputs.test-set }} + own-pip-versions: ${{ steps.run.outputs.own-pip-versions }} + steps: + - name: Make version array + id: run + run: | + # Make a JSON Array from comma/space-separated string (no extra escaping) + json_list() { \ + ret=""; IFS="${IFS},"; set -- $*; \ + for a in "$@"; do \ + ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \ + done; \ + printf '[%s]' "$ret"; } + tests="${{ inputs.test-set || env.test-set }}" + [ $tests = both ] && tests="core download" + printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT" + versions="${{ inputs.cpython-versions || env.cpython-versions }}" + if [ "$versions" = all ]; then \ + versions="${{ env.all-cpython-versions }}"; else \ + versions="${{ env.main-cpython-versions }}"; \ + fi + printf 'cpython-versions=%s\n' \ + "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT" + # versions with a special get-pip.py in a per-version subdirectory + printf 'own-pip-versions=%s\n' \ + "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT" tests: - name: Tests + name: Run tests + needs: select + permissions: + contents: read + packages: write runs-on: ${{ matrix.os }} + env: + PIP: python -m pip + PIP_DISABLE_PIP_VERSION_CHECK: true + PIP_NO_PYTHON_VERSION_WARNING: true strategy: fail-fast: true matrix: - os: [ubuntu-18.04] - # TODO: python 2.6 - python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] + os: [ubuntu-20.04] + python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }} python-impl: [cpython] - ytdl-test-set: [core, download] + ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }} run-tests-ext: [sh] include: - # python 3.2 is only available on windows via setup-python - os: windows-2019 - python-version: 3.2 + python-version: 3.4 python-impl: cpython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: bat - os: windows-2019 - python-version: 3.2 + python-version: 3.4 python-impl: cpython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: bat # jython - - os: ubuntu-18.04 + - os: ubuntu-20.04 + python-version: 2.7 python-impl: jython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: sh - - os: ubuntu-18.04 + - os: ubuntu-20.04 + python-version: 2.7 python-impl: jython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: sh steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - if: ${{ matrix.python-impl == 'cpython' }} + - name: Checkout + uses: actions/checkout@v3 + #-------- Python 3 ----- + - name: Set up supported Python ${{ matrix.python-version }} + id: setup-python + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}} + # wrap broken actions/setup-python@v4 + uses: ytdl-org/setup-python@v1 with: python-version: ${{ matrix.python-version }} + cache-build: true + allow-build: info + - name: Locate supported Python ${{ matrix.python-version }} + if: ${{ env.pythonLocation }} + shell: bash + run: | + echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV" + export expected="${{ steps.setup-python.outputs.python-path }}" + dirname() { printf '%s\n' \ + 'import os, sys' \ + 'print(os.path.dirname(sys.argv[1]))' \ + | ${expected} - "$1"; } + expd="$(dirname "$expected")" + export python="$(command -v python)" + [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV" + [ -x "$python" ] || printf '%s\n' \ + 'import os' \ + 'exp = os.environ["expected"]' \ + 'python = os.environ["python"]' \ + 'exps = os.path.split(exp)' \ + 'if python and (os.path.dirname(python) == exp[0]):' \ + ' exit(0)' \ + 'exps[1] = "python" + os.path.splitext(exps[1])[1]' \ + 'python = os.path.join(*exps)' \ + 'try:' \ + ' os.symlink(exp, python)' \ + 'except AttributeError:' \ + ' os.rename(exp, python)' \ + | ${expected} - + printf '%s\n' \ + 'import sys' \ + 'print(sys.path)' \ + | ${expected} - + #-------- Python 3.12 - + - name: Set up CPython 3.12 environment + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} + shell: bash + run: | + PYENV_ROOT=$HOME/.local/share/pyenv + echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" + - name: Cache Python 3.12 + id: cache312 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} + uses: actions/cache@v3 + with: + key: python-3.12 + path: | + ${{ env.PYENV_ROOT }} + - name: Build and set up Python 3.12 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }} + # dl and build locally + shell: bash + run: | + # Install build environment + sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ + libncursesw5-dev libreadline-dev libsqlite3-dev \ + libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev + # Download PyEnv from its GitHub repository. + export PYENV_ROOT=${{ env.PYENV_ROOT }} + export PATH=$PYENV_ROOT/bin:$PATH + git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" + pyenv install 3.12.0b4 + - name: Locate Python 3.12 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} + shell: bash + run: | + PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4" + echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" + echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" + #-------- Python 2.7 -- + - name: Set up Python 2.7 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }} + # install 2.7 + shell: bash + run: | + sudo apt-get install -y python2 python-is-python2 + echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" + #-------- Python 2.6 -- + - name: Set up Python 2.6 environment + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} + shell: bash + run: | + openssl_name=openssl-1.0.2u + echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV" + openssl_dir=$HOME/.local/opt/$openssl_name + echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV" + PYENV_ROOT=$HOME/.local/share/pyenv + echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" + sudo apt-get install -y openssl ca-certificates + - name: Cache Python 2.6 + id: cache26 + if: ${{ matrix.python-version == '2.6' }} + uses: actions/cache@v3 + with: + key: python-2.6.9 + path: | + ${{ env.openssl_dir }} + ${{ env.PYENV_ROOT }} + - name: Build and set up Python 2.6 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} + # dl and build locally + shell: bash + run: | + # Install build environment + sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ + libncursesw5-dev libreadline-dev libsqlite3-dev \ + libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev + # Download and install OpenSSL 1.0.2, back in time + openssl_name=${{ env.openssl_name }} + openssl_targz=${openssl_name}.tar.gz + openssl_dir=${{ env.openssl_dir }} + openssl_inc=$openssl_dir/include + openssl_lib=$openssl_dir/lib + openssl_ssl=$openssl_dir/ssl + curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz + tar -xf $openssl_targz + ( cd $openssl_name; \ + ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \ + --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \ + make && \ + make install ) + rm -rf $openssl_name + rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs + # Download PyEnv from its GitHub repository. + export PYENV_ROOT=${{ env.PYENV_ROOT }} + export PATH=$PYENV_ROOT/bin:$PATH + git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" + # Prevent pyenv build trying (and failing) to update pip + export GET_PIP=get-pip-2.6.py + echo 'import sys; sys.exit(0)' > ${GET_PIP} + GET_PIP=$(realpath $GET_PIP) + # Build and install Python + export CFLAGS="-I$openssl_inc" + export LDFLAGS="-L$openssl_lib" + export LD_LIBRARY_PATH="$openssl_lib" + pyenv install 2.6.9 + - name: Locate Python 2.6 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} + shell: bash + run: | + PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9" + echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" + echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV" + #-------- Jython ------ - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v3 with: java-version: 8 + distribution: 'zulu' + - name: Setup Jython environment + if: ${{ matrix.python-impl == 'jython' }} + shell: bash + run: | + echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV" + echo "PIP=pip" >> "$GITHUB_ENV" + - name: Cache Jython + id: cachejy + if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }} + uses: actions/cache@v3 + with: + # 2.7.3 now available, may solve SNI issue + key: jython-2.7.1 + path: | + ${{ env.JYTHON_ROOT }} - name: Install Jython - if: ${{ matrix.python-impl == 'jython' }} + if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }} + shell: bash run: | - wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar - java -jar jython-installer.jar -s -d "$HOME/jython" - echo "$HOME/jython/bin" >> $GITHUB_PATH - - name: Install nose - if: ${{ matrix.python-impl != 'jython' }} - run: pip install nose - - name: Install nose (Jython) - if: ${{ matrix.python-impl == 'jython' }} - # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar + java -jar jython-installer.jar -s -d "${JYTHON_ROOT}" + echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH" + - name: Set up cached Jython + if: ${{ steps.cachejy.outputs.cache-hit }} + shell: bash run: | - wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl - pip install nose-1.3.7-py2-none-any.whl + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + - name: Install supporting Python 2.7 if possible + if: ${{ steps.cachejy.outputs.cache-hit }} + shell: bash + run: | + sudo apt-get install -y python2.7 || true + #-------- pip --------- + - name: Set up supported Python ${{ matrix.python-version }} pip + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} + # This step may run in either Linux or Windows + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + # curl is available on both Windows and Linux, -L follows redirects, -O gets name + python -m ensurepip || python -m pip --version || { \ + get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \ + curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \ + python get-pip.py; } + - name: Set up Python 2.6 pip + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \ + python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; } + # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751 + echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV" + - name: Set up other Python ${{ matrix.python-version }} pip + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \ + python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } + #-------- unittest ---- + - name: Upgrade Unittest for Python 2.6 + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + $PIP -qq show unittest2 || { \ + for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \ + "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \ + "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \ + "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \ + "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \ + curl -L -O "https://files.pythonhosted.org/packages/${u}"; \ + $PIP install ${u##*/}; \ + done; } + # make tests use unittest2 + for test in ./test/test_*.py ./test/helper.py; do + sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test" + done + #-------- nose -------- + - name: Install nose for Python ${{ matrix.python-version }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == '3.12')) }} + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + # Use PyNose for recent Pythons instead of Nose + py3ver="${{ matrix.python-version }}" + py3ver=${py3ver#3.} + [ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0 + [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose + $PIP -qq show $nose || $PIP install $nose + - name: Install nose for other Python 2 + if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }} + shell: bash + run: | + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + $PIP -qq show nose || { \ + curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ + $PIP install nose-1.3.7-py2-none-any.whl; } + - name: Install nose for other Python 3 + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + $PIP -qq show nose || { \ + curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \ + $PIP install nose-1.3.7-py3-none-any.whl; } + - name: Set up nosetest test + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} + shell: bash + run: | + # set PYTHON_VER + PYTHON_VER=${{ matrix.python-version }} + [ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}" + echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV" + echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV" + # define a test to validate the Python version used by nosetests + printf '%s\n' \ + 'from __future__ import unicode_literals' \ + 'import sys, os, platform' \ + 'try:' \ + ' import unittest2 as unittest' \ + 'except ImportError:' \ + ' import unittest' \ + 'class TestPython(unittest.TestCase):' \ + ' def setUp(self):' \ + ' self.ver = os.environ["PYTHON_VER"].split("-")' \ + ' def test_python_ver(self):' \ + ' self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \ + ' self.assertTrue(sys.version.startswith(self.ver[-1]))' \ + ' self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \ + ' def test_python_impl(self):' \ + ' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \ + > test/test_python.py + #-------- TESTS ------- - name: Run tests + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} env: YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} - run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} + run: | + ./devscripts/run_tests.${{ matrix.run-tests-ext }} flake8: name: Linter runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.9 - name: Install flake8 run: pip install flake8 - name: Run flake8 run: flake8 . + diff --git a/README.md b/README.md index 14a3d6c86..47e686f84 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex You can also use pip: sudo -H pip install --upgrade youtube-dl - + This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. macOS users can install youtube-dl with [Homebrew](https://brew.sh/): @@ -563,7 +563,7 @@ The basic usage is not to set any template arguments when downloading a single f - `is_live` (boolean): Whether this video is a live stream or a fixed-length video - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL - - `format` (string): A human-readable description of the format + - `format` (string): A human-readable description of the format - `format_id` (string): Format code specified by `--format` - `format_note` (string): Additional info about the format - `width` (numeric): Width of the video @@ -675,7 +675,7 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM **tl;dr:** [navigate me to examples](#format-selection-examples). -The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. +The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. @@ -760,7 +760,7 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb - Absolute dates: Dates in the format `YYYYMMDD`. - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?` - + Examples: ```bash @@ -1000,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file python test/test_download.py nosetests +For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later. + See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. If you want to create a build of youtube-dl yourself, you'll need @@ -1091,7 +1093,7 @@ In any case, thank you very much for your contributions! ## youtube-dl coding conventions -This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. +This section introduces guidelines for writing idiomatic, robust and future-proof extractor code. Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. @@ -1114,7 +1116,7 @@ Say you have some source dictionary `meta` that you've fetched as JSON with HTTP ```python meta = self._download_json(url, video_id) ``` - + Assume at this point `meta`'s layout is: ```python @@ -1158,7 +1160,7 @@ description = self._search_regex( ``` On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. - + ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. @@ -1206,7 +1208,7 @@ r'(id|ID)=(?P\d+)' #### Make regular expressions relaxed and flexible When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. - + ##### Example Say you need to extract `title` from the following HTML code: @@ -1230,7 +1232,7 @@ title = self._search_regex( webpage, 'title', group='title') ``` -Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: +Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: The code definitely should not look like: @@ -1331,27 +1333,114 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`] Use `url_or_none` for safe URL processing. -Use `try_get` for safe metadata extraction from parsed JSON. +Use `traverse_obj` for safe metadata extraction from parsed JSON. -Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples ##### Safely extract optional description from parsed JSON + +When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples. + +In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available. + +```python +description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str))) +``` +`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`. + +Some extractors use the older and less capable `try_get()` function in the same way. + ```python description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) ``` ##### Safely extract more optional metadata + +In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid. + ```python -video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {} +# formerly: +# video = try_get(response, lambda x: x['result']['video'][0], dict) or {} description = video.get('summary') duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` +#### Safely extract nested lists + +Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`: +```json +{ + "title": "Example video", + "comment": "try extracting this", + "media": [{ + "type": "bad", + "size": 320, + "url": "https://some.cdn.site/bad.mp4" + }, { + "type": "streaming", + "url": "https://some.cdn.site/hls.m3u8" + }, { + "type": "super", + "size": 1280, + "url": "https://some.cdn.site/good.webm" + }], + "moreStuff": "more values", + ... +} +``` + +Then extractor code like this can collect the various fields of the JSON: +```python +... +from ..utils import ( + determine_ext, + int_or_none, + T, + traverse_obj, + txt_or_none, + url_or_none, +) +... + ... + info_dict = {} + # extract title and description if valid and not empty + info_dict.update(traverse_obj(media_json, { + 'title': ('title', T(txt_or_none)), + 'description': ('comment', T(txt_or_none)), + })) + + # extract any recognisable media formats + fmts = [] + # traverse into "media" list, extract `dict`s with desired keys + for fmt in traverse_obj(media_json, ('media', Ellipsis, { + 'format_id': ('type', T(txt_or_none)), + 'url': ('url', T(url_or_none)), + 'width': ('size', T(int_or_none)), })): + # bad `fmt` values were `None` and removed + if 'url' not in fmt: + continue + fmt_url = fmt['url'] # known to be valid URL + ext = determine_ext(fmt_url) + if ext == 'm3u8': + fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False)) + else: + fmt['ext'] = ext + fmts.append(fmt) + + # sort, raise if no formats + self._sort_formats(fmts) + + info_dict['formats'] = fmts + ... +``` +The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found. + # EMBEDDING YOUTUBE-DL youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new). diff --git a/devscripts/__init__.py b/devscripts/__init__.py new file mode 100644 index 000000000..750dbdca7 --- /dev/null +++ b/devscripts/__init__.py @@ -0,0 +1 @@ +# Empty file needed to make devscripts.utils properly importable from outside diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 3d1391334..7db396a77 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -5,8 +5,12 @@ import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl +from youtube_dl.compat import compat_open as open + +from utils import read_file BASH_COMPLETION_FILE = "youtube-dl.bash-completion" BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" @@ -18,9 +22,8 @@ def build_completion(opt_parser): for option in group.option_list: # for every long flag opts_flag.append(option.get_opt_string()) - with open(BASH_COMPLETION_TEMPLATE) as f: - template = f.read() - with open(BASH_COMPLETION_FILE, "w") as f: + template = read_file(BASH_COMPLETION_TEMPLATE) + with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f: # just using the special char filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py index 2f4d6a458..9fb1d2ba8 100755 --- a/devscripts/cli_to_api.py +++ b/devscripts/cli_to_api.py @@ -49,15 +49,34 @@ def cli_to_api(*opts): # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900 default = parsed_options([]) - diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v) + + def neq_opt(a, b): + if a == b: + return False + if a is None and repr(type(object)).endswith(".utils.DateRange'>"): + return '0001-01-01 - 9999-12-31' != '{0}'.format(b) + return a != b + + diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v)) if 'postprocessors' in diff: diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']] return diff def main(): - from pprint import pprint - pprint(cli_to_api(*sys.argv)) + from pprint import PrettyPrinter + + pprint = PrettyPrinter() + super_format = pprint.format + + def format(object, context, maxlevels, level): + if repr(type(object)).endswith(".utils.DateRange'>"): + return '{0}: {1}>'.format(repr(object)[:-2], object), True, False + return super_format(object, context, maxlevels, level) + + pprint.format = format + + pprint.pprint(cli_to_api(*sys.argv)) if __name__ == '__main__': diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 2ddfa1096..320bcfc27 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import json import mimetypes import netrc @@ -10,7 +9,9 @@ import os import re import sys -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) from youtube_dl.compat import ( compat_basestring, @@ -22,6 +23,7 @@ from youtube_dl.utils import ( make_HTTPS_handler, sanitized_Request, ) +from utils import read_file class GitHubReleaser(object): @@ -89,8 +91,7 @@ def main(): changelog_file, version, build_path = args - with io.open(changelog_file, encoding='utf-8') as inf: - changelog = inf.read() + changelog = read_file(changelog_file) mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) body = mobj.group(1) if mobj else '' diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index 51d19dd33..ef8a39e0b 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -6,10 +6,13 @@ import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl from youtube_dl.utils import shell_quote +from utils import read_file, write_file + FISH_COMPLETION_FILE = 'youtube-dl.fish' FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' @@ -38,11 +41,9 @@ def build_completion(opt_parser): complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) commands.append(shell_quote(complete_cmd)) - with open(FISH_COMPLETION_TEMPLATE) as f: - template = f.read() + template = read_file(FISH_COMPLETION_TEMPLATE) filled_template = template.replace('{{commands}}', '\n'.join(commands)) - with open(FISH_COMPLETION_FILE, 'w') as f: - f.write(filled_template) + write_file(FISH_COMPLETION_FILE, filled_template) parser = youtube_dl.parseOpts()[0] diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py index 867ea0048..b84908f85 100755 --- a/devscripts/gh-pages/add-version.py +++ b/devscripts/gh-pages/add-version.py @@ -6,16 +6,21 @@ import sys import hashlib import os.path +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) + +from devscripts.utils import read_file, write_file +from youtube_dl.compat import compat_open as open if len(sys.argv) <= 1: print('Specify the version number as parameter') sys.exit() version = sys.argv[1] -with open('update/LATEST_VERSION', 'w') as f: - f.write(version) +write_file('update/LATEST_VERSION', version) -versions_info = json.load(open('update/versions.json')) +versions_info = json.loads(read_file('update/versions.json')) if 'signature' in versions_info: del versions_info['signature'] @@ -39,5 +44,5 @@ for key, filename in filenames.items(): versions_info['versions'][version] = new_version versions_info['latest'] = version -with open('update/versions.json', 'w') as jsonf: - json.dump(versions_info, jsonf, indent=4, sort_keys=True) +with open('update/versions.json', 'w', encoding='utf-8') as jsonf: + json.dumps(versions_info, jsonf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py index a873d32ee..3e38e9299 100755 --- a/devscripts/gh-pages/generate-download.py +++ b/devscripts/gh-pages/generate-download.py @@ -2,14 +2,21 @@ from __future__ import unicode_literals import json +import os.path +import sys -versions_info = json.load(open('update/versions.json')) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) + +from utils import read_file, write_file + +versions_info = json.loads(read_file('update/versions.json')) version = versions_info['latest'] version_dict = versions_info['versions'][version] # Read template page -with open('download.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() +template = read_file('download.html.in') template = template.replace('@PROGRAM_VERSION@', version) template = template.replace('@PROGRAM_URL@', version_dict['bin'][0]) @@ -18,5 +25,5 @@ template = template.replace('@EXE_URL@', version_dict['exe'][0]) template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1]) template = template.replace('@TAR_URL@', version_dict['tar'][0]) template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1]) -with open('download.html', 'w', encoding='utf-8') as dlf: - dlf.write(template) + +write_file('download.html', template) diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py index 61487f925..444595c48 100755 --- a/devscripts/gh-pages/update-copyright.py +++ b/devscripts/gh-pages/update-copyright.py @@ -5,17 +5,22 @@ from __future__ import with_statement, unicode_literals import datetime import glob -import io # For Python 2 compatibility import os import re +import sys -year = str(datetime.datetime.now().year) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) + +from devscripts.utils import read_file, write_file +from youtube_dl import compat_str + +year = compat_str(datetime.datetime.now().year) for fn in glob.glob('*.html*'): - with io.open(fn, encoding='utf-8') as f: - content = f.read() + content = read_file(fn) newc = re.sub(r'(?PCopyright © 2011-)(?P[0-9]{4})', 'Copyright © 2011-' + year, content) if content != newc: tmpFn = fn + '.part' - with io.open(tmpFn, 'wt', encoding='utf-8') as outf: - outf.write(newc) + write_file(tmpFn, newc) os.rename(tmpFn, fn) diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py index 506a62377..13a367d34 100755 --- a/devscripts/gh-pages/update-feed.py +++ b/devscripts/gh-pages/update-feed.py @@ -2,10 +2,16 @@ from __future__ import unicode_literals import datetime -import io import json +import os.path import textwrap +import sys +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from utils import write_file atom_template = textwrap.dedent("""\ @@ -72,5 +78,4 @@ for v in versions: entries_str = textwrap.indent(''.join(entries), '\t') atom_template = atom_template.replace('@ENTRIES@', entries_str) -with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file: - atom_file.write(atom_template) +write_file('update/releases.atom', atom_template) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 531c93c70..06a8a474c 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -5,15 +5,17 @@ import sys import os import textwrap +dirn = os.path.dirname + # We must be able to import youtube_dl -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) import youtube_dl +from devscripts.utils import read_file, write_file def main(): - with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() + template = read_file('supportedsites.html.in') ie_htmls = [] for ie in youtube_dl.list_extractors(age_limit=None): @@ -29,8 +31,7 @@ def main(): template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) - with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: - sitesf.write(template) + write_file('supportedsites.html', template) if __name__ == '__main__': diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 226d1a5d6..5a9eb194f 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -1,10 +1,11 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse import re +from utils import read_file, write_file + def main(): parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') @@ -14,8 +15,7 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: - readme = inf.read() + readme = read_file(infile) bug_text = re.search( r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) @@ -25,8 +25,7 @@ def main(): out = bug_text + dev_text - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index b7ad23d83..65fa8169f 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -1,8 +1,11 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse +import os.path +import sys + +from utils import read_file, read_version, write_file def main(): @@ -13,17 +16,11 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: - issue_template_tmpl = inf.read() + issue_template_tmpl = read_file(infile) - # Get the version from youtube_dl/version.py without importing the package - exec(compile(open('youtube_dl/version.py').read(), - 'youtube_dl/version.py', 'exec')) + out = issue_template_tmpl % {'version': read_version()} - out = issue_template_tmpl % {'version': locals()['__version__']} - - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index edc19183d..5b8b123a4 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,35 +1,48 @@ from __future__ import unicode_literals, print_function from inspect import getsource -import io import os from os.path import dirname as dirn +import re import sys print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) lazy_extractors_filename = sys.argv[1] if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) # Py2: may be confused by leftover lazy_extractors.pyc -try: - os.remove(lazy_extractors_filename + 'c') -except OSError: - pass +if sys.version_info[0] < 3: + for c in ('c', 'o'): + try: + os.remove(lazy_extractors_filename + 'c') + except OSError: + pass + +from devscripts.utils import read_file, write_file +from youtube_dl.compat import compat_register_utf8 + +compat_register_utf8() from youtube_dl.extractor import _ALL_CLASSES from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor -with open('devscripts/lazy_load_template.py', 'rt') as f: - module_template = f.read() +module_template = read_file('devscripts/lazy_load_template.py') + + +def get_source(m): + return re.sub(r'(?m)^\s*#.*\n', '', getsource(m)) + module_contents = [ - module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', + module_template, + get_source(InfoExtractor.suitable), + get_source(InfoExtractor._match_valid_url) + '\n', 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n', # needed for suitable() methods of Youtube extractor (see #28780) - 'from youtube_dl.utils import parse_qs\n', + 'from youtube_dl.utils import parse_qs, variadic\n', ] ie_template = ''' @@ -62,7 +75,7 @@ def build_lazy_ie(ie, name): valid_url=valid_url, module=ie.__module__) if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: - s += '\n' + getsource(ie.suitable) + s += '\n' + get_source(ie.suitable) if hasattr(ie, '_make_valid_url'): # search extractors s += make_valid_template.format(valid_url=ie._make_valid_url()) @@ -102,7 +115,17 @@ for ie in ordered_cls: module_contents.append( '_ALL_CLASSES = [{0}]'.format(', '.join(names))) -module_src = '\n'.join(module_contents) + '\n' +module_src = '\n'.join(module_contents) -with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: - f.write(module_src) +write_file(lazy_extractors_filename, module_src + '\n') + +# work around JVM byte code module limit in Jython +if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7): + import subprocess + from youtube_dl.compat import compat_subprocess_get_DEVNULL + # if Python 2.7 is available, use it to compile the module for Jython + try: + # if Python 2.7 is available, use it to compile the module for Jython + subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL()) + except Exception: + pass diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 8fbce0796..7a5b04dcc 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -1,8 +1,14 @@ from __future__ import unicode_literals -import io -import sys +import os.path import re +import sys +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from utils import read_file +from youtube_dl.compat import compat_open as open README_FILE = 'README.md' helptext = sys.stdin.read() @@ -10,8 +16,7 @@ helptext = sys.stdin.read() if isinstance(helptext, bytes): helptext = helptext.decode('utf-8') -with io.open(README_FILE, encoding='utf-8') as f: - oldreadme = f.read() +oldreadme = read_file(README_FILE) header = oldreadme[:oldreadme.index('# OPTIONS')] footer = oldreadme[oldreadme.index('# CONFIGURATION'):] @@ -20,7 +25,7 @@ options = helptext[helptext.index(' General Options:') + 19:] options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) options = '# OPTIONS\n' + options + '\n' -with io.open(README_FILE, 'w', encoding='utf-8') as f: +with open(README_FILE, 'w', encoding='utf-8') as f: f.write(header) f.write(options) f.write(footer) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 764795bc5..c424d18d7 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -1,17 +1,19 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse -import os +import os.path import sys - # Import youtube_dl -ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') -sys.path.insert(0, ROOT_DIR) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl +from utils import write_file + def main(): parser = optparse.OptionParser(usage='%prog OUTFILE.md') @@ -38,8 +40,7 @@ def main(): ' - ' + md + '\n' for md in gen_ies_md(ies)) - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 76bf873e1..0090ada3e 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -import io import optparse import os.path import re +from utils import read_file, write_file + ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') - PREFIX = r'''%YOUTUBE-DL(1) # NAME @@ -29,8 +29,7 @@ def main(): outfile, = args - with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() + readme = read_file(README_FILE) readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) @@ -38,8 +37,7 @@ def main(): readme = filter_options(readme) - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(readme) + write_file(outfile, readme) def filter_options(readme): diff --git a/devscripts/utils.py b/devscripts/utils.py new file mode 100644 index 000000000..2d072d2e0 --- /dev/null +++ b/devscripts/utils.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import argparse +import functools +import os.path +import subprocess +import sys + +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_kwargs, + compat_open as open, +) + + +def read_file(fname): + with open(fname, encoding='utf-8') as f: + return f.read() + + +def write_file(fname, content, mode='w'): + with open(fname, mode, encoding='utf-8') as f: + return f.write(content) + + +def read_version(fname='youtube_dl/version.py'): + """Get the version without importing the package""" + exec(compile(read_file(fname), fname, 'exec')) + return locals()['__version__'] + + +def get_filename_args(has_infile=False, default_outfile=None): + parser = argparse.ArgumentParser() + if has_infile: + parser.add_argument('infile', help='Input file') + kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} + kwargs['help'] = 'Output file' + parser.add_argument('outfile', **compat_kwargs(kwargs)) + + opts = parser.parse_args() + if has_infile: + return opts.infile, opts.outfile + return opts.outfile + + +def compose_functions(*functions): + return lambda x: functools.reduce(lambda y, f: f(y), functions, x) + + +def run_process(*args, **kwargs): + kwargs.setdefault('text', True) + kwargs.setdefault('check', True) + kwargs.setdefault('capture_output', True) + if kwargs['text']: + kwargs.setdefault('encoding', 'utf-8') + kwargs.setdefault('errors', 'replace') + kwargs = compat_kwargs(kwargs) + return subprocess.run(args, **kwargs) diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 60aaf76cc..ebd552fcb 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -7,6 +7,8 @@ import sys sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) import youtube_dl +from utils import read_file, write_file + ZSH_COMPLETION_FILE = "youtube-dl.zsh" ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" @@ -34,15 +36,13 @@ def build_completion(opt_parser): flags = [opt.get_opt_string() for opt in opts] - with open(ZSH_COMPLETION_TEMPLATE) as f: - template = f.read() + template = read_file(ZSH_COMPLETION_TEMPLATE) template = template.replace("{{fileopts}}", "|".join(fileopts)) template = template.replace("{{diropts}}", "|".join(diropts)) template = template.replace("{{flags}}", " ".join(flags)) - with open(ZSH_COMPLETION_FILE, "w") as f: - f.write(template) + write_file(ZSH_COMPLETION_FILE, template) parser = youtube_dl.parseOpts()[0] diff --git a/test/helper.py b/test/helper.py index 883b2e877..5b7e3dfe2 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import errno -import io import hashlib import json import os.path @@ -9,14 +8,17 @@ import re import types import ssl import sys +import unittest import youtube_dl.extractor from youtube_dl import YoutubeDL from youtube_dl.compat import ( + compat_open as open, compat_os_name, compat_str, ) from youtube_dl.utils import ( + IDENTITY, preferredencoding, write_string, ) @@ -27,10 +29,10 @@ def get_params(override=None): "parameters.json") LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "local_parameters.json") - with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + with open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): - with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: parameters.update(json.load(pf)) if override: parameters.update(override) @@ -72,7 +74,8 @@ class FakeYDL(YoutubeDL): def to_screen(self, s, skip_eol=None): print(s) - def trouble(self, s, tb=None): + def trouble(self, *args, **kwargs): + s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message') raise Exception(s) def download(self, x): @@ -139,7 +142,7 @@ def expect_value(self, got, expected, field): self.assertTrue( contains_str in got, 'field %s (value: %r) should contain %r' % (field, got, contains_str)) - elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected): + elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected): fn = eval(expected) suite = expected.split(':', 1)[1].strip() self.assertTrue( @@ -297,3 +300,7 @@ def http_server_port(httpd): else: sock = httpd.socket return sock.getsockname()[1] + + +def expectedFailureIf(cond): + return unittest.expectedFailure if cond else IDENTITY diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 6d25441db..3f96645de 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -3,19 +3,37 @@ from __future__ import unicode_literals # Allow direct execution -import io import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value, http_server_port -from youtube_dl.compat import compat_etree_fromstring, compat_http_server -from youtube_dl.extractor.common import InfoExtractor -from youtube_dl.extractor import YoutubeIE, get_info_extractor -from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError import threading +from test.helper import ( + expect_dict, + expect_value, + FakeYDL, + http_server_port, +) +from youtube_dl.compat import ( + compat_etree_fromstring, + compat_http_server, + compat_open as open, +) +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor import ( + get_info_extractor, + YoutubeIE, +) +from youtube_dl.utils import ( + encode_data_uri, + ExtractorError, + RegexNotFoundError, + strip_jsonp, +) + TEAPOT_RESPONSE_STATUS = 418 TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" @@ -100,6 +118,71 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) + def test_search_nextjs_data(self): + html = ''' + + + + + + Test _search_nextjs_data() + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + +''' + search = self.ie._search_nextjs_data(html, 'testID') + self.assertEqual(search['props']['pageProps']['video']['id'], 'testid') + + def test_search_nuxt_data(self): + html = ''' + + + + + Nuxt.js Test Page + + + + +
+

Example heading

+
+

Decoy text

+
+
+ + + + +''' + search = self.ie._search_nuxt_data(html, 'testID') + self.assertEqual(search['track']['id'], 'testid') + def test_search_json_ld_realworld(self): # https://github.com/ytdl-org/youtube-dl/issues/23306 expect_dict( @@ -348,6 +431,24 @@ class TestInfoExtractor(unittest.TestCase): }], }) + # from https://0000.studio/ + # with type attribute but without extension in URL + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://0000.studio', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92', + 'ext': 'mp4', + }], + }) + def test_extract_jwplayer_data_realworld(self): # from http://www.suffolk.edu/sjc/ expect_dict( @@ -801,8 +902,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_m3u8_formats( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) @@ -1026,8 +1127,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/mpd/%s.mpd' % mpd_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_mpd_formats( compat_etree_fromstring(f.read().encode('utf-8')), mpd_base_url=mpd_base_url, mpd_url=mpd_url) @@ -1053,8 +1154,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/f4m/%s.f4m' % f4m_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode('utf-8')), f4m_url, None) @@ -1101,8 +1202,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: - with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode('utf-8')), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8c8e619c..d994682b2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -10,14 +10,31 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import copy +import json -from test.helper import FakeYDL, assertRegexpMatches +from test.helper import ( + FakeYDL, + assertRegexpMatches, + try_rm, +) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_str, compat_urllib_error +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_cookies_SimpleCookie, + compat_kwargs, + compat_open as open, + compat_str, + compat_urllib_error, +) + from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor.common import InfoExtractor from youtube_dl.postprocessor.common import PostProcessor -from youtube_dl.utils import ExtractorError, match_filter_func +from youtube_dl.utils import ( + ExtractorError, + match_filter_func, + traverse_obj, +) TEST_URL = 'http://localhost/sample.mp4' @@ -29,11 +46,14 @@ class YDL(FakeYDL): self.msgs = [] def process_info(self, info_dict): - self.downloaded_info_dicts.append(info_dict) + self.downloaded_info_dicts.append(info_dict.copy()) def to_screen(self, msg): self.msgs.append(msg) + def dl(self, *args, **kwargs): + assert False, 'Downloader must not be invoked for test_YoutubeDL' + def _make_result(formats, **kwargs): res = { @@ -42,8 +62,9 @@ def _make_result(formats, **kwargs): 'title': 'testttitle', 'extractor': 'testex', 'extractor_key': 'TestEx', + 'webpage_url': 'http://example.com/watch?v=shenanigans', } - res.update(**kwargs) + res.update(**compat_kwargs(kwargs)) return res @@ -681,12 +702,12 @@ class TestYoutubeDL(unittest.TestCase): class SimplePP(PostProcessor): def run(self, info): - with open(audiofile, 'wt') as f: + with open(audiofile, 'w') as f: f.write('EXAMPLE') return [info['filepath']], info def run_pp(params, PP): - with open(filename, 'wt') as f: + with open(filename, 'w') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) ydl.add_post_processor(PP()) @@ -705,7 +726,7 @@ class TestYoutubeDL(unittest.TestCase): class ModifierPP(PostProcessor): def run(self, info): - with open(info['filepath'], 'wt') as f: + with open(info['filepath'], 'w') as f: f.write('MODIFIED') return [], info @@ -930,17 +951,11 @@ class TestYoutubeDL(unittest.TestCase): # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): - class _YDL(YDL): - def __init__(self, *args, **kwargs): - super(_YDL, self).__init__(*args, **kwargs) - - def trouble(self, s, tb=None): - pass - - ydl = _YDL({ + ydl = YDL({ 'format': 'extra', 'ignoreerrors': True, }) + ydl.trouble = lambda *_, **__: None class VideoIE(InfoExtractor): _VALID_URL = r'video:(?P\d+)' @@ -1017,5 +1032,160 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(out_info['release_date'], '20210930') +class TestYoutubeDLCookies(unittest.TestCase): + + @staticmethod + def encode_cookie(cookie): + if not isinstance(cookie, dict): + cookie = vars(cookie) + for name, value in cookie.items(): + yield name, compat_str(value) + + @classmethod + def comparable_cookies(cls, cookies): + # Work around cookiejar cookies not being unicode strings + return sorted(map(tuple, map(sorted, map(cls.encode_cookie, cookies)))) + + def assertSameCookies(self, c1, c2, msg=None): + return self.assertEqual( + *map(self.comparable_cookies, (c1, c2)), + msg=msg) + + def assertSameCookieStrings(self, c1, c2, msg=None): + return self.assertSameCookies( + *map(lambda c: compat_http_cookies_SimpleCookie(c).values(), (c1, c2)), + msg=msg) + + def test_header_cookies(self): + + ydl = FakeYDL() + ydl.report_warning = lambda *_, **__: None + + def cookie(name, value, version=None, domain='', path='', secure=False, expires=None): + return compat_http_cookiejar_Cookie( + version or 0, name, value, None, False, + domain, bool(domain), bool(domain), path, bool(path), + secure, expires, False, None, None, rest={}) + + test_url, test_domain = (t % ('yt.dl',) for t in ('https://%s/test', '.%s')) + + def test(encoded_cookies, cookies, headers=False, round_trip=None, error_re=None): + def _test(): + ydl.cookiejar.clear() + ydl._load_cookies(encoded_cookies, autoscope=headers) + if headers: + ydl._apply_header_cookies(test_url) + data = {'url': test_url} + ydl._calc_headers(data) + self.assertSameCookies( + cookies, ydl.cookiejar, + 'Extracted cookiejar.Cookie is not the same') + if not headers: + self.assertSameCookieStrings( + data.get('cookies'), round_trip or encoded_cookies, + msg='Cookie is not the same as round trip') + ydl.__dict__['_YoutubeDL__header_cookies'] = [] + + try: + _test() + except AssertionError: + raise + except Exception as e: + if not error_re: + raise + assertRegexpMatches(self, e.args[0], error_re.join(('.*',) * 2)) + + test('test=value; Domain=' + test_domain, [cookie('test', 'value', domain=test_domain)]) + test('test=value', [cookie('test', 'value')], error_re='Unscoped cookies are not allowed') + test('cookie1=value1; Domain={0}; Path=/test; cookie2=value2; Domain={0}; Path=/'.format(test_domain), [ + cookie('cookie1', 'value1', domain=test_domain, path='/test'), + cookie('cookie2', 'value2', domain=test_domain, path='/')]) + cookie_kw = compat_kwargs( + {'domain': test_domain, 'path': '/test', 'secure': True, 'expires': '9999999999', }) + test('test=value; Domain={domain}; Path={path}; Secure; Expires={expires}'.format(**cookie_kw), [ + cookie('test', 'value', **cookie_kw)]) + test('test="value; "; path=/test; domain=' + test_domain, [ + cookie('test', 'value; ', domain=test_domain, path='/test')], + round_trip='test="value\\073 "; Domain={0}; Path=/test'.format(test_domain)) + test('name=; Domain=' + test_domain, [cookie('name', '', domain=test_domain)], + round_trip='name=""; Domain=' + test_domain) + test('test=value', [cookie('test', 'value', domain=test_domain)], headers=True) + test('cookie1=value; Domain={0}; cookie2=value'.format(test_domain), [], + headers=True, error_re='Invalid syntax') + ydl.report_warning = ydl.report_error + test('test=value', [], headers=True, error_re='Passing cookies as a header is a potential security risk') + + def test_infojson_cookies(self): + TEST_FILE = 'test_infojson_cookies.info.json' + TEST_URL = 'https://example.com/example.mp4' + COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com' + COOKIE_HEADER = {'Cookie': 'a=b; c=d'} + + ydl = FakeYDL() + ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE) + + def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False): + fmt = {'url': TEST_URL} + if fmts_header_cookies: + fmt['http_headers'] = COOKIE_HEADER + if cookies_field: + fmt['cookies'] = COOKIES + return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None) + + def test(initial_info, note): + + def failure_msg(why): + return ' when '.join((why, note)) + + result = {} + result['processed'] = ydl.process_ie_result(initial_info) + self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), + msg=failure_msg('No cookies set in cookiejar after initial process')) + ydl.cookiejar.clear() + with open(TEST_FILE) as infojson: + result['loaded'] = ydl.sanitize_info(json.load(infojson), True) + result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False) + self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), + msg=failure_msg('No cookies set in cookiejar after final process')) + ydl.cookiejar.clear() + for key in ('processed', 'loaded', 'final'): + info = result[key] + self.assertIsNone( + traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False), + msg=failure_msg('Cookie header not removed in {0} result'.format(key))) + self.assertSameCookieStrings( + traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES, + msg=failure_msg('No cookies field found in {0} result'.format(key))) + + test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field') + test(make_info(info_header_cookies=True), 'info_dict header cokies') + test(make_info(fmts_header_cookies=True), 'format header cookies') + test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies') + test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields') + test(make_info(cookies_field=True), 'cookies format field') + test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only') + + try_rm(TEST_FILE) + + def test_add_headers_cookie(self): + def check_for_cookie_header(result): + return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False) + + ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}}) + ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com + + fmt = {'url': 'https://example.com/video.mp4'} + result = ydl.process_ie_result(_make_result([fmt]), download=False) + self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict') + self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field') + self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar') + + fmt = {'url': 'https://wrong.com/video.mp4'} + result = ydl.process_ie_result(_make_result([fmt]), download=False) + self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain') + self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain') + self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain') + + if __name__ == '__main__': unittest.main() diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 05f48bd74..4f9dd71ae 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -46,6 +46,20 @@ class TestYoutubeDLCookieJar(unittest.TestCase): # will be ignored self.assertFalse(cookiejar._cookies) + def test_get_cookie_header(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + header = cookiejar.get_cookie_header('https://www.foobar.foobar') + self.assertIn('HTTPONLY_COOKIE', header) + + def test_get_cookies_for_url(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/') + self.assertEqual(len(cookies), 2) + cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/') + self.assertFalse(cookies) + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index d50008307..e0bc8cb95 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -20,15 +20,15 @@ from test.helper import ( import hashlib -import io import json import socket import youtube_dl.YoutubeDL from youtube_dl.compat import ( compat_http_client, - compat_urllib_error, compat_HTTPError, + compat_open as open, + compat_urllib_error, ) from youtube_dl.utils import ( DownloadError, @@ -245,7 +245,7 @@ def generator(test_case, tname): self.assertTrue( os.path.exists(info_json_fn), 'Missing info file %s' % info_json_fn) - with io.open(info_json_fn, encoding='utf-8') as infof: + with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py index c0239502b..029f9b05f 100644 --- a/test/test_downloader_external.py +++ b/test/test_downloader_external.py @@ -12,20 +12,65 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( FakeLogger, + FakeYDL, http_server_port, try_rm, ) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_http_server -from youtube_dl.utils import encodeFilename -from youtube_dl.downloader.external import Aria2pFD +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_server, + compat_kwargs, +) +from youtube_dl.utils import ( + encodeFilename, + join_nonempty, +) +from youtube_dl.downloader.external import ( + Aria2cFD, + Aria2pFD, + AxelFD, + CurlFD, + FFmpegFD, + HttpieFD, + WgetFD, +) import threading -TEST_DIR = os.path.dirname(os.path.abspath(__file__)) - - TEST_SIZE = 10 * 1024 +TEST_COOKIE = { + 'version': 0, + 'name': 'test', + 'value': 'ytdlp', + 'port': None, + 'port_specified': False, + 'domain': '.example.com', + 'domain_specified': True, + 'domain_initial_dot': False, + 'path': '/', + 'path_specified': True, + 'secure': False, + 'expires': None, + 'discard': False, + 'comment': None, + 'comment_url': None, + 'rest': {}, +} + +TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE) + +TEST_INFO = {'url': 'http://www.example.com/'} + + +def cookiejar_Cookie(**cookie_args): + return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args)) + + +def ifExternalFDAvailable(externalFD): + return unittest.skipUnless(externalFD.available(), + externalFD.get_basename() + ' not found') + class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): def log_message(self, format, *args): @@ -70,7 +115,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): assert False, 'unrecognised server path' -@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found') +@ifExternalFDAvailable(Aria2pFD) class TestAria2pFD(unittest.TestCase): def setUp(self): self.httpd = compat_http_server.HTTPServer( @@ -111,5 +156,103 @@ class TestAria2pFD(unittest.TestCase): }) +@ifExternalFDAvailable(HttpieFD) +class TestHttpieFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = HttpieFD(ydl, {}) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['http', '--download', '--output', 'test', 'http://www.example.com/']) + + # Test cookie header is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['http', '--download', '--output', 'test', + 'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE]) + + +@ifExternalFDAvailable(AxelFD) +class TestAxelFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = AxelFD(ydl, {}) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['axel', '-o', 'test', '--', 'http://www.example.com/']) + + # Test cookie header is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE, + '--max-redirect=0', '--', 'http://www.example.com/']) + + +@ifExternalFDAvailable(WgetFD) +class TestWgetFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = WgetFD(ydl, {}) + self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) + # Test cookiejar tempfile arg is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) + + +@ifExternalFDAvailable(CurlFD) +class TestCurlFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = CurlFD(ydl, {}) + self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO)) + # Test cookie header is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO)) + self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO)) + + +@ifExternalFDAvailable(Aria2cFD) +class TestAria2cFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = Aria2cFD(ydl, {}) + downloader._make_cmd('test', TEST_INFO) + self.assertFalse(hasattr(downloader, '_cookies_tempfile')) + + # Test cookiejar tempfile arg is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + cmd = downloader._make_cmd('test', TEST_INFO) + self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd) + + +@ifExternalFDAvailable(FFmpegFD) +class TestFFmpegFD(unittest.TestCase): + _args = [] + + def _test_cmd(self, args): + self._args = args + + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = FFmpegFD(ydl, {}) + downloader._debug_cmd = self._test_cmd + info_dict = TEST_INFO.copy() + info_dict['ext'] = 'mp4' + + downloader._call_downloader('test', info_dict) + self.assertEqual(self._args, [ + 'ffmpeg', '-y', '-i', 'http://www.example.com/', + '-c', 'copy', '-f', 'mp4', 'file:test']) + + # Test cookies arg is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + downloader._call_downloader('test', info_dict) + self.assertEqual(self._args, [ + 'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n', + '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test']) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 4e6d7a2a0..6af86ae48 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -88,7 +88,7 @@ class TestHttpFD(unittest.TestCase): self.assertTrue(downloader.real_download(filename, { 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), })) - self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE) + self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) try_rm(encodeFilename(filename)) def download_all(self, params): diff --git a/test/test_execution.py b/test/test_execution.py index 704e14612..9daaafa6c 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -8,47 +8,53 @@ import unittest import sys import os import subprocess -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, rootDir) -try: - _DEV_NULL = subprocess.DEVNULL -except AttributeError: - _DEV_NULL = open(os.devnull, 'wb') +from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL +from youtube_dl.utils import encodeArgument + +compat_register_utf8() + + +_DEV_NULL = compat_subprocess_get_DEVNULL() class TestExecution(unittest.TestCase): + def setUp(self): + self.module = 'youtube_dl' + if sys.version_info < (2, 7): + self.module += '.__main__' + def test_import(self): subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) def test_module_exec(self): - if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution - subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): - subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_cmdline_umlauts(self): + os.environ['PYTHONIOENCODING'] = 'utf-8' p = subprocess.Popen( - [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], + [sys.executable, '-m', self.module, encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) def test_lazy_extractors(self): - lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py' + lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py') try: - subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) - subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL) finally: - for x in ['', 'c'] if sys.version_info[0] < 3 else ['']: + for x in ('', 'c') if sys.version_info[0] < 3 else ('',): try: os.remove(lazy_extractors + x) - except (IOError, OSError): + except OSError: pass diff --git a/test/test_http.py b/test/test_http.py index 487a9bc77..485c4c6fc 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,33 +8,163 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import contextlib +import gzip +import io +import ssl +import tempfile +import threading +import zlib + +# avoid deprecated alias assertRaisesRegexp +if hasattr(unittest.TestCase, 'assertRaisesRegex'): + unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex + +try: + import brotli +except ImportError: + brotli = None +try: + from urllib.request import pathname2url +except ImportError: + from urllib import pathname2url + +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_server, + compat_str as str, + compat_urllib_error, + compat_urllib_HTTPError, + compat_urllib_parse, + compat_urllib_request, +) + +from youtube_dl.utils import ( + sanitized_Request, + update_Request, + urlencode_postdata, +) + from test.helper import ( + expectedFailureIf, + FakeYDL, FakeLogger, http_server_port, ) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_http_server, compat_urllib_request -import ssl -import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + protocol_version = 'HTTP/1.1' + + # work-around old/new -style class inheritance + def super(self, meth_name, *args, **kwargs): + from types import MethodType + try: + super() + fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k) + except TypeError: + fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k) + self.super = MethodType(fn, self) + return self.super(meth_name, *args, **kwargs) + def log_message(self, format, *args): pass + def _headers(self): + payload = str(self.headers).encode('utf-8') + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _redirect(self): + self.send_response(int(self.path[len('/redirect_'):])) + self.send_header('Location', '/method') + self.send_header('Content-Length', '0') + self.end_headers() + + def _method(self, method, payload=None): + self.send_response(200) + self.send_header('Content-Length', str(len(payload or ''))) + self.send_header('Method', method) + self.end_headers() + if payload: + self.wfile.write(payload) + + def _status(self, status): + payload = '{0} NOT FOUND'.format(status).encode('utf-8') + self.send_response(int(status)) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _read_data(self): + if 'Content-Length' in self.headers: + return self.rfile.read(int(self.headers['Content-Length'])) + + def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): + return '{0}://{1}:{2}/{3}'.format( + scheme, host, + port if port is not None + else http_server_port(self.server), path) + + def do_POST(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('POST', data) + elif self.path.startswith('/headers'): + self._headers() + else: + self._status(404) + + def do_HEAD(self): + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('HEAD') + else: + self._status(404) + + def do_PUT(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('PUT', data) + else: + self._status(404) + def do_GET(self): + + def respond(payload=b'