diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md deleted file mode 100644 index e5405c235..000000000 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -name: Broken site support -about: Report broken or misfunctioning site -title: '' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2021.12.17** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar issues including closed ones - - -## Verbose log - - - -``` -PASTE VERBOSE LOG HERE -``` - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md deleted file mode 100644 index 33b01ce7f..000000000 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -name: Site support request -about: Request support for a new site -title: '' -labels: 'site-support-request' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2021.12.17** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that none of provided URLs violate any copyrights -- [ ] I've searched the bugtracker for similar site support requests including closed ones - - -## Example URLs - - - -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md deleted file mode 100644 index 285610cc7..000000000 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -name: Site feature request -about: Request a new functionality for a site -title: '' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2021.12.17** -- [ ] I've searched the bugtracker for similar site feature requests including closed ones - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md deleted file mode 100644 index af73525fb..000000000 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -name: Bug report -about: Report a bug unrelated to any particular site or extractor -title: '' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2021.12.17** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read bugs section in FAQ - - -## Verbose log - - - -``` -PASTE VERBOSE LOG HERE -``` - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md deleted file mode 100644 index 42c878b83..000000000 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -name: Feature request -about: Request a new functionality unrelated to any particular site or extractor -title: '' -labels: 'request' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2021.12.17** -- [ ] I've searched the bugtracker for similar feature requests including closed ones - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md deleted file mode 100644 index 1fd7cd5dc..000000000 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -name: Ask question -about: Ask youtube-dl related question -title: '' -labels: 'question' ---- - - - - -## Checklist - - - -- [ ] I'm asking a question -- [ ] I've looked through the README and FAQ for similar questions -- [ ] I've searched the bugtracker for similar questions including closed ones - - -## Question - - - -WRITE QUESTION HERE diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index 3ba13e0ce..000000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1 +0,0 @@ -blank_issues_enabled: false diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md deleted file mode 100644 index c7600d5b5..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -name: Broken site support -about: Report broken or misfunctioning site -title: '' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar issues including closed ones - - -## Verbose log - - - -``` -PASTE VERBOSE LOG HERE -``` - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md deleted file mode 100644 index d4988e639..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -name: Site support request -about: Request support for a new site -title: '' -labels: 'site-support-request' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that none of provided URLs violate any copyrights -- [ ] I've searched the bugtracker for similar site support requests including closed ones - - -## Example URLs - - - -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md deleted file mode 100644 index 65f0a32f3..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -name: Site feature request -about: Request a new functionality for a site -title: '' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **%(version)s** -- [ ] I've searched the bugtracker for similar site feature requests including closed ones - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md deleted file mode 100644 index 41fb14b72..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -name: Bug report -about: Report a bug unrelated to any particular site or extractor -title: '' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read bugs section in FAQ - - -## Verbose log - - - -``` -PASTE VERBOSE LOG HERE -``` - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md deleted file mode 100644 index b3431a7f0..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -name: Feature request -about: Request a new functionality unrelated to any particular site or extractor -title: '' -labels: 'request' ---- - - - - -## Checklist - - - -- [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **%(version)s** -- [ ] I've searched the bugtracker for similar feature requests including closed ones - - -## Description - - - -WRITE DESCRIPTION HERE diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 892cea0a3..000000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,30 +0,0 @@ -## Please follow the guide below - -- You will be asked some questions, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x]) -- Use *Preview* tab to see how your *pull request* will actually look like - ---- - -### Before submitting a *pull request* make sure you have: -- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests -- [ ] Read [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) -- [ ] Read [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) and adjusted the code to meet them -- [ ] Covered the code with tests (note that PRs without tests will be REJECTED) -- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) - -### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: -- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) -- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) - -### What is the purpose of your *pull request*? -- [ ] Bug fix -- [ ] Improvement -- [ ] New extractor -- [ ] New feature - ---- - -### Description of your *pull request* and other information - -Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 073c4458c..000000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,482 +0,0 @@ -name: CI - -env: - all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 - main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11 - pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 - cpython-versions: main - test-set: core - # Python beta version to be built using pyenv before setup-python support - # Must also be included in all-cpython-versions - next: 3.13 - -on: - push: - # push inputs aren't known to GitHub - inputs: - cpython-versions: - type: string - default: all - test-set: - type: string - default: core - pull_request: - # pull_request inputs aren't known to GitHub - inputs: - cpython-versions: - type: string - default: main - test-set: - type: string - default: both - workflow_dispatch: - inputs: - cpython-versions: - type: choice - description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11) - options: - - all - - main - required: true - default: main - test-set: - type: choice - description: core, download - options: - - both - - core - - download - required: true - default: both - -permissions: - contents: read - -jobs: - select: - name: Select tests from inputs - runs-on: ubuntu-latest - outputs: - cpython-versions: ${{ steps.run.outputs.cpython-versions }} - test-set: ${{ steps.run.outputs.test-set }} - own-pip-versions: ${{ steps.run.outputs.own-pip-versions }} - steps: - # push and pull_request inputs aren't known to GitHub (pt3) - - name: Set push defaults - if: ${{ github.event_name == 'push' }} - env: - cpython-versions: all - test-set: core - run: | - echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV" - echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV" - - name: Get pull_request inputs - if: ${{ github.event_name == 'pull_request' }} - env: - cpython-versions: main - test-set: both - run: | - echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV" - echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV" - - name: Make version array - id: run - run: | - # Make a JSON Array from comma/space-separated string (no extra escaping) - json_list() { \ - ret=""; IFS="${IFS},"; set -- $*; \ - for a in "$@"; do \ - ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \ - done; \ - printf '[%s]' "$ret"; } - tests="${{ inputs.test-set || env.test-set }}" - [ $tests = both ] && tests="core download" - printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT" - versions="${{ inputs.cpython-versions || env.cpython-versions }}" - if [ "$versions" = all ]; then \ - versions="${{ env.all-cpython-versions }}"; else \ - versions="${{ env.main-cpython-versions }}"; \ - fi - printf 'cpython-versions=%s\n' \ - "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT" - # versions with a special get-pip.py in a per-version subdirectory - printf 'own-pip-versions=%s\n' \ - "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT" - - tests: - name: Run tests - needs: select - permissions: - contents: read - packages: write - runs-on: ${{ matrix.os }} - env: - PIP: python -m pip - PIP_DISABLE_PIP_VERSION_CHECK: true - PIP_NO_PYTHON_VERSION_WARNING: true - strategy: - fail-fast: true - matrix: - os: [ubuntu-22.04] - python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }} - python-impl: [cpython] - ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }} - run-tests-ext: [sh] - include: - - os: windows-2022 - python-version: 3.4 - python-impl: cpython - ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} - run-tests-ext: bat - - os: windows-2022 - python-version: 3.4 - python-impl: cpython - ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} - run-tests-ext: bat - # jython - - os: ubuntu-22.04 - python-version: 2.7 - python-impl: jython - ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} - run-tests-ext: sh - - os: ubuntu-22.04 - python-version: 2.7 - python-impl: jython - ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} - run-tests-ext: sh - steps: - - name: Prepare Linux - if: ${{ startswith(matrix.os, 'ubuntu') }} - shell: bash - run: | - # apt in runner, if needed, may not be up-to-date - sudo apt-get update - - name: Checkout - uses: actions/checkout@v3 - #-------- Python 3 ----- - - name: Set up supported Python ${{ matrix.python-version }} - id: setup-python - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != env.next }} - # wrap broken actions/setup-python@v4 - # NB may run apt-get install in Linux - uses: ytdl-org/setup-python@v1 - env: - # Temporary (?) workaround for Python 3.5 failures - May 2024 - PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org" - with: - python-version: ${{ matrix.python-version }} - cache-build: true - allow-build: info - - name: Locate supported Python ${{ matrix.python-version }} - if: ${{ env.pythonLocation }} - shell: bash - run: | - echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV" - export expected="${{ steps.setup-python.outputs.python-path }}" - dirname() { printf '%s\n' \ - 'import os, sys' \ - 'print(os.path.dirname(sys.argv[1]))' \ - | ${expected} - "$1"; } - expd="$(dirname "$expected")" - export python="$(command -v python)" - [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV" - [ -x "$python" ] || printf '%s\n' \ - 'import os' \ - 'exp = os.environ["expected"]' \ - 'python = os.environ["python"]' \ - 'exps = os.path.split(exp)' \ - 'if python and (os.path.dirname(python) == exp[0]):' \ - ' exit(0)' \ - 'exps[1] = "python" + os.path.splitext(exps[1])[1]' \ - 'python = os.path.join(*exps)' \ - 'try:' \ - ' os.symlink(exp, python)' \ - 'except AttributeError:' \ - ' os.rename(exp, python)' \ - | ${expected} - - printf '%s\n' \ - 'import sys' \ - 'print(sys.path)' \ - | ${expected} - - #-------- Python next (was 3.12) - - - name: Set up CPython 3.next environment - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }} - shell: bash - run: | - PYENV_ROOT=$HOME/.local/share/pyenv - echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" - - name: Cache Python 3.next - id: cachenext - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }} - uses: actions/cache@v3 - with: - key: python-${{ env.next }} - path: | - ${{ env.PYENV_ROOT }} - - name: Build and set up Python 3.next - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next && ! steps.cachenext.outputs.cache-hit }} - # dl and build locally - shell: bash - run: | - # Install build environment - sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ - libncursesw5-dev libreadline-dev libsqlite3-dev \ - libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev - # Download PyEnv from its GitHub repository. - export PYENV_ROOT=${{ env.PYENV_ROOT }} - export PATH=$PYENV_ROOT/bin:$PATH - git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" - pyenv install ${{ env.next }} - - name: Locate Python 3.next - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }} - shell: bash - run: | - PYTHONHOME="$(echo "${{ env.PYENV_ROOT }}/versions/${{ env.next }}."*)" - test -n "$PYTHONHOME" - echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" - echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" - #-------- Python 2.7 -- - - name: Set up Python 2.7 - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }} - # install 2.7 - shell: bash - run: | - # Ubuntu 22.04 no longer has python-is-python2: fetch it - curl -L "http://launchpadlibrarian.net/474693132/python-is-python2_2.7.17-4_all.deb" -o python-is-python2.deb - sudo apt-get install -y python2 - sudo dpkg --force-breaks -i python-is-python2.deb - echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" - #-------- Python 2.6 -- - - name: Set up Python 2.6 environment - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} - shell: bash - run: | - openssl_name=openssl-1.0.2u - echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV" - openssl_dir=$HOME/.local/opt/$openssl_name - echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV" - PYENV_ROOT=$HOME/.local/share/pyenv - echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" - sudo apt-get install -y openssl ca-certificates - - name: Cache Python 2.6 - id: cache26 - if: ${{ matrix.python-version == '2.6' }} - uses: actions/cache@v3 - with: - key: python-2.6.9 - path: | - ${{ env.openssl_dir }} - ${{ env.PYENV_ROOT }} - - name: Build and set up Python 2.6 - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} - # dl and build locally - shell: bash - run: | - # Install build environment - sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ - libncursesw5-dev libreadline-dev libsqlite3-dev \ - libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev - # Download and install OpenSSL 1.0.2, back in time - openssl_name=${{ env.openssl_name }} - openssl_targz=${openssl_name}.tar.gz - openssl_dir=${{ env.openssl_dir }} - openssl_inc=$openssl_dir/include - openssl_lib=$openssl_dir/lib - openssl_ssl=$openssl_dir/ssl - curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz - tar -xf $openssl_targz - ( cd $openssl_name; \ - ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \ - --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \ - make && \ - make install ) - rm -rf $openssl_name - rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs - # Download PyEnv from its GitHub repository. - export PYENV_ROOT=${{ env.PYENV_ROOT }} - export PATH=$PYENV_ROOT/bin:$PATH - git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" - # Prevent pyenv build trying (and failing) to update pip - export GET_PIP=get-pip-2.6.py - echo 'import sys; sys.exit(0)' > ${GET_PIP} - GET_PIP=$(realpath $GET_PIP) - # Build and install Python - export CFLAGS="-I$openssl_inc" - export LDFLAGS="-L$openssl_lib" - export LD_LIBRARY_PATH="$openssl_lib" - pyenv install 2.6.9 - - name: Locate Python 2.6 - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} - shell: bash - run: | - PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9" - echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" - echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV" - #-------- Jython ------ - - name: Set up Java 8 - if: ${{ matrix.python-impl == 'jython' }} - uses: actions/setup-java@v3 - with: - java-version: 8 - distribution: 'zulu' - - name: Setup Jython environment - if: ${{ matrix.python-impl == 'jython' }} - shell: bash - run: | - echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV" - echo "PIP=pip" >> "$GITHUB_ENV" - - name: Cache Jython - id: cachejy - if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }} - uses: actions/cache@v3 - with: - # 2.7.3 now available, may solve SNI issue - key: jython-2.7.1 - path: | - ${{ env.JYTHON_ROOT }} - - name: Install Jython - if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }} - shell: bash - run: | - JYTHON_ROOT="${{ env.JYTHON_ROOT }}" - curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar - java -jar jython-installer.jar -s -d "${JYTHON_ROOT}" - echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH" - - name: Set up cached Jython - if: ${{ steps.cachejy.outputs.cache-hit }} - shell: bash - run: | - JYTHON_ROOT="${{ env.JYTHON_ROOT }}" - echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH - - name: Install supporting Python 2.7 if possible - if: ${{ steps.cachejy.outputs.cache-hit }} - shell: bash - run: | - sudo apt-get install -y python2.7 || true - #-------- pip --------- - - name: Set up supported Python ${{ matrix.python-version }} pip - if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} - # This step may run in either Linux or Windows - shell: bash - run: | - echo "$PATH" - echo "$PYTHONHOME" - # curl is available on both Windows and Linux, -L follows redirects, -O gets name - python -m ensurepip || python -m pip --version || { \ - get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \ - curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \ - python get-pip.py --no-setuptools --no-wheel; } - - name: Set up Python 2.6 pip - if: ${{ matrix.python-version == '2.6' }} - shell: bash - run: | - python -m pip --version || { \ - curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \ - curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \ - python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; } - # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751 - echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV" - - name: Set up other Python ${{ matrix.python-version }} pip - if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} - shell: bash - run: | - python -m pip --version || { \ - curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \ - curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \ - python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } - #-------- unittest ---- - - name: Upgrade Unittest for Python 2.6 - if: ${{ matrix.python-version == '2.6' }} - shell: bash - run: | - # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) - $PIP -qq show unittest2 || { \ - for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \ - "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \ - "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \ - "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \ - "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \ - curl -L -O "https://files.pythonhosted.org/packages/${u}"; \ - $PIP install ${u##*/}; \ - done; } - # make tests use unittest2 - for test in ./test/test_*.py ./test/helper.py; do - sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test" - done - #-------- nose -------- - - name: Install nose for Python ${{ matrix.python-version }} - if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == env.next)) }} - shell: bash - run: | - echo "$PATH" - echo "$PYTHONHOME" - # Use PyNose for recent Pythons instead of Nose - py3ver="${{ matrix.python-version }}" - py3ver=${py3ver#3.} - [ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0 - [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose - $PIP -qq show $nose || $PIP install $nose - - name: Install nose for other Python 2 - if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }} - shell: bash - run: | - # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) - $PIP -qq show nose || { \ - curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ - $PIP install nose-1.3.7-py2-none-any.whl; } - - name: Install nose for other Python 3 - if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} - shell: bash - run: | - $PIP -qq show nose || { \ - curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \ - $PIP install nose-1.3.7-py3-none-any.whl; } - - name: Set up nosetest test - if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} - shell: bash - run: | - # set PYTHON_VER - PYTHON_VER=${{ matrix.python-version }} - [ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}" - echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV" - echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV" - # define a test to validate the Python version used by nosetests - printf '%s\n' \ - 'from __future__ import unicode_literals' \ - 'import sys, os, platform' \ - 'try:' \ - ' import unittest2 as unittest' \ - 'except ImportError:' \ - ' import unittest' \ - 'class TestPython(unittest.TestCase):' \ - ' def setUp(self):' \ - ' self.ver = os.environ["PYTHON_VER"].split("-")' \ - ' def test_python_ver(self):' \ - ' self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \ - ' self.assertTrue(sys.version.startswith(self.ver[-1]))' \ - ' self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \ - ' def test_python_impl(self):' \ - ' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \ - > test/test_python.py - #-------- TESTS ------- - - name: Run tests - if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} - continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} - env: - YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} - run: | - ./devscripts/run_tests.${{ matrix.run-tests-ext }} - flake8: - name: Linter - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install flake8 - run: pip install flake8 - - name: Run flake8 - run: flake8 . - diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index 4a6d7dacd..000000000 --- a/AUTHORS +++ /dev/null @@ -1,249 +0,0 @@ -Ricardo Garcia Gonzalez -Danny Colligan -Benjamin Johnson -Vasyl' Vavrychuk -Witold Baryluk -Paweł Paprota -Gergely Imreh -Rogério Brito -Philipp Hagemeister -Sören Schulze -Kevin Ngo -Ori Avtalion -shizeeg -Filippo Valsorda -Christian Albrecht -Dave Vasilevsky -Jaime Marquínez Ferrándiz -Jeff Crouse -Osama Khalid -Michael Walter -M. Yasoob Ullah Khalid -Julien Fraichard -Johny Mo Swag -Axel Noack -Albert Kim -Pierre Rudloff -Huarong Huo -Ismael Mejía -Steffan Donal -Andras Elso -Jelle van der Waa -Marcin Cieślak -Anton Larionov -Takuya Tsuchida -Sergey M. -Michael Orlitzky -Chris Gahan -Saimadhav Heblikar -Mike Col -Oleg Prutz -pulpe -Andreas Schmitz -Michael Kaiser -Niklas Laxström -David Triendl -Anthony Weems -David Wagner -Juan C. Olivares -Mattias Harrysson -phaer -Sainyam Kapoor -Nicolas Évrard -Jason Normore -Hoje Lee -Adam Thalhammer -Georg Jähnig -Ralf Haring -Koki Takahashi -Ariset Llerena -Adam Malcontenti-Wilson -Tobias Bell -Naglis Jonaitis -Charles Chen -Hassaan Ali -Dobrosław Żybort -David Fabijan -Sebastian Haas -Alexander Kirk -Erik Johnson -Keith Beckman -Ole Ernst -Aaron McDaniel (mcd1992) -Magnus Kolstad -Hari Padmanaban -Carlos Ramos -5moufl -lenaten -Dennis Scheiba -Damon Timm -winwon -Xavier Beynon -Gabriel Schubiner -xantares -Jan Matějka -Mauroy Sébastien -William Sewell -Dao Hoang Son -Oskar Jauch -Matthew Rayfield -t0mm0 -Tithen-Firion -Zack Fernandes -cryptonaut -Adrian Kretz -Mathias Rav -Petr Kutalek -Will Glynn -Max Reimann -Cédric Luthi -Thijs Vermeir -Joel Leclerc -Christopher Krooss -Ondřej Caletka -Dinesh S -Johan K. Jensen -Yen Chi Hsuan -Enam Mijbah Noor -David Luhmer -Shaya Goldberg -Paul Hartmann -Frans de Jonge -Robin de Rooij -Ryan Schmidt -Leslie P. Polzer -Duncan Keall -Alexander Mamay -Devin J. Pohly -Eduardo Ferro Aldama -Jeff Buchbinder -Amish Bhadeshia -Joram Schrijver -Will W. -Mohammad Teimori Pabandi -Roman Le Négrate -Matthias Küch -Julian Richen -Ping O. -Mister Hat -Peter Ding -jackyzy823 -George Brighton -Remita Amine -Aurélio A. Heckert -Bernhard Minks -sceext -Zach Bruggeman -Tjark Saul -slangangular -Behrouz Abbasi -ngld -nyuszika7h -Shaun Walbridge -Lee Jenkins -Anssi Hannula -Lukáš Lalinský -Qijiang Fan -Rémy Léone -Marco Ferragina -reiv -Muratcan Simsek -Evan Lu -flatgreen -Brian Foley -Vignesh Venkat -Tom Gijselinck -Founder Fang -Andrew Alexeyew -Saso Bezlaj -Erwin de Haan -Jens Wille -Robin Houtevelts -Patrick Griffis -Aidan Rowe -mutantmonkey -Ben Congdon -Kacper Michajłow -José Joaquín Atria -Viťas Strádal -Kagami Hiiragi -Philip Huppert -blahgeek -Kevin Deldycke -inondle -Tomáš Čech -Déstin Reed -Roman Tsiupa -Artur Krysiak -Jakub Adam Wieczorek -Aleksandar Topuzović -Nehal Patel -Rob van Bekkum -Petr Zvoníček -Pratyush Singh -Aleksander Nitecki -Sebastian Blunt -Matěj Cepl -Xie Yanbo -Philip Xu -John Hawkinson -Rich Leeper -Zhong Jianxin -Thor77 -Mattias Wadman -Arjan Verwer -Costy Petrisor -Logan B -Alex Seiler -Vijay Singh -Paul Hartmann -Stephen Chen -Fabian Stahl -Bagira -Odd Stråbø -Philip Herzog -Thomas Christlieb -Marek Rusinowski -Tobias Gruetzmacher -Olivier Bilodeau -Lars Vierbergen -Juanjo Benages -Xiao Di Guan -Thomas Winant -Daniel Twardowski -Jeremie Jarosh -Gerard Rovira -Marvin Ewald -Frédéric Bournival -Timendum -gritstub -Adam Voss -Mike Fährmann -Jan Kundrát -Giuseppe Fabiano -Örn Guðjónsson -Parmjit Virk -Genki Sky -Ľuboš Katrinec -Corey Nicholson -Ashutosh Chaudhary -John Dong -Tatsuyuki Ishi -Daniel Weber -Kay Bouché -Yang Hongbo -Lei Wang -Petr Novák -Leonardo Taccari -Martin Weinelt -Surya Oktafendri -TingPing -Alexandre Macabies -Bastian de Groot -Niklas Haas -András Veres-Szentkirályi -Enes Solak -Nathan Rossi -Thomas van der Berg -Luca Cherubin -Adrian Heine \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index ff40cef78..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,434 +0,0 @@ -**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: -``` -$ youtube-dl -v -[debug] System config: [] -[debug] User config: [] -[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj'] -[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2015.12.06 -[debug] Git HEAD: 135392e -[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 -[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 -[debug] Proxy map: {} -... -``` -**Do not post screenshots of verbose logs; only plain text is acceptable.** - -The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. - -Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): - -### Is the description of the issue itself sufficient? - -We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. - -So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious - -- What the problem is -- How it could be fixed -- How your proposed solution would look like - -If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. - -For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. - -If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). - -**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. - -### Are you using the latest version? - -Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. - -### Is the issue already documented? - -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. - -### Why are existing options not enough? - -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. - -### Is there enough context in your bug report? - -People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). - -We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. - -### Does the issue involve one problem, and one problem only? - -Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. - -In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. - -### Is anyone going to need the feature? - -Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. - -### Is your question about youtube-dl? - -It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. - -# DEVELOPER INSTRUCTIONS - -Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. - -To run youtube-dl as a developer, you don't need to build anything either. Simply execute - - python -m youtube_dl - -To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: - - python -m unittest discover - python test/test_download.py - nosetests - -See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. - -If you want to create a build of youtube-dl yourself, you'll need - -* python -* make (only GNU make is supported) -* pandoc -* zip -* nosetests - -### Adding support for a new site - -If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. - -After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): - -1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) -2. Check out the source code with: - - git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git - -3. Start a new git branch with - - cd youtube-dl - git checkout -b yourextractor - -4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: - - ```python - # coding: utf-8 - from __future__ import unicode_literals - - from .common import InfoExtractor - - - class YourExtractorIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' - _TEST = { - 'url': 'https://yourextractor.com/watch/42', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', - 'info_dict': { - 'id': '42', - 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # TODO more code goes here, for example ... - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - - return { - 'id': video_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) - } - ``` -5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). This makes the extractor available for use, as long as the class ends with `IE`. -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): - - $ flake8 youtube_dl/extractor/yourextractor.py - -9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: - - $ git add youtube_dl/extractor/extractors.py - $ git add youtube_dl/extractor/yourextractor.py - $ git commit -m '[yourextractor] Add new extractor' - $ git push origin yourextractor - -11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. - -In any case, thank you very much for your contributions! - -## youtube-dl coding conventions - -This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. - -Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. - -### Mandatory and optional metafields - -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: - - - `id` (media identifier) - - `title` (media title) - - `url` (media download URL) or `formats` - -In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. - -[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. - -#### Example - -Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`: - -```python -meta = self._download_json(url, video_id) -``` - -Assume at this point `meta`'s layout is: - -```python -{ - ... - "summary": "some fancy summary text", - ... -} -``` - -Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: - -```python -description = meta.get('summary') # correct -``` - -and not like: - -```python -description = meta['summary'] # incorrect -``` - -The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). - -Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: - -```python -description = self._search_regex( - r']+id="title"[^>]*>([^<]+)<', - webpage, 'description', fatal=False) -``` - -With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction. - -You can also pass `default=`, for example: - -```python -description = self._search_regex( - r']+id="title"[^>]*>([^<]+)<', - webpage, 'description', default=None) -``` - -On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. - -### Provide fallbacks - -When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. - -#### Example - -Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: - -```python -title = meta['title'] -``` - -If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. - -Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: - -```python -title = meta.get('title') or self._og_search_title(webpage) -``` - -This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. - -### Regular expressions - -#### Don't capture groups you don't use - -Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. - -##### Example - -Don't capture id attribute name here since you can't use it for anything anyway. - -Correct: - -```python -r'(?:id|ID)=(?P\d+)' -``` - -Incorrect: -```python -r'(id|ID)=(?P\d+)' -``` - - -#### Make regular expressions relaxed and flexible - -When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. - -##### Example - -Say you need to extract `title` from the following HTML code: - -```html -some fancy title -``` - -The code for that task should look similar to: - -```python -title = self._search_regex( - r']+class="title"[^>]*>([^<]+)', webpage, 'title') -``` - -Or even better: - -```python -title = self._search_regex( - r']+class=(["\'])title\1[^>]*>(?P[^<]+)', - webpage, 'title', group='title') -``` - -Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: - -The code definitely should not look like: - -```python -title = self._search_regex( - r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', - webpage, 'title', group='title') -``` - -### Long lines policy - -There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. - -For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: - -Correct: - -```python -'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' -``` - -Incorrect: - -```python -'https://www.youtube.com/watch?v=FqZTN594JQw&list=' -'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' -``` - -### Inline values - -Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. - -#### Example - -Correct: - -```python -title = self._html_search_regex(r'<title>([^<]+)', webpage, 'title') -``` - -Incorrect: - -```python -TITLE_RE = r'([^<]+)' -# ...some lines of code... -title = self._html_search_regex(TITLE_RE, webpage, 'title') -``` - -### Collapse fallbacks - -Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. - -#### Example - -Good: - -```python -description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], - webpage, 'description', default=None) -``` - -Unwieldy: - -```python -description = ( - self._og_search_description(webpage, default=None) - or self._html_search_meta('description', webpage, default=None) - or self._html_search_meta('twitter:description', webpage, default=None)) -``` - -Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. - -### Trailing parentheses - -Always move trailing parentheses after the last argument. - -#### Example - -Correct: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list) -``` - -Incorrect: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list, -) -``` - -### Use convenience conversion and parsing functions - -Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. - -Use `url_or_none` for safe URL processing. - -Use `try_get` for safe metadata extraction from parsed JSON. - -Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. - -Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. - -#### More examples - -##### Safely extract optional description from parsed JSON -```python -description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) -``` - -##### Safely extract more optional metadata -```python -video = try_get(response, lambda x: x['result']['video'][0], dict) or {} -description = video.get('summary') -duration = float_or_none(video.get('durationMs'), scale=1000) -view_count = int_or_none(video.get('views')) -``` - diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 658864282..000000000 --- a/ChangeLog +++ /dev/null @@ -1,6167 +0,0 @@ -version 2021.12.17 - -Core -* [postprocessor/ffmpeg] Show ffmpeg output on error (#22680, #29336) - -Extractors -* [youtube] Update signature function patterns (#30363, #30366) -* [peertube] Only call description endpoint if necessary (#29383) -* [periscope] Pass referer to HLS requests (#29419) -- [liveleak] Remove extractor (#17625, #24222, #29331) -+ [pornhub] Add support for pornhubthbh7ap3u.onion -* [pornhub] Detect geo restriction -* [pornhub] Dismiss tbr extracted from download URLs (#28927) -* [curiositystream:collection] Extend _VALID_URL (#26326, #29117) -* [youtube] Make get_video_info processing more robust (#29333) -* [youtube] Workaround for get_video_info request (#29333) -* [bilibili] Strip uploader name (#29202) -* [youtube] Update invidious instance list (#29281) -* [umg:de] Update GraphQL API URL (#29304) -* [nrk] Switch psapi URL to https (#29344) -+ [egghead] Add support for app.egghead.io (#28404, #29303) -* [appleconnect] Fix extraction (#29208) -+ [orf:tvthek] Add support for MPD formats (#28672, #29236) - - -version 2021.06.06 - -Extractors -* [facebook] Improve login required detection -* [youporn] Fix formats and view count extraction (#29216) -* [orf:tvthek] Fix thumbnails extraction (#29217) -* [formula1] Fix extraction (#29206) -* [ard] Relax URL regular expression and fix video ids (#22724, #29091) -+ [ustream] Detect https embeds (#29133) -* [ted] Prefer own formats over external sources (#29142) -* [twitch:clips] Improve extraction (#29149) -+ [twitch:clips] Add access token query to download URLs (#29136) -* [youtube] Fix get_video_info request (#29086, #29165) -* [vimeo] Fix vimeo pro embed extraction (#29126) -* [redbulltv] Fix embed data extraction (#28770) -* [shahid] Relax URL regular expression (#28772, #28930) - - -version 2021.05.16 - -Core -* [options] Fix thumbnail option group name (#29042) -* [YoutubeDL] Improve extract_info doc (#28946) - -Extractors -+ [playstuff] Add support for play.stuff.co.nz (#28901, #28931) -* [eroprofile] Fix extraction (#23200, #23626, #29008) -+ [vivo] Add support for vivo.st (#29009) -+ [generic] Add support for og:audio (#28311, #29015) -* [phoenix] Fix extraction (#29057) -+ [generic] Add support for sibnet embeds -+ [vk] Add support for sibnet embeds (#9500) -+ [generic] Add Referer header for direct videojs download URLs (#2879, - #20217, #29053) -* [orf:radio] Switch download URLs to HTTPS (#29012, #29046) -- [blinkx] Remove extractor (#28941) -* [medaltv] Relax URL regular expression (#28884) -+ [funimation] Add support for optional lang code in URLs (#28950) -+ [gdcvault] Add support for HTML5 videos -* [dispeak] Improve FLV extraction (#13513, #28970) -* [kaltura] Improve iframe extraction (#28969) -* [kaltura] Make embed code alternatives actually work -* [cda] Improve extraction (#28709, #28937) -* [twitter] Improve formats extraction from vmap URL (#28909) -* [xtube] Fix formats extraction (#28870) -* [svtplay] Improve extraction (#28507, #28876) -* [tv2dk] Fix extraction (#28888) - - -version 2021.04.26 - -Extractors -+ [xfileshare] Add support for wolfstream.tv (#28858) -* [francetvinfo] Improve video id extraction (#28792) -* [medaltv] Fix extraction (#28807) -* [tver] Redirect all downloads to Brightcove (#28849) -* [go] Improve video id extraction (#25207, #25216, #26058) -* [youtube] Fix lazy extractors (#28780) -+ [bbc] Extract description and timestamp from __INITIAL_DATA__ (#28774) -* [cbsnews] Fix extraction for python <3.6 (#23359) - - -version 2021.04.17 - -Core -+ [utils] Add support for experimental HTTP response status code - 308 Permanent Redirect (#27877, #28768) - -Extractors -+ [lbry] Add support for HLS videos (#27877, #28768) -* [youtube] Fix stretched ratio calculation -* [youtube] Improve stretch extraction (#28769) -* [youtube:tab] Improve grid extraction (#28725) -+ [youtube:tab] Detect series playlist on playlists page (#28723) -+ [youtube] Add more invidious instances (#28706) -* [pluralsight] Extend anti-throttling timeout (#28712) -* [youtube] Improve URL to extractor routing (#27572, #28335, #28742) -+ [maoritv] Add support for maoritelevision.com (#24552) -+ [youtube:tab] Pass innertube context and x-goog-visitor-id header along with - continuation requests (#28702) -* [mtv] Fix Viacom A/B Testing Video Player extraction (#28703) -+ [pornhub] Extract DASH and HLS formats from get_media end point (#28698) -* [cbssports] Fix extraction (#28682) -* [jamendo] Fix track extraction (#28686) -* [curiositystream] Fix format extraction (#26845, #28668) - - -version 2021.04.07 - -Core -* [extractor/common] Use compat_cookies_SimpleCookie for _get_cookies -+ [compat] Introduce compat_cookies_SimpleCookie -* [extractor/common] Improve JSON-LD author extraction -* [extractor/common] Fix _get_cookies on python 2 (#20673, #23256, #20326, - #28640) - -Extractors -* [youtube] Fix extraction of videos with restricted location (#28685) -+ [line] Add support for live.line.me (#17205, #28658) -* [vimeo] Improve extraction (#28591) -* [youku] Update ccode (#17852, #28447, #28460, #28648) -* [youtube] Prefer direct entry metadata over entry metadata from playlist - (#28619, #28636) -* [screencastomatic] Fix extraction (#11976, #24489) -+ [palcomp3] Add support for palcomp3.com (#13120) -+ [arnes] Add support for video.arnes.si (#28483) -+ [youtube:tab] Add support for hashtags (#28308) - - -version 2021.04.01 - -Extractors -* [youtube] Setup CONSENT cookie when needed (#28604) -* [vimeo] Fix password protected review extraction (#27591) -* [youtube] Improve age-restricted video extraction (#28578) - - -version 2021.03.31 - -Extractors -* [vlive] Fix inkey request (#28589) -* [francetvinfo] Improve video id extraction (#28584) -+ [instagram] Extract duration (#28469) -* [instagram] Improve title extraction (#28469) -+ [sbs] Add support for ondemand watch URLs (#28566) -* [youtube] Fix video's channel extraction (#28562) -* [picarto] Fix live stream extraction (#28532) -* [vimeo] Fix unlisted video extraction (#28414) -* [youtube:tab] Fix playlist/community continuation items extraction (#28266) -* [ard] Improve clip id extraction (#22724, #28528) - - -version 2021.03.25 - -Extractors -+ [zoom] Add support for zoom.us (#16597, #27002, #28531) -* [bbc] Fix BBC IPlayer Episodes/Group extraction (#28360) -* [youtube] Fix default value for youtube_include_dash_manifest (#28523) -* [zingmp3] Fix extraction (#11589, #16409, #16968, #27205) -+ [vgtv] Add support for new tv.aftonbladet.se URL schema (#28514) -+ [tiktok] Detect private videos (#28453) -* [vimeo:album] Fix extraction for albums with number of videos multiple - to page size (#28486) -* [vvvvid] Fix kenc format extraction (#28473) -* [mlb] Fix video extraction (#21241) -* [svtplay] Improve extraction (#28448) -* [applepodcasts] Fix extraction (#28445) -* [rtve] Improve extraction - + Extract all formats - * Fix RTVE Infantil extraction (#24851) - + Extract is_live and series - - -version 2021.03.14 - -Core -+ Introduce release_timestamp meta field (#28386) - -Extractors -+ [southpark] Add support for southparkstudios.com (#28413) -* [southpark] Fix extraction (#26763, #28413) -* [sportdeutschland] Fix extraction (#21856, #28425) -* [pinterest] Reduce the number of HLS format requests -* [peertube] Improve thumbnail extraction (#28419) -* [tver] Improve title extraction (#28418) -* [fujitv] Fix HLS formats extension (#28416) -* [shahid] Fix format extraction (#28383) -+ [lbry] Add support for channel filters (#28385) -+ [bandcamp] Extract release timestamp -+ [lbry] Extract release timestamp (#28386) -* [pornhub] Detect flagged videos -+ [pornhub] Extract formats from get_media end point (#28395) -* [bilibili] Fix video info extraction (#28341) -+ [cbs] Add support for Paramount+ (#28342) -+ [trovo] Add Origin header to VOD formats (#28346) -* [voxmedia] Fix volume embed extraction (#28338) - - -version 2021.03.03 - -Extractors -* [youtube:tab] Switch continuation to browse API (#28289, #28327) -* [9c9media] Fix extraction for videos with multiple ContentPackages (#28309) -+ [bbc] Add support for BBC Reel videos (#21870, #23660, #28268) - - -version 2021.03.02 - -Extractors -* [zdf] Rework extractors (#11606, #13473, #17354, #21185, #26711, #27068, - #27930, #28198, #28199, #28274) - * Generalize cross-extractor video ids for zdf based extractors - * Improve extraction - * Fix 3sat and phoenix -* [stretchinternet] Fix extraction (#28297) -* [urplay] Fix episode data extraction (#28292) -+ [bandaichannel] Add support for b-ch.com (#21404) -* [srgssr] Improve extraction (#14717, #14725, #27231, #28238) - + Extract subtitle - * Fix extraction for new videos - * Update srf download domains -* [vvvvid] Reduce season request payload size -+ [vvvvid] Extract series sublists playlist title (#27601, #27618) -+ [dplay] Extract Ad-Free uplynk URLs (#28160) -+ [wat] Detect DRM protected videos (#27958) -* [tf1] Improve extraction (#27980, #28040) -* [tmz] Fix and improve extraction (#24603, #24687, 28211) -+ [gedidigital] Add support for Gedi group sites (#7347, #26946) -* [youtube] Fix get_video_info request - - -version 2021.02.22 - -Core -+ [postprocessor/embedthumbnail] Recognize atomicparsley binary in lowercase - (#28112) - -Extractors -* [apa] Fix and improve extraction (#27750) -+ [youporn] Extract duration (#28019) -+ [peertube] Add support for canard.tube (#28190) -* [youtube] Fixup m4a_dash formats (#28165) -+ [samplefocus] Add support for samplefocus.com (#27763) -+ [vimeo] Add support for unlisted video source format extraction -* [viki] Improve extraction (#26522, #28203) - * Extract uploader URL and episode number - * Report login required error - + Extract 480p formats - * Fix API v4 calls -* [ninegag] Unescape title (#28201) -* [youtube] Improve URL regular expression (#28193) -+ [youtube] Add support for redirect.invidious.io (#28193) -+ [dplay] Add support for de.hgtv.com (#28182) -+ [dplay] Add support for discoveryplus.com (#24698) -+ [simplecast] Add support for simplecast.com (#24107) -* [youtube] Fix uploader extraction in flat playlist mode (#28045) -* [yandexmusic:playlist] Request missing tracks in chunks (#27355, #28184) -+ [storyfire] Add support for storyfire.com (#25628, #26349) -+ [zhihu] Add support for zhihu.com (#28177) -* [youtube] Fix controversial videos when authenticated with cookies (#28174) -* [ccma] Fix timestamp parsing in python 2 -+ [videopress] Add support for video.wordpress.com -* [kakao] Improve info extraction and detect geo restriction (#26577) -* [xboxclips] Fix extraction (#27151) -* [ard] Improve formats extraction (#28155) -+ [canvas] Add support for dagelijksekost.een.be (#28119) - - -version 2021.02.10 - -Extractors -* [youtube:tab] Improve grid continuation extraction (#28130) -* [ign] Fix extraction (#24771) -+ [xhamster] Extract format filesize -+ [xhamster] Extract formats from xplayer settings (#28114) -+ [youtube] Add support phone/tablet JS player (#26424) -* [archiveorg] Fix and improve extraction (#21330, #23586, #25277, #26780, - #27109, #27236, #28063) -+ [cda] Detect geo restricted videos (#28106) -* [urplay] Fix extraction (#28073, #28074) -* [youtube] Fix release date extraction (#28094) -+ [youtube] Extract abr and vbr (#28100) -* [youtube] Skip OTF formats (#28070) - - -version 2021.02.04.1 - -Extractors -* [youtube] Prefer DASH formats (#28070) -* [azmedien] Fix extraction (#28064) - - -version 2021.02.04 - -Extractors -* [pornhub] Implement lazy playlist extraction -* [svtplay] Fix video id extraction (#28058) -+ [pornhub] Add support for authentication (#18797, #21416, #24294) -* [pornhub:user] Improve paging -+ [pornhub:user] Add support for URLs unavailable via /videos page (#27853) -+ [bravotv] Add support for oxygen.com (#13357, #22500) -+ [youtube] Pass embed URL to get_video_info request -* [ccma] Improve metadata extraction (#27994) - + Extract age limit, alt title, categories, series and episode number - * Fix timestamp multiple subtitles extraction -* [egghead] Update API domain (#28038) -- [vidzi] Remove extractor (#12629) -* [vidio] Improve metadata extraction -* [youtube] Improve subtitles extraction -* [youtube] Fix chapter extraction fallback -* [youtube] Rewrite extractor - * Improve format sorting - * Remove unused code - * Fix series metadata extraction - * Fix trailer video extraction - * Improve error reporting - + Extract video location -+ [vvvvid] Add support for youtube embeds (#27825) -* [googledrive] Report download page errors (#28005) -* [vlive] Fix error message decoding for python 2 (#28004) -* [youtube] Improve DASH formats file size extraction -* [cda] Improve birth validation detection (#14022, #27929) -+ [awaan] Extract uploader id (#27963) -+ [medialaan] Add support DPG Media MyChannels based websites (#14871, #15597, - #16106, #16489) -* [abcnews] Fix extraction (#12394, #27920) -* [AMP] Fix upload date and timestamp extraction (#27970) -* [tv4] Relax URL regular expression (#27964) -+ [tv2] Add support for mtvuutiset.fi (#27744) -* [adn] Improve login warning reporting -* [zype] Fix uplynk id extraction (#27956) -+ [adn] Add support for authentication (#17091, #27841, #27937) - - -version 2021.01.24.1 - -Core -* Introduce --output-na-placeholder (#27896) - -Extractors -* [franceculture] Make thumbnail optional (#18807) -* [franceculture] Fix extraction (#27891, #27903) -* [njpwworld] Fix extraction (#27890) -* [comedycentral] Fix extraction (#27905) -* [wat] Fix format extraction (#27901) -+ [americastestkitchen:season] Add support for seasons (#27861) -+ [trovo] Add support for trovo.live (#26125) -+ [aol] Add support for yahoo videos (#26650) -* [yahoo] Fix single video extraction -* [lbry] Unescape lbry URI (#27872) -* [9gag] Fix and improve extraction (#23022) -* [americastestkitchen] Improve metadata extraction for ATK episodes (#27860) -* [aljazeera] Fix extraction (#20911, #27779) -+ [minds] Add support for minds.com (#17934) -* [ard] Fix title and description extraction (#27761) -+ [spotify] Add support for Spotify Podcasts (#27443) - - -version 2021.01.16 - -Core -* [YoutubeDL] Protect from infinite recursion due to recursively nested - playlists (#27833) -* [YoutubeDL] Ignore failure to create existing directory (#27811) -* [YoutubeDL] Raise syntax error for format selection expressions with multiple - + operators (#27803) - -Extractors -+ [animeondemand] Add support for lazy playlist extraction (#27829) -* [youporn] Restrict fallback download URL (#27822) -* [youporn] Improve height and tbr extraction (#20425, #23659) -* [youporn] Fix extraction (#27822) -+ [twitter] Add support for unified cards (#27826) -+ [twitch] Add Authorization header with OAuth token for GraphQL requests - (#27790) -* [mixcloud:playlist:base] Extract video id in flat playlist mode (#27787) -* [cspan] Improve info extraction (#27791) -* [adn] Improve info extraction -* [adn] Fix extraction (#26963, #27732) -* [youtube:search] Extract from all sections (#27604) -* [youtube:search] fix viewcount and try to extract all video sections (#27604) -* [twitch] Improve login error extraction -* [twitch] Fix authentication (#27743) -* [3qsdn] Improve extraction (#21058) -* [peertube] Extract formats from streamingPlaylists (#26002, #27586, #27728) -* [khanacademy] Fix extraction (#2887, #26803) -* [spike] Update Paramount Network feed URL (#27715) - - -version 2021.01.08 - -Core -* [downloader/hls] Disable decryption in tests (#27660) -+ [utils] Add a function to clean podcast URLs - -Extractors -* [rai] Improve subtitles extraction (#27698, #27705) -* [canvas] Match only supported VRT NU URLs (#27707) -+ [bibeltv] Add support for bibeltv.de (#14361) -+ [bfmtv] Add support for bfmtv.com (#16053, #26615) -+ [sbs] Add support for ondemand play and news embed URLs (#17650, #27629) -* [twitch] Drop legacy kraken API v5 code altogether and refactor -* [twitch:vod] Switch to GraphQL for video metadata -* [canvas] Fix VRT NU extraction (#26957, #27053) -* [twitch] Switch access token to GraphQL and refactor (#27646) -+ [rai] Detect ContentItem in iframe (#12652, #27673) -* [ketnet] Fix extraction (#27662) -+ [dplay] Add suport Discovery+ domains (#27680) -* [motherless] Improve extraction (#26495, #27450) -* [motherless] Fix recent videos upload date extraction (#27661) -* [nrk] Fix extraction for videos without a legalAge rating -- [googleplus] Remove extractor (#4955, #7400) -+ [applepodcasts] Add support for podcasts.apple.com (#25918) -+ [googlepodcasts] Add support for podcasts.google.com -+ [iheart] Add support for iheart.com (#27037) -* [acast] Clean podcast URLs -* [stitcher] Clean podcast URLs -+ [xfileshare] Add support for aparat.cam (#27651) -+ [twitter] Add support for summary card (#25121) -* [twitter] Try to use a Generic fallback for unknown twitter cards (#25982) -+ [stitcher] Add support for shows and show metadata extraction (#20510) -* [stv] Improve episode id extraction (#23083) - - -version 2021.01.03 - -Extractors -* [nrk] Improve series metadata extraction (#27473) -+ [nrk] Extract subtitles -* [nrk] Fix age limit extraction -* [nrk] Improve video id extraction -+ [nrk] Add support for podcasts (#27634, #27635) -* [nrk] Generalize and delegate all item extractors to nrk -+ [nrk] Add support for mp3 formats -* [nrktv] Switch to playback endpoint -* [vvvvid] Fix season metadata extraction (#18130) -* [stitcher] Fix extraction (#20811, #27606) -* [acast] Fix extraction (#21444, #27612, #27613) -+ [arcpublishing] Add support for arcpublishing.com (#2298, #9340, #17200) -+ [sky] Add support for Sports News articles and Brighcove videos (#13054) -+ [vvvvid] Extract akamai formats -* [vvvvid] Skip unplayable episodes (#27599) -* [yandexvideo] Fix extraction for Python 3.4 - - -version 2020.12.31 - -Core -* [utils] Accept only supported protocols in url_or_none -* [YoutubeDL] Allow format filtering using audio language (#16209) - -Extractors -+ [redditr] Extract all thumbnails (#27503) -* [vvvvid] Improve info extraction -+ [vvvvid] Add support for playlists (#18130, #27574) -+ [yandexdisk] Extract info from webpage -* [yandexdisk] Fix extraction (#17861, #27131) -* [yandexvideo] Use old API call as fallback -* [yandexvideo] Fix extraction (#25000) -- [nbc] Remove CSNNE extractor -* [nbc] Fix NBCSport VPlayer URL extraction (#16640) -+ [aenetworks] Add support for biography.com (#3863) -* [uktvplay] Match new video URLs (#17909) -* [sevenplay] Detect API errors -* [tenplay] Fix format extraction (#26653) -* [brightcove] Raise error for DRM protected videos (#23467, #27568) - - -version 2020.12.29 - -Extractors -* [youtube] Improve yt initial data extraction (#27524) -* [youtube:tab] Improve URL matching #27559) -* [youtube:tab] Restore retry on browse requests (#27313, #27564) -* [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904, - #25418, #26070, #26350, #26738, #27563) -- [brightcove] Remove sonyliv specific code -* [piksel] Improve format extraction -+ [zype] Add support for uplynk videos -+ [toggle] Add support for live.mewatch.sg (#27555) -+ [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826) -* [teachable] Improve embed detection (#26923) -* [mitele] Fix free video extraction (#24624, #25827, #26757) -* [telecinco] Fix extraction -* [youtube] Update invidious.snopyta.org (#22667) -* [amcnetworks] Improve auth only video detection (#27548) -+ [generic] Add support for VHX Embeds (#27546) - - -version 2020.12.26 - -Extractors -* [instagram] Fix comment count extraction -+ [instagram] Add support for reel URLs (#26234, #26250) -* [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821, - #27538) -* [instagram] Improve thumbnail extraction -* [instagram] Fix extraction when authenticated (#22880, #26377, #26981, - #27422) -* [spankbang:playlist] Fix extraction (#24087) -+ [spankbang] Add support for playlist videos -* [pornhub] Improve like and dislike count extraction (#27356) -* [pornhub] Fix lq formats extraction (#27386, #27393) -+ [bongacams] Add support for bongacams.com (#27440) -* [youtube:tab] Extend URL regular expression (#27501) -* [theweatherchannel] Fix extraction (#25930, #26051) -+ [sprout] Add support for Universal Kids (#22518) -* [theplatform] Allow passing geo bypass countries from other extractors -+ [wistia] Add support for playlists (#27533) -+ [ctv] Add support for ctv.ca (#27525) -* [9c9media] Improve info extraction -* [youtube] Fix automatic captions extraction (#27162, #27388) -* [sonyliv] Fix title for movies -* [sonyliv] Fix extraction (#25667) -* [streetvoice] Fix extraction (#27455, #27492) -+ [facebook] Add support for watchparty pages (#27507) -* [cbslocal] Fix video extraction -+ [brightcove] Add another method to extract policyKey -* [mewatch] Relax URL regular expression (#27506) - - -version 2020.12.22 - -Core -* [common] Remove unwanted query params from unsigned akamai manifest URLs - -Extractors -- [tastytrade] Remove extractor (#25716) -* [niconico] Fix playlist extraction (#27428) -- [everyonesmixtape] Remove extractor -- [kanalplay] Remove extractor -* [arkena] Fix extraction -* [nba] Rewrite extractor -* [turner] Improve info extraction -* [youtube] Improve xsrf token extraction (#27442) -* [generic] Improve RSS age limit extraction -* [generic] Fix RSS itunes thumbnail extraction (#27405) -+ [redditr] Extract duration (#27426) -- [zaq1] Remove extractor -+ [asiancrush] Add support for retrocrush.tv -* [asiancrush] Fix extraction -- [noco] Remove extractor (#10864) -* [nfl] Fix extraction (#22245) -* [skysports] Relax URL regular expression (#27435) -+ [tv5unis] Add support for tv5unis.ca (#22399, #24890) -+ [videomore] Add support for more.tv (#27088) -+ [yandexmusic] Add support for music.yandex.com (#27425) -+ [nhk:program] Add support for audio programs and program clips -+ [nhk] Add support for NHK video programs (#27230) - - -version 2020.12.14 - -Core -* [extractor/common] Improve JSON-LD interaction statistic extraction (#23306) -* [downloader/hls] Delegate manifests with media initialization to ffmpeg -+ [extractor/common] Document duration meta field for playlists - -Extractors -* [mdr] Bypass geo restriction -* [mdr] Improve extraction (#24346, #26873) -* [yandexmusic:album] Improve album title extraction (#27418) -* [eporner] Fix view count extraction and make optional (#23306) -+ [eporner] Extend URL regular expression -* [eporner] Fix hash extraction and extend _VALID_URL (#27396) -* [slideslive] Use m3u8 entry protocol for m3u8 formats (#27400) -* [twitcasting] Fix format extraction and improve info extraction (#24868) -* [linuxacademy] Fix authentication and extraction (#21129, #26223, #27402) -* [itv] Clean description from HTML tags (#27399) -* [vlive] Sort live formats (#27404) -* [hotstart] Fix and improve extraction - * Fix format extraction (#26690) - + Extract thumbnail URL (#16079, #20412) - + Add support for country specific playlist URLs (#23496) - * Select the last id in video URL (#26412) -+ [youtube] Add some invidious instances (#27373) - - -version 2020.12.12 - -Core -* [YoutubeDL] Improve thumbnail filename deducing (#26010, #27244) - -Extractors -+ [ruutu] Extract more metadata -+ [ruutu] Detect non-free videos (#21154) -* [ruutu] Authenticate format URLs (#21031, #26782) -+ [ruutu] Add support for static.nelonenmedia.fi (#25412) -+ [ruutu] Extend URL regular expression (#24839) -+ [facebook] Add support archived live video URLs (#15859) -* [wdr] Improve overall extraction -+ [wdr] Extend subtitles extraction (#22672, #22723) -+ [facebook] Add support for videos attached to Relay based story pages - (#10795) -+ [wdr:page] Add support for kinder.wdr.de (#27350) -+ [facebook] Add another regular expression for handleServerJS -* [facebook] Fix embed page extraction -+ [facebook] Add support for Relay post pages (#26935) -+ [facebook] Add support for watch videos (#22795, #27062) -+ [facebook] Add support for group posts with multiple videos (#19131) -* [itv] Fix series metadata extraction (#26897) -- [itv] Remove old extraction method (#23177) -* [facebook] Redirect mobile URLs to desktop URLs (#24831, #25624) -+ [facebook] Add support for Relay based pages (#26823) -* [facebook] Try to reduce unnecessary tahoe requests -- [facebook] Remove hardcoded Chrome User-Agent (#18974, #25411, #26958, - #27329) -- [smotri] Remove extractor (#27358) -- [beampro] Remove extractor (#17290, #22871, #23020, #23061, #26099) - - -version 2020.12.09 - -Core -* [extractor/common] Fix inline HTML5 media tags processing (#27345) - -Extractors -* [youtube:tab] Improve identity token extraction (#27197) -* [youtube:tab] Make click tracking params on continuation optional -* [youtube:tab] Delegate inline playlists to tab-based playlists (27298) -+ [tubitv] Extract release year (#27317) -* [amcnetworks] Fix free content extraction (#20354) -+ [lbry:channel] Add support for channels (#25584) -+ [lbry] Add support for short and embed URLs -* [lbry] Fix channel metadata extraction -+ [telequebec] Add support for video.telequebec.tv (#27339) -* [telequebec] Fix extraction (#25733, #26883) -+ [youtube:tab] Capture and output alerts (#27340) -* [tvplay:home] Fix extraction (#21153) -* [americastestkitchen] Fix Extraction and add support - for Cook's Country and Cook's Illustrated (#17234, #27322) -+ [slideslive] Add support for yoda service videos and extract subtitles - (#27323) - - -version 2020.12.07 - -Core -* [extractor/common] Extract timestamp from Last-Modified header -+ [extractor/common] Add support for dl8-* media tags (#27283) -* [extractor/common] Fix media type extraction for HTML5 media tags - in start/end form - -Extractors -* [aenetworks] Fix extraction (#23363, #23390, #26795, #26985) - * Fix Fastly format extraction - + Add support for play and watch subdomains - + Extract series metadata -* [youtube] Improve youtu.be extraction in non-existing playlists (#27324) -+ [generic] Extract RSS video description, timestamp and itunes metadata - (#27177) -* [nrk] Reduce the number of instalments and episodes requests -* [nrk] Improve extraction - * Improve format extraction for old akamai formats - + Add is_live value to entry info dict - * Request instalments only when available - * Fix skole extraction -+ [peertube] Extract fps -+ [peertube] Recognize audio-only formats (#27295) - - -version 2020.12.05 - -Core -* [extractor/common] Improve Akamai HTTP format extraction - * Allow m3u8 manifest without an additional audio format - * Fix extraction for qualities starting with a number - -Extractors -* [teachable:course] Improve extraction (#24507, #27286) -* [nrk] Improve error extraction -* [nrktv:series] Improve extraction (#21926) -* [nrktv:season] Improve extraction -* [nrk] Improve format extraction and geo-restriction detection (#24221) -* [pornhub] Handle HTTP errors gracefully (#26414) -* [nrktv] Relax URL regular expression (#27299, #26185) -+ [zdf] Extract webm formats (#26659) -+ [gamespot] Extract DASH and HTTP formats -+ [tver] Add support for tver.jp (#26662, #27284) -+ [pornhub] Add support for pornhub.org (#27276) - - -version 2020.12.02 - -Extractors -+ [tva] Add support for qub.ca (#27235) -+ [toggle] Detect DRM protected videos (#16479, #20805) -+ [toggle] Add support for new MeWatch URLs (#27256) -* [youtube:tab] Extract channels only from channels tab (#27266) -+ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030, - #18806, #23148, #24461, #26171, #26800, #27263) -* [cspan] Pass Referer header with format's video URL (#26032, #25729) -* [youtube] Improve age-gated videos extraction (#27259) -+ [mediaset] Add support for movie URLs (#27240) -* [yandexmusic] Refactor -+ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284) -* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762) - - -version 2020.11.29 - -Core -* [YoutubeDL] Write static debug to stderr and respect quiet for dynamic debug - (#14579, #22593) - -Extractors -* [drtv] Extend URL regular expression (#27243) -* [tiktok] Fix extraction (#20809, #22838, #22850, #25987, #26281, #26411, - #26639, #26776, #27237) -+ [ina] Add support for mobile URLs (#27229) -* [pornhub] Fix like and dislike count extraction (#27227, #27234) -* [youtube] Improve yt initial player response extraction (#27216) -* [videa] Fix extraction (#25650, #25973, #26301) - - -version 2020.11.26 - -Core -* [downloader/fragment] Set final file's mtime according to last fragment's - Last-Modified header (#11718, #18384, #27138) - -Extractors -+ [spreaker] Add support for spreaker.com (#13480, #13877) -* [vlive] Improve extraction for geo-restricted videos -+ [vlive] Add support for post URLs (#27122, #27123) -* [viki] Fix video API request (#27184) -* [bbc] Fix BBC Three clip extraction -* [bbc] Fix BBC News videos extraction -+ [medaltv] Add support for medal.tv (#27149) -* [youtube] Improve music metadata and license extraction (#26013) -* [nrk] Fix extraction -* [cda] Fix extraction (#17803, #24458, #24518, #26381) - - -version 2020.11.24 - -Core -+ [extractor/common] Add generic support for akamai HTTP format extraction - -Extractors -* [youtube:tab] Fix feeds extraction (#25695, #26452) -* [youtube:favorites] Restore extractor -* [youtube:tab] Fix some weird typo (#27157) -+ [pinterest] Add support for large collections (more than 25 pins) -+ [franceinter] Extract thumbnail (#27153) -+ [box] Add support for box.com (#5949) -+ [nytimes] Add support for cooking.nytimes.com (#27112, #27143) -* [lbry] Relax URL regular expression (#27144) -+ [rumble] Add support for embed pages (#10785) -+ [skyit] Add support for multiple Sky Italia websites (#26629) -+ [pinterest] Add support for pinterest.com (#25747) - - -version 2020.11.21.1 - -Core -* [downloader/http] Fix crash during urlopen caused by missing reason - of URLError -* [YoutubeDL] Fix --ignore-errors for playlists with generator-based entries - of url_transparent (#27064) - -Extractors -+ [svtplay] Add support for svt.se/barnkanalen (#24817) -+ [svt] Extract timestamp (#27130) -* [svtplay] Improve thumbnail extraction (#27130) -* [youtube] Fix error reason extraction (#27081) -* [youtube] Fix like and dislike count extraction (#25977) -+ [youtube:tab] Add support for current video and fix lives extraction (#27126) -* [infoq] Fix format extraction (#25984) -* [francetv] Update to fix thumbnail URL issue (#27120) -* [youtube] Improve yt initial data extraction (#27093) -+ [discoverynetworks] Add support new TLC/DMAX URLs (#27100) -* [rai] Fix protocol relative relinker URLs (#22766) -* [rai] Fix unavailable video format detection -* [rai] Improve extraction -* [rai] Fix extraction (#27077) -* [viki] Improve format extraction -* [viki] Fix stream extraction from MPD (#27092) -* [googledrive] Fix format extraction (#26979) -+ [amara] Add support for amara.org (#20618) -* [vimeo:album] Fix extraction (#27079) -* [mtv] Fix mgid extraction (#26841) - - -version 2020.11.19 - -Core -* [extractor/common] Output error for invalid URLs in _is_valid_url (#21400, - #24151, #25617, #25618, #25586, #26068, #27072) - -Extractors -* [youporn] Fix upload date extraction -* [youporn] Make comment count optional (#26986) -* [arte] Rework extractors - * Reimplement embed and playlist extractors to delegate to the single - entrypoint artetv extractor - * Improve embeds detection (#27057) -+ [arte] Extract m3u8 formats (#27061) -* [mgtv] Fix format extraction (#26415) -+ [lbry] Add support for odysee.com (#26806) -* [francetv] Improve info extraction -+ [francetv] Add fallback video URL extraction (#27047) - - -version 2020.11.18 - -Extractors -* [spiegel] Fix extraction (#24206, #24767) -* [youtube] Improve extraction - + Add support for --no-playlist (#27009) - * Improve playlist and mix extraction (#26390, #26509, #26534, #27011) - + Extract playlist uploader data -* [youtube:tab] Fix view count extraction (#27051) -* [malltv] Fix extraction (#27035) -+ [bandcamp] Extract playlist description (#22684) -* [urplay] Fix extraction (#26828) -* [youtube:tab] Fix playlist title extraction (#27015) -* [youtube] Fix chapters extraction (#26005) - - -version 2020.11.17 - -Core -* [utils] Skip ! prefixed code in js_to_json - -Extractors -* [youtube:tab] Fix extraction with cookies provided (#27005) -* [lrt] Fix extraction with empty tags (#20264) -+ [ndr:embed:base] Extract subtitles (#25447, #26106) -+ [servus] Add support for pm-wissen.com (#25869) -* [servus] Fix extraction (#26872, #26967, #26983, #27000) -* [xtube] Fix extraction (#26996) -* [lrt] Fix extraction -+ [lbry] Add support for lbry.tv -+ [condenast] Extract subtitles -* [condenast] Fix extraction -* [bandcamp] Fix extraction (#26681, #26684) -* [rai] Fix RaiPlay extraction (#26064, #26096) -* [vlive] Fix extraction -* [usanetwork] Fix extraction -* [nbc] Fix NBCNews/Today/MSNBC extraction -* [cnbc] Fix extraction - - -version 2020.11.12 - -Extractors -* [youtube] Rework extractors - - -version 2020.11.01 - -Core -* [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851) -* [downloader/http] Properly handle missing message in SSLError (#26646) -* [downloader/http] Fix access to not yet opened stream in retry - -Extractors -* [youtube] Fix JS player URL extraction -* [ytsearch] Fix extraction (#26920) -* [afreecatv] Fix typo (#26970) -* [23video] Relax URL regular expression (#26870) -+ [ustream] Add support for video.ibm.com (#26894) -* [iqiyi] Fix typo (#26884) -+ [expressen] Add support for di.se (#26670) -* [iprima] Improve video id extraction (#26507, #26494) - - -version 2020.09.20 - -Core -* [extractor/common] Relax interaction count extraction in _json_ld -+ [extractor/common] Extract author as uploader for VideoObject in _json_ld -* [downloader/hls] Fix incorrect end byte in Range HTTP header for - media segments with EXT-X-BYTERANGE (#14748, #24512) -* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601) -* [downloader/http] Improve timeout detection when reading block of data - (#10935) -* [downloader/http] Retry download when urlopen times out (#10935, #26603) - -Extractors -* [redtube] Extend URL regular expression (#26506) -* [twitch] Refactor -* [twitch:stream] Switch to GraphQL and fix reruns (#26535) -+ [telequebec] Add support for brightcove videos (#25833) -* [pornhub] Extract metadata from JSON-LD (#26614) -* [pornhub] Fix view count extraction (#26621, #26614) - - -version 2020.09.14 - -Core -+ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails - (#25687, #25717) - -Extractors -* [rtlnl] Extend URL regular expression (#26549, #25821) -* [youtube] Fix empty description extraction (#26575, #26006) -* [srgssr] Extend URL regular expression (#26555, #26556, #26578) -* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689, - #26565) -* [svtplay] Fix id extraction (#26576) -* [redbulltv] Improve support for rebull.com TV localized URLs (#22063) -+ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063) -* [soundcloud:pagedplaylist] Reduce pagination limit (#26557) - - -version 2020.09.06 - -Core -+ [utils] Recognize wav mimetype (#26463) - -Extractors -* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409) -* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384) -* [youtube:user] Extend URL regular expression (#26443) -* [xhamster] Improve initials regular expression (#26526, #26353) -* [svtplay] Fix video id extraction (#26425, #26428, #26438) -* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979, - #24263, #25010, #25553, #25606) - * Switch to GraphQL - + Add support for collections - + Add support for clips and collections playlists -* [biqle] Improve video ext extraction -* [xhamster] Fix extraction (#26157, #26254) -* [xhamster] Extend URL regular expression (#25789, #25804, #25927)) - - -version 2020.07.28 - -Extractors -* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137) -* [youtube] Improve description extraction (#25937, #25980) -* [wistia] Restrict embed regular expression (#25969) -* [youtube] Prevent excess HTTP 301 (#25786) -+ [youtube:playlists] Extend URL regular expression (#25810) -+ [bellmedia] Add support for cp24.com clip URLs (#25764) -* [brightcove] Improve embed detection (#25674) - - -version 2020.06.16.1 - -Extractors -* [youtube] Force old layout (#25682, #25683, #25680, #25686) -* [youtube] Fix categories and improve tags extraction - - -version 2020.06.16 - -Extractors -* [youtube] Fix uploader id and uploader URL extraction -* [youtube] Improve view count extraction -* [youtube] Fix upload date extraction (#25677) -* [youtube] Fix thumbnails extraction (#25676) -* [youtube] Fix playlist and feed extraction (#25675) -+ [facebook] Add support for single-video ID links -+ [youtube] Extract chapters from JSON (#24819) -+ [kaltura] Add support for multiple embeds on a webpage (#25523) - - -version 2020.06.06 - -Extractors -* [tele5] Bypass geo restriction -+ [jwplatform] Add support for bypass geo restriction -* [tele5] Prefer jwplatform over nexx (#25533) -* [twitch:stream] Expect 400 and 410 HTTP errors from API -* [twitch:stream] Fix extraction (#25528) -* [twitch] Fix thumbnails extraction (#25531) -+ [twitch] Pass v5 Accept HTTP header (#25531) -* [brightcove] Fix subtitles extraction (#25540) -+ [malltv] Add support for sk.mall.tv (#25445) -* [periscope] Fix untitled broadcasts (#25482) -* [jwplatform] Improve embeds extraction (#25467) - - -version 2020.05.29 - -Core -* [postprocessor/ffmpeg] Embed series metadata with --add-metadata -* [utils] Fix file permissions in write_json_file (#12471, #25122) - -Extractors -* [ard:beta] Extend URL regular expression (#25405) -+ [youtube] Add support for more invidious instances (#25417) -* [giantbomb] Extend URL regular expression (#25222) -* [ard] Improve URL regular expression (#25134, #25198) -* [redtube] Improve formats extraction and extract m3u8 formats (#25311, - #25321) -* [indavideo] Switch to HTTPS for API request (#25191) -* [redtube] Improve title extraction (#25208) -* [vimeo] Improve format extraction and sorting (#25285) -* [soundcloud] Reduce API playlist page limit (#25274) -+ [youtube] Add support for yewtu.be (#25226) -* [mailru] Fix extraction (#24530, #25239) -* [bellator] Fix mgid extraction (#25195) - - -version 2020.05.08 - -Core -* [downloader/http] Request last data block of exact remaining size -* [downloader/http] Finish downloading once received data length matches - expected -* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always - ensure cookie name and value are bytestrings on python 2 (#23256, #24776) -+ [compat] Introduce compat_cookiejar_Cookie -* [utils] Improve cookie files support - + Add support for UTF-8 in cookie files - * Skip malformed cookie file entries instead of crashing (invalid entry - length, invalid expires at) - -Extractors -* [youtube] Improve signature cipher extraction (#25187, #25188) -* [iprima] Improve extraction (#25138) -* [uol] Fix extraction (#22007) -+ [orf] Add support for more radio stations (#24938, #24968) -* [dailymotion] Fix typo -- [puhutv] Remove no longer available HTTP formats (#25124) - - -version 2020.05.03 - -Core -+ [extractor/common] Extract multiple JSON-LD entries -* [options] Clarify doc on --exec command (#19087, #24883) -* [extractor/common] Skip malformed ISM manifest XMLs while extracting - ISM formats (#24667) - -Extractors -* [crunchyroll] Fix and improve extraction (#25096, #25060) -* [youtube] Improve player id extraction -* [youtube] Use redirected video id if any (#25063) -* [yahoo] Fix GYAO Player extraction and relax URL regular expression - (#24178, #24778) -* [tvplay] Fix Viafree extraction (#15189, #24473, #24789) -* [tenplay] Relax URL regular expression (#25001) -+ [prosiebensat1] Extract series metadata -* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948) -- [prosiebensat1] Remove 7tv.de support (#24948) -* [youtube] Fix DRM videos detection (#24736) -* [thisoldhouse] Fix video id extraction (#24548, #24549) -+ [soundcloud] Extract AAC format (#19173, #24708) -* [youtube] Skip broken multifeed videos (#24711) -* [nova:embed] Fix extraction (#24700) -* [motherless] Fix extraction (#24699) -* [twitch:clips] Extend URL regular expression (#24290, #24642) -* [tv4] Fix ISM formats extraction (#24667) -* [tele5] Fix extraction (#24553) -+ [mofosex] Add support for generic embeds (#24633) -+ [youporn] Add support for generic embeds -+ [spankwire] Add support for generic embeds (#24633) -* [spankwire] Fix extraction (#18924, #20648) - - -version 2020.03.24 - -Core -- [utils] Revert support for cookie files with spaces used instead of tabs - -Extractors -* [teachable] Update upskillcourses and gns3 domains -* [generic] Look for teachable embeds before wistia -+ [teachable] Extract chapter metadata (#24421) -+ [bilibili] Add support for player.bilibili.com (#24402) -+ [bilibili] Add support for new URL schema with BV ids (#24439, #24442) -* [limelight] Remove disabled API requests (#24255) -* [soundcloud] Fix download URL extraction (#24394) -+ [cbc:watch] Add support for authentication (#19160) -* [hellporno] Fix extraction (#24399) -* [xtube] Fix formats extraction (#24348) -* [ndr] Fix extraction (#24326) -* [nhk] Update m3u8 URL and use native HLS downloader (#24329) -- [nhk] Remove obsolete rtmp formats (#24329) -* [nhk] Relax URL regular expression (#24329) -- [vimeo] Revert fix showcase password protected video extraction (#24224) - - -version 2020.03.08 - -Core -+ [utils] Add support for cookie files with spaces used instead of tabs - -Extractors -+ [pornhub] Add support for pornhubpremium.com (#24288) -- [youtube] Remove outdated code and unnecessary requests -* [youtube] Improve extraction in 429 HTTP error conditions (#24283) -* [nhk] Update API version (#24270) - - -version 2020.03.06 - -Extractors -* [youtube] Fix age-gated videos support without login (#24248) -* [vimeo] Fix showcase password protected video extraction (#24224) -* [pornhub] Improve title extraction (#24184) -* [peertube] Improve extraction (#23657) -+ [servus] Add support for new URL schema (#23475, #23583, #24142) -* [vimeo] Fix subtitles URLs (#24209) - - -version 2020.03.01 - -Core -* [YoutubeDL] Force redirect URL to unicode on python 2 -- [options] Remove duplicate short option -v for --version (#24162) - -Extractors -* [xhamster] Fix extraction (#24205) -* [franceculture] Fix extraction (#24204) -+ [telecinco] Add support for article opening videos -* [telecinco] Fix extraction (#24195) -* [xtube] Fix metadata extraction (#21073, #22455) -* [youjizz] Fix extraction (#24181) -- Remove no longer needed compat_str around geturl -* [pornhd] Fix extraction (#24128) -+ [teachable] Add support for multiple videos per lecture (#24101) -+ [wistia] Add support for multiple generic embeds (#8347, 11385) -* [imdb] Fix extraction (#23443) -* [tv2dk:bornholm:play] Fix extraction (#24076) - - -version 2020.02.16 - -Core -* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591, - #10622) -* [update] Fix updating via symlinks (#23991) -+ [compat] Introduce compat_realpath (#23991) - -Extractors -+ [npr] Add support for streams (#24042) -+ [24video] Add support for porn.24video.net (#23779, #23784) -- [jpopsuki] Remove extractor (#23858) -* [nova] Improve extraction (#23690) -* [nova:embed] Improve (#23690) -* [nova:embed] Fix extraction (#23672) -+ [abc:iview] Add support for 720p (#22907, #22921) -* [nytimes] Improve format sorting (#24010) -+ [toggle] Add support for mewatch.sg (#23895, #23930) -* [thisoldhouse] Fix extraction (#23951) -+ [popcorntimes] Add support for popcorntimes.tv (#23949) -* [sportdeutschland] Update to new API -* [twitch:stream] Lowercase channel id for stream request (#23917) -* [tv5mondeplus] Fix extraction (#23907, #23911) -* [tva] Relax URL regular expression (#23903) -* [vimeo] Fix album extraction (#23864) -* [viewlift] Improve extraction - * Fix extraction (#23851) - + Add support for authentication - + Add support for more domains -* [svt] Fix series extraction (#22297) -* [svt] Fix article extraction (#22897, #22919) -* [soundcloud] Improve private playlist/set tracks extraction (#3707) - - -version 2020.01.24 - -Extractors -* [youtube] Fix sigfunc name extraction (#23819) -* [stretchinternet] Fix extraction (#4319) -* [voicerepublic] Fix extraction -* [azmedien] Fix extraction (#23783) -* [businessinsider] Fix jwplatform id extraction (#22929, #22954) -+ [24video] Add support for 24video.vip (#23753) -* [ivi:compilation] Fix entries extraction (#23770) -* [ard] Improve extraction (#23761) - * Simplify extraction - + Extract age limit and series - * Bypass geo-restriction -+ [nbc] Add support for nbc multi network URLs (#23049) -* [americastestkitchen] Fix extraction -* [zype] Improve extraction - + Extract subtitles (#21258) - + Support URLs with alternative keys/tokens (#21258) - + Extract more metadata -* [orf:tvthek] Improve geo restricted videos detection (#23741) -* [soundcloud] Restore previews extraction (#23739) - - -version 2020.01.15 - -Extractors -* [yourporn] Fix extraction (#21645, #22255, #23459) -+ [canvas] Add support for new API endpoint (#17680, #18629) -* [ndr:base:embed] Improve thumbnails extraction (#23731) -+ [vodplatform] Add support for embed.kwikmotion.com domain -+ [twitter] Add support for promo_video_website cards (#23711) -* [orf:radio] Clean description and improve extraction -* [orf:fm4] Fix extraction (#23599) -* [safari] Fix kaltura session extraction (#23679, #23670) -* [lego] Fix extraction and extract subtitle (#23687) -* [cloudflarestream] Improve extraction - + Add support for bytehighway.net domain - + Add support for signed URLs - + Extract thumbnail -* [naver] Improve extraction - * Improve geo-restriction handling - + Extract automatic captions - + Extract uploader metadata - + Extract VLive HLS formats - * Improve metadata extraction -- [pandatv] Remove extractor (#23630) -* [dctp] Fix format extraction (#23656) -+ [scrippsnetworks] Add support for www.discovery.com videos -* [discovery] Fix anonymous token extraction (#23650) -* [nrktv:seriebase] Fix extraction (#23625, #23537) -* [wistia] Improve format extraction and extract subtitles (#22590) -* [vice] Improve extraction (#23631) -* [redtube] Detect private videos (#23518) - - -version 2020.01.01 - -Extractors -* [brightcove] Invalidate policy key cache on failing requests -* [pornhub] Improve locked videos detection (#22449, #22780) -+ [pornhub] Add support for m3u8 formats -* [pornhub] Fix extraction (#22749, #23082) -* [brightcove] Update policy key on failing requests -* [spankbang] Improve removed video detection (#23423) -* [spankbang] Fix extraction (#23307, #23423, #23444) -* [soundcloud] Automatically update client id on failing requests -* [prosiebensat1] Improve geo restriction handling (#23571) -* [brightcove] Cache brightcove player policy keys -* [teachable] Fail with error message if no video URL found -* [teachable] Improve locked lessons detection (#23528) -+ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981) -* [mitele] Fix extraction (#21354, #23456) -* [soundcloud] Update client id (#23516) -* [mailru] Relax URL regular expressions (#23509) - - -version 2019.12.25 - -Core -* [utils] Improve str_to_int -+ [downloader/hls] Add ability to override AES decryption key URL (#17521) - -Extractors -* [mediaset] Fix parse formats (#23508) -+ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291) -+ [slideslive] Add support for url and vimeo service names (#23414) -* [slideslive] Fix extraction (#23413) -* [twitch:clips] Fix extraction (#23375) -+ [soundcloud] Add support for token protected embeds (#18954) -* [vk] Improve extraction - * Fix User Videos extraction (#23356) - * Extract all videos for lists with more than 1000 videos (#23356) - + Add support for video albums (#14327, #14492) -- [kontrtube] Remove extractor -- [videopremium] Remove extractor -- [musicplayon] Remove extractor (#9225) -+ [ufctv] Add support for ufcfightpass.imgdge.com and - ufcfightpass.imggaming.com (#23343) -+ [twitch] Extract m3u8 formats frame rate (#23333) -+ [imggaming] Add support for playlists and extract subtitles -+ [ufcarabia] Add support for UFC Arabia (#23312) -* [ufctv] Fix extraction -* [yahoo] Fix gyao brightcove player id (#23303) -* [vzaar] Override AES decryption key URL (#17521) -+ [vzaar] Add support for AES HLS manifests (#17521, #23299) -* [nrl] Fix extraction -* [teachingchannel] Fix extraction -* [nintendo] Fix extraction and partially add support for Nintendo Direct - videos (#4592) -+ [ooyala] Add better fallback values for domain and streams variables -+ [youtube] Add support youtubekids.com (#23272) -* [tv2] Detect DRM protection -+ [tv2] Add support for katsomo.fi and mtv.fi (#10543) -* [tv2] Fix tv2.no article extraction -* [msn] Improve extraction - + Add support for YouTube and NBCSports embeds - + Add support for articles with multiple videos - * Improve AOL embed support - * Improve format extraction -* [abcotvs] Relax URL regular expression and improve metadata extraction - (#18014) -* [channel9] Reduce response size -* [adobetv] Improve extraction - * Use OnDemandPagedList for list extractors - * Reduce show extraction requests - * Extract original video format and subtitles - + Add support for adobe tv embeds - - -version 2019.11.28 - -Core -+ [utils] Add generic caesar cipher and rot47 -* [utils] Handle rd-suffixed day parts in unified_strdate (#23199) - -Extractors -* [vimeo] Improve extraction - * Fix review extraction - * Fix ondemand extraction - * Make password protected player case as an expected error (#22896) - * Simplify channel based extractors code -- [openload] Remove extractor (#11999) -- [verystream] Remove extractor -- [streamango] Remove extractor (#15406) -* [dailymotion] Improve extraction - * Extract http formats included in m3u8 manifest - * Fix user extraction (#3553, #21415) - + Add support for User Authentication (#11491) - * Fix password protected videos extraction (#23176) - * Respect age limit option and family filter cookie value (#18437) - * Handle video url playlist query param - * Report allowed countries for geo-restricted videos -* [corus] Improve extraction - + Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com - and disneylachaine.ca (#20861) - + Add support for self hosted videos (#22075) - * Detect DRM protection (#14910, #9164) -* [vivo] Fix extraction (#22328, #22279) -+ [bitchute] Extract upload date (#22990, #23193) -* [soundcloud] Update client id (#23214) - - -version 2019.11.22 - -Core -+ [extractor/common] Clean jwplayer description HTML tags -+ [extractor/common] Add data, headers and query to all major extract formats - methods - -Extractors -* [chaturbate] Fix extraction (#23010, #23012) -+ [ntvru] Add support for non relative file URLs (#23140) -* [vk] Fix wall audio thumbnails extraction (#23135) -* [ivi] Fix format extraction (#21991) -- [comcarcoff] Remove extractor -+ [drtv] Add support for new URL schema (#23059) -+ [nexx] Add support for Multi Player JS Setup (#23052) -+ [teamcoco] Add support for new videos (#23054) -* [soundcloud] Check if the soundtrack has downloads left (#23045) -* [facebook] Fix posts video data extraction (#22473) -- [addanime] Remove extractor -- [minhateca] Remove extractor -- [daisuki] Remove extractor -* [seeker] Fix extraction -- [revision3] Remove extractors -* [twitch] Fix video comments URL (#18593, #15828) -* [twitter] Improve extraction - + Add support for generic embeds (#22168) - * Always extract http formats for native videos (#14934) - + Add support for Twitter Broadcasts (#21369) - + Extract more metadata - * Improve VMap format extraction - * Unify extraction code for both twitter statuses and cards -+ [twitch] Add support for Clip embed URLs -* [lnkgo] Fix extraction (#16834) -* [mixcloud] Improve extraction - * Improve metadata extraction (#11721) - * Fix playlist extraction (#22378) - * Fix user mixes extraction (#15197, #17865) -+ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384) -* [onionstudios] Fix extraction -+ [hotstar] Pass Referer header to format requests (#22836) -* [dplay] Minimize response size -+ [patreon] Extract uploader_id and filesize -* [patreon] Minimize response size -* [roosterteeth] Fix login request (#16094, #22689) - - -version 2019.11.05 - -Extractors -+ [scte] Add support for learning.scte.org (#22975) -+ [msn] Add support for Vidible and AOL embeds (#22195, #22227) -* [myspass] Fix video URL extraction and improve metadata extraction (#22448) -* [jamendo] Improve extraction - * Fix album extraction (#18564) - * Improve metadata extraction (#18565, #21379) -* [mediaset] Relax URL guid matching (#18352) -+ [mediaset] Extract unprotected M3U and MPD manifests (#17204) -* [telegraaf] Fix extraction -+ [bellmedia] Add support for marilyn.ca videos (#22193) -* [stv] Fix extraction (#22928) -- [iconosquare] Remove extractor -- [keek] Remove extractor -- [gameone] Remove extractor (#21778) -- [flipagram] Remove extractor -- [bambuser] Remove extractor -* [wistia] Reduce embed extraction false positives -+ [wistia] Add support for inline embeds (#22931) -- [go90] Remove extractor -* [kakao] Remove raw request -+ [kakao] Extract format total bitrate -* [daum] Fix VOD and Clip extraction (#15015) -* [kakao] Improve extraction - + Add support for embed URLs - + Add support for Kakao Legacy vid based embed URLs - * Only extract fields used for extraction - * Strip description and extract tags -* [mixcloud] Fix cloudcast data extraction (#22821) -* [yahoo] Improve extraction - + Add support for live streams (#3597, #3779, #22178) - * Bypass cookie consent page for european domains (#16948, #22576) - + Add generic support for embeds (#20332) -* [tv2] Fix and improve extraction (#22787) -+ [tv2dk] Add support for TV2 DK sites -* [onet] Improve extraction … - + Add support for onet100.vod.pl - + Extract m3u8 formats - * Correct audio only format info -* [fox9] Fix extraction - - -version 2019.10.29 - -Core -* [utils] Actualize major IPv4 address blocks per country - -Extractors -+ [go] Add support for abc.com and freeform.com (#22823, #22864) -+ [mtv] Add support for mtvjapan.com -* [mtv] Fix extraction for mtv.de (#22113) -* [videodetective] Fix extraction -* [internetvideoarchive] Fix extraction -* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923) -- [hark] Remove extractor -- [tutv] Remove extractor -- [learnr] Remove extractor -- [macgamestore] Remove extractor -* [la7] Update Kaltura service URL (#22358) -* [thesun] Fix extraction (#16966) -- [makertv] Remove extractor -+ [tenplay] Add support for 10play.com.au (#21446) -* [soundcloud] Improve extraction - * Improve format extraction (#22123) - + Extract uploader_id and uploader_url (#21916) - + Extract all known thumbnails (#19071, #20659) - * Fix extraction for private playlists (#20976) - + Add support for playlist embeds (#20976) - * Skip preview formats (#22806) -* [dplay] Improve extraction - + Add support for dplay.fi, dplay.jp and es.dplay.com (#16969) - * Fix it.dplay.com extraction (#22826) - + Extract creator, tags and thumbnails - * Handle playback API call errors -+ [discoverynetworks] Add support for dplay.co.uk -* [vk] Improve extraction - + Add support for Odnoklassniki embeds - + Extract more videos from user lists (#4470) - + Fix wall post audio extraction (#18332) - * Improve error detection (#22568) -+ [odnoklassniki] Add support for embeds -* [puhutv] Improve extraction - * Fix subtitles extraction - * Transform HLS URLs to HTTP URLs - * Improve metadata extraction -* [ceskatelevize] Skip DRM media -+ [facebook] Extract subtitles (#22777) -* [globo] Handle alternative hash signing method - - -version 2019.10.22 - -Core -* [utils] Improve subtitles_filename (#22753) - -Extractors -* [facebook] Bypass download rate limits (#21018) -+ [contv] Add support for contv.com -- [viewster] Remove extractor -* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239) - * Update the list of domains - + Add support for aa-encoded video data - * Improve jwplayer format extraction - + Add support for Clappr sources -* [mangomolo] Fix video format extraction and add support for player URLs -* [audioboom] Improve metadata extraction -* [twitch] Update VOD URL matching (#22395, #22727) -- [mit] Remove support for video.mit.edu (#22403) -- [servingsys] Remove extractor (#22639) -* [dumpert] Fix extraction (#22428, #22564) -* [atresplayer] Fix extraction (#16277, #16716) - - -version 2019.10.16 - -Core -* [extractor/common] Make _is_valid_url more relaxed - -Extractors -* [vimeo] Improve album videos id extraction (#22599) -+ [globo] Extract subtitles (#22713) -* [bokecc] Improve player params extraction (#22638) -* [nexx] Handle result list (#22666) -* [vimeo] Fix VHX embed extraction -* [nbc] Switch to graphql API (#18581, #22693, #22701) -- [vessel] Remove extractor -- [promptfile] Remove extractor (#6239) -* [kaltura] Fix service URL extraction (#22658) -* [kaltura] Fix embed info strip (#22658) -* [globo] Fix format extraction (#20319) -* [redtube] Improve metadata extraction (#22492, #22615) -* [pornhub:uservideos:upload] Fix extraction (#22619) -+ [telequebec:squat] Add support for squat.telequebec.tv (#18503) -- [wimp] Remove extractor (#22088, #22091) -+ [gfycat] Extend URL regular expression (#22225) -+ [chaturbate] Extend URL regular expression (#22309) -* [peertube] Update instances (#22414) -+ [telequebec] Add support for coucou.telequebec.tv (#22482) -+ [xvideos] Extend URL regular expression (#22471) -- [youtube] Remove support for invidious.enkirton.net (#22543) -+ [openload] Add support for oload.monster (#22592) -* [nrktv:seriebase] Fix extraction (#22596) -+ [youtube] Add support for yt.lelux.fi (#22597) -* [orf:tvthek] Make manifest requests non fatal (#22578) -* [teachable] Skip login when already logged in (#22572) -* [viewlift] Improve extraction (#22545) -* [nonktube] Fix extraction (#22544) - - -version 2019.09.28 - -Core -* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493) - -Extractors -* [vk] Fix extraction (#22522) -* [heise] Fix kaltura embeds extraction (#22514) -* [ted] Check for resources validity and extract subtitled downloads (#22513) -+ [youtube] Add support for - owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292) -+ [nhk] Add support for clips -* [nhk] Fix video extraction (#22249, #22353) -* [byutv] Fix extraction (#22070) -+ [openload] Add support for oload.online (#22304) -+ [youtube] Add support for invidious.drycat.fr (#22451) -* [jwplatfom] Do not match video URLs (#20596, #22148) -* [youtube:playlist] Unescape playlist uploader (#22483) -+ [bilibili] Add support audio albums and songs (#21094) -+ [instagram] Add support for tv URLs -+ [mixcloud] Allow uppercase letters in format URLs (#19280) -* [brightcove] Delegate all supported legacy URLs to new extractor (#11523, - #12842, #13912, #15669, #16303) -* [hotstar] Use native HLS downloader by default -+ [hotstar] Extract more formats (#22323) -* [9now] Fix extraction (#22361) -* [zdf] Bypass geo restriction -+ [tv4] Extract series metadata -* [tv4] Fix extraction (#22443) - - -version 2019.09.12.1 - -Extractors -* [youtube] Remove quality and tbr for itag 43 (#22372) - - -version 2019.09.12 - -Extractors -* [youtube] Quick extraction tempfix (#22367, #22163) - - -version 2019.09.01 - -Core -+ [extractor/generic] Add support for squarespace embeds (#21294, #21802, - #21859) -+ [downloader/external] Respect mtime option for aria2c (#22242) - -Extractors -+ [xhamster:user] Add support for user pages (#16330, #18454) -+ [xhamster] Add support for more domains -+ [verystream] Add support for woof.tube (#22217) -+ [dailymotion] Add support for lequipe.fr (#21328, #22152) -+ [openload] Add support for oload.vip (#22205) -+ [bbccouk] Extend URL regular expression (#19200) -+ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223) -* [safari] Fix authentication (#22161, #22184) -* [usanetwork] Fix extraction (#22105) -+ [einthusan] Add support for einthusan.ca (#22171) -* [youtube] Improve unavailable message extraction (#22117) -+ [piksel] Extract subtitles (#20506) - - -version 2019.08.13 - -Core -* [downloader/fragment] Fix ETA calculation of resumed download (#21992) -* [YoutubeDL] Check annotations availability (#18582) - -Extractors -* [youtube:playlist] Improve flat extraction (#21927) -* [youtube] Fix annotations extraction (#22045) -+ [discovery] Extract series meta field (#21808) -* [youtube] Improve error detection (#16445) -* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986) -+ [roosterteeth] Add support for watch URLs -* [discovery] Limit video data by show slug (#21980) - - -version 2019.08.02 - -Extractors -+ [tvigle] Add support for HLS and DASH formats (#21967) -* [tvigle] Fix extraction (#21967) -+ [yandexvideo] Add support for DASH formats (#21971) -* [discovery] Use API call for video data extraction (#21808) -+ [mgtv] Extract format_note (#21881) -* [tvn24] Fix metadata extraction (#21833, #21834) -* [dlive] Relax URL regular expression (#21909) -+ [openload] Add support for oload.best (#21913) -* [youtube] Improve metadata extraction for age gate content (#21943) - - -version 2019.07.30 - -Extractors -* [youtube] Fix and improve title and description extraction (#21934) - - -version 2019.07.27 - -Extractors -+ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265) -+ [discovery] Add support go.discovery.com URLs -* [youtube:playlist] Relax video regular expression (#21844) -* [generic] Restrict --default-search schemeless URLs detection pattern - (#21842) -* [vrv] Fix CMS signing query extraction (#21809) - - -version 2019.07.16 - -Extractors -+ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv - (#21281, #21290) -* [kaltura] Check source format URL (#21290) -* [ctsnews] Fix YouTube embeds extraction (#21678) -+ [einthusan] Add support for einthusan.com (#21748, #21775) -+ [youtube] Add support for invidious.mastodon.host (#21777) -+ [gfycat] Extend URL regular expression (#21779, #21780) -* [youtube] Restrict is_live extraction (#21782) - - -version 2019.07.14 - -Extractors -* [porn91] Fix extraction (#21312) -+ [yandexmusic] Extract track number and disk number (#21421) -+ [yandexmusic] Add support for multi disk albums (#21420, #21421) -* [lynda] Handle missing subtitles (#20490, #20513) -+ [youtube] Add more invidious instances to URL regular expression (#21694) -* [twitter] Improve uploader id extraction (#21705) -* [spankbang] Fix and improve metadata extraction -* [spankbang] Fix extraction (#21763, #21764) -+ [dlive] Add support for dlive.tv (#18080) -+ [livejournal] Add support for livejournal.com (#21526) -* [roosterteeth] Fix free episode extraction (#16094) -* [dbtv] Fix extraction -* [bellator] Fix extraction -- [rudo] Remove extractor (#18430, #18474) -* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224) -* [bleacherreport] Fix Bleacher Report CMS extraction -* [espn] Fix fivethirtyeight.com extraction -* [5tv] Relax video URL regular expression and support https URLs -* [youtube] Fix is_live extraction (#21734) -* [youtube] Fix authentication (#11270) - - -version 2019.07.12 - -Core -+ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) - -Extractors -+ [mgtv] Pass Referer HTTP header for format URLs (#21726) -+ [beeg] Add support for api/v6 v2 URLs without t argument (#21701) -* [voxmedia:volume] Improvevox embed extraction (#16846) -* [funnyordie] Move extraction to VoxMedia extractor (#16846) -* [gameinformer] Fix extraction (#8895, #15363, #17206) -* [funk] Fix extraction (#17915) -* [packtpub] Relax lesson URL regular expression (#21695) -* [packtpub] Fix extraction (#21268) -* [philharmoniedeparis] Relax URL regular expression (#21672) -* [peertube] Detect embed URLs in generic extraction (#21666) -* [mixer:vod] Relax URL regular expression (#21657, #21658) -+ [lecturio] Add support id based URLs (#21630) -+ [go] Add site info for disneynow (#21613) -* [ted] Restrict info regular expression (#21631) -* [twitch:vod] Actualize m3u8 URL (#21538, #21607) -* [vzaar] Fix videos with empty title (#21606) -* [tvland] Fix extraction (#21384) -* [arte] Clean extractor (#15583, #21614) - - -version 2019.07.02 - -Core -+ [utils] Introduce random_user_agent and use as default User-Agent (#21546) - -Extractors -+ [vevo] Add support for embed.vevo.com URLs (#21565) -+ [openload] Add support for oload.biz (#21574) -* [xiami] Update API base URL (#21575) -* [yourporn] Fix extraction (#21585) -+ [acast] Add support for URLs with episode id (#21444) -+ [dailymotion] Add support for DM.player embeds -* [soundcloud] Update client id - - -version 2019.06.27 - -Extractors -+ [go] Add support for disneynow.com (#21528) -* [mixer:vod] Relax URL regular expression (#21531, #21536) -* [drtv] Relax URL regular expression -* [fusion] Fix extraction (#17775, #21269) -- [nfb] Remove extractor (#21518) -+ [beeg] Add support for api/v6 v2 URLs (#21511) -+ [brightcove:new] Add support for playlists (#21331) -+ [openload] Add support for oload.life (#21495) -* [vimeo:channel,group] Make title extraction non fatal -* [vimeo:likes] Implement extrator in terms of channel extractor (#21493) -+ [pornhub] Add support for more paged video sources -+ [pornhub] Add support for downloading single pages and search pages (#15570) -* [pornhub] Rework extractors (#11922, #16078, #17454, #17936) -+ [youtube] Add another signature function pattern -* [tf1] Fix extraction (#21365, #21372) -* [crunchyroll] Move Accept-Language workaround to video extractor since - it causes playlists not to list any videos -* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443) - - -version 2019.06.21 - -Core -* [utils] Restrict parse_codecs and add theora as known vcodec (#21381) - -Extractors -* [youtube] Update signature function patterns (#21469, #21476) -* [youtube] Make --write-annotations non fatal (#21452) -+ [sixplay] Add support for rtlmost.hu (#21405) -* [youtube] Hardcode codec metadata for av01 video only formats (#21381) -* [toutv] Update client key (#21370) -+ [biqle] Add support for new embed domain -* [cbs] Improve DRM protected videos detection (#21339) - - -version 2019.06.08 - -Core -* [downloader/common] Improve rate limit (#21301) -* [utils] Improve strip_or_none -* [extractor/common] Strip src attribute for HTML5 entries code (#18485, - #21169) - -Extractors -* [ted] Fix playlist extraction (#20844, #21032) -* [vlive:playlist] Fix video extraction when no playlist is found (#20590) -+ [vlive] Add CH+ support (#16887, #21209) -+ [openload] Add support for oload.website (#21329) -+ [tvnow] Extract HD formats (#21201) -+ [redbulltv] Add support for rrn:content URLs (#21297) -* [youtube] Fix average rating extraction (#21304) -+ [bitchute] Extract HTML5 formats (#21306) -* [cbsnews] Fix extraction (#9659, #15397) -* [vvvvid] Relax URL regular expression (#21299) -+ [prosiebensat1] Add support for new API (#21272) -+ [vrv] Extract adaptive_hls formats (#21243) -* [viki] Switch to HTTPS (#21001) -* [LiveLeak] Check if the original videos exist (#21206, #21208) -* [rtp] Fix extraction (#15099) -* [youtube] Improve DRM protected videos detection (#1774) -+ [srgssrplay] Add support for popupvideoplayer URLs (#21155) -+ [24video] Add support for porno.24video.net (#21194) -+ [24video] Add support for 24video.site (#21193) -- [pornflip] Remove extractor -- [criterion] Remove extractor (#21195) -* [pornhub] Use HTTPS (#21061) -* [bitchute] Fix uploader extraction (#21076) -* [streamcloud] Reduce waiting time to 6 seconds (#21092) -- [novamov] Remove extractors (#21077) -+ [openload] Add support for oload.press (#21135) -* [vivo] Fix extraction (#18906, #19217) - - -version 2019.05.20 - -Core -+ [extractor/common] Move workaround for applying first Set-Cookie header - into a separate _apply_first_set_cookie_header method - -Extractors -* [safari] Fix authentication (#21090) -* [vk] Use _apply_first_set_cookie_header -* [vrt] Fix extraction (#20527) -+ [canvas] Add support for vrtnieuws and sporza site ids and extract - AES HLS formats -+ [vrv] Extract captions (#19238) -* [tele5] Improve video id extraction -* [tele5] Relax URL regular expression (#21020, #21063) -* [svtplay] Update API URL (#21075) -+ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071) - - -version 2019.05.11 - -Core -* [utils] Transliterate "þ" as "th" (#20897) - -Extractors -+ [cloudflarestream] Add support for videodelivery.net (#21049) -+ [byutv] Add support for DVR videos (#20574, #20676) -+ [gfycat] Add support for URLs with tags (#20696, #20731) -+ [openload] Add support for verystream.com (#20701, #20967) -* [youtube] Use sp field value for signature field name (#18841, #18927, - #21028) -+ [yahoo:gyao] Extend URL regular expression (#21008) -* [youtube] Fix channel id extraction (#20982, #21003) -+ [sky] Add support for news.sky.com (#13055) -+ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965) -+ [francetvinfo] Extend video id extraction (#20619, #20740) -* [4tube] Update token hosts (#20918) -* [hotstar] Move to API v2 (#20931) -* [fox] Fix API error handling under python 2 (#20925) -+ [redbulltv] Extend URL regular expression (#20922) - - -version 2019.04.30 - -Extractors -* [openload] Use real Chrome versions (#20902) -- [youtube] Remove info el for get_video_info request -* [youtube] Improve extraction robustness -- [dramafever] Remove extractor (#20868) -* [adn] Fix subtitle extraction (#12724) -+ [ccc] Extract creator (#20355) -+ [ccc:playlist] Add support for media.ccc.de playlists (#14601, #20355) -+ [sverigesradio] Add support for sverigesradio.se (#18635) -+ [cinemax] Add support for cinemax.com -* [sixplay] Try extracting non-DRM protected manifests (#20849) -+ [youtube] Extract Youtube Music Auto-generated metadata (#20599, #20742) -- [wrzuta] Remove extractor (#20684, #20801) -* [twitch] Prefer source format (#20850) -+ [twitcasting] Add support for private videos (#20843) -* [reddit] Validate thumbnail URL (#20030) -* [yandexmusic] Fix track URL extraction (#20820) - - -version 2019.04.24 - -Extractors -* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766, - #20767, #20769, #20771, #20768, #20770) -* [toutv] Fix extraction and extract series info (#20757) -+ [vrv] Add support for movie listings (#19229) -+ [youtube] Print error when no data is available (#20737) -+ [soundcloud] Add support for new rendition and improve extraction (#20699) -+ [ooyala] Add support for geo verification proxy -+ [nrl] Add support for nrl.com (#15991) -+ [vimeo] Extract live archive source format (#19144) -+ [vimeo] Add support for live streams and improve info extraction (#19144) -+ [ntvcojp] Add support for cu.ntv.co.jp -+ [nhk] Extract RTMPT format -+ [nhk] Add support for audio URLs -+ [udemy] Add another course id extraction pattern (#20491) -+ [openload] Add support for oload.services (#20691) -+ [openload] Add support for openloed.co (#20691, #20693) -* [bravotv] Fix extraction (#19213) - - -version 2019.04.17 - -Extractors -* [openload] Randomize User-Agent (#20688) -+ [openload] Add support for oladblock domains (#20471) -* [adn] Fix subtitle extraction (#12724) -+ [aol] Add support for localized websites -+ [yahoo] Add support GYAO episode URLs -+ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098) -+ [yahoo] Add support for gyao.yahoo.co.jp -* [aenetworks] Fix history topic extraction and extract more formats -+ [cbs] Extract smpte and vtt subtitles -+ [streamango] Add support for streamcherry.com (#20592) -+ [yourporn] Add support for sxyprn.com (#20646) -* [mgtv] Fix extraction (#20650) -* [linkedin:learning] Use urljoin for form action URL (#20431) -+ [gdc] Add support for kaltura embeds (#20575) -* [dispeak] Improve mp4 bitrate extraction -* [kaltura] Sanitize embed URLs -* [jwplatfom] Do not match manifest URLs (#20596) -* [aol] Restrict URL regular expression and improve format extraction -+ [tiktok] Add support for new URL schema (#20573) -+ [stv:player] Add support for player.stv.tv (#20586) - - -version 2019.04.07 - -Core -+ [downloader/external] Pass rtmp_conn to ffmpeg - -Extractors -+ [ruutu] Add support for audio podcasts (#20473, #20545) -+ [xvideos] Extract all thumbnails (#20432) -+ [platzi] Add support for platzi.com (#20562) -* [dvtv] Fix extraction (#18514, #19174) -+ [vrv] Add basic support for individual movie links (#19229) -+ [bfi:player] Add support for player.bfi.org.uk (#19235) -* [hbo] Fix extraction and extract subtitles (#14629, #13709) -* [youtube] Extract srv[1-3] subtitle formats (#20566) -* [adultswim] Fix extraction (#18025) -* [teamcoco] Fix extraction and add support for subdomains (#17099, #20339) -* [adn] Fix subtitle compatibility with ffmpeg -* [adn] Fix extraction and add support for positioning styles (#20549) -* [vk] Use unique video id (#17848) -* [newstube] Fix extraction -* [rtl2] Actualize extraction -+ [adobeconnect] Add support for adobeconnect.com (#20283) -+ [gaia] Add support for authentication (#14605) -+ [mediasite] Add support for dashed ids and named catalogs (#20531) - - -version 2019.04.01 - -Core -* [utils] Improve int_or_none and float_or_none (#20403) -* Check for valid --min-sleep-interval when --max-sleep-interval is specified - (#20435) - -Extractors -+ [weibo] Extend URL regular expression (#20496) -+ [xhamster] Add support for xhamster.one (#20508) -+ [mediasite] Add support for catalogs (#20507) -+ [teamtreehouse] Add support for teamtreehouse.com (#9836) -+ [ina] Add support for audio URLs -* [ina] Improve extraction -* [cwtv] Fix episode number extraction (#20461) -* [npo] Improve DRM detection -+ [pornhub] Add support for DASH formats (#20403) -* [svtplay] Update API endpoint (#20430) - - -version 2019.03.18 - -Core -* [extractor/common] Improve HTML5 entries extraction -+ [utils] Introduce parse_bitrate -* [update] Hide update URLs behind redirect -* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346) - -Extractors -+ [yandexvideo] Add extractor -* [openload] Improve embed detection -+ [corus] Add support for bigbrothercanada.ca (#20357) -+ [orf:radio] Extract series (#20012) -+ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359) -- [anysex] Remove extractor (#19279) -+ [ciscolive] Add support for new URL schema (#20320, #20351) -+ [youtube] Add support for invidiou.sh (#20309) -- [anitube] Remove extractor (#20334) -- [ruleporn] Remove extractor (#15344, #20324) -* [npr] Fix extraction (#10793, #13440) -* [biqle] Fix extraction (#11471, #15313) -* [viddler] Modernize -* [moevideo] Fix extraction -* [primesharetv] Remove extractor -* [hypem] Modernize and extract more metadata (#15320) -* [veoh] Fix extraction -* [escapist] Modernize -- [videomega] Remove extractor (#10108) -+ [beeg] Add support for beeg.porn (#20306) -* [vimeo:review] Improve config url extraction and extract original format - (#20305) -* [fox] Detect geo restriction and authentication errors (#20208) - - -version 2019.03.09 - -Core -* [extractor/common] Use compat_etree_Element -+ [compat] Introduce compat_etree_Element -* [extractor/common] Fallback url to base URL for DASH formats -* [extractor/common] Do not fail on invalid data while parsing F4M manifest - in non fatal mode -* [extractor/common] Return MPD manifest as format's url meta field (#20242) -* [utils] Strip #HttpOnly_ prefix from cookies files (#20219) - -Extractors -* [francetv:site] Relax video id regular expression (#20268) -* [toutv] Detect invalid login error -* [toutv] Fix authentication (#20261) -+ [urplay] Extract timestamp (#20235) -+ [openload] Add support for oload.space (#20246) -* [facebook] Improve uploader extraction (#20250) -* [bbc] Use compat_etree_Element -* [crunchyroll] Use compat_etree_Element -* [npo] Improve ISM extraction -* [rai] Improve extraction (#20253) -* [paramountnetwork] Fix mgid extraction (#20241) -* [libsyn] Improve extraction (#20229) -+ [youtube] Add more invidious instances to URL regular expression (#20228) -* [spankbang] Fix extraction (#20023) -* [espn] Extend URL regular expression (#20013) -* [sixplay] Handle videos with empty assets (#20016) -+ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) - - -version 2019.03.01 - -Core -+ [downloader/external] Add support for rate limit and retries for wget -* [downloader/external] Fix infinite retries for curl (#19303) - -Extractors -* [npo] Fix extraction (#20084) -* [francetv:site] Extend video id regex (#20029, #20071) -+ [periscope] Extract width and height (#20015) -* [servus] Fix extraction (#19297) -* [bbccouk] Make subtitles non fatal (#19651) -* [metacafe] Fix family filter bypass (#19287) - - -version 2019.02.18 - -Extractors -* [tvp:website] Fix and improve extraction -+ [tvp] Detect unavailable videos -* [tvp] Fix description extraction and make thumbnail optional -+ [linuxacademy] Add support for linuxacademy.com (#12207) -* [bilibili] Update keys (#19233) -* [udemy] Extend URL regular expressions (#14330, #15883) -* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) -* [noovo] Fix extraction (#19230) -* [rai] Relax URL regular expression (#19232) -+ [vshare] Pass Referer to download request (#19205, #19221) -+ [openload] Add support for oload.live (#19222) -* [imgur] Use video id as title fallback (#18590) -+ [twitch] Add new source format detection approach (#19193) -* [tvplayhome] Fix video id extraction (#19190) -* [tvplayhome] Fix episode metadata extraction (#19190) -* [rutube:embed] Fix extraction (#19163) -+ [rutube:embed] Add support private videos (#19163) -+ [soundcloud] Extract more metadata -+ [trunews] Add support for trunews.com (#19153) -+ [linkedin:learning] Extract chapter_number and chapter_id (#19162) - - -version 2019.02.08 - -Core -* [utils] Improve JSON-LD regular expression (#18058) -* [YoutubeDL] Fallback to ie_key of matching extractor while making - download archive id when no explicit ie_key is provided (#19022) - -Extractors -+ [malltv] Add support for mall.tv (#18058, #17856) -+ [spankbang:playlist] Add support for playlists (#19145) -* [spankbang] Extend URL regular expression -* [trutv] Fix extraction (#17336) -* [toutv] Fix authentication (#16398, #18700) -* [pornhub] Fix tags and categories extraction (#13720, #19135) -* [pornhd] Fix formats extraction -+ [pornhd] Extract like count (#19123, #19125) -* [radiocanada] Switch to the new media requests (#19115) -+ [teachable] Add support for courses.workitdaily.com (#18871) -- [vporn] Remove extractor (#16276) -+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086) -+ [drtuber] Extract duration (#19078) -* [soundcloud] Fix paged playlists extraction, add support for albums and update client id -* [soundcloud] Update client id -* [drtv] Improve preference (#19079) -+ [openload] Add support for openload.pw and oload.pw (#18930) -+ [openload] Add support for oload.info (#19073) -* [crackle] Authorize media detail request (#16931) - - -version 2019.01.30.1 - -Core -* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067) - - -version 2019.01.30 - -Core -* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding - subtitles (#19024, #19042) -* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025) - -Extractors -* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061) -* [drtv] Improve extraction (#19039) - + Add support for EncryptedUri videos - + Extract more metadata - * Fix subtitles extraction -+ [fox] Add support for locked videos using cookies (#19060) -* [fox] Fix extraction for free videos (#19060) -+ [zattoo] Add support for tv.salt.ch (#19059) - - -version 2019.01.27 - -Core -+ [extractor/common] Extract season in _json_ld -* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection - (#681) - -Extractors -* [vice] Fix extraction for locked videos (#16248) -+ [wakanim] Detect DRM protected videos -+ [wakanim] Add support for wakanim.tv (#14374) -* [usatoday] Fix extraction for videos with custom brightcove partner id - (#18990) -* [drtv] Fix extraction (#18989) -* [nhk] Extend URL regular expression (#18968) -* [go] Fix Adobe Pass requests for Disney Now (#18901) -+ [openload] Add support for oload.club (#18969) - - -version 2019.01.24 - -Core -* [YoutubeDL] Fix negation for string operators in format selection (#18961) - - -version 2019.01.23 - -Core -* [utils] Fix urljoin for paths with non-http(s) schemes -* [extractor/common] Improve jwplayer relative URL handling (#18892) -+ [YoutubeDL] Add negation support for string comparisons in format selection - expressions (#18600, #18805) -* [extractor/common] Improve HLS video-only format detection (#18923) - -Extractors -* [crunchyroll] Extend URL regular expression (#18955) -* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722, - #17197, #18338 #18842, #18899) -+ [vrv] Add support for authentication (#14307) -* [videomore:season] Fix extraction -* [videomore] Improve extraction (#18908) -+ [tnaflix] Pass Referer in metadata request (#18925) -* [radiocanada] Relax DRM check (#18608, #18609) -* [vimeo] Fix video password verification for videos protected by - Referer HTTP header -+ [hketv] Add support for hkedcity.net (#18696) -+ [streamango] Add support for fruithosts.net (#18710) -+ [instagram] Add support for tags (#18757) -+ [odnoklassniki] Detect paid videos (#18876) -* [ted] Correct acodec for HTTP formats (#18923) -* [cartoonnetwork] Fix extraction (#15664, #17224) -* [vimeo] Fix extraction for password protected player URLs (#18889) - - -version 2019.01.17 - -Extractors -* [youtube] Extend JS player signature function name regular expressions - (#18890, #18891, #18893) - - -version 2019.01.16 - -Core -+ [test/helper] Add support for maxcount and count collection len checkers -* [downloader/hls] Fix uplynk ad skipping (#18824) -* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813) - -Extractors -* [youtube] Skip unsupported adaptive stream type (#18804) -+ [youtube] Extract DASH formats from player response (#18804) -* [funimation] Fix extraction (#14089) -* [skylinewebcams] Fix extraction (#18853) -+ [curiositystream] Add support for non app URLs -+ [bitchute] Check formats (#18833) -* [wistia] Extend URL regular expression (#18823) -+ [playplustv] Add support for playplus.com (#18789) - - -version 2019.01.10 - -Core -* [extractor/common] Use episode name as title in _json_ld -+ [extractor/common] Add support for movies in _json_ld -* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes - (#18765) -+ [utils] Add language codes replaced in 1989 revision of ISO 639 - to ISO639Utils (#18765) - -Extractors -* [youtube] Extract live HLS URL from player response (#18799) -+ [outsidetv] Add support for outsidetv.com (#18774) -* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs -+ [fox] Add support National Geographic (#17985, #15333, #14698) -+ [playplustv] Add support for playplus.tv (#18789) -* [globo] Set GLBID cookie manually (#17346) -+ [gaia] Add support for gaia.com (#14605) -* [youporn] Fix title and description extraction (#18748) -+ [hungama] Add support for hungama.com (#17402, #18771) -* [dtube] Fix extraction (#18741) -* [tvnow] Fix and rework extractors and prepare for a switch to the new API - (#17245, #18499) -* [carambatv:page] Fix extraction (#18739) - - -version 2019.01.02 - -Extractors -* [discovery] Use geo verification headers (#17838) -+ [packtpub] Add support for subscription.packtpub.com (#18718) -* [yourporn] Fix extraction (#18583) -+ [acast:channel] Add support for play.acast.com (#18587) -+ [extractors] Add missing age limits (#18621) -+ [rmcdecouverte] Add support for live stream -* [rmcdecouverte] Bypass geo restriction -* [rmcdecouverte] Update URL regular expression (#18595, 18697) -* [manyvids] Fix extraction (#18604, #18614) -* [bitchute] Fix extraction (#18567) - - -version 2018.12.31 - -Extractors -+ [bbc] Add support for another embed pattern (#18643) -+ [npo:live] Add support for npostart.nl (#18644) -* [beeg] Fix extraction (#18610, #18626) -* [youtube] Unescape HTML for series (#18641) -+ [youtube] Extract more format metadata -* [youtube] Detect DRM protected videos (#1774) -* [youtube] Relax HTML5 player regular expressions (#18465, #18466) -* [youtube] Extend HTML5 player regular expression (#17516) -+ [liveleak] Add support for another embed type and restore original - format extraction -+ [crackle] Extract ISM and HTTP formats -+ [twitter] Pass Referer with card request (#18579) -* [mediasite] Extend URL regular expression (#18558) -+ [lecturio] Add support for lecturio.de (#18562) -+ [discovery] Add support for Scripps Networks watch domains (#17947) - - -version 2018.12.17 - -Extractors -* [ard:beta] Improve geo restricted videos extraction -* [ard:beta] Fix subtitles extraction -* [ard:beta] Improve extraction robustness -* [ard:beta] Relax URL regular expression (#18441) -* [acast] Add support for embed.acast.com and play.acast.com (#18483) -* [iprima] Relax URL regular expression (#18515, #18540) -* [vrv] Fix initial state extraction (#18553) -* [youtube] Fix mark watched (#18546) -+ [safari] Add support for learning.oreilly.com (#18510) -* [youtube] Fix multifeed extraction (#18531) -* [lecturio] Improve subtitles extraction (#18488) -* [uol] Fix format URL extraction (#18480) -+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473) - - -version 2018.12.09 - -Core -* [YoutubeDL] Keep session cookies in cookie file between runs -* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929) - -Extractors -+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272) -+ [aenetworks] Add support for historyvault.com (#18460) -* [imgur] Improve gallery and album detection and extraction (#9133, #16577, - #17223, #18404) -* [iprima] Relax URL regular expression (#18453) -* [hotstar] Fix video data extraction (#18386) -* [ard:mediathek] Fix title and description extraction (#18349, #18371) -* [xvideos] Switch to HTTPS (#18422, #18427) -+ [lecturio] Add support for lecturio.com (#18405) -+ [nrktv:series] Add support for extra materials -* [nrktv:season,series] Fix extraction (#17159, #17258) -* [nrktv] Relax URL regular expression (#18304, #18387) -* [yourporn] Fix extraction (#18424, #18425) -* [tbs] Fix info extraction (#18403) -+ [gamespot] Add support for review URLs - - -version 2018.12.03 - -Core -* [utils] Fix random_birthday to generate existing dates only (#18284) - -Extractors -+ [tiktok] Add support for tiktok.com (#18108, #18135) -* [pornhub] Use actual URL host for requests (#18359) -* [lynda] Fix authentication (#18158, #18217) -* [gfycat] Update API endpoint (#18333, #18343) -+ [hotstar] Add support for alternative app state layout (#18320) -* [azmedien] Fix extraction (#18334, #18336) -+ [vimeo] Add support for VHX (Vimeo OTT) (#14835) -* [joj] Fix extraction (#18280, #18281) -+ [wistia] Add support for fast.wistia.com (#18287) - - -version 2018.11.23 - -Core -+ [setup.py] Add more relevant classifiers - -Extractors -* [mixcloud] Fallback to hardcoded decryption key (#18016) -* [nbc:news] Fix article extraction (#16194) -* [foxsports] Fix extraction (#17543) -* [loc] Relax regular expression and improve formats extraction -+ [ciscolive] Add support for ciscolive.cisco.com (#17984) -* [nzz] Relax kaltura regex (#18228) -* [sixplay] Fix formats extraction -* [bitchute] Improve title extraction -* [kaltura] Limit requested MediaEntry fields -+ [americastestkitchen] Add support for zype embeds (#18225) -+ [pornhub] Add pornhub.net alias -* [nova:embed] Fix extraction (#18222) - - -version 2018.11.18 - -Extractors -+ [wwe] Extract subtitles -+ [wwe] Add support for playlists (#14781) -+ [wwe] Add support for wwe.com (#14781, #17450) -* [vk] Detect geo restriction (#17767) -* [openload] Use original host during extraction (#18211) -* [atvat] Fix extraction (#18041) -+ [rte] Add support for new API endpoint (#18206) -* [tnaflixnetwork:embed] Fix extraction (#18205) -* [picarto] Use API and add token support (#16518) -+ [zype] Add support for player.zype.com (#18143) -* [vivo] Fix extraction (#18139) -* [ruutu] Update API endpoint (#18138) - - -version 2018.11.07 - -Extractors -+ [youtube] Add another JS signature function name regex (#18091, #18093, - #18094) -* [facebook] Fix tahoe request (#17171) -* [cliphunter] Fix extraction (#18083) -+ [youtube:playlist] Add support for invidio.us (#18077) -* [zattoo] Arrange API hosts for derived extractors (#18035) -+ [youtube] Add fallback metadata extraction from videoDetails (#18052) - - -version 2018.11.03 - -Core -* [extractor/common] Ensure response handle is not prematurely closed before - it can be read if it matches expected_status (#17195, #17846, #17447) - -Extractors -* [laola1tv:embed] Set correct stream access URL scheme (#16341) -+ [ehftv] Add support for ehftv.com (#15408) -* [azmedien] Adopt to major site redesign (#17745, #17746) -+ [twitcasting] Add support for twitcasting.tv (#17981) -* [orf:tvthek] Fix extraction (#17737, #17956, #18024) -+ [openload] Add support for oload.fun (#18045) -* [njpwworld] Fix authentication (#17427) -+ [linkedin:learning] Add support for linkedin.com/learning (#13545) -* [theplatform] Improve error detection (#13222) -* [cnbc] Simplify extraction (#14280, #17110) -+ [cbnc] Add support for new URL schema (#14193) -* [aparat] Improve extraction and extract more metadata (#17445, #18008) -* [aparat] Fix extraction - - -version 2018.10.29 - -Core -+ [extractor/common] Add validation for JSON-LD URLs - -Extractors -+ [sportbox] Add support for matchtv.ru -* [sportbox] Fix extraction (#17978) -* [screencast] Fix extraction (#14590, #14617, #17990) -+ [openload] Add support for oload.icu -+ [ivi] Add support for ivi.tv -* [crunchyroll] Improve extraction failsafeness (#17991) -* [dailymail] Fix formats extraction (#17976) -* [viewster] Reduce format requests -* [cwtv] Handle API errors (#17905) -+ [rutube] Use geo verification headers (#17897) -+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912) -- [tv3] Remove extractor (#10461, #15339) -* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894) -+ [openload] Add support for oload.cc (#17823) -+ [patreon] Extract post_file URL (#17792) -* [patreon] Fix extraction (#14502, #10471) - - -version 2018.10.05 - -Extractors -* [pluralsight] Improve authentication (#17762) -* [dailymotion] Fix extraction (#17699) -* [crunchyroll] Switch to HTTPS for RpcApi (#17749) -+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705) -* [philharmoniedeparis] Fix extraction (#17705) -+ [jamendo] Add support for licensing.jamendo.com (#17724) -+ [openload] Add support for oload.cloud (#17710) -* [pluralsight] Fix subtitles extraction (#17726, #17728) -+ [vimeo] Add another config regular expression (#17690) -* [spike] Fix Paramount Network extraction (#17677) -* [hotstar] Fix extraction (#14694, #14931, #17637) - - -version 2018.09.26 - -Extractors -* [pluralsight] Fix subtitles extraction (#17671) -* [mediaset] Improve embed support (#17668) -+ [youtube] Add support for invidio.us (#17613) -+ [zattoo] Add support for more zattoo platform sites -* [zattoo] Fix extraction (#17175, #17542) - - -version 2018.09.18 - -Core -+ [extractor/common] Introduce channel meta fields - -Extractors -* [adobepass] Don't pollute default headers dict -* [udemy] Don't pollute default headers dict -* [twitch] Don't pollute default headers dict -* [youtube] Don't pollute default query dict (#17593) -* [crunchyroll] Prefer hardsubless formats and formats in locale language -* [vrv] Make format ids deterministic -* [vimeo] Fix ondemand playlist extraction (#14591) -+ [pornhub] Extract upload date (#17574) -+ [porntube] Extract channel meta fields -+ [vimeo] Extract channel meta fields -+ [youtube] Extract channel meta fields (#9676, #12939) -* [porntube] Fix extraction (#17541) -* [asiancrush] Fix extraction (#15630) -+ [twitch:clips] Extend URL regular expression (#17559) -+ [vzaar] Add support for HLS -* [tube8] Fix metadata extraction (#17520) -* [eporner] Extract JSON-LD (#17519) - - -version 2018.09.10 - -Core -+ [utils] Properly recognize AV1 codec (#17506) - -Extractors -+ [iprima] Add support for prima.iprima.cz (#17514) -+ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414) -* [nbc] Fix extraction of percent encoded URLs (#17374) - - -version 2018.09.08 - -Extractors -* [youtube] Fix extraction (#17457, #17464) -+ [pornhub:uservideos] Add support for new URLs (#17388) -* [iprima] Confirm adult check (#17437) -* [slideslive] Make check for video service name case-insensitive (#17429) -* [radiojavan] Fix extraction (#17151) -* [generic] Skip unsuccessful jwplayer extraction (#16735) - - -version 2018.09.01 - -Core -* [utils] Skip remote IP addresses non matching to source address' IP version - when creating a connection (#13422, #17362) - -Extractors -+ [ard] Add support for one.ard.de (#17397) -* [niconico] Fix extraction on python3 (#17393, #17407) -* [ard] Extract f4m formats -* [crunchyroll] Parse vilos media data (#17343) -+ [ard] Add support for Beta ARD Mediathek -+ [bandcamp] Extract more metadata (#13197) -* [internazionale] Fix extraction of non-available-abroad videos (#17386) - - -version 2018.08.28 - -Extractors -+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix) - (#17361) -* [bitchute] Fix extraction by pass custom User-Agent (#17360) -* [webofstories:playlist] Fix extraction (#16914) -+ [tvplayhome] Add support for new tvplay URLs (#17344) -+ [generic] Allow relative src for videojs embeds (#17324) -+ [xfileshare] Add support for vidto.se (#17317) -+ [vidzi] Add support for vidzi.nu (#17316) -+ [nova:embed] Add support for media.cms.nova.cz (#17282) - - -version 2018.08.22 - -Core -* [utils] Use pure browser header for User-Agent (#17236) - -Extractors -+ [kinopoisk] Add support for kinopoisk.ru (#17283) -+ [yourporn] Add support for yourporn.sexy (#17298) -+ [go] Add support for disneynow.go.com (#16299, #17264) -+ [6play] Add support for play.rtl.hr (#17249) -* [anvato] Fallback to generic API key for access-key-to-API-key lookup - (#16788, #17254) -* [lci] Fix extraction (#17274) -* [bbccouk] Extend id URL regular expression (#17270) -* [cwtv] Fix extraction (#17256) -* [nova] Fix extraction (#17241) -+ [generic] Add support for expressen embeds -* [raywenderlich] Adapt to site redesign (#17225) -+ [redbulltv] Add support redbull.com tv URLs (#17218) -+ [bitchute] Add support for bitchute.com (#14052) -+ [clyp] Add support for token protected media (#17184) -* [imdb] Fix extension extraction (#17167) - - -version 2018.08.04 - -Extractors -* [funk:channel] Improve byChannelAlias extraction (#17142) -* [twitch] Fix authentication (#17024, #17126) -* [twitch:vod] Improve URL regular expression (#17135) -* [watchbox] Fix extraction (#17107) -* [pbs] Fix extraction (#17109) -* [theplatform] Relax URL regular expression (#16181, #17097) -+ [viqeo] Add support for viqeo.tv (#17066) - - -version 2018.07.29 - -Extractors -* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076) -+ [pornhub] Add support for subtitles (#16924, #17088) -* [ceskatelevize] Use https for API call (#16997, #16999) -* [dailymotion:playlist] Fix extraction (#16894) -* [ted] Improve extraction -* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085) -* [telecinco] Fix extraction (#17080) -* [mitele] Reduce number of requests -* [rai] Return non HTTP relinker URL intact (#17055) -* [vk] Fix extraction for inline only videos (#16923) -* [streamcloud] Fix extraction (#17054) -* [facebook] Fix tahoe player extraction with authentication (#16655) -+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269) - - -version 2018.07.21 - -Core -+ [utils] Introduce url_or_none -* [utils] Allow JSONP without function name (#17028) -+ [extractor/common] Extract DASH and MSS formats from SMIL manifests - -Extractors -+ [bbc] Add support for BBC Radio Play pages (#17022) -* [iwara] Fix download URLs (#17026) -* [vrtnu] Relax title extraction and extract JSON-LD (#17018) -+ [viu] Pass Referer and Origin headers and area id (#16992) -+ [vimeo] Add another config regular expression (#17013) -+ [facebook] Extract view count (#16942) -* [dailymotion] Improve description extraction (#16984) -* [slutload] Fix and improve extraction (#17001) -* [mediaset] Fix extraction (#16977) -+ [theplatform] Add support for theplatform TLD customization (#16977) -* [imgur] Relax URL regular expression (#16987) -* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262, - #16959) - - -version 2018.07.10 - -Core -* [utils] Share JSON-LD regular expression -* [downloader/dash] Improve error handling (#16927) - -Extractors -+ [nrktv] Add support for new season and serie URL schema -+ [nrktv] Add support for new episode URL schema (#16909) -+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328) -* [funk] Fix extraction (#16918) -* [watchbox] Fix extraction (#16904) -* [dplayit] Sort formats -* [dplayit] Fix extraction (#16901) -* [youtube] Improve login error handling (#13822) - - -version 2018.07.04 - -Core -* [extractor/common] Properly escape % in MPD templates (#16867) -* [extractor/common] Use source URL as Referer for HTML5 entries (16849) -* Prefer ffmpeg over avconv by default (#8622) - -Extractors -* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899) -* [lynda] Simplify login and improve error capturing (#16891) -+ [go90] Add support for embed URLs (#16873) -* [go90] Detect geo restriction error and pass geo verification headers - (#16874) -* [vlive] Fix live streams extraction (#16871) -* [npo] Fix typo (#16872) -+ [mediaset] Add support for new videos and extract all formats (#16568) -* [dctptv] Restore extraction based on REST API (#16850) -* [svt] Improve extraction and add support for pages (#16802) -* [porncom] Fix extraction (#16808) - - -version 2018.06.25 - -Extractors -* [joj] Relax URL regular expression (#16771) -* [brightcove] Workaround sonyliv DRM protected videos (#16807) -* [motherless] Fix extraction (#16786) -* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780) -- [foxnews:insider] Remove extractor (#15810) -+ [foxnews] Add support for iframe embeds (#15810, #16711) - - -version 2018.06.19 - -Core -+ [extractor/common] Introduce expected_status in _download_* methods - for convenient accept of HTTP requests failed with non 2xx status codes -+ [compat] Introduce compat_integer_types - -Extractors -* [peertube] Improve generic support (#16733) -+ [6play] Use geo verification headers -* [rtbf] Fix extraction for python 3.2 -* [vgtv] Improve HLS formats extraction -+ [vgtv] Add support for www.aftonbladet.se/tv URLs -* [bbccouk] Use expected_status -* [markiza] Expect 500 HTTP status code -* [tvnow] Try all clear manifest URLs (#15361) - - -version 2018.06.18 - -Core -* [downloader/rtmp] Fix downloading in verbose mode (#16736) - -Extractors -+ [markiza] Add support for markiza.sk (#16750) -* [wat] Try all supported adaptive URLs -+ [6play] Add support for rtlplay.be and extract hd usp formats -+ [rtbf] Add support for audio and live streams (#9638, #11923) -+ [rtbf] Extract HLS, DASH and all HTTP formats -+ [rtbf] Extract subtitles -+ [rtbf] Fixup specific HTTP URLs (#16101) -+ [expressen] Add support for expressen.se -* [vidzi] Fix extraction (#16678) -* [pbs] Improve extraction (#16623, #16684) -* [bilibili] Restrict cid regular expression (#16638, #16734) - - -version 2018.06.14 - -Core -* [downloader/http] Fix retry on error when streaming to stdout (#16699) - -Extractors -+ [discoverynetworks] Add support for disco-api videos (#16724) -+ [dailymotion] Add support for password protected videos (#9789) -+ [abc:iview] Add support for livestreams (#12354) -* [abc:iview] Fix extraction (#16704) -+ [crackle] Add support for sonycrackle.com (#16698) -+ [tvnet] Add support for tvnet.gov.vn (#15462) -* [nrk] Update API hosts and try all previously known ones (#16690) -* [wimp] Fix Youtube embeds extraction - - -version 2018.06.11 - -Extractors -* [npo] Extend URL regular expression and add support for npostart.nl (#16682) -+ [inc] Add support for another embed schema (#16666) -* [tv4] Fix format extraction (#16650) -+ [nexx] Add support for free cdn (#16538) -+ [pbs] Add another cove id pattern (#15373) -+ [rbmaradio] Add support for 192k format (#16631) - - -version 2018.06.04 - -Extractors -+ [camtube] Add support for camtube.co -+ [twitter:card] Extract guest token (#16609) -+ [chaturbate] Use geo verification headers -+ [bbc] Add support for bbcthree (#16612) -* [youtube] Move metadata extraction after video availability check -+ [youtube] Extract track and artist -+ [safari] Add support for new URL schema (#16614) -* [adn] Fix extraction - - -version 2018.06.02 - -Core -* [utils] Improve determine_ext - -Extractors -+ [facebook] Add support for tahoe player videos (#15441, #16554) -* [cbc] Improve extraction (#16583, #16593) -* [openload] Improve ext extraction (#16595) -+ [twitter:card] Add support for another endpoint (#16586) -+ [openload] Add support for oload.win and oload.download (#16592) -* [audimedia] Fix extraction (#15309) -+ [francetv] Add support for sport.francetvinfo.fr (#15645) -* [mlb] Improve extraction (#16587) -- [nhl] Remove old extractors -* [rbmaradio] Check formats availability (#16585) - - -version 2018.05.30 - -Core -* [downloader/rtmp] Generalize download messages and report time elapsed - on finish -* [downloader/rtmp] Gracefully handle live streams interrupted by user - -Extractors -* [teamcoco] Fix extraction for full episodes (#16573) -* [spiegel] Fix info extraction (#16538) -+ [apa] Add support for apa.at (#15041, #15672) -+ [bellmedia] Add support for bnnbloomberg.ca (#16560) -+ [9c9media] Extract MPD formats and subtitles -* [cammodels] Use geo verification headers -+ [ufctv] Add support for authentication (#16542) -+ [cammodels] Add support for cammodels.com (#14499) -* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt - (#16551) -* [soundcloud] Detect format extension (#16549) -* [cbc] Fix playlist title extraction (#16502) -+ [tumblr] Detect and report sensitive media (#13829) -+ [tumblr] Add support for authentication (#15133) - - -version 2018.05.26 - -Core -* [utils] Improve parse_age_limit - -Extractors -* [audiomack] Stringify video id (#15310) -* [izlesene] Fix extraction (#16233, #16271, #16407) -+ [indavideo] Add support for generic embeds (#11989) -* [indavideo] Fix extraction (#11221) -* [indavideo] Sign download URLs (#16174) -+ [peertube] Add support for PeerTube based sites (#16301, #16329) -* [imgur] Fix extraction (#16537) -+ [hidive] Add support for authentication (#16534) -+ [nbc] Add support for stream.nbcsports.com (#13911) -+ [viewlift] Add support for hoichoi.tv (#16536) -* [go90] Extract age limit and detect DRM protection(#10127) -* [viewlift] fix extraction for snagfilms.com (#15766) -* [globo] Improve extraction (#4189) - * Add support for authentication - * Simplify URL signing - * Extract DASH and MSS formats -* [leeco] Fix extraction (#16464) -* [teamcoco] Add fallback for format extraction (#16484) -* [teamcoco] Improve URL regular expression (#16484) -* [imdb] Improve extraction (#4085, #14557) - - -version 2018.05.18 - -Extractors -* [vimeo:likes] Relax URL regular expression and fix single page likes - extraction (#16475) -* [pluralsight] Fix clip id extraction (#16460) -+ [mychannels] Add support for mychannels.com (#15334) -- [moniker] Remove extractor (#15336) -* [pbs] Fix embed data extraction (#16474) -+ [mtv] Add support for paramountnetwork.com and bellator.com (#15418) -* [youtube] Fix hd720 format position -* [dailymotion] Remove fragment part from m3u8 URLs (#8915) -* [3sat] Improve extraction (#15350) - * Extract all formats - * Extract more format metadata - * Improve format sorting - * Use hls native downloader - * Detect and bypass geo-restriction -+ [dtube] Add support for d.tube (#15201) -* [options] Fix typo (#16450) -* [youtube] Improve format filesize extraction (#16453) -* [youtube] Make uploader extraction non fatal (#16444) -* [youtube] Fix extraction for embed restricted live streams (#16433) -* [nbc] Improve info extraction (#16440) -* [twitch:clips] Fix extraction (#16429) -* [redditr] Relax URL regular expression (#16426, #16427) -* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424) -+ [nick] Add support for nickjr.de (#13230) -* [teamcoco] Fix extraction (#16374) - - -version 2018.05.09 - -Core -* [YoutubeDL] Ensure ext exists for automatic captions -* Introduce --geo-bypass-ip-block - -Extractors -+ [udemy] Extract asset captions -+ [udemy] Extract stream URLs (#16372) -+ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389) -+ [cloudflarestream] Add support for cloudflarestream.com (#16375) -* [watchbox] Fix extraction (#16356) -* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954) -+ [itv:btcc] Add support for itv.com/btcc (#16139) -* [tunein] Use live title for live streams (#16347) -* [itv] Improve extraction (#16253) - - -version 2018.05.01 - -Core -* [downloader/fragment] Restart download if .ytdl file is corrupt (#16312) -+ [extractor/common] Extract interaction statistic -+ [utils] Add merge_dicts -+ [extractor/common] Add _download_json_handle - -Extractors -* [kaltura] Improve iframe embeds detection (#16337) -+ [udemy] Extract outputs renditions (#16289, #16291, #16320, #16321, #16334, - #16335) -+ [zattoo] Add support for zattoo.com and mobiltv.quickline.com (#14668, #14676) -* [yandexmusic] Convert release_year to int -* [udemy] Override _download_webpage_handle instead of _download_webpage -* [xiami] Override _download_webpage_handle instead of _download_webpage -* [yandexmusic] Override _download_webpage_handle instead of _download_webpage -* [youtube] Correctly disable polymer on all requests (#16323, #16326) -* [generic] Prefer enclosures over links in RSS feeds (#16189) -+ [redditr] Add support for old.reddit.com URLs (#16274) -* [nrktv] Update API host (#16324) -+ [imdb] Extract all formats (#16249) -+ [vimeo] Extract JSON-LD (#16295) -* [funk:channel] Improve extraction (#16285) - - -version 2018.04.25 - -Core -* [utils] Fix match_str for boolean meta fields -+ [Makefile] Add support for pandoc 2 and disable smart extension (#16251) -* [YoutubeDL] Fix typo in media extension compatibility checker (#16215) - -Extractors -+ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246, - #16250) -+ [twitch] Extract is_live according to status (#16259) -* [pornflip] Relax URL regular expression (#16258) -- [etonline] Remove extractor (#16256) -* [breakcom] Fix extraction (#16254) -+ [youtube] Add ability to authenticate with cookies -* [youtube:feed] Implement lazy playlist extraction (#10184) -+ [svt] Add support for TV channel live streams (#15279, #15809) -* [ccma] Fix video extraction (#15931) -* [rentv] Fix extraction (#15227) -+ [nick] Add support for nickjr.nl (#16230) -* [extremetube] Fix metadata extraction -+ [keezmovies] Add support for generic embeds (#16134, #16154) -* [nexx] Extract new azure URLs (#16223) -* [cbssports] Fix extraction (#16217) -* [kaltura] Improve embeds detection (#16201) -* [instagram:user] Fix extraction (#16119) -* [cbs] Skip DRM asset types (#16104) - - -version 2018.04.16 - -Extractors -* [smotri:broadcast] Fix extraction (#16180) -+ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551) -* [vine:user] Fix extraction (#15514, #16190) -* [pornhub] Relax URL regular expression (#16165) -* [cbc:watch] Re-acquire device token when expired (#16160) -+ [fxnetworks] Add support for https theplatform URLs (#16125, #16157) -+ [instagram:user] Add request signing (#16119) -+ [twitch] Add support for mobile URLs (#16146) - - -version 2018.04.09 - -Core -* [YoutubeDL] Do not save/restore console title while simulate (#16103) -* [extractor/common] Relax JSON-LD context check (#16006) - -Extractors -+ [generic] Add support for tube8 embeds -+ [generic] Add support for share-videos.se embeds (#16089, #16115) -* [odnoklassniki] Extend URL regular expression (#16081) -* [steam] Bypass mature content check (#16113) -+ [acast] Extract more metadata -* [acast] Fix extraction (#16118) -* [instagram:user] Fix extraction (#16119) -* [drtuber] Fix title extraction (#16107, #16108) -* [liveleak] Extend URL regular expression (#16117) -+ [openload] Add support for oload.xyz -* [openload] Relax stream URL regular expression -* [openload] Fix extraction (#16099) -+ [svtplay:series] Add support for season URLs -+ [svtplay:series] Add support for series (#11130, #16059) - - -version 2018.04.03 - -Extractors -+ [tvnow] Add support for shows (#15837) -* [dramafever] Fix authentication (#16067) -* [afreecatv] Use partial view only when necessary (#14450) -+ [afreecatv] Add support for authentication (#14450) -+ [nationalgeographic] Add support for new URL schema (#16001, #16054) -* [xvideos] Fix thumbnail extraction (#15978, #15979) -* [medialaan] Fix vod id (#16038) -+ [openload] Add support for oload.site (#16039) -* [naver] Fix extraction (#16029) -* [dramafever] Partially switch to API v5 (#16026) -* [abc:iview] Unescape title and series meta fields (#15994) -* [videa] Extend URL regular expression (#16003) - - -version 2018.03.26.1 - -Core -+ [downloader/external] Add elapsed time to progress hook (#10876) -* [downloader/external,fragment] Fix download finalization when writing file - to stdout (#10809, #10876, #15799) - -Extractors -* [vrv] Fix extraction on python2 (#15928) -* [afreecatv] Update referrer (#15947) -+ [24video] Add support for 24video.sexy (#15973) -* [crackle] Bypass geo restriction -* [crackle] Fix extraction (#15969) -+ [lenta] Add support for lenta.ru (#15953) -+ [instagram:user] Add pagination (#15934) -* [youku] Update ccode (#15939) -* [libsyn] Adapt to new page structure - - -version 2018.03.20 - -Core -* [extractor/common] Improve thumbnail extraction for HTML5 entries -* Generalize XML manifest processing code and improve XSPF parsing -+ [extractor/common] Add _download_xml_handle -+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794) - -Extractors -+ [7plus] Extract series metadata (#15862, #15906) -* [9now] Bypass geo restriction (#15920) -* [cbs] Skip unavailable assets (#13490, #13506, #15776) -+ [canalc2] Add support for HTML5 videos (#15916, #15919) -+ [ceskatelevize] Add support for iframe embeds (#15918) -+ [prosiebensat1] Add support for galileo.tv (#15894) -+ [generic] Add support for xfileshare embeds (#15879) -* [bilibili] Switch to v2 playurl API -* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863) -* [heise] Improve extraction (#15496, #15784, #15026) -* [instagram] Fix user videos extraction (#15858) - - -version 2018.03.14 - -Extractors -* [soundcloud] Update client id (#15866) -+ [tennistv] Add support for tennistv.com -+ [line] Add support for tv.line.me (#9427) -* [xnxx] Fix extraction (#15817) -* [njpwworld] Fix authentication (#15815) - - -version 2018.03.10 - -Core -* [downloader/hls] Skip uplynk ad fragments (#15748) - -Extractors -* [pornhub] Don't override session cookies (#15697) -+ [raywenderlich] Add support for videos.raywenderlich.com (#15251) -* [funk] Fix extraction and rework extractors (#15792) -* [nexx] Restore reverse engineered approach -+ [heise] Add support for kaltura embeds (#14961, #15728) -+ [tvnow] Extract series metadata (#15774) -* [ruutu] Continue formats extraction on NOT-USED URLs (#15775) -* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769) -* [vimeo] Modernize login code and improve error messaging -* [archiveorg] Fix extraction (#15770, #15772) -+ [hidive] Add support for hidive.com (#15494) -* [afreecatv] Detect deleted videos -* [afreecatv] Fix extraction (#15755) -* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778) -+ [vidzi] Add support for vidzi.si (#15751) -* [npo] Fix typo - - -version 2018.03.03 - -Core -+ [utils] Add parse_resolution -Revert respect --prefer-insecure while updating - -Extractors -+ [yapfiles] Add support for yapfiles.ru (#15726, #11085) -* [spankbang] Fix formats extraction (#15727) -* [adn] Fix extraction (#15716) -+ [toggle] Extract DASH and ISM formats (#15721) -+ [nickelodeon] Add support for nickelodeon.com.tr (#15706) -* [npo] Validate and filter format URLs (#15709) - - -version 2018.02.26 - -Extractors -* [udemy] Use custom User-Agent (#15571) - - -version 2018.02.25 - -Core -* [postprocessor/embedthumbnail] Skip embedding when there aren't any - thumbnails (#12573) -* [extractor/common] Improve jwplayer subtitles extraction (#15695) - -Extractors -+ [vidlii] Add support for vidlii.com (#14472, #14512, #14779) -+ [streamango] Capture and output error messages -* [streamango] Fix extraction (#14160, #14256) -+ [telequebec] Add support for emissions (#14649, #14655) -+ [telequebec:live] Add support for live streams (#15688) -+ [mailru:music] Add support for mail.ru/music (#15618) -* [aenetworks] Switch to akamai HLS formats (#15612) -* [ytsearch] Fix flat title extraction (#11260, #15681) - - -version 2018.02.22 - -Core -+ [utils] Fixup some common URL typos in sanitize_url (#15649) -* Respect --prefer-insecure while updating (#15497) - -Extractors -* [vidio] Fix HLS URL extraction (#15675) -+ [nexx] Add support for arc.nexx.cloud URLs -* [nexx] Switch to arc API (#15652) -* [redtube] Fix duration extraction (#15659) -+ [sonyliv] Respect referrer (#15648) -+ [brightcove:new] Use referrer for formats' HTTP headers -+ [cbc] Add support for olympics.cbc.ca (#15535) -+ [fusion] Add support for fusion.tv (#15628) -* [npo] Improve quality metadata extraction -* [npo] Relax URL regular expression (#14987, #14994) -+ [npo] Capture and output error message -+ [pornhub] Add support for channels (#15613) -* [youtube] Handle shared URLs with generic extractor (#14303) - - -version 2018.02.11 - -Core -+ [YoutubeDL] Add support for filesize_approx in format selector (#15550) - -Extractors -+ [francetv] Add support for live streams (#13689) -+ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103, - #15012) -* [francetv] Separate main extractor and rework others to delegate to it -* [francetv] Improve manifest URL signing (#15536) -+ [francetv] Sign m3u8 manifest URLs (#15565) -+ [veoh] Add support for embed URLs (#15561) -* [afreecatv] Fix extraction (#15556) -* [periscope] Use accessVideoPublic endpoint (#15554) -* [discovery] Fix auth request (#15542) -+ [6play] Extract subtitles (#15541) -* [newgrounds] Fix metadata extraction (#15531) -+ [nbc] Add support for stream.nbcolympics.com (#10295) -* [dvtv] Fix live streams extraction (#15442) - - -version 2018.02.08 - -Extractors -+ [myvi] Extend URL regular expression -+ [myvi:embed] Add support for myvi.tv embeds (#15521) -+ [prosiebensat1] Extend URL regular expression (#15520) -* [pokemon] Relax URL regular expression and extend title extraction (#15518) -+ [gameinformer] Use geo verification headers -* [la7] Fix extraction (#15501, #15502) -* [gameinformer] Fix brightcove id extraction (#15416) -+ [afreecatv] Pass referrer to video info request (#15507) -+ [telebruxelles] Add support for live streams -* [telebruxelles] Relax URL regular expression -* [telebruxelles] Fix extraction (#15504) -* [extractor/common] Respect secure schemes in _extract_wowza_formats - - -version 2018.02.04 - -Core -* [downloader/http] Randomize HTTP chunk size -+ [downloader/http] Add ability to pass downloader options via info dict -* [downloader/http] Fix 302 infinite loops by not reusing requests -+ Document http_chunk_size - -Extractors -+ [brightcove] Pass embed page URL as referrer (#15486) -+ [youtube] Enforce using chunked HTTP downloading for DASH formats - - -version 2018.02.03 - -Core -+ Introduce --http-chunk-size for chunk-based HTTP downloading -+ Add support for IronPython -* [downloader/ism] Fix Python 3.2 support - -Extractors -* [redbulltv] Fix extraction (#15481) -* [redtube] Fix metadata extraction (#15472) -* [pladform] Respect platform id and extract HLS formats (#15468) -- [rtlnl] Remove progressive formats (#15459) -* [6play] Do no modify asset URLs with a token (#15248) -* [nationalgeographic] Relax URL regular expression -* [dplay] Relax URL regular expression (#15458) -* [cbsinteractive] Fix data extraction (#15451) -+ [amcnetworks] Add support for sundancetv.com (#9260) - - -version 2018.01.27 - -Core -* [extractor/common] Improve _json_ld for articles -* Switch codebase to use compat_b64decode -+ [compat] Add compat_b64decode - -Extractors -+ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616) -* [dplay] Bypass geo restriction -+ [dplay] Add support for disco-api videos (#15396) -* [youtube] Extract precise error messages (#15284) -* [teachertube] Capture and output error message -* [teachertube] Fix and relax thumbnail extraction (#15403) -+ [prosiebensat1] Add another clip id regular expression (#15378) -* [tbs] Update tokenizer url (#15395) -* [mixcloud] Use compat_b64decode (#15394) -- [thesixtyone] Remove extractor (#15341) - - -version 2018.01.21 - -Core -* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187) -* [utils] Improve scientific notation handling in js_to_json (#14789) - -Extractors -+ [southparkdk] Add support for southparkstudios.nu -+ [southpark] Add support for collections (#14803) -* [franceinter] Fix upload date extraction (#14996) -+ [rtvs] Add support for rtvs.sk (#9242, #15187) -* [restudy] Fix extraction and extend URL regular expression (#15347) -* [youtube:live] Improve live detection (#15365) -+ [springboardplatform] Add support for springboardplatform.com -* [prosiebensat1] Add another clip id regular expression (#15290) -- [ringtv] Remove extractor (#15345) - - -version 2018.01.18 - -Extractors -* [soundcloud] Update client id (#15306) -- [kamcord] Remove extractor (#15322) -+ [spiegel] Add support for nexx videos (#15285) -* [twitch] Fix authentication and error capture (#14090, #15264) -* [vk] Detect more errors due to copyright complaints (#15259) - - -version 2018.01.14 - -Extractors -* [youtube] Fix live streams extraction (#15202) -* [wdr] Bypass geo restriction -* [wdr] Rework extractors (#14598) -+ [wdr] Add support for wdrmaus.de/elefantenseite (#14598) -+ [gamestar] Add support for gamepro.de (#3384) -* [viafree] Skip rtmp formats (#15232) -+ [pandoratv] Add support for mobile URLs (#12441) -+ [pandoratv] Add support for new URL format (#15131) -+ [ximalaya] Add support for ximalaya.com (#14687) -+ [digg] Add support for digg.com (#15214) -* [limelight] Tolerate empty pc formats (#15150, #15151, #15207) -* [ndr:embed:base] Make separate formats extraction non fatal (#15203) -+ [weibo] Add extractor (#15079) -+ [ok] Add support for live streams -* [canalplus] Fix extraction (#15072) -* [bilibili] Fix extraction (#15188) - - -version 2018.01.07 - -Core -* [utils] Fix youtube-dl under PyPy3 on Windows -* [YoutubeDL] Output python implementation in debug header - -Extractors -+ [jwplatform] Add support for multiple embeds (#15192) -* [mitele] Fix extraction (#15186) -+ [motherless] Add support for groups (#15124) -* [lynda] Relax URL regular expression (#15185) -* [soundcloud] Fallback to avatar picture for thumbnail (#12878) -* [youku] Fix list extraction (#15135) -* [openload] Fix extraction (#15166) -* [lynda] Skip invalid subtitles (#15159) -* [twitch] Pass video id to url_result when extracting playlist (#15139) -* [rtve.es:alacarta] Fix extraction of some new URLs -* [acast] Fix extraction (#15147) - - -version 2017.12.31 - -Core -+ [extractor/common] Add container meta field for formats extracted - in _parse_mpd_formats (#13616) -+ [downloader/hls] Use HTTP headers for key request -* [common] Use AACL as the default fourcc when AudioTag is 255 -* [extractor/common] Fix extraction of DASH formats with the same - representation id (#15111) - -Extractors -+ [slutload] Add support for mobile URLs (#14806) -* [abc:iview] Bypass geo restriction -* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985, - #15035, #15057, #15061, #15071, #15095, #15106) -* [openload] Fix extraction (#15118) -- [sandia] Remove extractor -- [collegerama] Remove extractor -+ [mediasite] Add support for sites based on Mediasite Video Platform (#5428, - #11185, #14343) -+ [ufctv] Add support for ufc.tv (#14520) -* [pluralsight] Fix missing first line of subtitles (#11118) -* [openload] Fallback on f-page extraction (#14665, #14879) -* [vimeo] Improve password protected videos extraction (#15114) -* [aws] Fix canonical/signed headers generation on python 2 (#15102) - - -version 2017.12.28 - -Extractors -+ [internazionale] Add support for internazionale.it (#14973) -* [playtvak] Relax video regular expression and make description optional - (#15037) -+ [filmweb] Add support for filmweb.no (#8773, #10368) -+ [23video] Add support for 23video.com -+ [espn] Add support for fivethirtyeight.com (#6864) -+ [umg:de] Add support for universal-music.de (#11582, #11584) -+ [espn] Add support for espnfc and extract more formats (#8053) -* [youku] Update ccode (#14880) -+ [openload] Add support for oload.stream (#15070) -* [youku] Fix list extraction (#15065) - - -version 2017.12.23 - -Core -* [extractor/common] Move X-Forwarded-For setup code into _request_webpage -+ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in - output template (#11427, #15018) -+ [extractor/common] Introduce uploader, uploader_id and uploader_url - meta fields for playlists (#11427, #15018) -* [downloader/fragment] Encode filename of fragment being removed (#15020) -+ [utils] Add another date format pattern (#14999) - -Extractors -+ [kaltura] Add another embed pattern for entry_id -+ [7plus] Add support for 7plus.com.au (#15043) -* [animeondemand] Relax login error regular expression -+ [shahid] Add support for show pages (#7401) -+ [youtube] Extract uploader, uploader_id and uploader_url for playlists - (#11427, #15018) -* [afreecatv] Improve format extraction (#15019) -+ [cspan] Add support for audio only pages and catch page errors (#14995) -+ [mailru] Add support for embed URLs (#14904) -* [crunchyroll] Future-proof XML element checks (#15013) -* [cbslocal] Fix timestamp extraction (#14999, #15000) -* [discoverygo] Correct TTML subtitle extension -* [vk] Make view count optional (#14979) -* [disney] Skip Apple FairPlay formats (#14982) -* [voot] Fix format extraction (#14758) - - -version 2017.12.14 - -Core -* [postprocessor/xattr] Clarify NO_SPACE message (#14970) -* [downloader/http] Return actual download result from real_download (#14971) - -Extractors -+ [itv] Extract more subtitles and duration -* [itv] Improve extraction (#14944) -+ [byutv] Add support for geo restricted videos -* [byutv] Fix extraction (#14966, #14967) -+ [bbccouk] Fix extraction for 320k HLS streams -+ [toutv] Add support for special video URLs (#14179) -* [discovery] Fix free videos extraction (#14157, #14954) -* [tvnow] Fix extraction (#7831) -+ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893) -* [nick] Improve extraction (#14876) -* [tbs] Fix extraction (#13658) - - -version 2017.12.10 - -Core -+ [utils] Add sami mimetype to mimetype2ext - -Extractors -* [culturebox] Improve video id extraction (#14947) -* [twitter] Improve extraction (#14197) -+ [udemy] Extract more HLS formats -* [udemy] Improve course id extraction (#14938) -+ [stretchinternet] Add support for portal.stretchinternet.com (#14576) -* [ellentube] Fix extraction (#14407, #14570) -+ [raiplay:playlist] Add support for playlists (#14563) -* [sonyliv] Bypass geo restriction -* [sonyliv] Extract higher quality formats (#14922) -* [fox] Extract subtitles -+ [fox] Add support for Adobe Pass authentication (#14205, #14489) -- [dailymotion:cloud] Remove extractor (#6794) -* [xhamster] Fix thumbnail extraction (#14780) -+ [xhamster] Add support for mobile URLs (#14780) -* [generic] Don't pass video id as mpd id while extracting DASH (#14902) -* [ard] Skip invalid stream URLs (#14906) -* [porncom] Fix metadata extraction (#14911) -* [pluralsight] Detect agreement request (#14913) -* [toutv] Fix login (#14614) - - -version 2017.12.02 - -Core -+ [downloader/fragment] Commit part file after each fragment -+ [extractor/common] Add durations for DASH fragments with bare SegmentURLs -+ [extractor/common] Add support for DASH manifests with SegmentLists with - bare SegmentURLs (#14844) -+ [utils] Add hvc1 codec code to parse_codecs - -Extractors -* [xhamster] Fix extraction (#14884) -* [youku] Update ccode (#14872) -* [mnet] Fix format extraction (#14883) -+ [xiami] Add Referer header to API request -* [mtv] Correct scc extension in extracted subtitles (#13730) -* [vvvvid] Fix extraction for kenc videos (#13406) -+ [br] Add support for BR Mediathek videos (#14560, #14788) -+ [daisuki] Add support for motto.daisuki.com (#14681) -* [odnoklassniki] Fix API metadata request (#14862) -* [itv] Fix HLS formats extraction -+ [pbs] Add another media id regular expression - - -version 2017.11.26 - -Core -* [extractor/common] Use final URL when dumping request (#14769) - -Extractors -* [fczenit] Fix extraction -- [firstpost] Remove extractor -* [freespeech] Fix extraction -* [nexx] Extract more formats -+ [openload] Add support for openload.link (#14763) -* [empflix] Relax URL regular expression -* [empflix] Fix extraction -* [tnaflix] Don't modify download URLs (#14811) -- [gamersyde] Remove extractor -* [francetv:generationwhat] Fix extraction -+ [massengeschmacktv] Add support for Massengeschmack TV -* [fox9] Fix extraction -* [faz] Fix extraction and add support for Perform Group embeds (#14714) -+ [performgroup] Add support for performgroup.com -+ [jwplatform] Add support for iframes (#14828) -* [culturebox] Fix extraction (#14827) -* [youku] Fix extraction; update ccode (#14815) -* [livestream] Make SMIL extraction non fatal (#14792) -+ [drtuber] Add support for mobile URLs (#14772) -+ [spankbang] Add support for mobile URLs (#14771) -* [instagram] Fix description, timestamp and counters extraction (#14755) - - -version 2017.11.15 - -Core -* [common] Skip Apple FairPlay m3u8 manifests (#14741) -* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740) - -Extractors -* [vshare] Capture and output error message -* [vshare] Fix extraction (#14473) -* [crunchyroll] Extract old RTMP formats -* [tva] Fix extraction (#14736) -* [gamespot] Lower preference of HTTP formats (#14652) -* [instagram:user] Fix extraction (#14699) -* [ccma] Fix typo (#14730) -- Remove sensitive data from logging in messages -* [instagram:user] Fix extraction (#14699) -+ [gamespot] Add support for article URLs (#14652) -* [gamespot] Skip Brightcove Once HTTP formats (#14652) -* [cartoonnetwork] Update tokenizer_src (#14666) -+ [wsj] Recognize another URL pattern (#14704) -* [pandatv] Update API URL and sign format URLs (#14693) -* [crunchyroll] Use old login method (#11572) - - -version 2017.11.06 - -Core -+ [extractor/common] Add protocol for f4m formats -* [f4m] Prefer baseURL for relative URLs (#14660) -* [extractor/common] Respect URL query in _extract_wowza_formats (14645) - -Extractors -+ [hotstar:playlist] Add support for playlists (#12465) -* [hotstar] Bypass geo restriction (#14672) -- [22tracks] Remove extractor (#11024, #14628) -+ [skysport] Sdd support ooyala videos protected with embed_token (#14641) -* [gamespot] Extract formats referenced with new data fields (#14652) -* [spankbang] Detect unavailable videos (#14644) - - -version 2017.10.29 - -Core -* [extractor/common] Prefix format id for audio only HLS formats -+ [utils] Add support for zero years and months in parse_duration - -Extractors -* [egghead] Fix extraction (#14388) -+ [fxnetworks] Extract series metadata (#14603) -+ [younow] Add support for younow.com (#9255, #9432, #12436) -* [dctptv] Fix extraction (#14599) -* [youtube] Restrict embed regular expression (#14600) -* [vimeo] Restrict iframe embed regular expression (#14600) -* [soundgasm] Improve extraction (#14588) -- [myvideo] Remove extractor (#8557) -+ [nbc] Add support for classic-tv videos (#14575) -+ [vrtnu] Add support for cookies authentication and simplify (#11873) -+ [canvas] Add support for vrt.be/vrtnu (#11873) -* [twitch:clips] Fix title extraction (#14566) -+ [ndtv] Add support for sub-sites (#14534) -* [dramafever] Fix login error message extraction -+ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt, - ro, hu) (#14553) - - -version 2017.10.20 - -Core -* [downloader/fragment] Report warning instead of error on inconsistent - download state -* [downloader/hls] Fix total fragments count when ad fragments exist - -Extractors -* [parliamentliveuk] Fix extraction (#14524) -* [soundcloud] Update client id (#14546) -+ [servus] Add support for servus.com (#14362) -+ [unity] Add support for unity3d.com (#14528) -* [youtube] Replace youtube redirect URLs in description (#14517) -* [pbs] Restrict direct video URL regular expression (#14519) -* [drtv] Respect preference for direct HTTP formats (#14509) -+ [eporner] Add support for embed URLs (#14507) -* [arte] Capture and output error message -* [niconico] Improve uploader metadata extraction robustness (#14135) - - -version 2017.10.15.1 - -Core -* [downloader/hls] Ignore anvato ad fragments (#14496) -* [downloader/fragment] Output ad fragment count - -Extractors -* [scrippsnetworks:watch] Bypass geo restriction -+ [anvato] Add ability to bypass geo restriction -* [redditr] Fix extraction for URLs with query (#14495) - - -version 2017.10.15 - -Core -+ [common] Add support for jwplayer youtube embeds - -Extractors -* [scrippsnetworks:watch] Fix extraction (#14389) -* [anvato] Process master m3u8 manifests -* [youtube] Fix relative URLs in description -* [spike] Bypass geo restriction -+ [howstuffworks] Add support for more domains -* [infoq] Fix http format downloading -+ [rtlnl] Add support for another type of embeds -+ [onionstudios] Add support for bulbs-video embeds -* [udn] Fix extraction -* [shahid] Fix extraction (#14448) -* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471) -* [vh1] Fix extraction (#9613) - - -version 2017.10.12 - -Core -* [YoutubeDL] Improve _default_format_spec (#14461) - -Extractors -* [steam] Fix extraction (#14067) -+ [funk] Add support for funk.net (#14464) -+ [nexx] Add support for shortcuts and relax domain id extraction -+ [voxmedia] Add support for recode.net (#14173) -+ [once] Add support for vmap URLs -+ [generic] Add support for channel9 embeds (#14469) -* [tva] Fix extraction (#14328) -+ [tubitv] Add support for new URL format (#14460) -- [afreecatv:global] Remove extractor -- [youtube:shared] Removed extractor (#14420) -+ [slideslive] Add support for slideslive.com (#2680) -+ [facebook] Support thumbnails (#14416) -* [vvvvid] Fix episode number extraction (#14456) -* [hrti:playlist] Relax URL regular expression -* [wdr] Relax media link regular expression (#14447) -* [hrti] Relax URL regular expression (#14443) -* [fox] Delegate extraction to uplynk:preplay (#14147) -+ [youtube] Add support for hooktube.com (#14437) - - -version 2017.10.07 - -Core -* [YoutubeDL] Ignore duplicates in --playlist-items -* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and - reduce code duplication (#14425) -+ [utils] Use cache in OnDemandPagedList by default -* [postprocessor/ffmpeg] Convert to opus using libopus (#14381) - -Extractors -* [reddit] Sort formats (#14430) -* [lnkgo] Relax URL regular expression (#14423) -* [pornflip] Extend URL regular expression (#14405, #14406) -+ [xtube] Add support for embed URLs (#14417) -+ [xvideos] Add support for embed URLs and improve extraction (#14409) -* [beeg] Fix extraction (#14403) -* [tvn24] Relax URL regular expression (#14395) -* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378, - #14392, #14414, #14419, #14431) -+ [ketnet] Add support for videos without direct sources (#14377) -* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een -+ [afreecatv] Add support for adult videos (#14376) - - -version 2017.10.01 - -Core -* [YoutubeDL] Document youtube_include_dash_manifest - -Extractors -+ [tvp] Add support for new URL schema (#14368) -+ [generic] Add support for single format Video.js embeds (#14371) -* [yahoo] Bypass geo restriction for brightcove (#14210) -* [yahoo] Use extracted brightcove account id (#14210) -* [rtve:alacarta] Fix extraction (#14290) -+ [yahoo] Add support for custom brightcove embeds (#14210) -+ [generic] Add support for Video.js embeds -+ [gfycat] Add support for /gifs/detail URLs (#14322) -* [generic] Fix infinite recursion for twitter:player URLs (#14339) -* [xhamsterembed] Fix extraction (#14308) - - -version 2017.09.24 - -Core -+ [options] Accept lrc as a subtitle conversion target format (#14292) -* [utils] Fix handling raw TTML subtitles (#14191) - -Extractors -* [24video] Fix timestamp extraction and make non fatal (#14295) -+ [24video] Add support for 24video.adult (#14295) -+ [kakao] Add support for tv.kakao.com (#12298, #14007) -+ [twitter] Add support for URLs without user id (#14270) -+ [americastestkitchen] Add support for americastestkitchen.com (#10764, - #13996) -* [generic] Fix support for multiple HTML5 videos on one page (#14080) -* [mixcloud] Fix extraction (#14088, #14132) -+ [lynda] Add support for educourse.ga (#14286) -* [beeg] Fix extraction (#14275) -* [nbcsports:vplayer] Correct theplatform URL (#13873) -* [twitter] Fix duration extraction (#14141) -* [tvplay] Bypass geo restriction -+ [heise] Add support for YouTube embeds (#14109) -+ [popcorntv] Add support for popcorntv.it (#5914, #14211) -* [viki] Update app data (#14181) -* [morningstar] Relax URL regular expression (#14222) -* [openload] Fix extraction (#14225, #14257) -* [noovo] Fix extraction (#14214) -* [dailymotion:playlist] Relax URL regular expression (#14219) -+ [twitch] Add support for go.twitch.tv URLs (#14215) -* [vgtv] Relax URL regular expression (#14223) - - -version 2017.09.15 - -Core -* [downloader/fragment] Restart inconsistent incomplete fragment downloads - (#13731) -* [YoutubeDL] Download raw subtitles files (#12909, #14191) - -Extractors -* [condenast] Fix extraction (#14196, #14207) -+ [orf] Add support for f4m stories -* [tv4] Relax URL regular expression (#14206) -* [animeondemand] Bypass geo restriction -+ [animeondemand] Add support for flash videos (#9944) - - -version 2017.09.11 - -Extractors -* [rutube:playlist] Fix suitable (#14166) - - -version 2017.09.10 - -Core -+ [utils] Introduce bool_or_none -* [YoutubeDL] Ensure dir existence for each requested format (#14116) - -Extractors -* [fox] Fix extraction (#14147) -* [rutube] Use bool_or_none -* [rutube] Rework and generalize playlist extractors (#13565) -+ [rutube:playlist] Add support for playlists (#13534, #13565) -+ [radiocanada] Add fallback for title extraction (#14145) -* [vk] Use dedicated YouTube embeds extraction routine -* [vice] Use dedicated YouTube embeds extraction routine -* [cracked] Use dedicated YouTube embeds extraction routine -* [chilloutzone] Use dedicated YouTube embeds extraction routine -* [abcnews] Use dedicated YouTube embeds extraction routine -* [youtube] Separate methods for embeds extraction -* [redtube] Fix formats extraction (#14122) -* [arte] Relax unavailability check (#14112) -+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059) -* [vidme:user] Relax URL regular expression (#14054) -* [bpb] Fix extraction (#14043, #14086) -* [soundcloud] Fix download URL with private tracks (#14093) -* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707) -* [viidea] Capture and output lecture error message (#14099) -* [radiocanada] Skip unsupported platforms (#14100) - - -version 2017.09.02 - -Extractors -* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076, - #14077, #14079, #14082, #14083, #14094, #14095, #14096) -* [youtube] Fix upload date extraction (#14065) -+ [charlierose] Add support for episodes (#14062) -+ [bbccouk] Add support for w-prefixed ids (#14056) -* [googledrive] Extend URL regular expression (#9785) -+ [googledrive] Add support for source format (#14046) -* [pornhd] Fix extraction (#14005) - - -version 2017.08.27.1 - -Extractors - -* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037) - - -version 2017.08.27 - -Core -+ [extractor/common] Extract height and format id for HTML5 videos (#14034) -* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023, - #8625, #9483) - * Simplify code and split into separate routines to facilitate maintaining - * Make retry mechanism work on errors during actual download not only - during connection establishment phase - * Retry on ECONNRESET and ETIMEDOUT during reading data from network - * Retry on content too short - * Show error description on retry - -Extractors -* [generic] Lower preference for extraction from LD-JSON -* [rai] Fix audio formats extraction (#14024) -* [youtube] Fix controversy videos extraction (#14027, #14029) -* [mixcloud] Fix extraction (#14015, #14020) - - -version 2017.08.23 - -Core -+ [extractor/common] Introduce _parse_xml -* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries - non fatal (#13970) -* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) - -Extractors -* [cbc:watch] Bypass geo restriction (#13993) -* [toutv] Relax DRM check (#13994) -+ [googledrive] Add support for subtitles (#13619, #13638) -* [pornhub] Relax uploader regular expression (#13906, #13975) -* [bandcamp:album] Extract track titles (#13962) -+ [bbccouk] Add support for events URLs (#13893) -+ [liveleak] Support multi-video pages (#6542) -+ [liveleak] Support another liveleak embedding pattern (#13336) -* [cda] Fix extraction (#13935) -+ [laola1tv] Add support for tv.ittf.com (#13965) -* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003) - - -version 2017.08.18 - -Core -* [YoutubeDL] Sanitize byte string format URLs (#13951) -+ [extractor/common] Add support for float durations in _parse_mpd_formats - (#13919) - -Extractors -* [arte] Detect unavailable videos (#13945) -* [generic] Convert redirect URLs to unicode strings (#13951) -* [udemy] Fix paid course detection (#13943) -* [pluralsight] Use RPC API for course extraction (#13937) -+ [clippit] Add support for clippituser.tv -+ [qqmusic] Support new URL schemes (#13805) -* [periscope] Renew HLS extraction (#13917) -* [mixcloud] Extract decrypt key - - -version 2017.08.13 - -Core -* [YoutubeDL] Make sure format id is not empty -* [extractor/common] Make _family_friendly_search optional -* [extractor/common] Respect source's type attribute for HTML5 media (#13892) - -Extractors -* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902) -+ [fourtube] Add support pornerbros.com (#6022) -+ [fourtube] Add support porntube.com (#7859, #13901) -+ [fourtube] Add support fux.com -* [limelight] Improve embeds detection (#13895) -+ [reddit] Add support for v.redd.it and reddit.com (#13847) -* [aparat] Extract all formats (#13887) -* [mixcloud] Fix play info decryption (#13885) -+ [generic] Add support for vzaar embeds (#13876) - - -version 2017.08.09 - -Core -* [utils] Skip missing params in cli_bool_option (#13865) - -Extractors -* [xxxymovies] Fix title extraction (#13868) -+ [nick] Add support for nick.com.pl (#13860) -* [mixcloud] Fix play info decryption (#13867) -* [20min] Fix embeds extraction (#13852) -* [dplayit] Fix extraction (#13851) -+ [niconico] Support videos with multiple formats (#13522) -+ [niconico] Support HTML5-only videos (#13806) - - -version 2017.08.06 - -Core -* Use relative paths for DASH fragments (#12990) - -Extractors -* [pluralsight] Fix format selection -- [mpora] Remove extractor (#13826) -+ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218) -* [vlive:channel] Limit number of videos per page to 100 (#13830) -* [podomatic] Extend URL regular expression (#13827) -* [cinchcast] Extend URL regular expression -* [yandexdisk] Relax URL regular expression (#13824) -* [vidme] Extract DASH and HLS formats -- [teamfour] Remove extractor (#13782) -* [pornhd] Fix extraction (#13783) -* [udemy] Fix subtitles extraction (#13812) -* [mlb] Extend URL regular expression (#13740, #13773) -+ [pbs] Add support for new URL schema (#13801) -* [nrktv] Update API host (#13796) - - -version 2017.07.30.1 - -Core -* [downloader/hls] Use redirect URL as manifest base (#13755) -* [options] Correctly hide login info from debug outputs (#13696) - -Extractors -+ [watchbox] Add support for watchbox.de (#13739) -- [clipfish] Remove extractor -+ [youjizz] Fix extraction (#13744) -+ [generic] Add support for another ooyala embed pattern (#13727) -+ [ard] Add support for lives (#13771) -* [soundcloud] Update client id -+ [soundcloud:trackstation] Add support for track stations (#13733) -* [svtplay] Use geo verification proxy for API request -* [svtplay] Update API URL (#13767) -+ [yandexdisk] Add support for yadi.sk (#13755) -+ [megaphone] Add support for megaphone.fm -* [amcnetworks] Make rating optional (#12453) -* [cloudy] Fix extraction (#13737) -+ [nickru] Add support for nickelodeon.ru -* [mtv] Improve thumbnail extraction -* [nick] Automate geo-restriction bypass (#13711) -* [niconico] Improve error reporting (#13696) - - -version 2017.07.23 - -Core -* [YoutubeDL] Improve default format specification (#13704) -* [YoutubeDL] Do not override id, extractor and extractor_key for - url_transparent entities -* [extractor/common] Fix playlist_from_matches - -Extractors -* [itv] Fix production id extraction (#13671, #13703) -* [vidio] Make duration non fatal and fix typo -* [mtv] Skip missing video parts (#13690) -* [sportbox:embed] Fix extraction -+ [npo] Add support for npo3.nl URLs (#13695) -* [dramafever] Remove video id from title (#13699) -+ [egghead:lesson] Add support for lessons (#6635) -* [funnyordie] Extract more metadata (#13677) -* [youku:show] Fix playlist extraction (#13248) -+ [dispeak] Recognize sevt subdomain (#13276) -* [adn] Improve error reporting (#13663) -* [crunchyroll] Relax series and season regular expression (#13659) -+ [spiegel:article] Add support for nexx iframe embeds (#13029) -+ [nexx:embed] Add support for iframe embeds -* [nexx] Improve JS embed extraction -+ [pearvideo] Add support for pearvideo.com (#13031) - - -version 2017.07.15 - -Core -* [YoutubeDL] Don't expand environment variables in meta fields (#13637) - -Extractors -* [spiegeltv] Delegate extraction to nexx extractor (#13159) -+ [nexx] Add support for nexx.cloud (#10807, #13465) -* [generic] Fix rutube embeds extraction (#13641) -* [karrierevideos] Fix title extraction (#13641) -* [youtube] Don't capture YouTube Red ad for creator meta field (#13621) -* [slideshare] Fix extraction (#13617) -+ [5tv] Add another video URL pattern (#13354, #13606) -* [drtv] Make HLS and HDS extraction non fatal -* [ted] Fix subtitles extraction (#13628, #13629) -* [vine] Make sure the title won't be empty -+ [twitter] Support HLS streams in vmap URLs -+ [periscope] Support pscp.tv URLs in embedded frames -* [twitter] Extract mp4 urls via mobile API (#12726) -* [niconico] Fix authentication error handling (#12486) -* [giantbomb] Extract m3u8 formats (#13626) -+ [vlive:playlist] Add support for playlists (#13613) - - -version 2017.07.09 - -Core -+ [extractor/common] Add support for AMP tags in _parse_html5_media_entries -+ [utils] Support attributes with no values in get_elements_by_attribute - -Extractors -+ [dailymail] Add support for embeds -+ [joj] Add support for joj.sk (#13268) -* [abc.net.au:iview] Extract more formats (#13492, #13489) -* [egghead:course] Fix extraction (#6635, #13370) -+ [cjsw] Add support for cjsw.com (#13525) -+ [eagleplatform] Add support for referrer protected videos (#13557) -+ [eagleplatform] Add support for another embed pattern (#13557) -* [veoh] Extend URL regular expression (#13601) -* [npo:live] Fix live stream id extraction (#13568, #13605) -* [googledrive] Fix height extraction (#13603) -+ [dailymotion] Add support for new layout (#13580) -- [yam] Remove extractor -* [xhamster] Extract all formats and fix duration extraction (#13593) -+ [xhamster] Add support for new URL schema (#13593) -* [espn] Extend URL regular expression (#13244, #13549) -* [kaltura] Fix typo in subtitles extraction (#13569) -* [vier] Adapt extraction to redesign (#13575) - - -version 2017.07.02 - -Core -* [extractor/common] Improve _json_ld - -Extractors -+ [thisoldhouse] Add more fallbacks for video id -* [thisoldhouse] Fix video id extraction (#13540, #13541) -* [xfileshare] Extend format regular expression (#13536) -* [ted] Fix extraction (#13535) -+ [tastytrade] Add support for tastytrade.com (#13521) -* [dplayit] Relax video id regular expression (#13524) -+ [generic] Extract more generic metadata (#13527) -+ [bbccouk] Capture and output error message (#13501, #13518) -* [cbsnews] Relax video info regular expression (#13284, #13503) -+ [facebook] Add support for plugin video embeds and multiple embeds (#13493) -* [soundcloud] Switch to https for API requests (#13502) -* [pandatv] Switch to https for API and download URLs -+ [pandatv] Add support for https URLs (#13491) -+ [niconico] Support sp subdomain (#13494) - - -version 2017.06.25 - -Core -+ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472) -* [YoutubeDL] Skip malformed formats for better extraction robustness - -Extractors -+ [wsj] Add support for barrons.com (#13470) -+ [ign] Add another video id pattern (#13328) -+ [raiplay:live] Add support for live streams (#13414) -+ [redbulltv] Add support for live videos and segments (#13486) -+ [onetpl] Add support for videos embedded via pulsembed (#13482) -* [ooyala] Make more robust -* [ooyala] Skip empty format URLs (#13471, #13476) -* [hgtv.com:show] Fix typo - - -version 2017.06.23 - -Core -* [adobepass] Fix extraction on older python 2.6 - -Extractors -* [youtube] Adapt to new automatic captions rendition (#13467) -* [hgtv.com:show] Relax video config regular expression (#13279, #13461) -* [drtuber] Fix formats extraction (#12058) -* [youporn] Fix upload date extraction -* [youporn] Improve formats extraction -* [youporn] Fix title extraction (#13456) -* [googledrive] Fix formats sorting (#13443) -* [watchindianporn] Fix extraction (#13411, #13415) -+ [vimeo] Add fallback mp4 extension for original format -+ [ruv] Add support for ruv.is (#13396) -* [viu] Fix extraction on older python 2.6 -* [pandora.tv] Fix upload_date extraction (#12846) -+ [asiancrush] Add support for asiancrush.com (#13420) - - -version 2017.06.18 - -Core -* [downloader/common] Use utils.shell_quote for debug command line -* [utils] Use compat_shlex_quote in shell_quote -* [postprocessor/execafterdownload] Encode command line (#13407) -* [compat] Fix compat_shlex_quote on Windows (#5889, #10254) -* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing - in --metadata-from-title (#13408) -* [extractor/common] Fix json dumping with --geo-bypass -+ [extractor/common] Improve jwplayer subtitles extraction -+ [extractor/common] Improve jwplayer formats extraction (#13379) - -Extractors -* [polskieradio] Fix extraction (#13392) -+ [xfileshare] Add support for fastvideo.me (#13385) -* [bilibili] Fix extraction of videos with double quotes in titles (#13387) -* [4tube] Fix extraction (#13381, #13382) -+ [disney] Add support for disneychannel.de (#13383) -* [npo] Improve URL regular expression (#13376) -+ [corus] Add support for showcase.ca -+ [corus] Add support for history.ca (#13359) - - -version 2017.06.12 - -Core -* [utils] Handle compat_HTMLParseError in extract_attributes (#13349) -+ [compat] Introduce compat_HTMLParseError -* [utils] Improve unified_timestamp -* [extractor/generic] Ensure format id is unicode string -* [extractor/common] Return unicode string from _match_id -+ [YoutubeDL] Sanitize more fields (#13313) - -Extractors -+ [xfileshare] Add support for rapidvideo.tv (#13348) -* [xfileshare] Modernize and pass Referer -+ [rutv] Add support for testplayer.vgtrk.com (#13347) -+ [newgrounds] Extract more metadata (#13232) -+ [newgrounds:playlist] Add support for playlists (#10611) -* [newgrounds] Improve formats and uploader extraction (#13346) -* [msn] Fix formats extraction -* [turbo] Ensure format id is string -* [sexu] Ensure height is int -* [jove] Ensure comment count is int -* [golem] Ensure format id is string -* [gfycat] Ensure filesize is int -* [foxgay] Ensure height is int -* [flickr] Ensure format id is string -* [sohu] Fix numeric fields -* [safari] Improve authentication detection (#13319) -* [liveleak] Ensure height is int (#13313) -* [streamango] Make title optional (#13292) -* [rtlnl] Improve URL regular expression (#13295) -* [tvplayer] Fix extraction (#13291) - - -version 2017.06.05 - -Core -* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270) - -Extractors -+ [bandcamp:weekly] Add support for bandcamp weekly (#12758) -* [pornhub:playlist] Fix extraction (#13281) -- [godtv] Remove extractor (#13175) -* [safari] Fix typo (#13252) -* [youtube] Improve chapters extraction (#13247) -* [1tv] Lower preference for HTTP formats (#13246) -* [francetv] Relax URL regular expression -* [drbonanza] Fix extraction (#13231) -* [packtpub] Fix authentication (#13240) - - -version 2017.05.29 - -Extractors -* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs - (#13211) -* [xhamster] Fix uploader and like/dislike count extraction (#13216)) -+ [xhamster] Extract categories (#11728) -+ [abcnews] Add support for embed URLs (#12851) -* [gaskrank] Fix extraction (#12493) -* [medialaan] Fix videos with missing videoUrl (#12774) -* [dvtv] Fix playlist support -+ [dvtv] Add support for DASH and HLS formats (#3063) -+ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032)) -* [cbsinteractive] Relax URL regular expression (#13213) -* [adn] Fix formats extraction -+ [youku] Extract more metadata (#10433) -* [cbsnews] Fix extraction (#13205) - - -version 2017.05.26 - -Core -+ [utils] strip_jsonp() can recognize more patterns -* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182) - -Extractors -+ [youtube] DASH MPDs with cipher signatures are recognized now (#11381) -+ [bbc] Add support for authentication -* [tudou] Merge into youku extractor (#12214) -* [youku:show] Fix extraction -* [youku] Fix extraction (#13191) -* [udemy] Fix extraction for outputs' format entries without URL (#13192) -* [vimeo] Fix formats' sorting (#13189) -* [cbsnews] Fix extraction for 60 Minutes videos (#12861) - - -version 2017.05.23 - -Core -+ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183) -+ [adobepass] Add support for Bright House Networks (#13149) - -Extractors -+ [streamcz] Add support for subtitles (#13174) -* [youtube] Fix DASH manifest signature decryption (#8944, #13156) -* [toggle] Relax URL regular expression (#13172) -* [toypics] Fix extraction (#13077) -* [njpwworld] Fix extraction (#13162, #13169) -+ [hitbox] Add support for smashcast.tv (#13154) -* [mitele] Update app key regular expression (#13158) - - -version 2017.05.18.1 - -Core -* [jsinterp] Fix typo and cleanup regular expressions (#13134) - - -version 2017.05.18 - -Core -+ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125, - #13126, #13128, #13129, #13130, #13131, #13132) -+ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats - (#13088, #13092) -+ [utils] Recognize more audio codecs (#13081) - -Extractors -+ [vier] Extract more metadata (#12539) -* [vier] Improve extraction (#12801) - + Add support for authentication - * Bypass authentication when no credentials provided - * Improve extraction robustness -* [dailymail] Fix sources extraction (#13057) -* [dailymotion] Extend URL regular expression (#13079) - - -version 2017.05.14 - -Core -+ [extractor/common] Respect Width and Height attributes in ISM manifests -+ [postprocessor/metadatafromtitle] Add support regular expression syntax for - --metadata-from-title (#13065) - -Extractors -+ [mediaset] Add support for video.mediaset.it (#12708, #12964) -* [orf:radio] Fix extraction (#11643, #12926) -* [aljazeera] Extend URL regular expression (#13053) -* [imdb] Relax URL regular expression (#13056) -+ [francetv] Add support for mobile.france.tv (#13068) -+ [upskill] Add support for upskillcourses.com (#13043) -* [thescene] Fix extraction (#13061) -* [condenast] Improve embed support -* [liveleak] Fix extraction (#12053) -+ [douyu] Support Douyu shows (#12228) -* [myspace] Improve URL regular expression (#13040) -* [adultswim] Use desktop platform in assets URL (#13041) - - -version 2017.05.09 - -Core -* [YoutubeDL] Force --restrict-filenames when no locale is set on all python - versions (#13027) - -Extractors -* [francetv] Adapt to site redesign (#13034) -+ [packtpub] Add support for authentication (#12622) -* [drtv] Lower preference for SignLanguage formats (#13013, #13016) -+ [cspan] Add support for brightcove live embeds (#13028) -* [vrv] Extract DASH formats and subtitles -* [funimation] Fix authentication (#13021) -* [adultswim] Fix extraction (#8640, #10950, #11042, #12121) - + Add support for Adobe Pass authentication - + Add support for live streams - + Add support for show pages -* [turner] Extract thumbnail, is_live and strip description -+ [nonktube] Add support for nonktube.com (#8647, #13024) -+ [nuevo] Pass headers to _extract_nuevo -* [nbc] Improve extraction (#12364) - - -version 2017.05.07 - -Common -* [extractor/common] Fix typo in _extract_akamai_formats -+ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata -+ [extractor/common] Introduce chapters meta field - -Extractors -* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995, - #13003) -* [bilibili] Fix video downloading (#13001) -* [rmcdecouverte] Fix extraction (#12937) -* [theplatform] Extract chapters -* [bandcamp] Fix thumbnail extraction (#12980) -* [pornhub] Extend URL regular expression (#12996) -+ [youtube] Extract chapters -+ [nrk] Extract chapters -+ [vice] Add support for ooyala embeds in article pages -+ [vice] Support vice articles (#12968) -* [vice] Fix extraction for non en_us videos (#12967) -* [gdcvault] Fix extraction for some videos (#12733) -* [pbs] Improve multipart video support (#12981) -* [laola1tv] Fix extraction (#12880) -+ [cda] Support birthday verification (#12789) -* [leeco] Fix extraction (#12974) -+ [pbs] Extract chapters -* [amp] Improve thumbnail and subtitles extraction -* [foxsports] Fix extraction (#12945) -- [coub] Remove comment count extraction (#12941) - - -version 2017.05.01 - -Core -+ [extractor/common] Extract view count from JSON-LD -* [utils] Improve unified_timestamp -+ [utils] Add video/mp2t to mimetype2ext -* [downloader/external] Properly handle live stream downloading cancellation - (#8932) -+ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906) - -Extractors -* [infoq] Make audio format extraction non fatal (#12938) -* [brightcove] Allow whitespace around attribute names in embedded code -+ [zaq1] Add support for zaq1.pl (#12693) -+ [xvideos] Extract duration (#12828) -* [vevo] Fix extraction (#12879) -+ [noovo] Add support for noovo.ca (#12792) -+ [washingtonpost] Add support for embeds (#12699) -* [yandexmusic:playlist] Fix extraction for python 3 (#12888) -* [anvato] Improve extraction (#12913) - * Promote to regular shortcut based extractor - * Add mcp to access key mapping table - * Add support for embeds extraction - * Add support for anvato embeds in generic extractor -* [xtube] Fix extraction for older FLV videos (#12734) -* [tvplayer] Fix extraction (#12908) - - -version 2017.04.28 - -Core -+ [adobepass] Use geo verification headers for all requests -- [downloader/fragment] Remove assert for resume_len when no fragments - downloaded -+ [extractor/common] Add manifest_url for explicit group rendition formats -* [extractor/common] Fix manifest_url for m3u8 formats -- [extractor/common] Don't list master m3u8 playlists in format list (#12832) - -Extractor -* [aenetworks] Fix extraction for shows with single season -+ [go] Add support for Disney, DisneyJunior and DisneyXD show pages -* [youtube] Recognize new locale-based player URLs (#12885) -+ [streamable] Add support for new embedded URL schema (#12844) -* [arte:+7] Relax URL regular expression (#12837) - - -version 2017.04.26 - -Core -* Introduce --keep-fragments for keeping fragments of fragmented download - on disk after download is finished -* [YoutubeDL] Fix output template for missing timestamp (#12796) -* [socks] Handle cases where credentials are required but missing -* [extractor/common] Improve HLS extraction (#12211) - * Extract m3u8 parsing to separate method - * Improve rendition groups extraction - * Build stream name according stream GROUP-ID - * Ignore reference to AUDIO group without URI when stream has no CODECS - * Use float for scaled tbr in _parse_m3u8_formats -* [utils] Add support for TTML styles in dfxp2srt -* [downloader/hls] No need to download keys for fragments that have been - already downloaded -* [downloader/fragment] Improve fragment downloading - * Resume immediately - * Don't concatenate fragments and decrypt them on every resume - * Optimize disk storage usage, don't store intermediate fragments on disk - * Store bookkeeping download state file -+ [extractor/common] Add support for multiple getters in try_get -+ [extractor/common] Add support for video of WebPage context in _json_ld - (#12778) -+ [extractor/common] Relax JWPlayer regular expression and remove - duplicate URLs (#12768) - -Extractors -* [iqiyi] Fix extraction of Yule videos -* [vidio] Improve extraction and sort formats -+ [brightcove] Match only video elements with data-video-id attribute -* [iqiyi] Fix playlist detection (#12504) -- [azubu] Remove extractor (#12813) -* [porn91] Fix extraction (#12814) -* [vidzi] Fix extraction (#12793) -+ [amp] Extract error message (#12795) -+ [xfileshare] Add support for gorillavid.com and daclips.com (#12776) -* [instagram] Fix extraction (#12777) -+ [generic] Support Brightcove videos in ', - webpage, 'embed url')) - if VKIE.suitable(embed_url): - return self.url_result(embed_url, VKIE.ie_key(), video_id) - - embed_page = self._download_webpage( - embed_url, video_id, headers={'Referer': url}) - video_ext = self._get_cookies(embed_url).get('video_ext') - if video_ext: - video_ext = compat_urllib_parse_unquote(video_ext.value) - if not video_ext: - video_ext = compat_b64decode(self._search_regex( - r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', - embed_page, 'video_ext')).decode() - video_id, sig, _, access_token = video_ext.split(':') - item = self._download_json( - 'https://api.vk.com/method/video.get', video_id, - headers={'User-Agent': 'okhttp/3.4.1'}, query={ - 'access_token': access_token, - 'sig': sig, - 'v': 5.44, - 'videos': video_id, - })['response']['items'][0] - title = item['title'] - - formats = [] - for f_id, f_url in item.get('files', {}).items(): - if f_id == 'external': - return self.url_result(f_url) - ext, height = f_id.split('_') - formats.append({ - 'format_id': height + 'p', - 'url': f_url, - 'height': int_or_none(height), - 'ext': ext, - }) - self._sort_formats(formats) - - thumbnails = [] - for k, v in item.items(): - if k.startswith('photo_') and v: - width = k.replace('photo_', '') - thumbnails.append({ - 'id': width, - 'url': v, - 'width': int_or_none(width), - }) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'comment_count': int_or_none(item.get('comments')), - 'description': item.get('description'), - 'duration': int_or_none(item.get('duration')), - 'thumbnails': thumbnails, - 'timestamp': int_or_none(item.get('date')), - 'uploader': item.get('owner_id'), - 'view_count': int_or_none(item.get('views')), - } diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py deleted file mode 100644 index 0c773e66e..000000000 --- a/youtube_dl/extractor/bitchute.py +++ /dev/null @@ -1,142 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools -import re - -from .common import InfoExtractor -from ..utils import ( - orderedSet, - unified_strdate, - urlencode_postdata, -) - - -class BitChuteIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.bitchute.com/video/szoMrox2JEI/', - 'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb', - 'info_dict': { - 'id': 'szoMrox2JEI', - 'ext': 'mp4', - 'title': 'Fuck bitches get money', - 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Victoria X Rave', - 'upload_date': '20170813', - }, - }, { - 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', - 'only_matching': True, - }, { - 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', - }) - - title = self._html_search_regex( - (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'([^<]+)'), - webpage, 'title', default=None) or self._html_search_meta( - 'description', webpage, 'title', - default=None) or self._og_search_description(webpage) - - format_urls = [] - for mobj in re.finditer( - r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): - format_urls.append(mobj.group('url')) - format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) - - formats = [ - {'url': format_url} - for format_url in orderedSet(format_urls)] - - if not formats: - formats = self._parse_html5_media_entries( - url, webpage, video_id)[0]['formats'] - - self._check_formats(formats, video_id) - self._sort_formats(formats) - - description = self._html_search_regex( - r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail( - webpage, default=None) or self._html_search_meta( - 'twitter:image:src', webpage, 'thumbnail') - uploader = self._html_search_regex( - (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', - r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), - webpage, 'uploader', fatal=False) - - upload_date = unified_strdate(self._search_regex( - r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', - webpage, 'upload date', fatal=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, - 'formats': formats, - } - - -class BitChuteChannelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' - _TEST = { - 'url': 'https://www.bitchute.com/channel/victoriaxrave/', - 'playlist_mincount': 185, - 'info_dict': { - 'id': 'victoriaxrave', - }, - } - - _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' - - def _entries(self, channel_id): - channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id - offset = 0 - for page_num in itertools.count(1): - data = self._download_json( - '%sextend/' % channel_url, channel_id, - 'Downloading channel page %d' % page_num, - data=urlencode_postdata({ - 'csrfmiddlewaretoken': self._TOKEN, - 'name': '', - 'offset': offset, - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': channel_url, - 'X-Requested-With': 'XMLHttpRequest', - 'Cookie': 'csrftoken=%s' % self._TOKEN, - }) - if data.get('success') is False: - break - html = data.get('html') - if not html: - break - video_ids = re.findall( - r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', - html) - if not video_ids: - break - offset += len(video_ids) - for video_id in video_ids: - yield self.url_result( - 'https://www.bitchute.com/video/%s' % video_id, - ie=BitChuteIE.ie_key(), video_id=video_id) - - def _real_extract(self, url): - channel_id = self._match_id(url) - return self.playlist_result( - self._entries(channel_id), playlist_id=channel_id) diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py deleted file mode 100644 index d1bf8e829..000000000 --- a/youtube_dl/extractor/bleacherreport.py +++ /dev/null @@ -1,112 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from .amp import AMPIE -from ..utils import ( - ExtractorError, - int_or_none, - parse_iso8601, -) - - -class BleacherReportIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football', - 'md5': 'a3ffc3dc73afdbc2010f02d98f990f20', - 'info_dict': { - 'id': '2496438', - 'ext': 'mp4', - 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?', - 'uploader_id': 3992341, - 'description': 'CFB, ACC, Florida State', - 'timestamp': 1434380212, - 'upload_date': '20150615', - 'uploader': 'Team Stream Now ', - }, - 'add_ie': ['Ooyala'], - }, { - 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', - 'md5': '6a5cd403418c7b01719248ca97fb0692', - 'info_dict': { - 'id': '2586817', - 'ext': 'webm', - 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', - 'timestamp': 1446839961, - 'uploader': 'Sean Fay', - 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757', - 'uploader_id': 6466954, - 'upload_date': '20151011', - }, - 'add_ie': ['Youtube'], - }] - - def _real_extract(self, url): - article_id = self._match_id(url) - - article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] - - thumbnails = [] - primary_photo = article_data.get('primaryPhoto') - if primary_photo: - thumbnails = [{ - 'url': primary_photo['url'], - 'width': primary_photo.get('width'), - 'height': primary_photo.get('height'), - }] - - info = { - '_type': 'url_transparent', - 'id': article_id, - 'title': article_data['title'], - 'uploader': article_data.get('author', {}).get('name'), - 'uploader_id': article_data.get('authorId'), - 'timestamp': parse_iso8601(article_data.get('createdAt')), - 'thumbnails': thumbnails, - 'comment_count': int_or_none(article_data.get('commentsCount')), - 'view_count': int_or_none(article_data.get('hitCount')), - } - - video = article_data.get('video') - if video: - video_type = video['type'] - if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): - info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] - elif video_type == 'ooyala.com': - info['url'] = 'ooyala:%s' % video['id'] - elif video_type == 'youtube.com': - info['url'] = video['id'] - elif video_type == 'vine.co': - info['url'] = 'https://vine.co/v/%s' % video['id'] - else: - info['url'] = video_type + video['id'] - return info - else: - raise ExtractorError('no video in the article', expected=True) - - -class BleacherReportCMSIE(AMPIE): - _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' - _TESTS = [{ - 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', - 'md5': '670b2d73f48549da032861130488c681', - 'info_dict': { - 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', - 'ext': 'mp4', - 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', - 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', - 'upload_date': '20150723', - 'timestamp': 1437679032, - - }, - 'expected_warnings': [ - 'Unable to download f4m manifest' - ] - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) - info['id'] = video_id - return info diff --git a/youtube_dl/extractor/blerp.py b/youtube_dl/extractor/blerp.py deleted file mode 100644 index 355daef6e..000000000 --- a/youtube_dl/extractor/blerp.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json - -from ..utils import ( - strip_or_none, - traverse_obj, -) -from .common import InfoExtractor - - -class BlerpIE(InfoExtractor): - IE_NAME = 'blerp' - _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a', - 'info_dict': { - 'id': '6320fe8745636cb4dd677a5a', - 'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016', - 'uploader': 'luminousaj', - 'uploader_id': '5fb81e51aa66ae000c395478', - 'ext': 'mp3', - 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], - } - }, { - 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', - 'info_dict': { - 'id': '5bc94ef4796001000498429f', - 'title': 'Yee', - 'uploader': '179617322678353920', - 'uploader_id': '5ba99cf71386730004552c42', - 'ext': 'mp3', - 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] - } - }] - - _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" - _GRAPHQL_QUERY = ( - '''query webBitePageGetBite($_id: MongoID!) { - web { - biteById(_id: $_id) { - ...bitePageFrag - __typename - } - __typename - } - } - - fragment bitePageFrag on Bite { - _id - title - userKeywords - keywords - color - visibility - isPremium - owned - price - extraReview - isAudioExists - image { - filename - original { - url - __typename - } - __typename - } - userReactions { - _id - reactions - createdAt - __typename - } - topReactions - totalSaveCount - saved - blerpLibraryType - license - licenseMetaData - playCount - totalShareCount - totalFavoriteCount - totalAddedToBoardCount - userCategory - userAudioQuality - audioCreationState - transcription - userTranscription - description - createdAt - updatedAt - author - listingType - ownerObject { - _id - username - profileImage { - filename - original { - url - __typename - } - __typename - } - __typename - } - transcription - favorited - visibility - isCurated - sourceUrl - audienceRating - strictAudienceRating - ownerId - reportObject { - reportedContentStatus - __typename - } - giphy { - mp4 - gif - __typename - } - audio { - filename - original { - url - __typename - } - mp3 { - url - __typename - } - __typename - } - __typename - } - - ''') - - def _real_extract(self, url): - audio_id = self._match_id(url) - - data = { - 'operationName': self._GRAPHQL_OPERATIONNAME, - 'query': self._GRAPHQL_QUERY, - 'variables': { - '_id': audio_id - } - } - - headers = { - 'Content-Type': 'application/json' - } - - json_result = self._download_json('https://api.blerp.com/graphql', - audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) - - bite_json = json_result['data']['web']['biteById'] - - info_dict = { - 'id': bite_json['_id'], - 'url': bite_json['audio']['mp3']['url'], - 'title': bite_json['title'], - 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), - 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), - 'ext': 'mp3', - 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) - } - - return info_dict diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py deleted file mode 100644 index 2fbfad1ba..000000000 --- a/youtube_dl/extractor/bloomberg.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class BloombergIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' - - _TESTS = [{ - 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', - # The md5 checksum changes - 'info_dict': { - 'id': 'qurhIVlJSB6hzkVi229d8g', - 'ext': 'flv', - 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', - 'description': 'md5:a8ba0302912d03d246979735c17d2761', - }, - 'params': { - 'format': 'best[format_id^=hds]', - }, - }, { - # video ID in BPlayer(...) - 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', - 'info_dict': { - 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', - 'ext': 'flv', - 'title': 'Meet the Real-Life Tech Wizards of Middle Earth', - 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', - }, - 'params': { - 'format': 'best[format_id^=hds]', - }, - }, { - # data-bmmrid= - 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money', - 'only_matching': True, - }, { - 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', - 'only_matching': True, - }, { - 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump', - 'only_matching': True, - }] - - def _real_extract(self, url): - name = self._match_id(url) - webpage = self._download_webpage(url, name) - video_id = self._search_regex( - (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', - r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', - r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'), - webpage, 'id', group='id', default=None) - if not video_id: - bplayer_data = self._parse_json(self._search_regex( - r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) - video_id = bplayer_data['id'] - title = re.sub(': Video$', '', self._og_search_title(webpage)) - - embed_info = self._download_json( - 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) - formats = [] - for stream in embed_info['streams']: - stream_url = stream.get('url') - if not stream_url: - continue - if stream['muxing_format'] == 'TS': - formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) - else: - formats.extend(self._extract_f4m_formats( - stream_url, video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - } diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py deleted file mode 100644 index 4b8bef391..000000000 --- a/youtube_dl/extractor/bokecc.py +++ /dev/null @@ -1,60 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_parse_qs -from ..utils import ExtractorError - - -class BokeCCBaseIE(InfoExtractor): - def _extract_bokecc_formats(self, webpage, video_id, format_id=None): - player_params_str = self._html_search_regex( - r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)', - webpage, 'player params', group='query') - - player_params = compat_parse_qs(player_params_str) - - info_xml = self._download_xml( - 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( - player_params['siteid'][0], player_params['vid'][0]), video_id) - - formats = [{ - 'format_id': format_id, - 'url': quality.find('./copy').attrib['playurl'], - 'preference': int(quality.attrib['value']), - } for quality in info_xml.findall('./video/quality')] - - self._sort_formats(formats) - - return formats - - -class BokeCCIE(BokeCCBaseIE): - IE_DESC = 'CC视频' - _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' - - _TESTS = [{ - 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A', - 'info_dict': { - 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461', - 'ext': 'flv', - 'title': 'BokeCC Video', - }, - }] - - def _real_extract(self, url): - qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) - if not qs.get('vid') or not qs.get('uid'): - raise ExtractorError('Invalid URL', expected=True) - - video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) - - webpage = self._download_webpage(url, video_id) - - return { - 'id': video_id, - 'title': 'BokeCC Video', # no title provided in the webpage - 'formats': self._extract_bokecc_formats(webpage, video_id), - } diff --git a/youtube_dl/extractor/bongacams.py b/youtube_dl/extractor/bongacams.py deleted file mode 100644 index 016999d55..000000000 --- a/youtube_dl/extractor/bongacams.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - try_get, - urlencode_postdata, -) - - -class BongaCamsIE(InfoExtractor): - _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)' - _TESTS = [{ - 'url': 'https://de.bongacams.com/azumi-8', - 'only_matching': True, - }, { - 'url': 'https://cn.bongacams.com/azumi-8', - 'only_matching': True, - }, { - 'url': 'https://de.bongacams.net/claireashton', - 'info_dict': { - 'id': 'claireashton', - 'ext': 'mp4', - 'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}', - 'age_limit': 18, - 'uploader_id': 'ClaireAshton', - 'uploader': 'ClaireAshton', - 'like_count': int, - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - channel_id = mobj.group('id') - - amf = self._download_json( - 'https://%s/tools/amf.php' % host, channel_id, - data=urlencode_postdata(( - ('method', 'getRoomData'), - ('args[]', channel_id), - ('args[]', 'false'), - )), headers={'X-Requested-With': 'XMLHttpRequest'}) - - server_url = amf['localData']['videoServerUrl'] - - uploader_id = try_get( - amf, lambda x: x['performerData']['username'], compat_str) or channel_id - uploader = try_get( - amf, lambda x: x['performerData']['displayName'], compat_str) - like_count = int_or_none(try_get( - amf, lambda x: x['performerData']['loversCount'])) - - formats = self._extract_m3u8_formats( - '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), - channel_id, 'mp4', m3u8_id='hls', live=True) - self._sort_formats(formats) - - return { - 'id': channel_id, - 'title': self._live_title(uploader or uploader_id), - 'uploader': uploader, - 'uploader_id': uploader_id, - 'like_count': like_count, - 'age_limit': 18, - 'is_live': True, - 'formats': formats, - } diff --git a/youtube_dl/extractor/bostonglobe.py b/youtube_dl/extractor/bostonglobe.py deleted file mode 100644 index 57882fbee..000000000 --- a/youtube_dl/extractor/bostonglobe.py +++ /dev/null @@ -1,72 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - -from ..utils import ( - extract_attributes, -) - - -class BostonGlobeIE(InfoExtractor): - _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?' - _TESTS = [ - { - 'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html', - 'md5': '0a62181079c85c2d2b618c9a738aedaf', - 'info_dict': { - 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood', - 'id': '5320421710001', - 'ext': 'mp4', - 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.', - 'timestamp': 1486877593, - 'upload_date': '20170212', - 'uploader_id': '245991542', - }, - }, - { - # Embedded youtube video; we hand it off to the Generic extractor. - 'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html', - 'md5': '582b40327089d5c0c949b3c54b13c24b', - 'info_dict': { - 'title': "Who Is Matt Damon's Favorite Batman?", - 'id': 'ZW1QCnlA6Qc', - 'ext': 'mp4', - 'upload_date': '20170217', - 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb', - 'uploader': 'The Late Late Show with James Corden', - 'uploader_id': 'TheLateLateShow', - }, - 'expected_warnings': ['404'], - }, - ] - - def _real_extract(self, url): - page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) - - page_title = self._og_search_title(webpage, default=None) - - # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject"> - entries = [] - for video in re.findall(r'(?i)(<video[^>]+>)', webpage): - attrs = extract_attributes(video) - - video_id = attrs.get('data-brightcove-video-id') - account_id = attrs.get('data-account') - player_id = attrs.get('data-player') - embed = attrs.get('data-embed') - - if video_id and account_id and player_id and embed: - entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, embed, video_id)) - - if len(entries) == 0: - return self.url_result(url, 'Generic') - elif len(entries) == 1: - return self.url_result(entries[0], 'BrightcoveNew') - else: - return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew') diff --git a/youtube_dl/extractor/box.py b/youtube_dl/extractor/box.py deleted file mode 100644 index aae82d1af..000000000 --- a/youtube_dl/extractor/box.py +++ /dev/null @@ -1,98 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - parse_iso8601, - # try_get, - update_url_query, -) - - -class BoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)' - _TEST = { - 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', - 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', - 'info_dict': { - 'id': '510727257538', - 'ext': 'mp4', - 'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4', - 'uploader': 'MLS Video', - 'timestamp': 1566320259, - 'upload_date': '20190820', - 'uploader_id': '235196876', - } - } - - def _real_extract(self, url): - shared_name, file_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, file_id) - request_token = self._parse_json(self._search_regex( - r'Box\.config\s*=\s*({.+?});', webpage, - 'Box config'), file_id)['requestToken'] - access_token = self._download_json( - 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id, - 'Downloading token JSON metadata', - data=json.dumps({'fileIDs': [file_id]}).encode(), headers={ - 'Content-Type': 'application/json', - 'X-Request-Token': request_token, - 'X-Box-EndUser-API': 'sharedName=' + shared_name, - })[file_id]['read'] - shared_link = 'https://app.box.com/s/' + shared_name - f = self._download_json( - 'https://api.box.com/2.0/files/' + file_id, file_id, - 'Downloading file JSON metadata', headers={ - 'Authorization': 'Bearer ' + access_token, - 'BoxApi': 'shared_link=' + shared_link, - 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats - }, query={ - 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' - }) - title = f['name'] - - query = { - 'access_token': access_token, - 'shared_link': shared_link - } - - formats = [] - - # for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []): - # entry_url_template = try_get( - # entry, lambda x: x['content']['url_template']) - # if not entry_url_template: - # continue - # representation = entry.get('representation') - # if representation == 'dash': - # TODO: append query to every fragment URL - # formats.extend(self._extract_mpd_formats( - # entry_url_template.replace('{+asset_path}', 'manifest.mpd'), - # file_id, query=query)) - - authenticated_download_url = f.get('authenticated_download_url') - if authenticated_download_url and f.get('is_download_available'): - formats.append({ - 'ext': f.get('extension') or determine_ext(title), - 'filesize': f.get('size'), - 'format_id': 'download', - 'url': update_url_query(authenticated_download_url, query), - }) - - self._sort_formats(formats) - - creator = f.get('created_by') or {} - - return { - 'id': file_id, - 'title': title, - 'formats': formats, - 'description': f.get('description') or None, - 'uploader': creator.get('name'), - 'timestamp': parse_iso8601(f.get('created_at')), - 'uploader_id': creator.get('id'), - } diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py deleted file mode 100644 index 07833532e..000000000 --- a/youtube_dl/extractor/bpb.py +++ /dev/null @@ -1,62 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - determine_ext, -) - - -class BpbIE(InfoExtractor): - IE_DESC = 'Bundeszentrale für politische Bildung' - _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/' - - _TEST = { - 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', - # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 - 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', - 'info_dict': { - 'id': '297', - 'ext': 'mp4', - 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR', - 'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r'<h2 class="white">(.*?)</h2>', webpage, 'title') - video_info_dicts = re.findall( - r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage) - - formats = [] - for video_info in video_info_dicts: - video_info = self._parse_json( - video_info, video_id, transform_source=js_to_json, fatal=False) - if not video_info: - continue - video_url = video_info.get('src') - if not video_url: - continue - quality = 'high' if '_high' in video_url else 'low' - formats.append({ - 'url': video_url, - 'preference': 10 if quality == 'high' else 0, - 'format_note': quality, - 'format_id': '%s-%s' % (quality, determine_ext(video_url)), - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': self._og_search_description(webpage), - } diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py deleted file mode 100644 index 9bde7f2d8..000000000 --- a/youtube_dl/extractor/br.py +++ /dev/null @@ -1,311 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - parse_duration, - parse_iso8601, - xpath_element, - xpath_text, -) - - -class BRIE(InfoExtractor): - IE_DESC = 'Bayerischer Rundfunk' - _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' - - _TESTS = [ - { - 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html', - 'md5': '83a0477cf0b8451027eb566d88b51106', - 'info_dict': { - 'id': '48f656ef-287e-486f-be86-459122db22cc', - 'ext': 'mp4', - 'title': 'Die böse Überraschung', - 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9', - 'duration': 180, - 'uploader': 'Reinhard Weber', - 'upload_date': '20150422', - }, - 'skip': '404 not found', - }, - { - 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', - 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef', - 'info_dict': { - 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05', - 'ext': 'flv', - 'title': 'Manfred Schreiber ist tot', - 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97', - 'duration': 26, - }, - 'skip': '404 not found', - }, - { - 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html', - 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d', - 'info_dict': { - 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b', - 'ext': 'aac', - 'title': 'Kurzweilig und sehr bewegend', - 'description': 'md5:0351996e3283d64adeb38ede91fac54e', - 'duration': 296, - }, - 'skip': '404 not found', - }, - { - 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', - 'md5': 'dbab0aef2e047060ea7a21fc1ce1078a', - 'info_dict': { - 'id': '6ba73750-d405-45d3-861d-1ce8c524e059', - 'ext': 'mp4', - 'title': 'Umweltbewusster Häuslebauer', - 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', - 'duration': 116, - } - }, - { - 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', - 'md5': '23bca295f1650d698f94fc570977dae3', - 'info_dict': { - 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d', - 'ext': 'mp4', - 'title': 'Folge 1 - Metaphysik', - 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3', - 'duration': 893, - 'uploader': 'Eva Maria Steimle', - 'upload_date': '20170208', - } - }, - ] - - def _real_extract(self, url): - base_url, display_id = re.search(self._VALID_URL, url).groups() - page = self._download_webpage(url, display_id) - xml_url = self._search_regex( - r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') - xml = self._download_xml(base_url + xml_url, display_id) - - medias = [] - - for xml_media in xml.findall('video') + xml.findall('audio'): - media_id = xml_media.get('externalId') - media = { - 'id': media_id, - 'title': xpath_text(xml_media, 'title', 'title', True), - 'duration': parse_duration(xpath_text(xml_media, 'duration')), - 'formats': self._extract_formats(xpath_element( - xml_media, 'assets'), media_id), - 'thumbnails': self._extract_thumbnails(xpath_element( - xml_media, 'teaserImage/variants'), base_url), - 'description': xpath_text(xml_media, 'desc'), - 'webpage_url': xpath_text(xml_media, 'permalink'), - 'uploader': xpath_text(xml_media, 'author'), - } - broadcast_date = xpath_text(xml_media, 'broadcastDate') - if broadcast_date: - media['upload_date'] = ''.join(reversed(broadcast_date.split('.'))) - medias.append(media) - - if len(medias) > 1: - self._downloader.report_warning( - 'found multiple medias; please ' - 'report this with the video URL to http://yt-dl.org/bug') - if not medias: - raise ExtractorError('No media entries found') - return medias[0] - - def _extract_formats(self, assets, media_id): - formats = [] - for asset in assets.findall('asset'): - format_url = xpath_text(asset, ['downloadUrl', 'url']) - asset_type = asset.get('type') - if asset_type.startswith('HDS'): - formats.extend(self._extract_f4m_formats( - format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) - elif asset_type.startswith('HLS'): - formats.extend(self._extract_m3u8_formats( - format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) - else: - format_info = { - 'ext': xpath_text(asset, 'mediaType'), - 'width': int_or_none(xpath_text(asset, 'frameWidth')), - 'height': int_or_none(xpath_text(asset, 'frameHeight')), - 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')), - 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')), - 'vcodec': xpath_text(asset, 'codecVideo'), - 'acodec': xpath_text(asset, 'codecAudio'), - 'container': xpath_text(asset, 'mediaType'), - 'filesize': int_or_none(xpath_text(asset, 'size')), - } - format_url = self._proto_relative_url(format_url) - if format_url: - http_format_info = format_info.copy() - http_format_info.update({ - 'url': format_url, - 'format_id': 'http-%s' % asset_type, - }) - formats.append(http_format_info) - server_prefix = xpath_text(asset, 'serverPrefix') - if server_prefix: - rtmp_format_info = format_info.copy() - rtmp_format_info.update({ - 'url': server_prefix, - 'play_path': xpath_text(asset, 'fileName'), - 'format_id': 'rtmp-%s' % asset_type, - }) - formats.append(rtmp_format_info) - self._sort_formats(formats) - return formats - - def _extract_thumbnails(self, variants, base_url): - thumbnails = [{ - 'url': base_url + xpath_text(variant, 'url'), - 'width': int_or_none(xpath_text(variant, 'width')), - 'height': int_or_none(xpath_text(variant, 'height')), - } for variant in variants.findall('variant') if xpath_text(variant, 'url')] - thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) - return thumbnails - - -class BRMediathekIE(InfoExtractor): - IE_DESC = 'Bayerischer Rundfunk Mediathek' - _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})' - - _TESTS = [{ - 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', - 'md5': 'fdc3d485835966d1622587d08ba632ec', - 'info_dict': { - 'id': 'av:5a1e6a6e8fce6d001871cc8e', - 'ext': 'mp4', - 'title': 'Die Sendung vom 28.11.2017', - 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', - 'timestamp': 1511942766, - 'upload_date': '20171129', - } - }] - - def _real_extract(self, url): - clip_id = self._match_id(url) - - clip = self._download_json( - 'https://proxy-base.master.mango.express/graphql', - clip_id, data=json.dumps({ - "query": """{ - viewer { - clip(id: "%s") { - title - description - duration - createdAt - ageRestriction - videoFiles { - edges { - node { - publicLocation - fileSize - videoProfile { - width - height - bitrate - encoding - } - } - } - } - captionFiles { - edges { - node { - publicLocation - } - } - } - teaserImages { - edges { - node { - imageFiles { - edges { - node { - publicLocation - width - height - } - } - } - } - } - } - } - } -}""" % clip_id}).encode(), headers={ - 'Content-Type': 'application/json', - })['data']['viewer']['clip'] - title = clip['title'] - - formats = [] - for edge in clip.get('videoFiles', {}).get('edges', []): - node = edge.get('node', {}) - n_url = node.get('publicLocation') - if not n_url: - continue - ext = determine_ext(n_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - n_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - video_profile = node.get('videoProfile', {}) - tbr = int_or_none(video_profile.get('bitrate')) - format_id = 'http' - if tbr: - format_id += '-%d' % tbr - formats.append({ - 'format_id': format_id, - 'url': n_url, - 'width': int_or_none(video_profile.get('width')), - 'height': int_or_none(video_profile.get('height')), - 'tbr': tbr, - 'filesize': int_or_none(node.get('fileSize')), - }) - self._sort_formats(formats) - - subtitles = {} - for edge in clip.get('captionFiles', {}).get('edges', []): - node = edge.get('node', {}) - n_url = node.get('publicLocation') - if not n_url: - continue - subtitles.setdefault('de', []).append({ - 'url': n_url, - }) - - thumbnails = [] - for edge in clip.get('teaserImages', {}).get('edges', []): - for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): - node = image_edge.get('node', {}) - n_url = node.get('publicLocation') - if not n_url: - continue - thumbnails.append({ - 'url': n_url, - 'width': int_or_none(node.get('width')), - 'height': int_or_none(node.get('height')), - }) - - return { - 'id': clip_id, - 'title': title, - 'description': clip.get('description'), - 'duration': int_or_none(clip.get('duration')), - 'timestamp': parse_iso8601(clip.get('createdAt')), - 'age_limit': int_or_none(clip.get('ageRestriction')), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnails': thumbnails, - } diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py deleted file mode 100644 index bae2aedce..000000000 --- a/youtube_dl/extractor/bravotv.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .adobepass import AdobePassIE -from ..utils import ( - smuggle_url, - update_url_query, - int_or_none, -) - - -class BravoTVIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' - _TESTS = [{ - 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', - 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9', - 'info_dict': { - 'id': 'epL0pmK1kQlT', - 'ext': 'mp4', - 'title': 'The Top Chef Season 16 Winner Is...', - 'description': 'Find out who takes the title of Top Chef!', - 'uploader': 'NBCU-BRAV', - 'upload_date': '20190314', - 'timestamp': 1552591860, - } - }, { - 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', - 'only_matching': True, - }, { - 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2', - 'only_matching': True, - }] - - def _real_extract(self, url): - site, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - settings = self._parse_json(self._search_regex( - r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), - display_id) - info = {} - query = { - 'mbr': 'true', - } - account_pid, release_pid = [None] * 2 - tve = settings.get('ls_tve') - if tve: - query['manifest'] = 'm3u' - mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage) - if mobj: - account_pid, tp_path = mobj.groups() - release_pid = tp_path.strip('/').split('/')[-1] - else: - account_pid = 'HNK2IC' - tp_path = release_pid = tve['release_pid'] - if tve.get('entitlement') == 'auth': - adobe_pass = settings.get('tve_adobe_auth', {}) - if site == 'bravotv': - site = 'bravo' - resource = self._get_mvpd_resource( - adobe_pass.get('adobePassResourceId') or site, - tve['title'], release_pid, tve.get('rating')) - query['auth'] = self._extract_mvpd_auth( - url, release_pid, - adobe_pass.get('adobePassRequestorId') or site, resource) - else: - shared_playlist = settings['ls_playlist'] - account_pid = shared_playlist['account_pid'] - metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] - tp_path = release_pid = metadata.get('release_pid') - if not release_pid: - release_pid = metadata['guid'] - tp_path = 'media/guid/2140479951/' + release_pid - info.update({ - 'title': metadata['title'], - 'description': metadata.get('description'), - 'season_number': int_or_none(metadata.get('season_num')), - 'episode_number': int_or_none(metadata.get('episode_num')), - }) - query['switch'] = 'progressive' - info.update({ - '_type': 'url_transparent', - 'id': release_pid, - 'url': smuggle_url(update_url_query( - 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path), - query), {'force_smil_url': True}), - 'ie_key': 'ThePlatform', - }) - return info diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py deleted file mode 100644 index 68c7cf2bb..000000000 --- a/youtube_dl/extractor/breakcom.py +++ /dev/null @@ -1,91 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .youtube import YoutubeIE -from ..utils import ( - int_or_none, - url_or_none, -) - - -class BreakIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' - _TESTS = [{ - 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', - 'info_dict': { - 'id': '2468056', - 'ext': 'mp4', - 'title': 'When Girls Act Like D-Bags', - 'age_limit': 13, - }, - }, { - # youtube embed - 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', - 'info_dict': { - 'id': 'RrrDLdeL2HQ', - 'ext': 'mp4', - 'title': 'Whale Watching Boat Crashing Into San Diego Dock', - 'description': 'md5:afc1b2772f0a8468be51dd80eb021069', - 'upload_date': '20160331', - 'uploader': 'Steve Holden', - 'uploader_id': 'sdholden07', - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id, video_id = re.match(self._VALID_URL, url).groups() - - webpage = self._download_webpage(url, display_id) - - youtube_url = YoutubeIE._extract_url(webpage) - if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) - - content = self._parse_json( - self._search_regex( - r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, - 'content'), - display_id) - - formats = [] - for video in content: - video_url = url_or_none(video.get('url')) - if not video_url: - continue - bitrate = int_or_none(self._search_regex( - r'(\d+)_kbps', video_url, 'tbr', default=None)) - formats.append({ - 'url': video_url, - 'format_id': 'http-%d' % bitrate if bitrate else 'http', - 'tbr': bitrate, - }) - self._sort_formats(formats) - - title = self._search_regex( - (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', - r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value') - - def get(key, name): - return int_or_none(self._search_regex( - r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, - default=None)) - - age_limit = get('ratings', 'age limit') - video_id = video_id or get('pid', 'video id') or display_id - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py deleted file mode 100644 index 6022076ac..000000000 --- a/youtube_dl/extractor/brightcove.py +++ /dev/null @@ -1,681 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import base64 -import re -import struct - -from .adobepass import AdobePassIE -from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_HTTPError, - compat_parse_qs, - compat_urllib_parse_urlparse, - compat_urlparse, - compat_xml_parse_error, -) -from ..utils import ( - clean_html, - extract_attributes, - ExtractorError, - find_xpath_attr, - fix_xml_ampersands, - float_or_none, - int_or_none, - js_to_json, - mimetype2ext, - parse_iso8601, - smuggle_url, - str_or_none, - try_get, - unescapeHTML, - unsmuggle_url, - UnsupportedError, - update_url_query, - url_or_none, -) - - -class BrightcoveLegacyIE(InfoExtractor): - IE_NAME = 'brightcove:legacy' - _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' - - _TESTS = [ - { - # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ - 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', - 'md5': '5423e113865d26e40624dce2e4b45d95', - 'note': 'Test Brightcove downloads and detection in GenericIE', - 'info_dict': { - 'id': '2371591881001', - 'ext': 'mp4', - 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', - 'uploader': '8TV', - 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', - 'timestamp': 1368213670, - 'upload_date': '20130510', - 'uploader_id': '1589608506001', - }, - 'skip': 'The player has been deactivated by the content owner', - }, - { - # From http://medianetwork.oracle.com/video/player/1785452137001 - 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', - 'info_dict': { - 'id': '1785452137001', - 'ext': 'flv', - 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', - 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', - 'uploader': 'Oracle', - 'timestamp': 1344975024, - 'upload_date': '20120814', - 'uploader_id': '1460825906', - }, - 'skip': 'video not playable', - }, - { - # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ - 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001', - 'info_dict': { - 'id': '2750934548001', - 'ext': 'mp4', - 'title': 'This Bracelet Acts as a Personal Thermostat', - 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', - # 'uploader': 'Mashable', - 'timestamp': 1382041798, - 'upload_date': '20131017', - 'uploader_id': '1130468786001', - }, - }, - { - # test that the default referer works - # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/ - 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001', - 'info_dict': { - 'id': '2878862109001', - 'ext': 'mp4', - 'title': 'Lost in Motion II', - 'description': 'md5:363109c02998fee92ec02211bd8000df', - 'uploader': 'National Ballet of Canada', - }, - 'skip': 'Video gone', - }, - { - # test flv videos served by akamaihd.net - # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william - 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', - # The md5 checksum changes on each download - 'info_dict': { - 'id': '3750436379001', - 'ext': 'flv', - 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', - 'uploader': 'RBTV Old (do not use)', - 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', - 'timestamp': 1409122195, - 'upload_date': '20140827', - 'uploader_id': '710858724001', - }, - 'skip': 'Video gone', - }, - { - # playlist with 'videoList' - # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players - 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', - 'info_dict': { - 'title': 'Sealife', - 'id': '3550319591001', - }, - 'playlist_mincount': 7, - 'skip': 'Unsupported URL', - }, - { - # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) - 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', - 'info_dict': { - 'id': '1522758701001', - 'title': 'Lesson 08', - }, - 'playlist_mincount': 10, - 'skip': 'Unsupported URL', - }, - { - # playerID inferred from bcpid - # from http://www.un.org/chinese/News/story.asp?NewsID=27724 - 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', - 'only_matching': True, # Tested in GenericIE - } - ] - - @classmethod - def _build_brightcove_url(cls, object_str): - """ - Build a Brightcove url from a xml string containing - <object class="BrightcoveExperience">{params}</object> - """ - - # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553 - object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>', - lambda m: m.group(1) + '/>', object_str) - # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608 - object_str = object_str.replace('<--', '<!--') - # remove namespace to simplify extraction - object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str) - object_str = fix_xml_ampersands(object_str) - - try: - object_doc = compat_etree_fromstring(object_str.encode('utf-8')) - except compat_xml_parse_error: - return - - fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') - if fv_el is not None: - flashvars = dict( - (k, v[0]) - for k, v in compat_parse_qs(fv_el.attrib['value']).items()) - else: - flashvars = {} - - data_url = object_doc.attrib.get('data', '') - data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query) - - def find_param(name): - if name in flashvars: - return flashvars[name] - node = find_xpath_attr(object_doc, './param', 'name', name) - if node is not None: - return node.attrib['value'] - return data_url_params.get(name) - - params = {} - - playerID = find_param('playerID') or find_param('playerId') - if playerID is None: - raise ExtractorError('Cannot find player ID') - params['playerID'] = playerID - - playerKey = find_param('playerKey') - # Not all pages define this value - if playerKey is not None: - params['playerKey'] = playerKey - # These fields hold the id of the video - videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') - if videoPlayer is not None: - if isinstance(videoPlayer, list): - videoPlayer = videoPlayer[0] - videoPlayer = videoPlayer.strip() - # UUID is also possible for videoPlayer (e.g. - # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd - # or http://www8.hp.com/cn/zh/home.html) - if not (re.match( - r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', - videoPlayer) or videoPlayer.startswith('ref:')): - return None - params['@videoPlayer'] = videoPlayer - linkBase = find_param('linkBaseURL') - if linkBase is not None: - params['linkBaseURL'] = linkBase - return cls._make_brightcove_url(params) - - @classmethod - def _build_brightcove_url_from_js(cls, object_js): - # The layout of JS is as follows: - # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { - # // build Brightcove <object /> XML - # } - m = re.search( - r'''(?x)customBC\.createVideo\( - .*? # skipping width and height - ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID - ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters - # in length, however it's appended to itself - # in places, so truncate - ["\'](?P<videoID>\d+)["\'] # @videoPlayer - ''', object_js) - if m: - return cls._make_brightcove_url(m.groupdict()) - - @classmethod - def _make_brightcove_url(cls, params): - return update_url_query( - 'http://c.brightcove.com/services/viewer/htmlFederated', params) - - @classmethod - def _extract_brightcove_url(cls, webpage): - """Try to extract the brightcove url from the webpage, returns None - if it can't be found - """ - urls = cls._extract_brightcove_urls(webpage) - return urls[0] if urls else None - - @classmethod - def _extract_brightcove_urls(cls, webpage): - """Return a list of all Brightcove URLs from the webpage """ - - url_m = re.search( - r'''(?x) - <meta\s+ - (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+ - content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 - ''', webpage) - if url_m: - url = unescapeHTML(url_m.group('url')) - # Some sites don't add it, we can't download with this url, for example: - # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ - if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: - return [url] - - matches = re.findall( - r'''(?sx)<object - (?: - [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | - [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ - ).+?>\s*</object>''', - webpage) - if matches: - return list(filter(None, [cls._build_brightcove_url(m) for m in matches])) - - matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) - if matches: - return list(filter(None, [ - cls._build_brightcove_url_from_js(custom_bc) - for custom_bc in matches])) - return [src for _, src in re.findall( - r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - # Change the 'videoId' and others field to '@videoPlayer' - url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) - # Change bckey (used by bcove.me urls) to playerKey - url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) - mobj = re.match(self._VALID_URL, url) - query_str = mobj.group('query') - query = compat_urlparse.parse_qs(query_str) - - videoPlayer = query.get('@videoPlayer') - if videoPlayer: - # We set the original url as the default 'Referer' header - referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) - video_id = videoPlayer[0] - if 'playerID' not in query: - mobj = re.search(r'/bcpid(\d+)', url) - if mobj is not None: - query['playerID'] = [mobj.group(1)] - publisher_id = query.get('publisherId') - if publisher_id and publisher_id[0].isdigit(): - publisher_id = publisher_id[0] - if not publisher_id: - player_key = query.get('playerKey') - if player_key and ',' in player_key[0]: - player_key = player_key[0] - else: - player_id = query.get('playerID') - if player_id and player_id[0].isdigit(): - headers = {} - if referer: - headers['Referer'] = referer - player_page = self._download_webpage( - 'http://link.brightcove.com/services/player/bcpid' + player_id[0], - video_id, headers=headers, fatal=False) - if player_page: - player_key = self._search_regex( - r'<param\s+name="playerKey"\s+value="([\w~,-]+)"', - player_page, 'player key', fatal=False) - if player_key: - enc_pub_id = player_key.split(',')[1].replace('~', '=') - publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] - if publisher_id: - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) - if referer: - brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) - return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) - # TODO: figure out if it's possible to extract playlistId from playerKey - # elif 'playerKey' in query: - # player_key = query['playerKey'] - # return self._get_playlist_info(player_key[0]) - raise UnsupportedError(url) - - -class BrightcoveNewIE(AdobePassIE): - IE_NAME = 'brightcove:new' - _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)' - _TESTS = [{ - 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', - 'md5': 'c8100925723840d4b0d243f7025703be', - 'info_dict': { - 'id': '4463358922001', - 'ext': 'mp4', - 'title': 'Meet the man behind Popcorn Time', - 'description': 'md5:eac376a4fe366edc70279bfb681aea16', - 'duration': 165.768, - 'timestamp': 1441391203, - 'upload_date': '20150904', - 'uploader_id': '929656772001', - 'formats': 'mincount:20', - }, - }, { - # with rtmp streams - 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001', - 'info_dict': { - 'id': '4279049078001', - 'ext': 'mp4', - 'title': 'Titansgrave: Chapter 0', - 'description': 'Titansgrave: Chapter 0', - 'duration': 1242.058, - 'timestamp': 1433556729, - 'upload_date': '20150606', - 'uploader_id': '4036320279001', - 'formats': 'mincount:39', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - # playlist stream - 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', - 'info_dict': { - 'id': '5718313430001', - 'title': 'No Audio Playlist', - }, - 'playlist_count': 7, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', - 'only_matching': True, - }, { - # ref: prefixed video id - 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', - 'only_matching': True, - }, { - # non numeric ref: prefixed video id - 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356', - 'only_matching': True, - }, { - # unavailable video without message but with error_code - 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001', - 'only_matching': True, - }] - - @staticmethod - def _extract_url(ie, webpage): - urls = BrightcoveNewIE._extract_urls(ie, webpage) - return urls[0] if urls else None - - @staticmethod - def _extract_urls(ie, webpage): - # Reference: - # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe - # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag - # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript - # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html - # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player - - entries = [] - - # Look for iframe embeds [1] - for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): - entries.append(url if url.startswith('http') else 'http:' + url) - - # Look for <video> tags [2] and embed_in_page embeds [3] - # [2] looks like: - for video, script_tag, account_id, player_id, embed in re.findall( - r'''(?isx) - (<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>) - (?:.*? - (<script[^>]+ - src=["\'](?:https?:)?//players\.brightcove\.net/ - (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js - ) - )? - ''', webpage): - attrs = extract_attributes(video) - - # According to examples from [4] it's unclear whether video id - # may be optional and what to do when it is - video_id = attrs.get('data-video-id') - if not video_id: - continue - - account_id = account_id or attrs.get('data-account') - if not account_id: - continue - - player_id = player_id or attrs.get('data-player') or 'default' - embed = embed or attrs.get('data-embed') or 'default' - - bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( - account_id, player_id, embed, video_id) - - # Some brightcove videos may be embedded with video tag only and - # without script tag or any mentioning of brightcove at all. Such - # embeds are considered ambiguous since they are matched based only - # on data-video-id and data-account attributes and in the wild may - # not be brightcove embeds at all. Let's check reconstructed - # brightcove URLs in case of such embeds and only process valid - # ones. By this we ensure there is indeed a brightcove embed. - if not script_tag and not ie._is_valid_url( - bc_url, video_id, 'possible brightcove video'): - continue - - entries.append(bc_url) - - return entries - - def _parse_brightcove_metadata(self, json_data, video_id, headers={}): - title = json_data['name'].strip() - - num_drm_sources = 0 - formats = [] - sources = json_data.get('sources') or [] - for source in sources: - container = source.get('container') - ext = mimetype2ext(source.get('type')) - src = source.get('src') - # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object - if container == 'WVM' or source.get('key_systems'): - num_drm_sources += 1 - continue - elif ext == 'ism': - continue - elif ext == 'm3u8' or container == 'M2TS': - if not src: - continue - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif ext == 'mpd': - if not src: - continue - formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) - else: - streaming_src = source.get('streaming_src') - stream_name, app_name = source.get('stream_name'), source.get('app_name') - if not src and not streaming_src and (not stream_name or not app_name): - continue - tbr = float_or_none(source.get('avg_bitrate'), 1000) - height = int_or_none(source.get('height')) - width = int_or_none(source.get('width')) - f = { - 'tbr': tbr, - 'filesize': int_or_none(source.get('size')), - 'container': container, - 'ext': ext or container.lower(), - } - if width == 0 and height == 0: - f.update({ - 'vcodec': 'none', - }) - else: - f.update({ - 'width': width, - 'height': height, - 'vcodec': source.get('codec'), - }) - - def build_format_id(kind): - format_id = kind - if tbr: - format_id += '-%dk' % int(tbr) - if height: - format_id += '-%dp' % height - return format_id - - if src or streaming_src: - f.update({ - 'url': src or streaming_src, - 'format_id': build_format_id('http' if src else 'http-streaming'), - 'source_preference': 0 if src else -1, - }) - else: - f.update({ - 'url': app_name, - 'play_path': stream_name, - 'format_id': build_format_id('rtmp'), - }) - formats.append(f) - - if not formats: - errors = json_data.get('errors') - if errors: - error = errors[0] - raise ExtractorError( - error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) - if sources and num_drm_sources == len(sources): - raise ExtractorError('This video is DRM protected.', expected=True) - - self._sort_formats(formats) - - for f in formats: - f.setdefault('http_headers', {}).update(headers) - - subtitles = {} - for text_track in json_data.get('text_tracks', []): - if text_track.get('kind') != 'captions': - continue - text_track_url = url_or_none(text_track.get('src')) - if not text_track_url: - continue - lang = (str_or_none(text_track.get('srclang')) - or str_or_none(text_track.get('label')) or 'en').lower() - subtitles.setdefault(lang, []).append({ - 'url': text_track_url, - }) - - is_live = False - duration = float_or_none(json_data.get('duration'), 1000) - if duration is not None and duration <= 0: - is_live = True - - return { - 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'description': clean_html(json_data.get('description')), - 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), - 'duration': duration, - 'timestamp': parse_iso8601(json_data.get('published_at')), - 'uploader_id': json_data.get('account_id'), - 'formats': formats, - 'subtitles': subtitles, - 'tags': json_data.get('tags', []), - 'is_live': is_live, - } - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - self._initialize_geo_bypass({ - 'countries': smuggled_data.get('geo_countries'), - 'ip_blocks': smuggled_data.get('geo_ip_blocks'), - }) - - account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() - - policy_key_id = '%s_%s' % (account_id, player_id) - policy_key = self._downloader.cache.load('brightcove', policy_key_id) - policy_key_extracted = False - store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) - - def extract_policy_key(): - base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed) - config = self._download_json( - base_url + 'config.json', video_id, fatal=False) or {} - policy_key = try_get( - config, lambda x: x['video_cloud']['policy_key']) - if not policy_key: - webpage = self._download_webpage( - base_url + 'index.min.js', video_id) - - catalog = self._search_regex( - r'catalog\(({.+?})\);', webpage, 'catalog', default=None) - if catalog: - catalog = self._parse_json( - js_to_json(catalog), video_id, fatal=False) - if catalog: - policy_key = catalog.get('policyKey') - - if not policy_key: - policy_key = self._search_regex( - r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', - webpage, 'policy key', group='pk') - - store_pk(policy_key) - return policy_key - - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) - headers = {} - referrer = smuggled_data.get('referrer') - if referrer: - headers.update({ - 'Referer': referrer, - 'Origin': re.search(r'https?://[^/]+', referrer).group(0), - }) - - for _ in range(2): - if not policy_key: - policy_key = extract_policy_key() - policy_key_extracted = True - headers['Accept'] = 'application/json;pk=%s' % policy_key - try: - json_data = self._download_json(api_url, video_id, headers=headers) - break - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: - policy_key = None - store_pk(None) - continue - raise ExtractorError(message, expected=True) - raise - - errors = json_data.get('errors') - if errors and errors[0].get('error_subcode') == 'TVE_AUTH': - custom_fields = json_data['custom_fields'] - tve_token = self._extract_mvpd_auth( - smuggled_data['source_url'], video_id, - custom_fields['bcadobepassrequestorid'], - custom_fields['bcadobepassresourceid']) - json_data = self._download_json( - api_url, video_id, headers={ - 'Accept': 'application/json;pk=%s' % policy_key - }, query={ - 'tveToken': tve_token, - }) - - if content_type == 'playlist': - return self.playlist_result( - [self._parse_brightcove_metadata(vid, vid.get('id'), headers) - for vid in json_data.get('videos', []) if vid.get('id')], - json_data.get('id'), json_data.get('name'), - json_data.get('description')) - - return self._parse_brightcove_metadata( - json_data, video_id, headers=headers) diff --git a/youtube_dl/extractor/businessinsider.py b/youtube_dl/extractor/businessinsider.py deleted file mode 100644 index 73a57b1e4..000000000 --- a/youtube_dl/extractor/businessinsider.py +++ /dev/null @@ -1,48 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from .jwplatform import JWPlatformIE - - -class BusinessInsiderIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6', - 'md5': 'ffed3e1e12a6f950aa2f7d83851b497a', - 'info_dict': { - 'id': 'cjGDb0X9', - 'ext': 'mp4', - 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant", - 'description': 'md5:0175a3baf200dd8fa658f94cade841b3', - 'upload_date': '20160611', - 'timestamp': 1465675620, - }, - }, { - 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/', - 'md5': '43f438dbc6da0b89f5ac42f68529d84a', - 'info_dict': { - 'id': '5zJwd4FK', - 'ext': 'mp4', - 'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort', - 'description': 'md5:2af8975825d38a4fed24717bbe51db49', - 'upload_date': '20170705', - 'timestamp': 1499270528, - }, - }, { - 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - jwplatform_id = self._search_regex( - (r'data-media-id=["\']([a-zA-Z0-9]{8})', - r'id=["\']jwplayer_([a-zA-Z0-9]{8})', - r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})', - r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'), - webpage, 'jwplatform id') - return self.url_result( - 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), - video_id=video_id) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py deleted file mode 100644 index ec411091e..000000000 --- a/youtube_dl/extractor/buzzfeed.py +++ /dev/null @@ -1,98 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from .facebook import FacebookIE - - -class BuzzFeedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)' - _TESTS = [{ - 'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia', - 'info_dict': { - 'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss', - 'title': 'This Angry Ram Destroys A Punching Bag Like A Boss', - 'description': 'Rambro!', - }, - 'playlist': [{ - 'info_dict': { - 'id': 'aVCR29aE_OQ', - 'ext': 'mp4', - 'title': 'Angry Ram destroys a punching bag..', - 'description': 'md5:c59533190ef23fd4458a5e8c8c872345', - 'upload_date': '20141024', - 'uploader_id': 'Buddhanz1', - 'uploader': 'Angry Ram', - } - }] - }, { - 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', - 'params': { - 'skip_download': True, # Got enough YouTube download tests - }, - 'info_dict': { - 'id': 'look-at-this-cute-dog-omg', - 'description': 're:Munchkin the Teddy Bear is back ?!', - 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', - }, - 'playlist': [{ - 'info_dict': { - 'id': 'mVmBL8B-In0', - 'ext': 'mp4', - 'title': 're:Munchkin the Teddy Bear gets her exercise', - 'description': 'md5:28faab95cda6e361bcff06ec12fc21d8', - 'upload_date': '20141124', - 'uploader_id': 'CindysMunchkin', - 'uploader': 're:^Munchkin the', - }, - }] - }, { - 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', - 'info_dict': { - 'id': 'the-most-adorable-crash-landing-ever', - 'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing', - 'description': 'This gosling knows how to stick a landing.', - }, - 'playlist': [{ - 'md5': '763ca415512f91ca62e4621086900a23', - 'info_dict': { - 'id': '971793786185728', - 'ext': 'mp4', - 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...', - 'uploader': 'Calgary Outdoor Centre-University of Calgary', - }, - }], - 'add_ie': ['Facebook'], - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - all_buckets = re.findall( - r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'', - webpage) - - entries = [] - for bd_json in all_buckets: - bd = json.loads(bd_json) - video = bd.get('video') or bd.get('progload_video') - if not video: - continue - entries.append(self.url_result(video['url'])) - - facebook_urls = FacebookIE._extract_urls(webpage) - entries.extend([ - self.url_result(facebook_url) - for facebook_url in facebook_urls]) - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'entries': entries, - } diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py deleted file mode 100644 index 0b11bf11f..000000000 --- a/youtube_dl/extractor/byutv.py +++ /dev/null @@ -1,117 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - merge_dicts, - parse_duration, - url_or_none, -) - - -class BYUtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' - _TESTS = [{ - # ooyalaVOD - 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', - 'info_dict': { - 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', - 'display_id': 'studio-c-season-5-episode-5', - 'ext': 'mp4', - 'title': 'Season 5 Episode 5', - 'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65', - 'thumbnail': r're:^https?://.*', - 'duration': 1486.486, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - }, { - # dvr - 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2', - 'info_dict': { - 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451', - 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2', - 'ext': 'mp4', - 'title': 'Pacific vs. BYU (4/12/19)', - 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3', - 'duration': 11645, - }, - 'params': { - 'skip_download': True - }, - }, { - 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', - 'only_matching': True, - }, { - 'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - video = self._download_json( - 'https://api.byutv.org/api3/catalog/getvideosforcontent', - display_id, query={ - 'contentid': video_id, - 'channel': 'byutv', - 'x-byutv-context': 'web$US', - }, headers={ - 'x-byutv-context': 'web$US', - 'x-byutv-platformkey': 'xsaaw9c7y5', - }) - - ep = video.get('ooyalaVOD') - if ep: - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], - 'id': video_id, - 'display_id': display_id, - 'title': ep.get('title'), - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - } - - info = {} - formats = [] - for format_id, ep in video.items(): - if not isinstance(ep, dict): - continue - video_url = url_or_none(ep.get('videoUrl')) - if not video_url: - continue - ext = determine_ext(video_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'url': video_url, - 'format_id': format_id, - }) - merge_dicts(info, { - 'title': ep.get('title'), - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - 'duration': parse_duration(ep.get('length')), - }) - self._sort_formats(formats) - - return merge_dicts(info, { - 'id': video_id, - 'display_id': display_id, - 'title': display_id, - 'formats': formats, - }) diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py deleted file mode 100644 index cac8fdcba..000000000 --- a/youtube_dl/extractor/c56.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import js_to_json - - -class C56IE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)' - IE_NAME = '56.com' - _TESTS = [{ - 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', - 'md5': 'e59995ac63d0457783ea05f93f12a866', - 'info_dict': { - 'id': '93440716', - 'ext': 'flv', - 'title': '网事知多少 第32期:车怒', - 'duration': 283.813, - }, - }, { - 'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html', - 'md5': '', - 'info_dict': { - 'id': '82247482', - 'title': '爱的诅咒之杜鹃花开', - }, - 'playlist_count': 7, - 'add_ie': ['Sohu'], - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) - text_id = mobj.group('textid') - - webpage = self._download_webpage(url, text_id) - sohu_video_info_str = self._search_regex( - r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None) - if sohu_video_info_str: - sohu_video_info = self._parse_json( - sohu_video_info_str, text_id, transform_source=js_to_json) - return self.url_result(sohu_video_info['url'], 'Sohu') - - page = self._download_json( - 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') - - info = page['info'] - - formats = [ - { - 'format_id': f['type'], - 'filesize': int(f['filesize']), - 'url': f['url'] - } for f in info['rfiles'] - ] - self._sort_formats(formats) - - return { - 'id': info['vid'], - 'title': info['Subject'], - 'duration': int(info['duration']) / 1000.0, - 'formats': formats, - 'thumbnail': info.get('bimg') or info.get('img'), - } diff --git a/youtube_dl/extractor/caffeine.py b/youtube_dl/extractor/caffeine.py deleted file mode 100644 index bffedb9a7..000000000 --- a/youtube_dl/extractor/caffeine.py +++ /dev/null @@ -1,79 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - merge_dicts, - parse_iso8601, - T, - traverse_obj, - txt_or_none, - urljoin, -) - - -class CaffeineTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P<id>[0-9a-f-]+)' - _TESTS = [{ - 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', - 'info_dict': { - 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', - 'ext': 'mp4', - 'title': 'GOOOOD MORNINNNNN #highlights', - 'timestamp': 1654702180, - 'upload_date': '20220608', - 'uploader': 'TsuSurf', - 'duration': 3145, - 'age_limit': 17, - }, - 'params': { - 'format': 'bestvideo', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - json_data = self._download_json( - 'https://api.caffeine.tv/social/public/activity/' + video_id, - video_id) - broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {} - title = broadcast_info['broadcast_title'] - video_url = broadcast_info['video_url'] - - ext = determine_ext(video_url) - if ext == 'm3u8': - formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8', - fatal=False) - else: - formats = [{'url': video_url}] - self._sort_formats(formats) - - return merge_dicts({ - 'id': video_id, - 'title': title, - 'formats': formats, - }, traverse_obj(json_data, { - 'uploader': ((None, 'user'), 'username'), - }, get_all=False), traverse_obj(json_data, { - 'like_count': ('like_count', T(int_or_none)), - 'view_count': ('view_count', T(int_or_none)), - 'comment_count': ('comment_count', T(int_or_none)), - 'tags': ('tags', Ellipsis, T(txt_or_none)), - 'is_live': 'is_live', - 'uploader': ('user', 'name'), - }), traverse_obj(broadcast_info, { - 'duration': ('content_duration', T(int_or_none)), - 'timestamp': ('broadcast_start_time', T(parse_iso8601)), - 'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))), - 'age_limit': ('content_rating', T(lambda r: r and { - # assume Apple Store ratings [1] - # 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system - 'FOUR_PLUS': 0, - 'NINE_PLUS': 9, - 'TWELVE_PLUS': 12, - 'SEVENTEEN_PLUS': 17, - }.get(r, 17))), - })) diff --git a/youtube_dl/extractor/callin.py b/youtube_dl/extractor/callin.py deleted file mode 100644 index 341be479f..000000000 --- a/youtube_dl/extractor/callin.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - traverse_obj, - try_get, -) - - -class CallinIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)' - _TESTS = [{ - 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', - 'md5': '14ede27ee2c957b7e4db93140fc0745c', - 'info_dict': { - 'id': 'PrumRdSQJW', - 'ext': 'mp4', - 'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink', - 'description': 'Or, why the government doesn’t like SpaceX', - 'channel': 'The Pull Request', - 'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa', - } - }, { - 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA', - 'md5': '16f704ddbf82a27e3930533b12062f07', - 'info_dict': { - 'id': 'lzxMidUnjA', - 'ext': 'mp4', - 'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?', - 'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.', - 'channel': 'The DEBRIEF With Briahna Joy Gray', - 'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm', - } - }] - - def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw): - return self._parse_json( - self._search_regex( - r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', - webpage, 'next.js data', fatal=fatal, **kw), - video_id, transform_source=transform_source, fatal=fatal) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - next_data = self._search_nextjs_data(webpage, video_id) - episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict) - if not episode: - raise ExtractorError('Failed to find episode data') - - title = episode.get('title') or self._og_search_title(webpage) - description = episode.get('description') or self._og_search_description(webpage) - - formats = [] - formats.extend(self._extract_m3u8_formats( - episode.get('m3u8'), video_id, 'mp4', - entry_protocol='m3u8_native', fatal=False)) - self._sort_formats(formats) - - channel = try_get(episode, lambda x: x['show']['title'], compat_str) - channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'formats': formats, - 'channel': channel, - 'channel_url': channel_url, - } diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py deleted file mode 100644 index 8f0c6c545..000000000 --- a/youtube_dl/extractor/camdemy.py +++ /dev/null @@ -1,161 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) -from ..utils import ( - clean_html, - parse_duration, - str_to_int, - unified_strdate, -) - - -class CamdemyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' - _TESTS = [{ - # single file - 'url': 'http://www.camdemy.com/media/5181/', - 'md5': '5a5562b6a98b37873119102e052e311b', - 'info_dict': { - 'id': '5181', - 'ext': 'mp4', - 'title': 'Ch1-1 Introduction, Signals (02-23-2012)', - 'thumbnail': r're:^https?://.*\.jpg$', - 'creator': 'ss11spring', - 'duration': 1591, - 'upload_date': '20130114', - 'view_count': int, - } - }, { - # With non-empty description - # webpage returns "No permission or not login" - 'url': 'http://www.camdemy.com/media/13885', - 'md5': '4576a3bb2581f86c61044822adbd1249', - 'info_dict': { - 'id': '13885', - 'ext': 'mp4', - 'title': 'EverCam + Camdemy QuickStart', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', - 'creator': 'evercam', - 'duration': 318, - } - }, { - # External source (YouTube) - 'url': 'http://www.camdemy.com/media/14842', - 'info_dict': { - 'id': '2vsYQzNIsJo', - 'ext': 'mp4', - 'title': 'Excel 2013 Tutorial - How to add Password Protection', - 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', - 'upload_date': '20130211', - 'uploader': 'Hun Kim', - 'uploader_id': 'hunkimtutorials', - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - src_from = self._html_search_regex( - r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1", - webpage, 'external source', default=None, group='url') - if src_from: - return self.url_result(src_from) - - oembed_obj = self._download_json( - 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) - - title = oembed_obj['title'] - thumb_url = oembed_obj['thumbnail_url'] - video_folder = compat_urlparse.urljoin(thumb_url, 'video/') - file_list_doc = self._download_xml( - compat_urlparse.urljoin(video_folder, 'fileList.xml'), - video_id, 'Downloading filelist XML') - file_name = file_list_doc.find('./video/item/fileName').text - video_url = compat_urlparse.urljoin(video_folder, file_name) - - # Some URLs return "No permission or not login" in a webpage despite being - # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) - upload_date = unified_strdate(self._search_regex( - r'>published on ([^<]+)<', webpage, - 'upload date', default=None)) - view_count = str_to_int(self._search_regex( - r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views', - webpage, 'view count', default=None)) - description = self._html_search_meta( - 'description', webpage, default=None) or clean_html( - oembed_obj.get('description')) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumb_url, - 'description': description, - 'creator': oembed_obj.get('author_name'), - 'duration': parse_duration(oembed_obj.get('duration')), - 'upload_date': upload_date, - 'view_count': view_count, - } - - -class CamdemyFolderIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)' - _TESTS = [{ - # links with trailing slash - 'url': 'http://www.camdemy.com/folder/450', - 'info_dict': { - 'id': '450', - 'title': '信號與系統 2012 & 2011 (Signals and Systems)', - }, - 'playlist_mincount': 145 - }, { - # links without trailing slash - # and multi-page - 'url': 'http://www.camdemy.com/folder/853', - 'info_dict': { - 'id': '853', - 'title': '科學計算 - 使用 Matlab' - }, - 'playlist_mincount': 20 - }, { - # with displayMode parameter. For testing the codes to add parameters - 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', - 'info_dict': { - 'id': '853', - 'title': '科學計算 - 使用 Matlab' - }, - 'playlist_mincount': 20 - }] - - def _real_extract(self, url): - folder_id = self._match_id(url) - - # Add displayMode=list so that all links are displayed in a single page - parsed_url = list(compat_urlparse.urlparse(url)) - query = dict(compat_urlparse.parse_qsl(parsed_url[4])) - query.update({'displayMode': 'list'}) - parsed_url[4] = compat_urllib_parse_urlencode(query) - final_url = compat_urlparse.urlunparse(parsed_url) - - page = self._download_webpage(final_url, folder_id) - matches = re.findall(r"href='(/media/\d+/?)'", page) - - entries = [self.url_result('http://www.camdemy.com' + media_path) - for media_path in matches] - - folder_title = self._html_search_meta('keywords', page) - - return self.playlist_result(entries, folder_id, folder_title) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py deleted file mode 100644 index d2e860b24..000000000 --- a/youtube_dl/extractor/cammodels.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - url_or_none, -) - - -class CamModelsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.cammodels.com/cam/AutumnKnight/', - 'only_matching': True, - 'age_limit': 18 - }] - - def _real_extract(self, url): - user_id = self._match_id(url) - - manifest = self._download_json( - 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id) - - formats = [] - thumbnails = [] - for format_id, format_dict in manifest['formats'].items(): - if not isinstance(format_dict, dict): - continue - encodings = format_dict.get('encodings') - if not isinstance(encodings, list): - continue - vcodec = format_dict.get('videoCodec') - acodec = format_dict.get('audioCodec') - for media in encodings: - if not isinstance(media, dict): - continue - media_url = url_or_none(media.get('location')) - if not media_url: - continue - - format_id_list = [format_id] - height = int_or_none(media.get('videoHeight')) - if height is not None: - format_id_list.append('%dp' % height) - f = { - 'url': media_url, - 'format_id': '-'.join(format_id_list), - 'width': int_or_none(media.get('videoWidth')), - 'height': height, - 'vbr': int_or_none(media.get('videoKbps')), - 'abr': int_or_none(media.get('audioKbps')), - 'fps': int_or_none(media.get('fps')), - 'vcodec': vcodec, - 'acodec': acodec, - } - if 'rtmp' in format_id: - f['ext'] = 'flv' - elif 'hls' in format_id: - f.update({ - 'ext': 'mp4', - # hls skips fragments, preferring rtmp - 'preference': -1, - }) - else: - if format_id == 'jpeg': - thumbnails.append({ - 'url': f['url'], - 'width': f['width'], - 'height': f['height'], - 'format_id': f['format_id'], - }) - continue - formats.append(f) - self._sort_formats(formats) - - return { - 'id': user_id, - 'title': self._live_title(user_id), - 'thumbnails': thumbnails, - 'is_live': True, - 'formats': formats, - 'age_limit': 18 - } diff --git a/youtube_dl/extractor/camtube.py b/youtube_dl/extractor/camtube.py deleted file mode 100644 index b3be3bdcf..000000000 --- a/youtube_dl/extractor/camtube.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_timestamp, -) - - -class CamTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female', - 'info_dict': { - 'id': '42ad3956-dd5b-445a-8313-803ea6079fac', - 'display_id': 'minafay-030618-1136-chaturbate-female', - 'ext': 'mp4', - 'title': 'minafay-030618-1136-chaturbate-female', - 'duration': 1274, - 'timestamp': 1528018608, - 'upload_date': '20180603', - 'age_limit': 18 - }, - 'params': { - 'skip_download': True, - }, - }] - - _API_BASE = 'https://api.camtube.co' - - def _real_extract(self, url): - display_id = self._match_id(url) - - token = self._download_json( - '%s/rpc/session/new' % self._API_BASE, display_id, - 'Downloading session token')['token'] - - self._set_cookie('api.camtube.co', 'session', token) - - video = self._download_json( - '%s/recordings/%s' % (self._API_BASE, display_id), display_id, - headers={'Referer': url}) - - video_id = video['uuid'] - timestamp = unified_timestamp(video.get('createdAt')) - duration = int_or_none(video.get('duration')) - view_count = int_or_none(video.get('viewCount')) - like_count = int_or_none(video.get('likeCount')) - creator = video.get('stageName') - - formats = [{ - 'url': '%s/recordings/%s/manifest.m3u8' - % (self._API_BASE, video_id), - 'format_id': 'hls', - 'ext': 'mp4', - 'protocol': 'm3u8_native', - }] - - return { - 'id': video_id, - 'display_id': display_id, - 'title': display_id, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - 'creator': creator, - 'formats': formats, - 'age_limit': 18 - } diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py deleted file mode 100644 index bbc5205fd..000000000 --- a/youtube_dl/extractor/camwithher.py +++ /dev/null @@ -1,89 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - unified_strdate, -) - - -class CamWithHerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)' - - _TESTS = [{ - 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', - 'info_dict': { - 'id': '5644', - 'ext': 'flv', - 'title': 'Periscope Tease', - 'description': 'In the clouds teasing on periscope to my favorite song', - 'duration': 240, - 'view_count': int, - 'comment_count': int, - 'uploader': 'MileenaK', - 'upload_date': '20160322', - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', - 'only_matching': True, - }, { - 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', - 'only_matching': True, - }, { - 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - flv_id = self._html_search_regex( - r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id') - - # Video URL construction algorithm is reverse-engineered from cwhplayer.swf - rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % ( - ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id) - - title = self._html_search_regex( - r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title') - description = self._html_search_regex( - r'>Description:</span>(.+?)</div>', webpage, 'description', default=None) - - runtime = self._search_regex( - r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None) - if runtime: - runtime = re.sub(r'[\s-]', '', runtime) - duration = parse_duration(runtime) - view_count = int_or_none(self._search_regex( - r'Views\s*:\s*(\d+)', webpage, 'view count', default=None)) - comment_count = int_or_none(self._search_regex( - r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None)) - - uploader = self._search_regex( - r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None) - upload_date = unified_strdate(self._search_regex( - r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None)) - - return { - 'id': flv_id, - 'url': rtmp_url, - 'ext': 'flv', - 'no_resume': True, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, - 'uploader': uploader, - 'upload_date': upload_date, - 'age_limit': 18 - } diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py deleted file mode 100644 index 407cc8084..000000000 --- a/youtube_dl/extractor/canalc2.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import parse_duration - - -class Canalc2IE(InfoExtractor): - IE_NAME = 'canalc2.tv' - _VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)' - - _TESTS = [{ - 'url': 'http://www.canalc2.tv/video/12163', - 'md5': '060158428b650f896c542dfbb3d6487f', - 'info_dict': { - 'id': '12163', - 'ext': 'mp4', - 'title': 'Terrasses du Numérique', - 'duration': 122, - }, - }, { - 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://www.canalc2.tv/video/%s' % video_id, video_id) - - title = self._html_search_regex( - r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>', - webpage, 'title') - - formats = [] - for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): - if video_url.startswith('rtmp://'): - rtmp = re.search( - r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url) - formats.append({ - 'url': rtmp.group('url'), - 'format_id': 'rtmp', - 'ext': 'flv', - 'app': rtmp.group('app'), - 'play_path': rtmp.group('play_path'), - 'page_url': url, - }) - else: - formats.append({ - 'url': video_url, - 'format_id': 'http', - }) - - if formats: - info = { - 'formats': formats, - } - else: - info = self._parse_html5_media_entries(url, webpage, url)[0] - - self._sort_formats(info['formats']) - - info.update({ - 'id': video_id, - 'title': title, - 'duration': parse_duration(self._search_regex( - r'id=["\']video_duree["\'][^>]*>([^<]+)', - webpage, 'duration', fatal=False)), - }) - return info diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py deleted file mode 100644 index 51c11cb7e..000000000 --- a/youtube_dl/extractor/canalplus.py +++ /dev/null @@ -1,116 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - # ExtractorError, - # HEADRequest, - int_or_none, - qualities, - unified_strdate, -) - - -class CanalplusIE(InfoExtractor): - IE_DESC = 'mycanal.fr and piwiplus.fr' - _VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)' - _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json' - _SITE_ID_MAP = { - 'mycanal': 'cplus', - 'piwiplus': 'teletoon', - } - - # Only works for direct mp4 URLs - _GEO_COUNTRIES = ['FR'] - - _TESTS = [{ - 'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061', - 'info_dict': { - 'id': '1397061', - 'display_id': 'lolywood', - 'ext': 'mp4', - 'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34', - 'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e', - 'upload_date': '20160602', - }, - }, { - # geo restricted, bypassed - 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', - 'info_dict': { - 'id': '1108190', - 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', - 'ext': 'mp4', - 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', - 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', - 'upload_date': '20140724', - }, - 'expected_warnings': ['HTTP Error 403: Forbidden'], - }] - - def _real_extract(self, url): - site, display_id, video_id = re.match(self._VALID_URL, url).groups() - - site_id = self._SITE_ID_MAP[site] - - info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) - video_data = self._download_json(info_url, video_id, 'Downloading video JSON') - - if isinstance(video_data, list): - video_data = [video for video in video_data if video.get('ID') == video_id][0] - media = video_data['MEDIA'] - infos = video_data['INFOS'] - - preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD']) - - # _, fmt_url = next(iter(media['VIDEOS'].items())) - # if '/geo' in fmt_url.lower(): - # response = self._request_webpage( - # HEADRequest(fmt_url), video_id, - # 'Checking if the video is georestricted') - # if '/blocage' in response.geturl(): - # raise ExtractorError( - # 'The video is not available in your country', - # expected=True) - - formats = [] - for format_id, format_url in media['VIDEOS'].items(): - if not format_url: - continue - if format_id == 'HLS': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) - elif format_id == 'HDS': - formats.extend(self._extract_f4m_formats( - format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - # the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js - 'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes', - 'format_id': format_id, - 'preference': preference(format_id), - }) - self._sort_formats(formats) - - thumbnails = [{ - 'id': image_id, - 'url': image_url, - } for image_id, image_url in media.get('images', {}).items()] - - titrage = infos['TITRAGE'] - - return { - 'id': video_id, - 'display_id': display_id, - 'title': '%s - %s' % (titrage['TITRE'], - titrage['SOUS_TITRE']), - 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), - 'thumbnails': thumbnails, - 'description': infos.get('DESCRIPTION'), - 'duration': int_or_none(infos.get('DURATION')), - 'view_count': int_or_none(infos.get('NB_VUES')), - 'like_count': int_or_none(infos.get('NB_LIKES')), - 'comment_count': int_or_none(infos.get('NB_COMMENTS')), - 'formats': formats, - } diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py deleted file mode 100644 index eefbab241..000000000 --- a/youtube_dl/extractor/canvas.py +++ /dev/null @@ -1,384 +0,0 @@ -from __future__ import unicode_literals - -import re -import json - -from .common import InfoExtractor -from .gigya import GigyaBaseIE -from ..compat import compat_HTTPError -from ..utils import ( - ExtractorError, - clean_html, - extract_attributes, - float_or_none, - get_element_by_class, - int_or_none, - merge_dicts, - str_or_none, - strip_or_none, - url_or_none, -) - - -class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', - 'md5': '68993eda72ef62386a15ea2cf3c93107', - 'info_dict': { - 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', - 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', - 'ext': 'mp4', - 'title': 'Nachtwacht: De Greystook', - 'description': 'Nachtwacht: De Greystook', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1468.04, - }, - 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], - }, { - 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', - 'only_matching': True, - }] - _GEO_BYPASS = False - _HLS_ENTRY_PROTOCOLS_MAP = { - 'HLS': 'm3u8_native', - 'HLS_AES': 'm3u8', - } - _REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - site_id, video_id = mobj.group('site_id'), mobj.group('id') - - data = None - if site_id != 'vrtvideo': - # Old API endpoint, serves more formats but may fail for some videos - data = self._download_json( - 'https://mediazone.vrt.be/api/v1/%s/assets/%s' - % (site_id, video_id), video_id, 'Downloading asset JSON', - 'Unable to download asset JSON', fatal=False) - - # New API endpoint - if not data: - headers = self.geo_verification_headers() - headers.update({'Content-Type': 'application/json'}) - token = self._download_json( - '%s/tokens' % self._REST_API_BASE, video_id, - 'Downloading token', data=b'', headers=headers)['vrtPlayerToken'] - data = self._download_json( - '%s/videos/%s' % (self._REST_API_BASE, video_id), - video_id, 'Downloading video JSON', query={ - 'vrtPlayerToken': token, - 'client': '%s@PROD' % site_id, - }, expected_status=400) - if not data.get('title'): - code = data.get('code') - if code == 'AUTHENTICATION_REQUIRED': - self.raise_login_required() - elif code == 'INVALID_LOCATION': - self.raise_geo_restricted(countries=['BE']) - raise ExtractorError(data.get('message') or code, expected=True) - - title = data['title'] - description = data.get('description') - - formats = [] - for target in data['targetUrls']: - format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type')) - if not format_url or not format_type: - continue - format_type = format_type.upper() - if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], - m3u8_id=format_type, fatal=False)) - elif format_type == 'HDS': - formats.extend(self._extract_f4m_formats( - format_url, video_id, f4m_id=format_type, fatal=False)) - elif format_type == 'MPEG_DASH': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id=format_type, fatal=False)) - elif format_type == 'HSS': - formats.extend(self._extract_ism_formats( - format_url, video_id, ism_id='mss', fatal=False)) - else: - formats.append({ - 'format_id': format_type, - 'url': format_url, - }) - self._sort_formats(formats) - - subtitles = {} - subtitle_urls = data.get('subtitleUrls') - if isinstance(subtitle_urls, list): - for subtitle in subtitle_urls: - subtitle_url = subtitle.get('url') - if subtitle_url and subtitle.get('type') == 'CLOSED': - subtitles.setdefault('nl', []).append({'url': subtitle_url}) - - return { - 'id': video_id, - 'display_id': video_id, - 'title': title, - 'description': description, - 'formats': formats, - 'duration': float_or_none(data.get('duration'), 1000), - 'thumbnail': data.get('posterImageUrl'), - 'subtitles': subtitles, - } - - -class CanvasEenIE(InfoExtractor): - IE_DESC = 'canvas.be and een.be' - _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', - 'md5': 'ed66976748d12350b118455979cca293', - 'info_dict': { - 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', - 'display_id': 'de-afspraak-veilt-voor-de-warmste-week', - 'ext': 'flv', - 'title': 'De afspraak veilt voor de Warmste Week', - 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 49.02, - }, - 'expected_warnings': ['is not a supported codec'], - }, { - # with subtitles - 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', - 'info_dict': { - 'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625', - 'display_id': 'pieter-0167', - 'ext': 'mp4', - 'title': 'Pieter 0167', - 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 2553.08, - 'subtitles': { - 'nl': [{ - 'ext': 'vtt', - }], - }, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Pagina niet gevonden', - }, { - 'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan', - 'info_dict': { - 'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8', - 'display_id': 'emma-pakt-thilly-aan', - 'ext': 'mp4', - 'title': 'Emma pakt Thilly aan', - 'description': 'md5:c5c9b572388a99b2690030afa3f3bad7', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 118.24, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['is not a supported codec'], - }, { - 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - site_id, display_id = mobj.group('site_id'), mobj.group('id') - - webpage = self._download_webpage(url, display_id) - - title = strip_or_none(self._search_regex( - r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', - webpage, 'title', default=None) or self._og_search_title( - webpage, default=None)) - - video_id = self._html_search_regex( - r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', - group='id') - - return { - '_type': 'url_transparent', - 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id), - 'ie_key': CanvasIE.ie_key(), - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': self._og_search_description(webpage), - } - - -class VrtNUIE(GigyaBaseIE): - IE_DESC = 'VrtNU.be' - _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)' - _TESTS = [{ - # Available via old API endpoint - 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/', - 'info_dict': { - 'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', - 'ext': 'mp4', - 'title': 'Postbus X - Aflevering 1 (Seizoen 1989)', - 'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7', - 'duration': 1457.04, - 'thumbnail': r're:^https?://.*\.jpg$', - 'series': 'Postbus X', - 'season': 'Seizoen 1989', - 'season_number': 1989, - 'episode': 'De zwarte weduwe', - 'episode_number': 1, - 'timestamp': 1595822400, - 'upload_date': '20200727', - }, - 'skip': 'This video is only available for registered users', - 'params': { - 'username': '<snip>', - 'password': '<snip>', - }, - 'expected_warnings': ['is not a supported codec'], - }, { - # Only available via new API endpoint - 'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/', - 'info_dict': { - 'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1', - 'ext': 'mp4', - 'title': 'Aflevering 5', - 'description': 'Wie valt door de mand tijdens een missie?', - 'duration': 2967.06, - 'season': 'Season 1', - 'season_number': 1, - 'episode_number': 5, - }, - 'skip': 'This video is only available for registered users', - 'params': { - 'username': '<snip>', - 'password': '<snip>', - }, - 'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'], - }] - _NETRC_MACHINE = 'vrtnu' - _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' - _CONTEXT_ID = 'R3595707040' - - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - auth_data = { - 'APIKey': self._APIKEY, - 'targetEnv': 'jssdk', - 'loginID': username, - 'password': password, - 'authMode': 'cookie', - } - - auth_info = self._gigya_login(auth_data) - - # Sometimes authentication fails for no good reason, retry - login_attempt = 1 - while login_attempt <= 3: - try: - # When requesting a token, no actual token is returned, but the - # necessary cookies are set. - self._request_webpage( - 'https://token.vrt.be', - None, note='Requesting a token', errnote='Could not get a token', - headers={ - 'Content-Type': 'application/json', - 'Referer': 'https://www.vrt.be/vrtnu/', - }, - data=json.dumps({ - 'uid': auth_info['UID'], - 'uidsig': auth_info['UIDSignature'], - 'ts': auth_info['signatureTimestamp'], - 'email': auth_info['profile']['email'], - }).encode('utf-8')) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - login_attempt += 1 - self.report_warning('Authentication failed') - self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') - else: - raise e - else: - break - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - attrs = extract_attributes(self._search_regex( - r'(<nui-media[^>]+>)', webpage, 'media element')) - video_id = attrs['videoid'] - publication_id = attrs.get('publicationid') - if publication_id: - video_id = publication_id + '$' + video_id - - page = (self._parse_json(self._search_regex( - r'digitalData\s*=\s*({.+?});', webpage, 'digial data', - default='{}'), video_id, fatal=False) or {}).get('page') or {} - - info = self._search_json_ld(webpage, display_id, default={}) - return merge_dicts(info, { - '_type': 'url_transparent', - 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, - 'ie_key': CanvasIE.ie_key(), - 'id': video_id, - 'display_id': display_id, - 'season_number': int_or_none(page.get('episode_season')), - }) - - -class DagelijkseKostIE(InfoExtractor): - IE_DESC = 'dagelijksekost.een.be' - _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)' - _TEST = { - 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof', - 'md5': '30bfffc323009a3e5f689bef6efa2365', - 'info_dict': { - 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa', - 'display_id': 'hachis-parmentier-met-witloof', - 'ext': 'mp4', - 'title': 'Hachis parmentier met witloof', - 'description': 'md5:9960478392d87f63567b5b117688cdc5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 283.02, - }, - 'expected_warnings': ['is not a supported codec'], - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - title = strip_or_none(get_element_by_class( - 'dish-metadata__title', webpage - ) or self._html_search_meta( - 'twitter:title', webpage)) - - description = clean_html(get_element_by_class( - 'dish-description', webpage) - ) or self._html_search_meta( - ('description', 'twitter:description', 'og:description'), - webpage) - - video_id = self._html_search_regex( - r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', - group='id') - - return { - '_type': 'url_transparent', - 'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id, - 'ie_key': CanvasIE.ie_key(), - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - } diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py deleted file mode 100644 index b57b86af7..000000000 --- a/youtube_dl/extractor/carambatv.py +++ /dev/null @@ -1,108 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - float_or_none, - int_or_none, - try_get, -) - -from .videomore import VideomoreIE - - -class CarambaTVIE(InfoExtractor): - _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://video1.carambatv.ru/v/191910501', - 'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a', - 'info_dict': { - 'id': '191910501', - 'ext': 'mp4', - 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 2678.31, - }, - }, { - 'url': 'carambatv:191910501', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id, - video_id) - - title = video['title'] - - base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id - - formats = [{ - 'url': base_url + f['fn'], - 'height': int_or_none(f.get('height')), - 'format_id': '%sp' % f['height'] if f.get('height') else None, - } for f in video['qualities'] if f.get('fn')] - self._sort_formats(formats) - - thumbnail = video.get('splash') - duration = float_or_none(try_get( - video, lambda x: x['annotations'][0]['end_time'], compat_str)) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } - - -class CarambaTVPageIE(InfoExtractor): - _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)' - _TEST = { - 'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/', - 'md5': 'a49fb0ec2ad66503eeb46aac237d3c86', - 'info_dict': { - 'id': '475222', - 'ext': 'flv', - 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', - 'thumbnail': r're:^https?://.*\.jpg', - # duration reported by videomore is incorrect - 'duration': int, - }, - 'add_ie': [VideomoreIE.ie_key()], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - videomore_url = VideomoreIE._extract_url(webpage) - if not videomore_url: - videomore_id = self._search_regex( - r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id', - default=None) - if videomore_id: - videomore_url = 'videomore:%s' % videomore_id - if videomore_url: - title = self._og_search_title(webpage) - return { - '_type': 'url_transparent', - 'url': videomore_url, - 'ie_key': VideomoreIE.ie_key(), - 'title': title, - } - - video_url = self._og_search_property('video:iframe', webpage, default=None) - - if not video_url: - video_id = self._search_regex( - r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)', - webpage, 'video id') - video_url = 'carambatv:%s' % video_id - - return self.url_result(video_url, CarambaTVIE.ie_key()) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py deleted file mode 100644 index 48b33617f..000000000 --- a/youtube_dl/extractor/cartoonnetwork.py +++ /dev/null @@ -1,62 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .turner import TurnerBaseIE -from ..utils import int_or_none - - -class CartoonNetworkIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html' - _TEST = { - 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html', - 'info_dict': { - 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e', - 'ext': 'mp4', - 'title': 'How to Draw Upgrade', - 'description': 'md5:2061d83776db7e8be4879684eefe8c0f', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False): - metadata_re = '' - if content_re: - metadata_re = r'|video_metadata\.content_' + content_re - return self._search_regex( - r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), - webpage, name, fatal=fatal) - - media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) - title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True) - - info = self._extract_ngtv_info( - media_id, {'networkId': 'cartoonnetwork'}, { - 'url': url, - 'site_name': 'CartoonNetwork', - 'auth_required': find_field('authType', 'auth type') != 'unauth', - }) - - series = find_field( - 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage) - info.update({ - 'id': media_id, - 'display_id': display_id, - 'title': title, - 'description': self._html_search_meta('description', webpage), - 'series': series, - 'episode': title, - }) - - for field in ('season', 'episode'): - field_name = field + 'Number' - info[field + '_number'] = int_or_none(find_field( - field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage)) - - return info diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py deleted file mode 100644 index fd5ec6033..000000000 --- a/youtube_dl/extractor/cbc.py +++ /dev/null @@ -1,497 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import hashlib -import json -import re -from xml.sax.saxutils import escape - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_HTTPError, -) -from ..utils import ( - js_to_json, - smuggle_url, - try_get, - xpath_text, - xpath_element, - xpath_with_ns, - find_xpath_attr, - orderedSet, - parse_duration, - parse_iso8601, - parse_age_limit, - strip_or_none, - int_or_none, - ExtractorError, -) - - -class CBCIE(InfoExtractor): - IE_NAME = 'cbc.ca' - _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' - _TESTS = [{ - # with mediaId - 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', - 'md5': '97e24d09672fc4cf56256d6faa6c25bc', - 'info_dict': { - 'id': '2682904050', - 'ext': 'mp4', - 'title': 'Don Cherry – All-Stars', - 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', - 'timestamp': 1454463000, - 'upload_date': '20160203', - 'uploader': 'CBCC-NEW', - }, - 'skip': 'Geo-restricted to Canada', - }, { - # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com - 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4', - 'md5': '162adfa070274b144f4fdc3c3b8207db', - 'info_dict': { - 'id': '2414435309', - 'ext': 'mp4', - 'title': '22 Minutes Update: What Not To Wear Quebec', - 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.", - 'upload_date': '20131025', - 'uploader': 'CBCC-NEW', - 'timestamp': 1382717907, - }, - }, { - # with clipId, feed only available via tpfeed.cbc.ca - 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', - 'md5': '0274a90b51a9b4971fe005c63f592f12', - 'info_dict': { - 'id': '2487345465', - 'ext': 'mp4', - 'title': 'Robin Williams freestyles on 90 Minutes Live', - 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', - 'upload_date': '19780210', - 'uploader': 'CBCC-NEW', - 'timestamp': 255977160, - }, - }, { - # multiple iframes - 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', - 'playlist': [{ - 'md5': '377572d0b49c4ce0c9ad77470e0b96b4', - 'info_dict': { - 'id': '2680832926', - 'ext': 'mp4', - 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', - 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', - 'upload_date': '20160201', - 'timestamp': 1454342820, - 'uploader': 'CBCC-NEW', - }, - }, { - 'md5': '415a0e3f586113894174dfb31aa5bb1a', - 'info_dict': { - 'id': '2658915080', - 'ext': 'mp4', - 'title': 'Fly like an eagle!', - 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', - 'upload_date': '20150315', - 'timestamp': 1426443984, - 'uploader': 'CBCC-NEW', - }, - }], - 'skip': 'Geo-restricted to Canada', - }, { - # multiple CBC.APP.Caffeine.initInstance(...) - 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', - 'info_dict': { - 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', - 'id': 'dog-indoor-exercise-winter-1.3928238', - 'description': 'md5:c18552e41726ee95bd75210d1ca9194c', - }, - 'playlist_mincount': 6, - }] - - @classmethod - def suitable(cls, url): - return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) - - def _extract_player_init(self, player_init, display_id): - player_info = self._parse_json(player_init, display_id, js_to_json) - media_id = player_info.get('mediaId') - if not media_id: - clip_id = player_info['clipId'] - feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, - clip_id, fatal=False) - if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) - if not media_id: - media_id = self._download_json( - 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, - clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - title = self._og_search_title(webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', default=None) or self._html_search_regex( - r'<title>([^<]+)', webpage, 'title', fatal=False) - entries = [ - self._extract_player_init(player_init, display_id) - for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] - media_ids = [] - for media_id_re in ( - r']+src="[^"]+?mediaId=(\d+)"', - r']+\bid=["\']player-(\d+)', - r'guid["\']\s*:\s*["\'](\d+)'): - media_ids.extend(re.findall(media_id_re, webpage)) - entries.extend([ - self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - for media_id in orderedSet(media_ids)]) - return self.playlist_result( - entries, display_id, strip_or_none(title), - self._og_search_description(webpage)) - - -class CBCPlayerIE(InfoExtractor): - IE_NAME = 'cbc.ca:player' - _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P\d+)' - _TESTS = [{ - 'url': 'http://www.cbc.ca/player/play/2683190193', - 'md5': '64d25f841ddf4ddb28a235338af32e2c', - 'info_dict': { - 'id': '2683190193', - 'ext': 'mp4', - 'title': 'Gerry Runs a Sweat Shop', - 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0', - 'timestamp': 1455071400, - 'upload_date': '20160210', - 'uploader': 'CBCC-NEW', - }, - 'skip': 'Geo-restricted to Canada', - }, { - # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ - 'url': 'http://www.cbc.ca/player/play/2657631896', - 'md5': 'e5e708c34ae6fca156aafe17c43e8b75', - 'info_dict': { - 'id': '2657631896', - 'ext': 'mp3', - 'title': 'CBC Montreal is organizing its first ever community hackathon!', - 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', - 'timestamp': 1425704400, - 'upload_date': '20150307', - 'uploader': 'CBCC-NEW', - }, - }, { - 'url': 'http://www.cbc.ca/player/play/2164402062', - 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6', - 'info_dict': { - 'id': '2164402062', - 'ext': 'mp4', - 'title': 'Cancer survivor four times over', - 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', - 'timestamp': 1320410746, - 'upload_date': '20111104', - 'uploader': 'CBCC-NEW', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url( - 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { - 'force_smil_url': True - }), - 'id': video_id, - } - - -class CBCWatchBaseIE(InfoExtractor): - _device_id = None - _device_token = None - _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' - _NS_MAP = { - 'media': 'http://search.yahoo.com/mrss/', - 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', - } - _GEO_COUNTRIES = ['CA'] - _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login' - _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token' - _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' - _NETRC_MACHINE = 'cbcwatch' - - def _signature(self, email, password): - data = json.dumps({ - 'email': email, - 'password': password, - }).encode() - headers = {'content-type': 'application/json'} - query = {'apikey': self._API_KEY} - resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query) - access_token = resp['access_token'] - - # token - query = { - 'access_token': access_token, - 'apikey': self._API_KEY, - 'jwtapp': 'jwt', - } - resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query) - return resp['signature'] - - def _call_api(self, path, video_id): - url = path if path.startswith('http') else self._API_BASE_URL + path - for _ in range(2): - try: - result = self._download_xml(url, video_id, headers={ - 'X-Clearleap-DeviceId': self._device_id, - 'X-Clearleap-DeviceToken': self._device_token, - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - # Device token has expired, re-acquiring device token - self._register_device() - continue - raise - error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') - if error_message: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) - return result - - def _real_initialize(self): - if self._valid_device_token(): - return - device = self._downloader.cache.load( - 'cbcwatch', self._cache_device_key()) or {} - self._device_id, self._device_token = device.get('id'), device.get('token') - if self._valid_device_token(): - return - self._register_device() - - def _valid_device_token(self): - return self._device_id and self._device_token - - def _cache_device_key(self): - email, _ = self._get_login_info() - return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device' - - def _register_device(self): - result = self._download_xml( - self._API_BASE_URL + 'device/register', - None, 'Acquiring device token', - data=b'web') - self._device_id = xpath_text(result, 'deviceId', fatal=True) - email, password = self._get_login_info() - if email and password: - signature = self._signature(email, password) - data = '{0}{1}web'.format( - escape(signature), escape(self._device_id)).encode() - url = self._API_BASE_URL + 'device/login' - result = self._download_xml( - url, None, data=data, - headers={'content-type': 'application/xml'}) - self._device_token = xpath_text(result, 'token', fatal=True) - else: - self._device_token = xpath_text(result, 'deviceToken', fatal=True) - self._downloader.cache.store( - 'cbcwatch', self._cache_device_key(), { - 'id': self._device_id, - 'token': self._device_token, - }) - - def _parse_rss_feed(self, rss): - channel = xpath_element(rss, 'channel', fatal=True) - - def _add_ns(path): - return xpath_with_ns(path, self._NS_MAP) - - entries = [] - for item in channel.findall('item'): - guid = xpath_text(item, 'guid', fatal=True) - title = xpath_text(item, 'title', fatal=True) - - media_group = xpath_element(item, _add_ns('media:group'), fatal=True) - content = xpath_element(media_group, _add_ns('media:content'), fatal=True) - content_url = content.attrib['url'] - - thumbnails = [] - for thumbnail in media_group.findall(_add_ns('media:thumbnail')): - thumbnail_url = thumbnail.get('url') - if not thumbnail_url: - continue - thumbnails.append({ - 'id': thumbnail.get('profile'), - 'url': thumbnail_url, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - - timestamp = None - release_date = find_xpath_attr( - item, _add_ns('media:credit'), 'role', 'releaseDate') - if release_date is not None: - timestamp = parse_iso8601(release_date.text) - - entries.append({ - '_type': 'url_transparent', - 'url': content_url, - 'id': guid, - 'title': title, - 'description': xpath_text(item, 'description'), - 'timestamp': timestamp, - 'duration': int_or_none(content.get('duration')), - 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))), - 'episode': xpath_text(item, _add_ns('clearleap:episode')), - 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))), - 'series': xpath_text(item, _add_ns('clearleap:series')), - 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))), - 'thumbnails': thumbnails, - 'ie_key': 'CBCWatchVideo', - }) - - return self.playlist_result( - entries, xpath_text(channel, 'guid'), - xpath_text(channel, 'title'), - xpath_text(channel, 'description')) - - -class CBCWatchVideoIE(CBCWatchBaseIE): - IE_NAME = 'cbc.ca:watch:video' - _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _TEST = { - # geo-restricted to Canada, bypassable - 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235', - 'only_matching': True, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - result = self._call_api(url, video_id) - - m3u8_url = xpath_text(result, 'url', fatal=True) - formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) - if len(formats) < 2: - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') - for f in formats: - format_id = f.get('format_id') - if format_id.startswith('AAC'): - f['acodec'] = 'aac' - elif format_id.startswith('AC3'): - f['acodec'] = 'ac-3' - self._sort_formats(formats) - - info = { - 'id': video_id, - 'title': video_id, - 'formats': formats, - } - - rss = xpath_element(result, 'rss') - if rss: - info.update(self._parse_rss_feed(rss)['entries'][0]) - del info['url'] - del info['_type'] - del info['ie_key'] - return info - - -class CBCWatchIE(CBCWatchBaseIE): - IE_NAME = 'cbc.ca:watch' - _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' - _TESTS = [{ - # geo-restricted to Canada, bypassable - 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', - 'info_dict': { - 'id': '9673749a-5e77-484c-8b62-a1092a6b5168', - 'ext': 'mp4', - 'title': 'Customer (Dis)Service', - 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', - 'upload_date': '20160219', - 'timestamp': 1455840000, - }, - 'params': { - # m3u8 download - 'skip_download': True, - 'format': 'bestvideo', - }, - }, { - # geo-restricted to Canada, bypassable - 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', - 'info_dict': { - 'id': '1ed4b385-cd84-49cf-95f0-80f004680057', - 'title': 'Arthur', - 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', - }, - 'playlist_mincount': 30, - }, { - 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - rss = self._call_api('web/browse/' + video_id, video_id) - return self._parse_rss_feed(rss) - - -class CBCOlympicsIE(InfoExtractor): - IE_NAME = 'cbc.ca:olympics' - _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P[^/?#]+)' - _TESTS = [{ - 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._hidden_inputs(webpage)['videoId'] - video_doc = self._download_xml( - 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id) - title = xpath_text(video_doc, 'title', fatal=True) - is_live = xpath_text(video_doc, 'kind') == 'Live' - if is_live: - title = self._live_title(title) - - formats = [] - for video_source in video_doc.findall('videoSources/videoSource'): - uri = xpath_text(video_source, 'uri') - if not uri: - continue - tokenize = self._download_json( - 'https://olympics.cbc.ca/api/api-akamai/tokenize', - video_id, data=json.dumps({ - 'VideoSource': uri, - }).encode(), headers={ - 'Content-Type': 'application/json', - 'Referer': url, - # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js - 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie - }, fatal=False) - if not tokenize: - continue - content_url = tokenize['ContentUrl'] - video_source_format = video_source.get('format') - if video_source_format == 'IIS': - formats.extend(self._extract_ism_formats( - content_url, video_id, ism_id=video_source_format, fatal=False)) - else: - formats.extend(self._extract_m3u8_formats( - content_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', - m3u8_id=video_source_format, fatal=False)) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': xpath_text(video_doc, 'description'), - 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'), - 'duration': parse_duration(xpath_text(video_doc, 'duration')), - 'formats': formats, - 'is_live': is_live, - } diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py deleted file mode 100644 index c79e55a75..000000000 --- a/youtube_dl/extractor/cbs.py +++ /dev/null @@ -1,115 +0,0 @@ -from __future__ import unicode_literals - -from .theplatform import ThePlatformFeedIE -from ..utils import ( - ExtractorError, - int_or_none, - find_xpath_attr, - xpath_element, - xpath_text, - update_url_query, -) - - -class CBSBaseIE(ThePlatformFeedIE): - def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): - subtitles = {} - for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]: - cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k) - if cc_e is not None: - cc_url = cc_e.get('value') - if cc_url: - subtitles.setdefault(subtitles_lang, []).append({ - 'ext': ext, - 'url': cc_url, - }) - return subtitles - - -class CBSIE(CBSBaseIE): - _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P[\w-]+)' - - _TESTS = [{ - 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', - 'info_dict': { - 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', - 'ext': 'mp4', - 'title': 'Connect Chat feat. Garth Brooks', - 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', - 'duration': 1495, - 'timestamp': 1385585425, - 'upload_date': '20131127', - 'uploader': 'CBSI-NEW', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - '_skip': 'Blocked outside the US', - }, { - 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', - 'only_matching': True, - }, { - 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', - 'only_matching': True, - }, { - 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/', - 'only_matching': True, - }] - - def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517): - items_data = self._download_xml( - 'http://can.cbs.com/thunder/player/videoPlayerService.php', - content_id, query={'partner': site, 'contentId': content_id}) - video_data = xpath_element(items_data, './/item') - title = xpath_text(video_data, 'videoTitle', 'title', True) - tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id) - tp_release_url = 'http://link.theplatform.com/s/' + tp_path - - asset_types = [] - subtitles = {} - formats = [] - last_e = None - for item in items_data.findall('.//item'): - asset_type = xpath_text(item, 'assetType') - if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type: - continue - asset_types.append(asset_type) - query = { - 'mbr': 'true', - 'assetTypes': asset_type, - } - if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): - query['formats'] = 'MPEG4,M3U' - elif asset_type in ('RTMP', 'WIFI', '3G'): - query['formats'] = 'MPEG4,FLV' - try: - tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data' % asset_type) - except ExtractorError as e: - last_e = e - continue - formats.extend(tp_formats) - subtitles = self._merge_subtitles(subtitles, tp_subtitles) - if last_e and not formats: - raise last_e - self._sort_formats(formats) - - info = self._extract_theplatform_metadata(tp_path, content_id) - info.update({ - 'id': content_id, - 'title': title, - 'series': xpath_text(video_data, 'seriesTitle'), - 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), - 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), - 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), - 'thumbnail': xpath_text(video_data, 'previewImageURL'), - 'formats': formats, - 'subtitles': subtitles, - }) - return info - - def _real_extract(self, url): - content_id = self._match_id(url) - return self._extract_video_info(content_id) diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py deleted file mode 100644 index 6596e98a6..000000000 --- a/youtube_dl/extractor/cbsinteractive.py +++ /dev/null @@ -1,103 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .cbs import CBSIE -from ..utils import int_or_none - - -class CBSInteractiveIE(CBSIE): - _VALID_URL = r'https?://(?:www\.)?(?Pcnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P[^/?]+)' - _TESTS = [{ - 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', - 'info_dict': { - 'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00', - 'display_id': 'hands-on-with-microsofts-windows-8-1-update', - 'ext': 'mp4', - 'title': 'Hands-on with Microsoft Windows 8.1 Update', - 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', - 'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861', - 'uploader': 'Sarah Mitroff', - 'duration': 70, - 'timestamp': 1396479627, - 'upload_date': '20140402', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/', - 'md5': 'f11d27b2fa18597fbf92444d2a9ed386', - 'info_dict': { - 'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK', - 'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187', - 'ext': 'mp4', - 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)', - 'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f', - 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40', - 'uploader': 'Ashley Esqueda', - 'duration': 1482, - 'timestamp': 1433289889, - 'upload_date': '20150603', - }, - }, { - 'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/', - 'info_dict': { - 'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt', - 'display_id': 'video-keeping-android-smartphones-and-tablets-secure', - 'ext': 'mp4', - 'title': 'Video: Keeping Android smartphones and tablets secure', - 'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.', - 'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0', - 'uploader': 'Adrian Kingsley-Hughes', - 'duration': 731, - 'timestamp': 1449129925, - 'upload_date': '20151203', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/', - 'only_matching': True, - }] - - MPX_ACCOUNTS = { - 'cnet': 2198311517, - 'zdnet': 2387448114, - } - - def _real_extract(self, url): - site, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - - data_json = self._html_search_regex( - r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'", - webpage, 'data json') - data = self._parse_json(data_json, display_id) - vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0] - - video_id = vdata['mpxRefId'] - - title = vdata['title'] - author = vdata.get('author') - if author: - uploader = '%s %s' % (author['firstName'], author['lastName']) - uploader_id = author.get('id') - else: - uploader = None - uploader_id = None - - info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site]) - info.update({ - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'duration': int_or_none(vdata.get('duration')), - 'uploader': uploader, - 'uploader_id': uploader_id, - }) - return info diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py deleted file mode 100644 index 3b7e1a8b9..000000000 --- a/youtube_dl/extractor/cbslocal.py +++ /dev/null @@ -1,119 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .anvato import AnvatoIE -from .sendtonews import SendtoNewsIE -from ..compat import compat_urlparse -from ..utils import ( - parse_iso8601, - unified_timestamp, -) - - -class CBSLocalIE(AnvatoIE): - _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/' - _VALID_URL = _VALID_URL_BASE + r'video/(?P\d+)' - - _TESTS = [{ - 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', - 'info_dict': { - 'id': '3580809', - 'ext': 'mp4', - 'title': 'A Very Blue Anniversary', - 'description': 'CBS2’s Cindy Hsu has more.', - 'thumbnail': 're:^https?://.*', - 'timestamp': int, - 'upload_date': r're:^\d{8}$', - 'uploader': 'CBS', - 'subtitles': { - 'en': 'mincount:5', - }, - 'categories': [ - 'Stations\\Spoken Word\\WCBSTV', - 'Syndication\\AOL', - 'Syndication\\MSN', - 'Syndication\\NDN', - 'Syndication\\Yahoo', - 'Content\\News', - 'Content\\News\\Local News', - ], - 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - mcp_id = self._match_id(url) - return self.url_result( - 'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id) - - -class CBSLocalArticleIE(AnvatoIE): - _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P[0-9a-z-]+)' - - _TESTS = [{ - # Anvato backend - 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis', - 'md5': 'f0ee3081e3843f575fccef901199b212', - 'info_dict': { - 'id': '3401037', - 'ext': 'mp4', - 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'', - 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.', - 'thumbnail': 're:^https?://.*', - 'timestamp': 1463440500, - 'upload_date': '20160516', - 'uploader': 'CBS', - 'subtitles': { - 'en': 'mincount:5', - }, - 'categories': [ - 'Stations\\Spoken Word\\KCBSTV', - 'Syndication\\MSN', - 'Syndication\\NDN', - 'Syndication\\AOL', - 'Syndication\\Yahoo', - 'Syndication\\Tribune', - 'Syndication\\Curb.tv', - 'Content\\News' - ], - 'tags': ['CBS 2 News Evening'], - }, - }, { - # SendtoNews embed - 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', - 'info_dict': { - 'id': 'GxfCe0Zo7D-175909-5588', - }, - 'playlist_count': 9, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - sendtonews_url = SendtoNewsIE._extract_url(webpage) - if sendtonews_url: - return self.url_result( - compat_urlparse.urljoin(url, sendtonews_url), - ie=SendtoNewsIE.ie_key()) - - info_dict = self._extract_anvato_videos(webpage, display_id) - - timestamp = unified_timestamp(self._html_search_regex( - r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage, - 'released date', default=None)) or parse_iso8601( - self._html_search_meta('uploadDate', webpage)) - - info_dict.update({ - 'display_id': display_id, - 'timestamp': timestamp, - }) - - return info_dict diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py deleted file mode 100644 index 1285ed65e..000000000 --- a/youtube_dl/extractor/cbsnews.py +++ /dev/null @@ -1,147 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import zlib - -from .common import InfoExtractor -from .cbs import CBSIE -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) -from ..utils import ( - parse_duration, -) - - -class CBSNewsEmbedIE(CBSIE): - IE_NAME = 'cbsnews:embed' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P.+)' - _TESTS = [{ - 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A', - 'only_matching': True, - }] - - def _real_extract(self, url): - item = self._parse_json(zlib.decompress(compat_b64decode( - compat_urllib_parse_unquote(self._match_id(url))), - -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0] - return self._extract_video_info(item['mpxRefId'], 'cbsnews') - - -class CBSNewsIE(CBSIE): - IE_NAME = 'cbsnews' - IE_DESC = 'CBS News' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P[\da-z_-]+)' - - _TESTS = [ - { - # 60 minutes - 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/', - 'info_dict': { - 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4', - 'ext': 'flv', - 'title': 'Artificial Intelligence, real-life applications', - 'description': 'md5:a7aaf27f1b4777244de8b0b442289304', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 317, - 'uploader': 'CBSI-NEW', - 'timestamp': 1476046464, - 'upload_date': '20161009', - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, - { - 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', - 'info_dict': { - 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', - 'ext': 'mp4', - 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', - 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', - 'upload_date': '20140404', - 'timestamp': 1396650660, - 'uploader': 'CBSI-NEW', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 205, - 'subtitles': { - 'en': [{ - 'ext': 'ttml', - }], - }, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # 48 hours - 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/', - 'info_dict': { - 'title': 'Cold as Ice', - 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?', - }, - 'playlist_mincount': 7, - }, - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - entries = [] - for embed_url in re.findall(r']+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage): - entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key())) - if entries: - return self.playlist_result( - entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage), - playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)) - - item = self._parse_json(self._html_search_regex( - r'CBSNEWS\.defaultPayload\s*=\s*({.+})', - webpage, 'video JSON info'), display_id)['items'][0] - return self._extract_video_info(item['mpxRefId'], 'cbsnews') - - -class CBSNewsLiveVideoIE(InfoExtractor): - IE_NAME = 'cbsnews:livevideo' - IE_DESC = 'CBS News Live Videos' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P[^/?#]+)' - - # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples - _TEST = { - 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', - 'info_dict': { - 'id': 'clinton-sanders-prepare-to-face-off-in-nh', - 'ext': 'mp4', - 'title': 'Clinton, Sanders Prepare To Face Off In NH', - 'duration': 334, - }, - 'skip': 'Video gone', - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - video_info = self._download_json( - 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={ - 'device': 'desktop', - 'dvr_slug': display_id, - }) - - formats = self._extract_akamai_formats(video_info['url'], display_id) - self._sort_formats(formats) - - return { - 'id': display_id, - 'display_id': display_id, - 'title': video_info['headline'], - 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), - 'duration': parse_duration(video_info.get('segmentDur')), - 'formats': formats, - } diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py deleted file mode 100644 index a891c9a55..000000000 --- a/youtube_dl/extractor/cbssports.py +++ /dev/null @@ -1,113 +0,0 @@ -from __future__ import unicode_literals - -import re - -# from .cbs import CBSBaseIE -from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get, -) - - -# class CBSSportsEmbedIE(CBSBaseIE): -class CBSSportsEmbedIE(InfoExtractor): - IE_NAME = 'cbssports:embed' - _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+? - (?: - ids%3D(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})| - pcid%3D(?P\d+) - )''' - _TESTS = [{ - 'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod', - 'only_matching': True, - }, { - 'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue', - 'only_matching': True, - }] - - # def _extract_video_info(self, filter_query, video_id): - # return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) - - def _real_extract(self, url): - uuid, pcid = re.match(self._VALID_URL, url).groups() - query = {'id': uuid} if uuid else {'pcid': pcid} - video = self._download_json( - 'https://www.cbssports.com/api/content/video/', - uuid or pcid, query=query)[0] - video_id = video['id'] - title = video['title'] - metadata = video.get('metaData') or {} - # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id) - # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id) - - formats = self._extract_m3u8_formats( - metadata['files'][0]['url'], video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - self._sort_formats(formats) - - image = video.get('image') - thumbnails = None - if image: - image_path = image.get('path') - if image_path: - thumbnails = [{ - 'url': image_path, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - 'filesize': int_or_none(image.get('size')), - }] - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': video.get('description'), - 'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])), - 'duration': int_or_none(metadata.get('duration')), - } - - -class CBSSportsBaseIE(InfoExtractor): - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - iframe_url = self._search_regex( - r']+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"', - webpage, 'embed url') - return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key()) - - -class CBSSportsIE(CBSSportsBaseIE): - IE_NAME = 'cbssports' - _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/', - 'info_dict': { - 'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636', - 'ext': 'mp4', - 'title': 'Cover 3: Stanford Spring Gleaning', - 'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.', - 'timestamp': 1617218398, - 'upload_date': '20210331', - 'duration': 502, - }, - }] - - -class TwentyFourSevenSportsIE(CBSSportsBaseIE): - IE_NAME = '247sports' - _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P\d+)' - _TESTS = [{ - 'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/', - 'info_dict': { - 'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc', - 'ext': 'mp4', - 'title': '2021 QB Jake Garcia senior highlights through five games', - 'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b', - 'timestamp': 1607114223, - 'upload_date': '20201204', - 'duration': 208, - }, - }] diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py deleted file mode 100644 index 36e6dff72..000000000 --- a/youtube_dl/extractor/ccc.py +++ /dev/null @@ -1,111 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, - try_get, - url_or_none, -) - - -class CCCIE(InfoExtractor): - IE_NAME = 'media.ccc.de' - _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video', - 'md5': '3a1eda8f3a29515d27f5adb967d7e740', - 'info_dict': { - 'id': '1839', - 'ext': 'mp4', - 'title': 'Introduction to Processor Design', - 'creator': 'byterazor', - 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20131228', - 'timestamp': 1388188800, - 'duration': 3710, - 'tags': list, - } - }, { - 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id') - event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id) - - formats = [] - for recording in event_data.get('recordings', []): - recording_url = recording.get('recording_url') - if not recording_url: - continue - language = recording.get('language') - folder = recording.get('folder') - format_id = None - if language: - format_id = language - if folder: - if language: - format_id += '-' + folder - else: - format_id = folder - vcodec = 'h264' if 'h264' in folder else ( - 'none' if folder in ('mp3', 'opus') else None - ) - formats.append({ - 'format_id': format_id, - 'url': recording_url, - 'width': int_or_none(recording.get('width')), - 'height': int_or_none(recording.get('height')), - 'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024), - 'language': language, - 'vcodec': vcodec, - }) - self._sort_formats(formats) - - return { - 'id': event_id, - 'display_id': display_id, - 'title': event_data['title'], - 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])), - 'description': event_data.get('description'), - 'thumbnail': event_data.get('thumb_url'), - 'timestamp': parse_iso8601(event_data.get('date')), - 'duration': int_or_none(event_data.get('length')), - 'tags': event_data.get('tags'), - 'formats': formats, - } - - -class CCCPlaylistIE(InfoExtractor): - IE_NAME = 'media.ccc.de:lists' - _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://media.ccc.de/c/30c3', - 'info_dict': { - 'title': '30C3', - 'id': '30c3', - }, - 'playlist_count': 135, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url).lower() - - conf = self._download_json( - 'https://media.ccc.de/public/conferences/' + playlist_id, - playlist_id) - - entries = [] - for e in conf['events']: - event_url = url_or_none(e.get('frontend_link')) - if event_url: - entries.append(self.url_result(event_url, ie=CCCIE.ie_key())) - - return self.playlist_result(entries, playlist_id, conf.get('title')) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py deleted file mode 100644 index e6ae49352..000000000 --- a/youtube_dl/extractor/ccma.py +++ /dev/null @@ -1,155 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import calendar -import datetime -import re - -from .common import InfoExtractor -from ..utils import ( - clean_html, - extract_timezone, - int_or_none, - parse_duration, - parse_resolution, - try_get, - url_or_none, -) - - -class CCMAIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?Pvideo|audio)/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', - 'md5': '7296ca43977c8ea4469e719c609b0871', - 'info_dict': { - 'id': '5630208', - 'ext': 'mp4', - 'title': 'L\'espot de La Marató de TV3', - 'description': 'md5:f12987f320e2f6e988e9908e4fe97765', - 'timestamp': 1478608140, - 'upload_date': '20161108', - 'age_limit': 0, - } - }, { - 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', - 'md5': 'fa3e38f269329a278271276330261425', - 'info_dict': { - 'id': '943685', - 'ext': 'mp3', - 'title': 'El Consell de Savis analitza el derbi', - 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', - 'upload_date': '20170512', - 'timestamp': 1494622500, - 'vcodec': 'none', - 'categories': ['Esports'], - } - }, { - 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', - 'md5': 'b43c3d3486f430f3032b5b160d80cbc3', - 'info_dict': { - 'id': '6031387', - 'ext': 'mp4', - 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)', - 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', - 'timestamp': 1582577700, - 'upload_date': '20200224', - 'subtitles': 'mincount:4', - 'age_limit': 16, - 'series': 'Crims', - } - }] - - def _real_extract(self, url): - media_type, media_id = re.match(self._VALID_URL, url).groups() - - media = self._download_json( - 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ - 'media': media_type, - 'idint': media_id, - }) - - formats = [] - media_url = media['media']['url'] - if isinstance(media_url, list): - for format_ in media_url: - format_url = url_or_none(format_.get('file')) - if not format_url: - continue - label = format_.get('label') - f = parse_resolution(label) - f.update({ - 'url': format_url, - 'format_id': label, - }) - formats.append(f) - else: - formats.append({ - 'url': media_url, - 'vcodec': 'none' if media_type == 'audio' else None, - }) - self._sort_formats(formats) - - informacio = media['informacio'] - title = informacio['titol'] - durada = informacio.get('durada') or {} - duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) - tematica = try_get(informacio, lambda x: x['tematica']['text']) - - timestamp = None - data_utc = try_get(informacio, lambda x: x['data_emissio']['utc']) - try: - timezone, data_utc = extract_timezone(data_utc) - timestamp = calendar.timegm((datetime.datetime.strptime( - data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple()) - except TypeError: - pass - - subtitles = {} - subtitols = media.get('subtitols') or [] - if isinstance(subtitols, dict): - subtitols = [subtitols] - for st in subtitols: - sub_url = st.get('url') - if sub_url: - subtitles.setdefault( - st.get('iso') or st.get('text') or 'ca', []).append({ - 'url': sub_url, - }) - - thumbnails = [] - imatges = media.get('imatges', {}) - if imatges: - thumbnail_url = imatges.get('url') - if thumbnail_url: - thumbnails = [{ - 'url': thumbnail_url, - 'width': int_or_none(imatges.get('amplada')), - 'height': int_or_none(imatges.get('alcada')), - }] - - age_limit = None - codi_etic = try_get(informacio, lambda x: x['codi_etic']['id']) - if codi_etic: - codi_etic_s = codi_etic.split('_') - if len(codi_etic_s) == 2: - if codi_etic_s[1] == 'TP': - age_limit = 0 - else: - age_limit = int_or_none(codi_etic_s[1]) - - return { - 'id': media_id, - 'title': title, - 'description': clean_html(informacio.get('descripcio')), - 'duration': duration, - 'timestamp': timestamp, - 'thumbnails': thumbnails, - 'subtitles': subtitles, - 'formats': formats, - 'age_limit': age_limit, - 'alt_title': informacio.get('titol_complet'), - 'episode_number': int_or_none(informacio.get('capitol')), - 'categories': [tematica] if tematica else None, - 'series': informacio.get('programa'), - } diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py deleted file mode 100644 index c76f361c6..000000000 --- a/youtube_dl/extractor/cctv.py +++ /dev/null @@ -1,191 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - float_or_none, - try_get, - unified_timestamp, -) - - -class CCTVIE(InfoExtractor): - IE_DESC = '央视网' - _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)' - _TESTS = [{ - # fo.addVariable("videoCenterId","id") - 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', - 'md5': 'd61ec00a493e09da810bf406a078f691', - 'info_dict': { - 'id': '5ecdbeab623f4973b40ff25f18b174e8', - 'ext': 'mp4', - 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)', - 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95', - 'duration': 98, - 'uploader': 'songjunjie', - 'timestamp': 1455279956, - 'upload_date': '20160212', - }, - }, { - # var guid = "id" - 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml', - 'info_dict': { - 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae', - 'ext': 'mp4', - 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)', - 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。', - 'duration': 37, - 'uploader': 'shujun', - 'timestamp': 1454677291, - 'upload_date': '20160205', - }, - 'params': { - 'skip_download': True, - }, - }, { - # changePlayer('id') - 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml', - 'info_dict': { - 'id': '4bb9bb4db7a6471ba85fdeda5af0381e', - 'ext': 'mp4', - 'title': 'NHnews008 ANNUAL POLITICAL SEASON', - 'description': 'Four Comprehensives', - 'duration': 60, - 'uploader': 'zhangyunlei', - 'timestamp': 1425385521, - 'upload_date': '20150303', - }, - 'params': { - 'skip_download': True, - }, - }, { - # loadvideo('id') - 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml', - 'info_dict': { - 'id': 'b15f009ff45c43968b9af583fc2e04b2', - 'ext': 'mp4', - 'title': 'Путь,усыпанный космеями Серия 1', - 'description': 'Путь, усыпанный космеями', - 'duration': 2645, - 'uploader': 'renxue', - 'timestamp': 1477479241, - 'upload_date': '20161026', - }, - 'params': { - 'skip_download': True, - }, - }, { - # var initMyAray = 'id' - 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml', - 'info_dict': { - 'id': 'a194cfa7f18c426b823d876668325946', - 'ext': 'mp4', - 'title': '小泽征尔音乐塾 音乐梦想无国界', - 'duration': 2173, - 'timestamp': 1369248264, - 'upload_date': '20130522', - }, - 'params': { - 'skip_download': True, - }, - }, { - # var ids = ["id"] - 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml', - 'info_dict': { - 'id': 'a8606119a4884588a79d81c02abecc16', - 'ext': 'mp3', - 'title': '来自维也纳的新年贺礼', - 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7', - 'duration': 1578, - 'uploader': 'djy', - 'timestamp': 1482942419, - 'upload_date': '20161228', - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Failed to download m3u8 information'], - }, { - 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml', - 'only_matching': True, - }, { - 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44', - 'only_matching': True, - }, { - 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', - 'only_matching': True, - }, { - 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml', - 'only_matching': True, - }, { - 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_id = self._search_regex( - [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)', - r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', - r'changePlayer\s*\(\s*["\']([\da-fA-F]+)', - r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)', - r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)', - r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'], - webpage, 'video id') - - data = self._download_json( - 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id, - query={ - 'pid': video_id, - 'url': url, - 'idl': 32, - 'idlr': 32, - 'modifyed': 'false', - }) - - title = data['title'] - - formats = [] - - video = data.get('video') - if isinstance(video, dict): - for quality, chapters_key in enumerate(('lowChapters', 'chapters')): - video_url = try_get( - video, lambda x: x[chapters_key][0]['url'], compat_str) - if video_url: - formats.append({ - 'url': video_url, - 'format_id': 'http', - 'quality': quality, - 'preference': -1, - }) - - hls_url = try_get(data, lambda x: x['hls_url'], compat_str) - if hls_url: - hls_url = re.sub(r'maxbr=\d+&?', '', hls_url) - formats.extend(self._extract_m3u8_formats( - hls_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - - self._sort_formats(formats) - - uploader = data.get('editer_name') - description = self._html_search_meta( - 'description', webpage, default=None) - timestamp = unified_timestamp(data.get('f_pgmtime')) - duration = float_or_none(try_get(video, lambda x: x['totalLength'])) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'uploader': uploader, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py deleted file mode 100644 index e1b391937..000000000 --- a/youtube_dl/extractor/cda.py +++ /dev/null @@ -1,214 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import codecs -import re - -from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_ord, - compat_urllib_parse_unquote, -) -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - merge_dicts, - multipart_encode, - parse_duration, - random_birthday, - urljoin, -) - - -class CDAIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' - _BASE_URL = 'http://www.cda.pl/' - _TESTS = [{ - 'url': 'http://www.cda.pl/video/5749950c', - 'md5': '6f844bf51b15f31fae165365707ae970', - 'info_dict': { - 'id': '5749950c', - 'ext': 'mp4', - 'height': 720, - 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', - 'description': 'md5:269ccd135d550da90d1662651fcb9772', - 'thumbnail': r're:^https?://.*\.jpg$', - 'average_rating': float, - 'duration': 39, - 'age_limit': 0, - } - }, { - 'url': 'http://www.cda.pl/video/57413289', - 'md5': 'a88828770a8310fc00be6c95faf7f4d5', - 'info_dict': { - 'id': '57413289', - 'ext': 'mp4', - 'title': 'Lądowanie na lotnisku na Maderze', - 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'crash404', - 'view_count': int, - 'average_rating': float, - 'duration': 137, - 'age_limit': 0, - } - }, { - # Age-restricted - 'url': 'http://www.cda.pl/video/1273454c4', - 'info_dict': { - 'id': '1273454c4', - 'ext': 'mp4', - 'title': 'Bronson (2008) napisy HD 1080p', - 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', - 'height': 1080, - 'uploader': 'boniek61', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 5554, - 'age_limit': 18, - 'view_count': int, - 'average_rating': float, - }, - }, { - 'url': 'http://ebd.cda.pl/0x0/5749950c', - 'only_matching': True, - }] - - def _download_age_confirm_page(self, url, video_id, *args, **kwargs): - form_data = random_birthday('rok', 'miesiac', 'dzien') - form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) - data, content_type = multipart_encode(form_data) - return self._download_webpage( - urljoin(url, '/a/validatebirth'), video_id, *args, - data=data, headers={ - 'Referer': url, - 'Content-Type': content_type, - }, **kwargs) - - def _real_extract(self, url): - video_id = self._match_id(url) - self._set_cookie('cda.pl', 'cda.player', 'html5') - webpage = self._download_webpage( - self._BASE_URL + '/video/' + video_id, video_id) - - if 'Ten film jest dostępny dla użytkowników premium' in webpage: - raise ExtractorError('This video is only available for premium users.', expected=True) - - if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage): - self.raise_geo_restricted() - - need_confirm_age = False - if self._html_search_regex(r'(]+action="[^"]*/a/validatebirth[^"]*")', - webpage, 'birthday validate form', default=None): - webpage = self._download_age_confirm_page( - url, video_id, note='Confirming age') - need_confirm_age = True - - formats = [] - - uploader = self._search_regex(r'''(?x) - <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> - (?:<\1[^>]*>[^<]*|(?!)(?:.|\n))*? - <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P[^<]+) - ''', webpage, 'uploader', default=None, group='uploader') - view_count = self._search_regex( - r'Odsłony:(?:\s| )*([0-9]+)', webpage, - 'view_count', default=None) - average_rating = self._search_regex( - (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', - r']+\bclass=["\']rating["\'][^>]*>(?P[0-9.]+)'), webpage, 'rating', fatal=False, - group='rating_value') - - info_dict = { - 'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'uploader': uploader, - 'view_count': int_or_none(view_count), - 'average_rating': float_or_none(average_rating), - 'thumbnail': self._og_search_thumbnail(webpage), - 'formats': formats, - 'duration': None, - 'age_limit': 18 if need_confirm_age else 0, - } - - info = self._search_json_ld(webpage, video_id, default={}) - - # Source: https://www.cda.pl/js/player.js?t=1606154898 - def decrypt_file(a): - for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): - a = a.replace(p, '') - a = compat_urllib_parse_unquote(a) - b = [] - for c in a: - f = compat_ord(c) - b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f)) - a = ''.join(b) - a = a.replace('.cda.mp4', '') - for p in ('.2cda.pl', '.3cda.pl'): - a = a.replace(p, '.cda.pl') - if '/upstream' in a: - a = a.replace('/upstream', '.mp4/upstream') - return 'https://' + a - return 'https://' + a + '.mp4' - - def extract_format(page, version): - json_str = self._html_search_regex( - r'player_data=(\\?["\'])(?P.+?)\1', page, - '%s player_json' % version, fatal=False, group='player_data') - if not json_str: - return - player_data = self._parse_json( - json_str, '%s player_data' % version, fatal=False) - if not player_data: - return - video = player_data.get('video') - if not video or 'file' not in video: - self.report_warning('Unable to extract %s version information' % version) - return - if video['file'].startswith('uggc'): - video['file'] = codecs.decode(video['file'], 'rot_13') - if video['file'].endswith('adc.mp4'): - video['file'] = video['file'].replace('adc.mp4', '.mp4') - elif not video['file'].startswith('http'): - video['file'] = decrypt_file(video['file']) - f = { - 'url': video['file'], - } - m = re.search( - r']+data-quality="(?P[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P[0-9]+)p', - page) - if m: - f.update({ - 'format_id': m.group('format_id'), - 'height': int(m.group('height')), - }) - info_dict['formats'].append(f) - if not info_dict['duration']: - info_dict['duration'] = parse_duration(video.get('duration')) - - extract_format(webpage, 'default') - - for href, resolution in re.findall( - r']+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', - webpage): - if need_confirm_age: - handler = self._download_age_confirm_page - else: - handler = self._download_webpage - - webpage = handler( - urljoin(self._BASE_URL, href), video_id, - 'Downloading %s version information' % resolution, fatal=False) - if not webpage: - # Manually report warning because empty page is returned when - # invalid version is requested. - self.report_warning('Unable to download %s version information' % resolution) - continue - - extract_format(webpage, resolution) - - self._sort_formats(formats) - - return merge_dicts(info_dict, info) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py deleted file mode 100644 index fe677d8e8..000000000 --- a/youtube_dl/extractor/ceskatelevize.py +++ /dev/null @@ -1,301 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, -) -from ..utils import ( - ExtractorError, - float_or_none, - sanitized_Request, - str_or_none, - traverse_obj, - urlencode_postdata, - USER_AGENTS, -) - - -class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P[^/#?]+)' - _TESTS = [{ - 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', - 'info_dict': { - 'id': '61924494877028507', - 'ext': 'mp4', - 'title': 'Bonus 01 - En - Hyde Park Civilizace', - 'description': 'English Subtittles', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 81.3, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # live stream - 'url': 'http://www.ceskatelevize.cz/zive/ct1/', - 'info_dict': { - 'id': '102', - 'ext': 'mp4', - 'title': r'ČT1 - živé vysílání online', - 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', - 'is_live': True, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # another - 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', - 'only_matching': True, - 'info_dict': { - 'id': 402, - 'ext': 'mp4', - 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', - 'is_live': True, - }, - # 'skip': 'Georestricted to Czech Republic', - }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', - 'only_matching': True, - }, { - # video with 18+ caution trailer - 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', - 'info_dict': { - 'id': '215562210900007-bogotart', - 'title': 'Bogotart - Queer', - 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', - }, - 'playlist': [{ - 'info_dict': { - 'id': '61924494877311053', - 'ext': 'mp4', - 'title': 'Bogotart - Queer (Varování 18+)', - 'duration': 11.9, - }, - }, { - 'info_dict': { - 'id': '61924494877068022', - 'ext': 'mp4', - 'title': 'Bogotart - Queer (Queer)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 1558.3, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # iframe embed - 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', - 'only_matching': True, - }] - - def _search_nextjs_data(self, webpage, video_id, **kw): - return self._parse_json( - self._search_regex( - r'(?s)]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)', - webpage, 'next.js data', **kw), - video_id, **kw) - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage, urlh = self._download_webpage_handle(url, playlist_id) - parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) - site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') - playlist_title = self._og_search_title(webpage, default=None) - if site_name and playlist_title: - playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] - playlist_description = self._og_search_description(webpage, default=None) - if playlist_description: - playlist_description = playlist_description.replace('\xa0', ' ') - - type_ = 'IDEC' - if re.search(r'(^/porady|/zive)/', parsed_url.path): - next_data = self._search_nextjs_data(webpage, playlist_id) - if '/zive/' in parsed_url.path: - idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) - else: - idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) - if not idec: - idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) - if idec: - type_ = 'bonus' - if not idec: - raise ExtractorError('Failed to find IDEC id') - iframe_hash = self._download_webpage( - 'https://www.ceskatelevize.cz/v-api/iframe-hash/', - playlist_id, note='Getting IFRAME hash') - query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } - webpage = self._download_webpage( - 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', - playlist_id, note='Downloading player', query=query) - - NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' - if '%s

' % NOT_AVAILABLE_STRING in webpage: - self.raise_geo_restricted(NOT_AVAILABLE_STRING) - if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): - raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) - - type_ = None - episode_id = None - - playlist = self._parse_json( - self._search_regex( - r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', - default='{}'), playlist_id) - if playlist: - type_ = playlist.get('type') - episode_id = playlist.get('id') - - if not type_: - type_ = self._html_search_regex( - r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', - webpage, 'type') - if not episode_id: - episode_id = self._html_search_regex( - r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', - webpage, 'episode_id') - - data = { - 'playlist[0][type]': type_, - 'playlist[0][id]': episode_id, - 'requestUrl': parsed_url.path, - 'requestSource': 'iVysilani', - } - - entries = [] - - for user_agent in (None, USER_AGENTS['Safari']): - req = sanitized_Request( - 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', - data=urlencode_postdata(data)) - - req.add_header('Content-type', 'application/x-www-form-urlencoded') - req.add_header('x-addr', '127.0.0.1') - req.add_header('X-Requested-With', 'XMLHttpRequest') - if user_agent: - req.add_header('User-Agent', user_agent) - req.add_header('Referer', url) - - playlistpage = self._download_json(req, playlist_id, fatal=False) - - if not playlistpage: - continue - - playlist_url = playlistpage['url'] - if playlist_url == 'error_region': - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - - req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) - req.add_header('Referer', url) - - playlist = self._download_json(req, playlist_id, fatal=False) - if not playlist: - continue - - playlist = playlist.get('playlist') - if not isinstance(playlist, list): - continue - - playlist_len = len(playlist) - - for num, item in enumerate(playlist): - is_live = item.get('type') == 'LIVE' - formats = [] - for format_id, stream_url in item.get('streamUrls', {}).items(): - if 'drmOnly=true' in stream_url: - continue - if 'playerType=flash' in stream_url: - stream_formats = self._extract_m3u8_formats( - stream_url, playlist_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % format_id, fatal=False) - else: - stream_formats = self._extract_mpd_formats( - stream_url, playlist_id, - mpd_id='dash-%s' % format_id, fatal=False) - # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 - if format_id == 'audioDescription': - for f in stream_formats: - f['source_preference'] = -10 - formats.extend(stream_formats) - - if user_agent and len(entries) == playlist_len: - entries[num]['formats'].extend(formats) - continue - - item_id = str_or_none(item.get('id') or item['assetId']) - title = item['title'] - - duration = float_or_none(item.get('duration')) - thumbnail = item.get('previewImageUrl') - - subtitles = {} - if item.get('type') == 'VOD': - subs = item.get('subtitles') - if subs: - subtitles = self.extract_subtitles(episode_id, subs) - - if playlist_len == 1: - final_title = playlist_title or title - else: - final_title = '%s (%s)' % (playlist_title, title) - - entries.append({ - 'id': item_id, - 'title': final_title, - 'description': playlist_description if playlist_len == 1 else None, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - }) - - for e in entries: - self._sort_formats(e['formats']) - - if len(entries) == 1: - return entries[0] - return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) - - def _get_subtitles(self, episode_id, subs): - original_subtitles = self._download_webpage( - subs[0]['url'], episode_id, 'Downloading subtitles') - srt_subs = self._fix_subtitles(original_subtitles) - return { - 'cs': [{ - 'ext': 'srt', - 'data': srt_subs, - }] - } - - @staticmethod - def _fix_subtitles(subtitles): - """ Convert millisecond-based subtitles to SRT """ - - def _msectotimecode(msec): - """ Helper utility to convert milliseconds to timecode """ - components = [] - for divider in [1000, 60, 60, 100]: - components.append(msec % divider) - msec //= divider - return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) - - def _fix_subtitle(subtitle): - for line in subtitle.splitlines(): - m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) - if m: - yield m.group(1) - start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) - yield '{0} --> {1}'.format(start, stop) - else: - yield line - - return '\r\n'.join(_fix_subtitle(subtitles)) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py deleted file mode 100644 index 09cacf6d3..000000000 --- a/youtube_dl/extractor/channel9.py +++ /dev/null @@ -1,262 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - parse_iso8601, - qualities, - unescapeHTML, -) - - -class Channel9IE(InfoExtractor): - IE_DESC = 'Channel 9' - IE_NAME = 'channel9' - _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P.+?)(?P/RSS)?/?(?:[?#&]|$)' - - _TESTS = [{ - 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - 'md5': '32083d4eaf1946db6d454313f44510ca', - 'info_dict': { - 'id': '6c413323-383a-49dc-88f9-a22800cab024', - 'ext': 'wmv', - 'title': 'Developer Kick-Off Session: Stuff We Love', - 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', - 'duration': 4576, - 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1377717420, - 'upload_date': '20130828', - 'session_code': 'KOS002', - 'session_room': 'Arena 1A', - 'session_speakers': 'count:5', - }, - }, { - 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', - 'info_dict': { - 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', - 'ext': 'wmv', - 'title': 'Self-service BI with Power BI - nuclear testing', - 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', - 'duration': 1540, - 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1386381991, - 'upload_date': '20131207', - 'authors': ['Mike Wilmot'], - }, - }, { - # low quality mp4 is best - 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'info_dict': { - 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', - 'ext': 'mp4', - 'title': 'Ranges for the Standard Library', - 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', - 'duration': 5646, - 'thumbnail': r're:https?://.*\.jpg', - 'upload_date': '20150930', - 'timestamp': 1443640735, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', - 'info_dict': { - 'id': 'Events/DEVintersection/DEVintersection-2016', - 'title': 'DEVintersection 2016 Orlando Sessions', - }, - 'playlist_mincount': 14, - }, { - 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', - 'only_matching': True, - }, { - 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', - 'only_matching': True, - }] - - _RSS_URL = 'http://channel9.msdn.com/%s/RSS' - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b', - webpage) - - def _extract_list(self, video_id, rss_url=None): - if not rss_url: - rss_url = self._RSS_URL % video_id - rss = self._download_xml(rss_url, video_id, 'Downloading RSS') - entries = [self.url_result(session_url.text, 'Channel9') - for session_url in rss.findall('./channel/item/link')] - title_text = rss.find('./channel/title').text - return self.playlist_result(entries, video_id, title_text) - - def _real_extract(self, url): - content_path, rss = re.match(self._VALID_URL, url).groups() - - if rss: - return self._extract_list(content_path, url) - - webpage = self._download_webpage( - url, content_path, 'Downloading web page') - - episode_data = self._search_regex( - r"data-episode='([^']+)'", webpage, 'episode data', default=None) - if episode_data: - episode_data = self._parse_json(unescapeHTML( - episode_data), content_path) - content_id = episode_data['contentId'] - is_session = '/Sessions(' in episode_data['api'] - content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,' - if is_session: - content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers' - else: - content_url += 'Authors,Body&$expand=Authors' - content_data = self._download_json(content_url, content_id) - title = content_data['Title'] - - QUALITIES = ( - 'mp3', - 'wmv', 'mp4', - 'wmv-low', 'mp4-low', - 'wmv-mid', 'mp4-mid', - 'wmv-high', 'mp4-high', - ) - - quality_key = qualities(QUALITIES) - - def quality(quality_id, format_url): - return (len(QUALITIES) if '_Source.' in format_url - else quality_key(quality_id)) - - formats = [] - urls = set() - - SITE_QUALITIES = { - 'MP3': 'mp3', - 'MP4': 'mp4', - 'Low Quality WMV': 'wmv-low', - 'Low Quality MP4': 'mp4-low', - 'Mid Quality WMV': 'wmv-mid', - 'Mid Quality MP4': 'mp4-mid', - 'High Quality WMV': 'wmv-high', - 'High Quality MP4': 'mp4-high', - } - - formats_select = self._search_regex( - r'(?s)]+name=["\']format[^>]+>(.+?)]+\bvalue=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*(?P[^<]+?)\s*<', - formats_select): - format_url = mobj.group('url') - if format_url in urls: - continue - urls.add(format_url) - format_id = mobj.group('format') - quality_id = SITE_QUALITIES.get(format_id, format_id) - formats.append({ - 'url': format_url, - 'format_id': quality_id, - 'quality': quality(quality_id, format_url), - 'vcodec': 'none' if quality_id == 'mp3' else None, - }) - - API_QUALITIES = { - 'VideoMP4Low': 'mp4-low', - 'VideoWMV': 'wmv-mid', - 'VideoMP4Medium': 'mp4-mid', - 'VideoMP4High': 'mp4-high', - 'VideoWMVHQ': 'wmv-hq', - } - - for format_id, q in API_QUALITIES.items(): - q_url = content_data.get(format_id) - if not q_url or q_url in urls: - continue - urls.add(q_url) - formats.append({ - 'url': q_url, - 'format_id': q, - 'quality': quality(q, q_url), - }) - - self._sort_formats(formats) - - slides = content_data.get('Slides') - zip_file = content_data.get('ZipFile') - - if not formats and not slides and not zip_file: - raise ExtractorError( - 'None of recording, slides or zip are available for %s' % content_path) - - subtitles = {} - for caption in content_data.get('Captions', []): - caption_url = caption.get('Url') - if not caption_url: - continue - subtitles.setdefault(caption.get('Language', 'en'), []).append({ - 'url': caption_url, - 'ext': 'vtt', - }) - - common = { - 'id': content_id, - 'title': title, - 'description': clean_html(content_data.get('Description') or content_data.get('Body')), - 'thumbnail': content_data.get('VideoPlayerPreviewImage'), - 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), - 'timestamp': parse_iso8601(content_data.get('PublishedDate')), - 'avg_rating': int_or_none(content_data.get('Rating')), - 'rating_count': int_or_none(content_data.get('RatingCount')), - 'view_count': int_or_none(content_data.get('Views')), - 'comment_count': int_or_none(content_data.get('CommentCount')), - 'subtitles': subtitles, - } - if is_session: - speakers = [] - for s in content_data.get('Speakers', []): - speaker_name = s.get('FullName') - if not speaker_name: - continue - speakers.append(speaker_name) - - common.update({ - 'session_code': content_data.get('Code'), - 'session_room': content_data.get('Room'), - 'session_speakers': speakers, - }) - else: - authors = [] - for a in content_data.get('Authors', []): - author_name = a.get('DisplayName') - if not author_name: - continue - authors.append(author_name) - common['authors'] = authors - - contents = [] - - if slides: - d = common.copy() - d.update({'title': title + '-Slides', 'url': slides}) - contents.append(d) - - if zip_file: - d = common.copy() - d.update({'title': title + '-Zip', 'url': zip_file}) - contents.append(d) - - if formats: - d = common.copy() - d.update({'title': title, 'formats': formats}) - contents.append(d) - return self.playlist_result(contents) - else: - return self._extract_list(content_path) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py deleted file mode 100644 index 42c9af263..000000000 --- a/youtube_dl/extractor/charlierose.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import remove_end - - -class CharlieRoseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P\d+)' - _TESTS = [{ - 'url': 'https://charlierose.com/videos/27996', - 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', - 'info_dict': { - 'id': '27996', - 'ext': 'mp4', - 'title': 'Remembering Zaha Hadid', - 'thumbnail': r're:^https?://.*\.jpg\?\d+', - 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', - 'subtitles': { - 'en': [{ - 'ext': 'vtt', - }], - }, - }, - }, { - 'url': 'https://charlierose.com/videos/27996', - 'only_matching': True, - }, { - 'url': 'https://charlierose.com/episodes/30887?autoplay=true', - 'only_matching': True, - }] - - _PLAYER_BASE = 'https://charlierose.com/video/player/%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id) - - title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') - - info_dict = self._parse_html5_media_entries( - self._PLAYER_BASE % video_id, webpage, video_id, - m3u8_entry_protocol='m3u8_native')[0] - - self._sort_formats(info_dict['formats']) - self._remove_duplicate_formats(info_dict['formats']) - - info_dict.update({ - 'id': video_id, - 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), - 'description': self._og_search_description(webpage), - }) - - return info_dict diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py deleted file mode 100644 index a459dcb8d..000000000 --- a/youtube_dl/extractor/chaturbate.py +++ /dev/null @@ -1,109 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - lowercase_escape, - url_or_none, -) - - -class ChaturbateIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' - _TESTS = [{ - 'url': 'https://www.chaturbate.com/siswet19/', - 'info_dict': { - 'id': 'siswet19', - 'ext': 'mp4', - 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'age_limit': 18, - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Room is offline', - }, { - 'url': 'https://chaturbate.com/fullvideo/?b=caylin', - 'only_matching': True, - }, { - 'url': 'https://en.chaturbate.com/siswet19/', - 'only_matching': True, - }] - - _ROOM_OFFLINE = 'Room is currently offline' - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://chaturbate.com/%s/' % video_id, video_id, - headers=self.geo_verification_headers()) - - found_m3u8_urls = [] - - data = self._parse_json( - self._search_regex( - r'initialRoomDossier\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'data', default='{}', group='value'), - video_id, transform_source=lowercase_escape, fatal=False) - if data: - m3u8_url = url_or_none(data.get('hls_source')) - if m3u8_url: - found_m3u8_urls.append(m3u8_url) - - if not found_m3u8_urls: - for m in re.finditer( - r'(\\u002[27])(?Phttp.+?\.m3u8.*?)\1', webpage): - found_m3u8_urls.append(lowercase_escape(m.group('url'))) - - if not found_m3u8_urls: - for m in re.finditer( - r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): - found_m3u8_urls.append(m.group('url')) - - m3u8_urls = [] - for found_m3u8_url in found_m3u8_urls: - m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '') - for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url): - if m3u8_url not in m3u8_urls: - m3u8_urls.append(m3u8_url) - - if not m3u8_urls: - error = self._search_regex( - [r']+class=(["\'])desc_span\1[^>]*>(?P[^<]+)', - r']+id=(["\'])defchat\1[^>]*>\s*

(?P[^<]+)<'], - webpage, 'error', group='error', default=None) - if not error: - if any(p in webpage for p in ( - self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): - error = self._ROOM_OFFLINE - if error: - raise ExtractorError(error, expected=True) - raise ExtractorError('Unable to find stream URL') - - formats = [] - for m3u8_url in m3u8_urls: - for known_id in ('fast', 'slow'): - if '_%s' % known_id in m3u8_url: - m3u8_id = known_id - break - else: - m3u8_id = None - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', - # ffmpeg skips segments for fast m3u8 - preference=-10 if m3u8_id == 'fast' else None, - m3u8_id=m3u8_id, fatal=False, live=True)) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': self._live_title(video_id), - 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, - 'age_limit': self._rta_search(webpage), - 'is_live': True, - 'formats': formats, - } diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py deleted file mode 100644 index 5aac21299..000000000 --- a/youtube_dl/extractor/chilloutzone.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import unicode_literals - -import re -import json - -from .common import InfoExtractor -from .youtube import YoutubeIE -from ..compat import compat_b64decode -from ..utils import ( - clean_html, - ExtractorError -) - - -class ChilloutzoneIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P[\w|-]+)\.html' - _TESTS = [{ - 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', - 'md5': 'a76f3457e813ea0037e5244f509e66d1', - 'info_dict': { - 'id': 'enemene-meck-alle-katzen-weg', - 'ext': 'mp4', - 'title': 'Enemene Meck - Alle Katzen weg', - 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', - }, - }, { - 'note': 'Video hosted at YouTube', - 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', - 'info_dict': { - 'id': '1YVQaAgHyRU', - 'ext': 'mp4', - 'title': '16 Photos Taken 1 Second Before Disaster', - 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', - 'uploader': 'BuzzFeedVideo', - 'uploader_id': 'BuzzFeedVideo', - 'upload_date': '20131105', - }, - }, { - 'note': 'Video hosted at Vimeo', - 'url': 'http://www.chilloutzone.net/video/icon-blending.html', - 'md5': '2645c678b8dc4fefcc0e1b60db18dac1', - 'info_dict': { - 'id': '85523671', - 'ext': 'mp4', - 'title': 'The Sunday Times - Icons', - 'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}', - 'uploader': 'Us', - 'uploader_id': 'usfilms', - 'upload_date': '20140131' - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - base64_video_info = self._html_search_regex( - r'var cozVidData = "(.+?)";', webpage, 'video data') - decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8') - video_info_dict = json.loads(decoded_video_info) - - # get video information from dict - video_url = video_info_dict['mediaUrl'] - description = clean_html(video_info_dict.get('description')) - title = video_info_dict['title'] - native_platform = video_info_dict['nativePlatform'] - native_video_id = video_info_dict['nativeVideoId'] - source_priority = video_info_dict['sourcePriority'] - - # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) - if native_platform is None: - youtube_url = YoutubeIE._extract_url(webpage) - if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) - - # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or - # the own CDN - if source_priority == 'native': - if native_platform == 'youtube': - return self.url_result(native_video_id, ie='Youtube') - if native_platform == 'vimeo': - return self.url_result( - 'http://vimeo.com/' + native_video_id, ie='Vimeo') - - if not video_url: - raise ExtractorError('No video found') - - return { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'title': title, - 'description': description, - } diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py deleted file mode 100644 index 8d75cdf19..000000000 --- a/youtube_dl/extractor/chirbit.py +++ /dev/null @@ -1,91 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_b64decode -from ..utils import parse_duration - - -class ChirbitIE(InfoExtractor): - IE_NAME = 'chirbit' - _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P[\da-zA-Z]+)' - _TESTS = [{ - 'url': 'http://chirb.it/be2abG', - 'info_dict': { - 'id': 'be2abG', - 'ext': 'mp3', - 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', - 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', - 'duration': 306, - 'uploader': 'Gerryaudio', - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', - 'only_matching': True, - }, { - 'url': 'https://chirb.it/wp/MN58c2', - 'only_matching': True, - }] - - def _real_extract(self, url): - audio_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://chirb.it/%s' % audio_id, audio_id) - - data_fd = self._search_regex( - r'data-fd=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'data fd', group='url') - - # Reverse engineered from https://chirb.it/js/chirbit.player.js (look - # for soundURL) - audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8') - - title = self._search_regex( - r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') - description = self._search_regex( - r'

Description

\s*]*>([^<]+)', - webpage, 'description', default=None) - duration = parse_duration(self._search_regex( - r'class=["\']c-length["\'][^>]*>([^<]+)', - webpage, 'duration', fatal=False)) - uploader = self._search_regex( - r'id=["\']chirbit-username["\'][^>]*>([^<]+)', - webpage, 'uploader', fatal=False) - - return { - 'id': audio_id, - 'url': audio_url, - 'title': title, - 'description': description, - 'duration': duration, - 'uploader': uploader, - } - - -class ChirbitProfileIE(InfoExtractor): - IE_NAME = 'chirbit:profile' - _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P[^/]+)' - _TEST = { - 'url': 'http://chirbit.com/ScarletBeauty', - 'info_dict': { - 'id': 'ScarletBeauty', - }, - 'playlist_mincount': 3, - } - - def _real_extract(self, url): - profile_id = self._match_id(url) - - webpage = self._download_webpage(url, profile_id) - - entries = [ - self.url_result(self._proto_relative_url('//chirb.it/' + video_id)) - for _, video_id in re.findall(r']+id=([\'"])copy-btn-(?P[0-9a-zA-Z]+)\1', webpage)] - - return self.playlist_result(entries, profile_id) diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py deleted file mode 100644 index b861d54b0..000000000 --- a/youtube_dl/extractor/cinchcast.py +++ /dev/null @@ -1,58 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - unified_strdate, - xpath_text, -) - - -class CinchcastIE(InfoExtractor): - _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single', - 'info_dict': { - 'id': '5258197', - 'ext': 'mp3', - 'title': 'Train Your Brain to Up Your Game with Coach Mandy', - 'upload_date': '20130816', - }, - }, { - # Actual test is run in generic, look for undergroundwellness - 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - doc = self._download_xml( - 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, - video_id) - - item = doc.find('.//item') - title = xpath_text(item, './title', fatal=True) - date_str = xpath_text( - item, './{http://developer.longtailvideo.com/trac/}date') - upload_date = unified_strdate(date_str, day_first=False) - # duration is present but wrong - formats = [{ - 'format_id': 'main', - 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'], - }] - backup_url = xpath_text( - item, './{http://developer.longtailvideo.com/trac/}backupContent') - if backup_url: - formats.append({ - 'preference': 2, # seems to be more reliable - 'format_id': 'backup', - 'url': backup_url, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'upload_date': upload_date, - 'formats': formats, - } diff --git a/youtube_dl/extractor/cinemax.py b/youtube_dl/extractor/cinemax.py deleted file mode 100644 index 7f89d33de..000000000 --- a/youtube_dl/extractor/cinemax.py +++ /dev/null @@ -1,29 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .hbo import HBOBaseIE - - -class CinemaxIE(HBOBaseIE): - _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P[^/]+/video/[0-9a-z-]+-(?P\d+))' - _TESTS = [{ - 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903', - 'md5': '82e0734bba8aa7ef526c9dd00cf35a05', - 'info_dict': { - 'id': '20126903', - 'ext': 'mp4', - 'title': 'S1 Ep 1: Recap', - }, - 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], - }, { - 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed', - 'only_matching': True, - }] - - def _real_extract(self, url): - path, video_id = re.match(self._VALID_URL, url).groups() - info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) - info['id'] = video_id - return info diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py deleted file mode 100644 index da404e4dc..000000000 --- a/youtube_dl/extractor/ciscolive.py +++ /dev/null @@ -1,151 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) -from ..utils import ( - clean_html, - float_or_none, - int_or_none, - try_get, - urlencode_postdata, -) - - -class CiscoLiveBaseIE(InfoExtractor): - # These appear to be constant across all Cisco Live presentations - # and are not tied to any user session or event - RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' - RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' - RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' - - HEADERS = { - 'Origin': 'https://ciscolive.cisco.com', - 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID, - 'rfWidgetId': RAINFOCUS_WIDGET_ID, - } - - def _call_api(self, ep, rf_id, query, referrer, note=None): - headers = self.HEADERS.copy() - headers['Referer'] = referrer - return self._download_json( - self.RAINFOCUS_API_URL % ep, rf_id, note=note, - data=urlencode_postdata(query), headers=headers) - - def _parse_rf_item(self, rf_item): - event_name = rf_item.get('eventName') - title = rf_item['title'] - description = clean_html(rf_item.get('abstract')) - presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) - bc_id = rf_item['videos'][0]['url'] - bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id - duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) - location = try_get(rf_item, lambda x: x['times'][0]['room']) - - if duration: - duration = duration * 60 - - return { - '_type': 'url_transparent', - 'url': bc_url, - 'ie_key': 'BrightcoveNew', - 'title': title, - 'description': description, - 'duration': duration, - 'creator': presenter_name, - 'location': location, - 'series': event_name, - } - - -class CiscoLiveSessionIE(CiscoLiveBaseIE): - _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P[^/?&]+)' - _TESTS = [{ - 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', - 'md5': 'c98acf395ed9c9f766941c70f5352e22', - 'info_dict': { - 'id': '5803694304001', - 'ext': 'mp4', - 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', - 'description': 'md5:ec4a436019e09a918dec17714803f7cc', - 'timestamp': 1530305395, - 'upload_date': '20180629', - 'uploader_id': '5647924234001', - 'location': '16B Mezz.', - }, - }, { - 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU', - 'only_matching': True, - }, { - 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS', - 'only_matching': True, - }] - - def _real_extract(self, url): - rf_id = self._match_id(url) - rf_result = self._call_api('session', rf_id, {'id': rf_id}, url) - return self._parse_rf_item(rf_result['items'][0]) - - -class CiscoLiveSearchIE(CiscoLiveBaseIE): - _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)' - _TESTS = [{ - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', - 'info_dict': { - 'title': 'Search query', - }, - 'playlist_count': 5, - }, { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', - 'only_matching': True, - }, { - 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) - - @staticmethod - def _check_bc_id_exists(rf_item): - return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None - - def _entries(self, query, url): - query['size'] = 50 - query['from'] = 0 - for page_num in itertools.count(1): - results = self._call_api( - 'search', None, query, url, - 'Downloading search JSON page %d' % page_num) - sl = try_get(results, lambda x: x['sectionList'][0], dict) - if sl: - results = sl - items = results.get('items') - if not items or not isinstance(items, list): - break - for item in items: - if not isinstance(item, dict): - continue - if not self._check_bc_id_exists(item): - continue - yield self._parse_rf_item(item) - size = int_or_none(results.get('size')) - if size is not None: - query['size'] = size - total = int_or_none(results.get('total')) - if total is not None and query['from'] + query['size'] > total: - break - query['from'] += query['size'] - - def _real_extract(self, url): - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - query['type'] = 'session' - return self.playlist_result( - self._entries(query, url), playlist_title='Search query') diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py deleted file mode 100644 index 505bdbe16..000000000 --- a/youtube_dl/extractor/cjsw.py +++ /dev/null @@ -1,72 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - unescapeHTML, -) - - -class CJSWIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P[^/]+)/episode/(?P\d+)' - _TESTS = [{ - 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', - 'md5': 'cee14d40f1e9433632c56e3d14977120', - 'info_dict': { - 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41', - 'ext': 'mp3', - 'title': 'Freshly Squeezed – Episode June 20, 2017', - 'description': 'md5:c967d63366c3898a80d0c7b0ff337202', - 'series': 'Freshly Squeezed', - 'episode_id': '20170620', - }, - }, { - # no description - 'url': 'http://cjsw.com/program/road-pops/episode/20170707/', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - program, episode_id = mobj.group('program', 'id') - audio_id = '%s/%s' % (program, episode_id) - - webpage = self._download_webpage(url, episode_id) - - title = unescapeHTML(self._search_regex( - (r']+class=["\']episode-header__title["\'][^>]*>(?P[^<]+)', - r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'), - webpage, 'title', group='title')) - - audio_url = self._search_regex( - r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1', - webpage, 'audio url', group='url') - - audio_id = self._search_regex( - r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3', - audio_url, 'audio id', default=audio_id) - - formats = [{ - 'url': audio_url, - 'ext': determine_ext(audio_url, 'mp3'), - 'vcodec': 'none', - }] - - description = self._html_search_regex( - r'<p>(?P<description>.+?)</p>', webpage, 'description', - default=None) - series = self._search_regex( - r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, - 'series', default=program, group='name') - - return { - 'id': audio_id, - 'title': title, - 'description': description, - 'formats': formats, - 'series': series, - 'episode_id': episode_id, - } diff --git a/youtube_dl/extractor/clipchamp.py b/youtube_dl/extractor/clipchamp.py deleted file mode 100644 index 3b485eaab..000000000 --- a/youtube_dl/extractor/clipchamp.py +++ /dev/null @@ -1,69 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - merge_dicts, - T, - traverse_obj, - unified_timestamp, - url_or_none, -) - - -class ClipchampIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU', - 'info_dict': { - 'id': 'gRXZ4ZhdDaU', - 'ext': 'mp4', - 'title': 'Untitled video', - 'uploader': 'Alexander Schwartz', - 'timestamp': 1680805580, - 'upload_date': '20230406', - 'thumbnail': r're:^https?://.+\.jpg', - }, - 'params': { - 'skip_download': 'm3u8', - 'format': 'bestvideo', - }, - }] - - _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s' - _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'} - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video'] - - storage_location = data.get('storage_location') - if storage_location != 'cf_stream': - raise ExtractorError('Unsupported clip storage location "%s"' % (storage_location,)) - - path = data['download_url'] - iframe = self._download_webpage( - 'https://iframe.cloudflarestream.com/' + path, video_id, 'Downloading player iframe') - subdomain = self._search_regex( - r'''\bcustomer-domain-prefix\s*=\s*("|')(?P<sd>[\w-]+)\1''', iframe, - 'subdomain', group='sd', fatal=False) or 'customer-2ut9yn3y6fta1yxe' - - formats = self._extract_mpd_formats( - self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id, - query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash') - formats.extend(self._extract_m3u8_formats( - self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4', - query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls')) - - return merge_dicts({ - 'id': video_id, - 'formats': formats, - 'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), T(compat_str)))) or None, - }, traverse_obj(data, { - 'title': ('project', 'project_name', T(compat_str)), - 'timestamp': ('created_at', T(unified_timestamp)), - 'thumbnail': ('thumbnail_url', T(url_or_none)), - }), rev=True) diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py deleted file mode 100644 index f2ca7a337..000000000 --- a/youtube_dl/extractor/cliphunter.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - url_or_none, -) - - -class CliphunterIE(InfoExtractor): - IE_NAME = 'cliphunter' - - _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/ - (?P<id>[0-9]+)/ - (?P<seo>.+?)(?:$|[#\?]) - ''' - _TESTS = [{ - 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', - 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480', - 'info_dict': { - 'id': '1012420', - 'ext': 'flv', - 'title': 'Fun Jynx Maze solo', - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18, - }, - 'skip': 'Video gone', - }, { - 'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz', - 'md5': '55a723c67bfc6da6b0cfa00d55da8a27', - 'info_dict': { - 'id': '2019449', - 'ext': 'mp4', - 'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz', - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_title = self._search_regex( - r'mediaTitle = "([^"]+)"', webpage, 'title') - - gexo_files = self._parse_json( - self._search_regex( - r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'), - video_id) - - formats = [] - for format_id, f in gexo_files.items(): - video_url = url_or_none(f.get('url')) - if not video_url: - continue - fmt = f.get('fmt') - height = f.get('h') - format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'width': int_or_none(f.get('w')), - 'height': int_or_none(height), - 'tbr': int_or_none(f.get('br')), - }) - self._sort_formats(formats) - - thumbnail = self._search_regex( - r"var\s+mov_thumb\s*=\s*'([^']+)';", - webpage, 'thumbnail', fatal=False) - - return { - 'id': video_id, - 'title': video_title, - 'formats': formats, - 'age_limit': self._rta_search(webpage), - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/clippit.py b/youtube_dl/extractor/clippit.py deleted file mode 100644 index a1a7a774c..000000000 --- a/youtube_dl/extractor/clippit.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - parse_iso8601, - qualities, -) - -import re - - -class ClippitIE(InfoExtractor): - - _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)' - _TEST = { - 'url': 'https://www.clippituser.tv/c/evmgm', - 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09', - 'info_dict': { - 'id': 'evmgm', - 'ext': 'mp4', - 'title': 'Bye bye Brutus. #BattleBots - Clippit', - 'uploader': 'lizllove', - 'uploader_url': 'https://www.clippituser.tv/p/lizllove', - 'timestamp': 1472183818, - 'upload_date': '20160826', - 'description': 'BattleBots | ABC', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex(r'<title.*>(.+?)', webpage, 'title') - - FORMATS = ('sd', 'hd') - quality = qualities(FORMATS) - formats = [] - for format_id in FORMATS: - url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, - webpage, 'url', fatal=False) - if not url: - continue - match = re.search(r'/(?P\d+)\.mp4', url) - formats.append({ - 'url': url, - 'format_id': format_id, - 'quality': quality(format_id), - 'height': int(match.group('height')) if match else None, - }) - - uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n', - webpage, 'uploader', fatal=False) - uploader_url = ('https://www.clippituser.tv/p/' + uploader - if uploader else None) - - timestamp = self._html_search_regex(r'datetime="(.+?)"', - webpage, 'date', fatal=False) - thumbnail = self._html_search_regex(r'data-image="(.+?)"', - webpage, 'thumbnail', fatal=False) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'uploader': uploader, - 'uploader_url': uploader_url, - 'timestamp': parse_iso8601(timestamp), - 'description': self._og_search_description(webpage), - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dl/extractor/cliprs.py deleted file mode 100644 index d55b26d59..000000000 --- a/youtube_dl/extractor/cliprs.py +++ /dev/null @@ -1,33 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .onet import OnetBaseIE - - -class ClipRsIE(OnetBaseIE): - _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P[^/]+)/\d+' - _TEST = { - 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', - 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5', - 'info_dict': { - 'id': '1488842.1399140381', - 'ext': 'mp4', - 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli', - 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026', - 'duration': 229, - 'timestamp': 1459850243, - 'upload_date': '20160405', - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - mvp_id = self._search_mvp_id(webpage) - - info_dict = self._extract_from_id(mvp_id, webpage) - info_dict['display_id'] = display_id - - return info_dict diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py deleted file mode 100644 index 6cdb42f5a..000000000 --- a/youtube_dl/extractor/clipsyndicate.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - find_xpath_attr, - fix_xml_ampersands -) - - -class ClipsyndicateIE(InfoExtractor): - _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P\d+)' - - _TESTS = [{ - 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', - 'md5': '4d7d549451bad625e0ff3d7bd56d776c', - 'info_dict': { - 'id': '4629301', - 'ext': 'mp4', - 'title': 'Brick Briscoe', - 'duration': 612, - 'thumbnail': r're:^https?://.+\.jpg', - }, - }, { - 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - js_player = self._download_webpage( - 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, - video_id, 'Downlaoding player') - # it includes a required token - flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars') - - pdoc = self._download_xml( - 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, - video_id, 'Downloading video info', - transform_source=fix_xml_ampersands) - - track_doc = pdoc.find('trackList/track') - - def find_param(name): - node = find_xpath_attr(track_doc, './/param', 'name', name) - if node is not None: - return node.attrib['value'] - - return { - 'id': video_id, - 'title': find_param('title'), - 'url': track_doc.find('location').text, - 'thumbnail': find_param('thumbnail'), - 'duration': int(find_param('duration')), - } diff --git a/youtube_dl/extractor/closertotruth.py b/youtube_dl/extractor/closertotruth.py deleted file mode 100644 index 26243d52d..000000000 --- a/youtube_dl/extractor/closertotruth.py +++ /dev/null @@ -1,92 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class CloserToTruthIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', - 'info_dict': { - 'id': '0_zof1ktre', - 'display_id': 'solutions-the-mind-body-problem', - 'ext': 'mov', - 'title': 'Solutions to the Mind-Body Problem?', - 'upload_date': '20140221', - 'timestamp': 1392956007, - 'uploader_id': 'CTTXML' - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://closertotruth.com/episodes/how-do-brains-work', - 'info_dict': { - 'id': '0_iuxai6g6', - 'display_id': 'how-do-brains-work', - 'ext': 'mov', - 'title': 'How do Brains Work?', - 'upload_date': '20140221', - 'timestamp': 1392956024, - 'uploader_id': 'CTTXML' - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://closertotruth.com/interviews/1725', - 'info_dict': { - 'id': '1725', - 'title': 'AyaFr-002', - }, - 'playlist_mincount': 2, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - partner_id = self._search_regex( - r']+src=["\'].*?\b(?:partner_id|p)/(\d+)', - webpage, 'kaltura partner_id') - - title = self._search_regex( - r'(.+?)\s*\|\s*.+?', webpage, 'video title') - - select = self._search_regex( - r'(?s)]+id="select-version"[^>]*>(.+?)', - webpage, 'select version', default=None) - if select: - entry_ids = set() - entries = [] - for mobj in re.finditer( - r']+value=(["\'])(?P[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P[^<]+)', - webpage): - entry_id = mobj.group('id') - if entry_id in entry_ids: - continue - entry_ids.add(entry_id) - entries.append({ - '_type': 'url_transparent', - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), - 'ie_key': 'Kaltura', - 'title': mobj.group('title'), - }) - if entries: - return self.playlist_result(entries, display_id, title) - - entry_id = self._search_regex( - r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2', - webpage, 'kaltura entry_id', group='id') - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), - 'ie_key': 'Kaltura', - 'title': title - } diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py deleted file mode 100644 index 2fdcfbb3a..000000000 --- a/youtube_dl/extractor/cloudflarestream.py +++ /dev/null @@ -1,72 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import base64 -import re - -from .common import InfoExtractor - - -class CloudflareStreamIE(InfoExtractor): - _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' - _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE - _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:watch\.)?%s/| - %s - ) - (?P<id>%s) - ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE) - _TESTS = [{ - 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', - 'info_dict': { - 'id': '31c9291ab41fac05471db4e73aa11717', - 'ext': 'mp4', - 'title': '31c9291ab41fac05471db4e73aa11717', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', - 'only_matching': True, - }, { - 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd', - 'only_matching': True, - }, { - 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') - for mobj in re.finditer( - r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE), - webpage)] - - def _real_extract(self, url): - video_id = self._match_id(url) - domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' - base_url = 'https://%s/%s/' % (domain, video_id) - if '.' in video_id: - video_id = self._parse_json(base64.urlsafe_b64decode( - video_id.split('.')[1]), video_id)['sub'] - manifest_base_url = base_url + 'manifest/video.' - - formats = self._extract_m3u8_formats( - manifest_base_url + 'm3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - formats.extend(self._extract_mpd_formats( - manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'thumbnail': base_url + 'thumbnails/thumbnail.jpg', - 'formats': formats, - } diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py deleted file mode 100644 index d39a9a5c2..000000000 --- a/youtube_dl/extractor/cloudy.py +++ /dev/null @@ -1,60 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - str_to_int, - unified_strdate, -) - - -class CloudyIE(InfoExtractor): - IE_DESC = 'cloudy.ec' - _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' - _TESTS = [{ - 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '29832b05028ead1b58be86bf319397ca', - 'info_dict': { - 'id': 'af511e2527aac', - 'ext': 'mp4', - 'title': 'Funny Cats and Animals Compilation june 2013', - 'upload_date': '20130913', - 'view_count': int, - } - }, { - 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.cloudy.ec/embed.php', video_id, query={ - 'id': video_id, - 'playerPage': 1, - 'autoplay': 1, - }) - - info = self._parse_html5_media_entries(url, webpage, video_id)[0] - - webpage = self._download_webpage( - 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) - - if webpage: - info.update({ - 'title': self._search_regex( - r'<h\d[^>]*>([^<]+)<', webpage, 'title'), - 'upload_date': unified_strdate(self._search_regex( - r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, - 'upload date', fatal=False)), - 'view_count': str_to_int(self._search_regex( - r'([\d,.]+) views<', webpage, 'view count', fatal=False)), - }) - - if not info.get('title'): - info['title'] = video_id - - info['id'] = video_id - - return info diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py deleted file mode 100644 index 98f9cb596..000000000 --- a/youtube_dl/extractor/clubic.py +++ /dev/null @@ -1,56 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - clean_html, - qualities, -) - - -class ClubicIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' - - _TESTS = [{ - 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', - 'md5': '1592b694ba586036efac1776b0b43cd3', - 'info_dict': { - 'id': '448474', - 'ext': 'mp4', - 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', - 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', - 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$', - } - }, { - 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id - player_page = self._download_webpage(player_url, video_id) - - config = self._parse_json(self._search_regex( - r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, - 'configuration'), video_id) - - video_info = config['videoInfo'] - sources = config['sources'] - quality_order = qualities(['sd', 'hq']) - - formats = [{ - 'format_id': src['streamQuality'], - 'url': src['src'], - 'quality': quality_order(src['streamQuality']), - } for src in sources] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_info['title'], - 'formats': formats, - 'description': clean_html(video_info.get('description')), - 'thumbnail': config.get('poster'), - } diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py deleted file mode 100644 index 06d04de13..000000000 --- a/youtube_dl/extractor/clyp.py +++ /dev/null @@ -1,82 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) -from ..utils import ( - float_or_none, - unified_timestamp, -) - - -class ClypIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)' - _TESTS = [{ - 'url': 'https://clyp.it/ojz2wfah', - 'md5': '1d4961036c41247ecfdcc439c0cddcbb', - 'info_dict': { - 'id': 'ojz2wfah', - 'ext': 'mp3', - 'title': 'Krisson80 - bits wip wip', - 'description': '#Krisson80BitsWipWip #chiptune\n#wip', - 'duration': 263.21, - 'timestamp': 1443515251, - 'upload_date': '20150929', - }, - }, { - 'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d', - 'info_dict': { - 'id': 'b04p1odi', - 'ext': 'mp3', - 'title': 'GJ! (Reward Edit)', - 'description': 'Metal Resistance (THE ONE edition)', - 'duration': 177.789, - 'timestamp': 1528241278, - 'upload_date': '20180605', - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - audio_id = self._match_id(url) - - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - token = qs.get('token', [None])[0] - - query = {} - if token: - query['token'] = token - - metadata = self._download_json( - 'https://api.clyp.it/%s' % audio_id, audio_id, query=query) - - formats = [] - for secure in ('', 'Secure'): - for ext in ('Ogg', 'Mp3'): - format_id = '%s%s' % (secure, ext) - format_url = metadata.get('%sUrl' % format_id) - if format_url: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'vcodec': 'none', - }) - self._sort_formats(formats) - - title = metadata['Title'] - description = metadata.get('Description') - duration = float_or_none(metadata.get('Duration')) - timestamp = unified_timestamp(metadata.get('DateCreated')) - - return { - 'id': audio_id, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'formats': formats, - } diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py deleted file mode 100644 index e701fbeab..000000000 --- a/youtube_dl/extractor/cmt.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .mtv import MTVIE - - -class CMTIE(MTVIE): - IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' - - _TESTS = [{ - 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', - 'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2', - 'info_dict': { - 'id': '989124', - 'ext': 'mp4', - 'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"', - 'description': 'Blame It All On My Roots', - }, - 'skip': 'Video not available', - }, { - 'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908', - 'md5': 'e61a801ca4a183a466c08bd98dccbb1c', - 'info_dict': { - 'id': '1504699', - 'ext': 'mp4', - 'title': 'Still The King Ep. 109 in 3 Minutes', - 'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.', - 'timestamp': 1469421000.0, - 'upload_date': '20160725', - }, - }, { - 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', - 'only_matching': True, - }, { - 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501', - 'only_matching': True, - }, { - 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes', - 'only_matching': True, - }] - - def _extract_mgid(self, webpage): - mgid = self._search_regex( - r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1', - webpage, 'mgid', group='mgid', default=None) - if not mgid: - mgid = self._extract_triforce_mgid(webpage) - return mgid - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - mgid = self._extract_mgid(webpage) - return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py deleted file mode 100644 index 7b9f4536a..000000000 --- a/youtube_dl/extractor/cnbc.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import smuggle_url - - -class CNBCIE(InfoExtractor): - _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://video.cnbc.com/gallery/?video=3000503714', - 'info_dict': { - 'id': '3000503714', - 'ext': 'mp4', - 'title': 'Fighting zombies is big business', - 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', - 'timestamp': 1459332000, - 'upload_date': '20160330', - 'uploader': 'NBCU-CNBC', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url( - 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, - {'force_smil_url': True}), - 'id': video_id, - } - - -class CNBCVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' - _TEST = { - 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', - 'info_dict': { - 'id': '7000031301', - 'ext': 'mp4', - 'title': "Trump: I don't necessarily agree with raising rates", - 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', - 'timestamp': 1531958400, - 'upload_date': '20180719', - 'uploader': 'NBCU-CNBC', - }, - 'params': { - 'skip_download': True, - }, - } - - def _real_extract(self, url): - path, display_id = re.match(self._VALID_URL, url).groups() - video_id = self._download_json( - 'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ - 'query': '''{ - page(path: "%s") { - vcpsId - } -}''' % path, - })['data']['page']['vcpsId'] - return self.url_result( - 'http://video.cnbc.com/gallery/?video=%d' % video_id, - CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py deleted file mode 100644 index 2d950fa05..000000000 --- a/youtube_dl/extractor/cnn.py +++ /dev/null @@ -1,147 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .turner import TurnerBaseIE -from ..utils import url_basename - - -class CNNIE(TurnerBaseIE): - _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ - (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' - - _TESTS = [{ - 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', - 'md5': '3e6121ea48df7e2259fe73a0628605c4', - 'info_dict': { - 'id': 'sports/2013/06/09/nadal-1-on-1.cnn', - 'ext': 'mp4', - 'title': 'Nadal wins 8th French Open title', - 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', - 'duration': 135, - 'upload_date': '20130609', - }, - 'expected_warnings': ['Failed to download m3u8 information'], - }, { - 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', - 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', - 'info_dict': { - 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', - 'ext': 'mp4', - 'title': "Student's epic speech stuns new freshmen", - 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", - 'upload_date': '20130821', - }, - 'expected_warnings': ['Failed to download m3u8 information'], - }, { - 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', - 'md5': 'f14d02ebd264df951feb2400e2c25a1b', - 'info_dict': { - 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', - 'ext': 'mp4', - 'title': 'Nashville Ep. 1: Hand crafted skateboards', - 'description': 'md5:e7223a503315c9f150acac52e76de086', - 'upload_date': '20141222', - }, - 'expected_warnings': ['Failed to download m3u8 information'], - }, { - 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', - 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', - 'info_dict': { - 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', - 'ext': 'mp4', - 'title': '5 stunning stats about Netflix', - 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', - 'upload_date': '20160819', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', - 'only_matching': True, - }, { - 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', - 'only_matching': True, - }, { - 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', - 'only_matching': True, - }] - - _CONFIG = { - # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml - 'edition': { - 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml', - 'media_src': 'http://pmd.cdn.turner.com/cnn/big', - }, - # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml - 'money': { - 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml', - 'media_src': 'http://ht3.cdn.turner.com/money/big', - }, - } - - def _extract_timestamp(self, video_data): - # TODO: fix timestamp extraction - return None - - def _real_extract(self, url): - sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() - if sub_domain not in ('money', 'edition'): - sub_domain = 'edition' - config = self._CONFIG[sub_domain] - return self._extract_cvp_info( - config['data_src'] % path, page_title, { - 'default': { - 'media_src': config['media_src'], - }, - 'f4m': { - 'host': 'cnn-vh.akamaihd.net', - }, - }) - - -class CNNBlogsIE(InfoExtractor): - _VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+' - _TEST = { - 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', - 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', - 'info_dict': { - 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', - 'ext': 'mp4', - 'title': 'Criminalizing journalism?', - 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', - 'upload_date': '20140209', - }, - 'expected_warnings': ['Failed to download m3u8 information'], - 'add_ie': ['CNN'], - } - - def _real_extract(self, url): - webpage = self._download_webpage(url, url_basename(url)) - cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url') - return self.url_result(cnn_url, CNNIE.ie_key()) - - -class CNNArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)' - _TEST = { - 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', - 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', - 'info_dict': { - 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', - 'ext': 'mp4', - 'title': 'Obama: Cyberattack not an act of war', - 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', - 'upload_date': '20141221', - }, - 'expected_warnings': ['Failed to download m3u8 information'], - 'add_ie': ['CNN'], - } - - def _real_extract(self, url): - webpage = self._download_webpage(url, url_basename(url)) - cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url') - return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key()) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py deleted file mode 100644 index 1bfa912be..000000000 --- a/youtube_dl/extractor/comedycentral.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import unicode_literals - -from .mtv import MTVServicesInfoExtractor - - -class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})' - _FEED_URL = 'http://comedycentral.com/feeds/mrss/' - - _TESTS = [{ - 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike', - 'md5': 'b8acb347177c680ff18a292aa2166f80', - 'info_dict': { - 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025', - 'ext': 'mp4', - 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike', - 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498', - 'timestamp': 1598670000, - 'upload_date': '20200829', - }, - }, { - 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314', - 'only_matching': True, - }, { - 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate', - 'only_matching': True, - }] - - -class ComedyCentralTVIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})' - _TESTS = [{ - 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1', - 'info_dict': { - 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285', - 'ext': 'mp4', - 'title': 'Josh Investigates', - 'description': 'Steht uns das Ende der Welt bevor?', - }, - }] - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - _GEO_COUNTRIES = ['DE'] - - def _get_feed_query(self, uri): - return { - 'accountOverride': 'intl.mtvi.com', - 'arcEp': 'web.cc.tv', - 'ep': 'b9032c3a', - 'imageEp': 'web.cc.tv', - 'mgid': uri, - } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a0901dab5..673ddb3fd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -38,10 +38,6 @@ from ..compat import ( compat_xml_parse_error, compat_zip as zip, ) -from ..downloader.f4m import ( - get_base_url, - remove_encrypted_media, -) from ..utils import ( NO_DEFAULT, age_restricted, @@ -95,6 +91,25 @@ from ..utils import ( ) +def _f4m_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def remove_encrypted_media(media): + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib + and 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_f4m_ns('baseURL'), _f4m_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url + + class InfoExtractor(object): """Information Extractor class. diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py deleted file mode 100644 index 7e12499b1..000000000 --- a/youtube_dl/extractor/commonmistakes.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import unicode_literals - -import sys - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class CommonMistakesIE(InfoExtractor): - IE_DESC = False # Do not list - _VALID_URL = r'''(?x) - (?:url|URL)$ - ''' - - _TESTS = [{ - 'url': 'url', - 'only_matching': True, - }, { - 'url': 'URL', - 'only_matching': True, - }] - - def _real_extract(self, url): - msg = ( - 'You\'ve asked youtube-dl to download the URL "%s". ' - 'That doesn\'t make any sense. ' - 'Simply remove the parameter in your command or configuration.' - ) % url - if not self._downloader.params.get('verbose'): - msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.' - raise ExtractorError(msg, expected=True) - - -class UnicodeBOMIE(InfoExtractor): - IE_DESC = False - _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' - - # Disable test for python 3.2 since BOM is broken in re in this version - # (see https://github.com/ytdl-org/youtube-dl/issues/9751) - _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ - 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', - 'only_matching': True, - }] - - def _real_extract(self, url): - real_url = self._match_id(url) - self.report_warning( - 'Your URL starts with a Byte Order Mark (BOM). ' - 'Removing the BOM and looking for "%s" ...' % real_url) - return self.url_result(real_url) diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py deleted file mode 100644 index d98331a4e..000000000 --- a/youtube_dl/extractor/commonprotocols.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) - - -class RtmpIE(InfoExtractor): - IE_DESC = False # Do not list - _VALID_URL = r'(?i)rtmp[est]?://.+' - - _TESTS = [{ - 'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4', - 'only_matching': True, - }, { - 'url': 'rtmp://edge.live.hitbox.tv/live/dimak', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._generic_id(url) - title = self._generic_title(url) - return { - 'id': video_id, - 'title': title, - 'formats': [{ - 'url': url, - 'ext': 'flv', - 'format_id': compat_urlparse.urlparse(url).scheme, - }], - } - - -class MmsIE(InfoExtractor): - IE_DESC = False # Do not list - _VALID_URL = r'(?i)mms://.+' - - _TEST = { - # Direct MMS link - 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', - 'info_dict': { - 'id': 'MilesReid(0709)', - 'ext': 'wmv', - 'title': 'MilesReid(0709)', - }, - 'params': { - 'skip_download': True, # rtsp downloads, requiring mplayer or mpv - }, - } - - def _real_extract(self, url): - video_id = self._generic_id(url) - title = self._generic_title(url) - - return { - 'id': video_id, - 'title': title, - 'url': url, - } diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py deleted file mode 100644 index d5e77af32..000000000 --- a/youtube_dl/extractor/condenast.py +++ /dev/null @@ -1,251 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse, -) -from ..utils import ( - determine_ext, - extract_attributes, - int_or_none, - js_to_json, - mimetype2ext, - orderedSet, - parse_iso8601, - strip_or_none, - try_get, -) - - -class CondeNastIE(InfoExtractor): - """ - Condé Nast is a media group, some of its sites use a custom HTML5 player - that works the same in all of them. - """ - - # The keys are the supported sites and the values are the name to be shown - # to the user and in the extractor description. - _SITES = { - 'allure': 'Allure', - 'architecturaldigest': 'Architectural Digest', - 'arstechnica': 'Ars Technica', - 'bonappetit': 'Bon Appétit', - 'brides': 'Brides', - 'cnevids': 'Condé Nast', - 'cntraveler': 'Condé Nast Traveler', - 'details': 'Details', - 'epicurious': 'Epicurious', - 'glamour': 'Glamour', - 'golfdigest': 'Golf Digest', - 'gq': 'GQ', - 'newyorker': 'The New Yorker', - 'self': 'SELF', - 'teenvogue': 'Teen Vogue', - 'vanityfair': 'Vanity Fair', - 'vogue': 'Vogue', - 'wired': 'WIRED', - 'wmagazine': 'W Magazine', - } - - _VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:%s)\.com/ - (?: - (?: - embed(?:js)?| - (?:script|inline)/video - )/(?P<id>[0-9a-f]{24})(?:/(?P<player_id>[0-9a-f]{24}))?(?:.+?\btarget=(?P<target>[^&]+))?| - (?P<type>watch|series|video)/(?P<display_id>[^/?#]+) - )''' % '|'.join(_SITES.keys()) - IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) - - EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys()) - - _TESTS = [{ - 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', - 'md5': '1921f713ed48aabd715691f774c451f7', - 'info_dict': { - 'id': '5171b343c2b4c00dd0c1ccb3', - 'ext': 'mp4', - 'title': '3D Printed Speakers Lit With LED', - 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', - 'uploader': 'wired', - 'upload_date': '20130314', - 'timestamp': 1363219200, - } - }, { - 'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series', - 'info_dict': { - 'id': '58d1865bfd2e6126e2000015', - 'ext': 'mp4', - 'title': 'The Only True Surprise? Trump’s an Idiot', - 'uploader': 'gq', - 'upload_date': '20170321', - 'timestamp': 1490126427, - 'description': 'How much grimmer would things be if these people were competent?', - }, - }, { - # JS embed - 'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js', - 'md5': 'f1a6f9cafb7083bab74a710f65d08999', - 'info_dict': { - 'id': '55f9cf8b61646d1acf00000c', - 'ext': 'mp4', - 'title': '3D printed TSA Travel Sentry keys really do open TSA locks', - 'uploader': 'arstechnica', - 'upload_date': '20150916', - 'timestamp': 1442434920, - } - }, { - 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', - 'only_matching': True, - }, { - 'url': 'http://player-backend.cnevids.com/script/video/59138decb57ac36b83000005.js', - 'only_matching': True, - }] - - def _extract_series(self, url, webpage): - title = self._html_search_regex( - r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>', - webpage, 'series title') - url_object = compat_urllib_parse_urlparse(url) - base_url = '%s://%s' % (url_object.scheme, url_object.netloc) - m_paths = re.finditer( - r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) - paths = orderedSet(m.group(1) for m in m_paths) - build_url = lambda path: compat_urlparse.urljoin(base_url, path) - entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] - return self.playlist_result(entries, playlist_title=title) - - def _extract_video_params(self, webpage, display_id): - query = self._parse_json( - self._search_regex( - r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params', - default='{}'), - display_id, transform_source=js_to_json, fatal=False) - if query: - query['videoId'] = self._search_regex( - r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)', - webpage, 'video id', default=None) - else: - params = extract_attributes(self._search_regex( - r'(<[^>]+data-js="video-player"[^>]+>)', - webpage, 'player params element')) - query.update({ - 'videoId': params['data-video'], - 'playerId': params['data-player'], - 'target': params['id'], - }) - return query - - def _extract_video(self, params): - video_id = params['videoId'] - - video_info = None - - # New API path - query = params.copy() - query['embedType'] = 'inline' - info_page = self._download_json( - 'http://player.cnevids.com/embed-api.json', video_id, - 'Downloading embed info', fatal=False, query=query) - - # Old fallbacks - if not info_page: - if params.get('playerId'): - info_page = self._download_json( - 'http://player.cnevids.com/player/video.js', video_id, - 'Downloading video info', fatal=False, query=params) - if info_page: - video_info = info_page.get('video') - if not video_info: - info_page = self._download_webpage( - 'http://player.cnevids.com/player/loader.js', - video_id, 'Downloading loader info', query=params) - if not video_info: - info_page = self._download_webpage( - 'https://player.cnevids.com/inline/video/%s.js' % video_id, - video_id, 'Downloading inline info', query={ - 'target': params.get('target', 'embedplayer') - }) - - if not video_info: - video_info = self._parse_json( - self._search_regex( - r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'), - video_id, transform_source=js_to_json)['video'] - - title = video_info['title'] - - formats = [] - for fdata in video_info['sources']: - src = fdata.get('src') - if not src: - continue - ext = mimetype2ext(fdata.get('type')) or determine_ext(src) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - quality = fdata.get('quality') - formats.append({ - 'format_id': ext + ('-%s' % quality if quality else ''), - 'url': src, - 'ext': ext, - 'quality': 1 if quality == 'high' else 0, - }) - self._sort_formats(formats) - - subtitles = {} - for t, caption in video_info.get('captions', {}).items(): - caption_url = caption.get('src') - if not (t in ('vtt', 'srt', 'tml') and caption_url): - continue - subtitles.setdefault('en', []).append({'url': caption_url}) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'thumbnail': video_info.get('poster_frame'), - 'uploader': video_info.get('brand'), - 'duration': int_or_none(video_info.get('duration')), - 'tags': video_info.get('tags'), - 'series': video_info.get('series_title'), - 'season': video_info.get('season_title'), - 'timestamp': parse_iso8601(video_info.get('premiere_date')), - 'categories': video_info.get('categories'), - 'subtitles': subtitles, - } - - def _real_extract(self, url): - video_id, player_id, target, url_type, display_id = re.match(self._VALID_URL, url).groups() - - if video_id: - return self._extract_video({ - 'videoId': video_id, - 'playerId': player_id, - 'target': target, - }) - - webpage = self._download_webpage(url, display_id) - - if url_type == 'series': - return self._extract_series(url, webpage) - else: - video = try_get(self._parse_json(self._search_regex( - r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage, - 'preload state', '{}'), display_id), - lambda x: x['transformed']['video']) - if video: - params = {'videoId': video['id']} - info = {'description': strip_or_none(video.get('description'))} - else: - params = self._extract_video_params(webpage, display_id) - info = self._search_json_ld( - webpage, display_id, fatal=False) - info.update(self._extract_video(params)) - return info diff --git a/youtube_dl/extractor/contv.py b/youtube_dl/extractor/contv.py deleted file mode 100644 index 84b462d40..000000000 --- a/youtube_dl/extractor/contv.py +++ /dev/null @@ -1,118 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, -) - - -class CONtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)' - _TESTS = [{ - 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter', - 'info_dict': { - 'id': 'CEG10022949', - 'ext': 'mp4', - 'title': 'Days Of Thrills & Laughter', - 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb', - 'upload_date': '20180703', - 'timestamp': 1530634789.61, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites', - 'info_dict': { - 'id': 'CLIP-show_fotld_bts', - 'title': 'Fight of the Living Dead: Behind the Scenes Bites', - }, - 'playlist_mincount': 7, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - details = self._download_json( - 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id, - video_id, query={'device': 'web'}) - - if details.get('type') == 'episodic': - seasons = self._download_json( - 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id, - video_id) - entries = [] - for season in seasons: - for episode in season.get('episodes', []): - episode_id = episode.get('id') - if not episode_id: - continue - entries.append(self.url_result( - 'https://www.contv.com/details-movie/' + episode_id, - CONtvIE.ie_key(), episode_id)) - return self.playlist_result(entries, video_id, details.get('title')) - - m_details = details['details'] - title = details['title'] - - formats = [] - - media_hls_url = m_details.get('media_hls_url') - if media_hls_url: - formats.extend(self._extract_m3u8_formats( - media_hls_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) - - media_mp4_url = m_details.get('media_mp4_url') - if media_mp4_url: - formats.append({ - 'format_id': 'http', - 'url': media_mp4_url, - }) - - self._sort_formats(formats) - - subtitles = {} - captions = m_details.get('captions') or {} - for caption_url in captions.values(): - subtitles.setdefault('en', []).append({ - 'url': caption_url - }) - - thumbnails = [] - for image in m_details.get('images', []): - image_url = image.get('url') - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) - - description = None - for p in ('large_', 'medium_', 'small_', ''): - d = m_details.get(p + 'description') - if d: - description = d - break - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': description, - 'timestamp': float_or_none(details.get('metax_added_on'), 1000), - 'subtitles': subtitles, - 'duration': float_or_none(m_details.get('duration'), 1000), - 'view_count': int_or_none(details.get('num_watched')), - 'like_count': int_or_none(details.get('num_fav')), - 'categories': details.get('category'), - 'tags': details.get('tags'), - 'season_number': int_or_none(details.get('season')), - 'episode_number': int_or_none(details.get('episode')), - 'release_year': int_or_none(details.get('pub_year')), - } diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py deleted file mode 100644 index e11aadf14..000000000 --- a/youtube_dl/extractor/corus.py +++ /dev/null @@ -1,160 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .theplatform import ThePlatformFeedIE -from ..utils import ( - dict_get, - ExtractorError, - float_or_none, - int_or_none, -) - - -class CorusIE(ThePlatformFeedIE): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?P<domain> - (?: - globaltv| - etcanada| - seriesplus| - wnetwork| - ytv - )\.com| - (?: - hgtv| - foodnetwork| - slice| - history| - showcase| - bigbrothercanada| - abcspark| - disney(?:channel|lachaine) - )\.ca - ) - /(?:[^/]+/)* - (?: - video\.html\?.*?\bv=| - videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)? - ) - (?P<id> - [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}| - (?:[A-Z]{4})?\d{12,20} - ) - ''' - _TESTS = [{ - 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/', - 'info_dict': { - 'id': '870923331648', - 'ext': 'mp4', - 'title': 'Movie Night Popcorn with Bryan', - 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.', - 'upload_date': '20170206', - 'timestamp': 1486392197, - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - 'expected_warnings': ['Failed to parse JSON'], - }, { - 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753', - 'only_matching': True, - }, { - 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/', - 'only_matching': True, - }, { - 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video', - 'only_matching': True, - }, { - 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video', - 'only_matching': True, - }, { - 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/', - 'only_matching': True - }, { - 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/', - 'only_matching': True - }, { - 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/', - 'only_matching': True - }, { - 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/', - 'only_matching': True - }] - _GEO_BYPASS = False - _SITE_MAP = { - 'globaltv': 'series', - 'etcanada': 'series', - 'foodnetwork': 'food', - 'bigbrothercanada': 'series', - 'disneychannel': 'disneyen', - 'disneylachaine': 'disneyfr', - } - - def _real_extract(self, url): - domain, video_id = re.match(self._VALID_URL, url).groups() - site = domain.split('.')[0] - path = self._SITE_MAP.get(site, site) - if path != 'series': - path = 'migration/' + path - video = self._download_json( - 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path, - video_id, query={'byId': video_id}, - headers={'Accept': 'application/json'})[0] - title = video['title'] - - formats = [] - for source in video.get('sources', []): - smil_url = source.get('file') - if not smil_url: - continue - source_type = source.get('type') - note = 'Downloading%s smil file' % (' ' + source_type if source_type else '') - resp = self._download_webpage( - smil_url, video_id, note, fatal=False, - headers=self.geo_verification_headers()) - if not resp: - continue - error = self._parse_json(resp, video_id, fatal=False) - if error: - if error.get('exception') == 'GeoLocationBlocked': - self.raise_geo_restricted(countries=['CA']) - raise ExtractorError(error['description']) - smil = self._parse_xml(resp, video_id, fatal=False) - if smil is None: - continue - namespace = self._parse_smil_namespace(smil) - formats.extend(self._parse_smil_formats( - smil, smil_url, video_id, namespace)) - if not formats and video.get('drm'): - raise ExtractorError('This video is DRM protected.', expected=True) - self._sort_formats(formats) - - subtitles = {} - for track in video.get('tracks', []): - track_url = track.get('file') - if not track_url: - continue - lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en' - subtitles.setdefault(lang, []).append({'url': track_url}) - - metadata = video.get('metadata') or {} - get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number')) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')), - 'description': video.get('description'), - 'timestamp': int_or_none(video.get('availableDate'), 1000), - 'subtitles': subtitles, - 'duration': float_or_none(metadata.get('duration')), - 'series': dict_get(video, ('show', 'pl1$show')), - 'season_number': get_number('season'), - 'episode_number': get_number('episode'), - } diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py deleted file mode 100644 index 6ea03e65c..000000000 --- a/youtube_dl/extractor/coub.py +++ /dev/null @@ -1,140 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - parse_iso8601, - qualities, -) - - -class CoubIE(InfoExtractor): - _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)' - - _TESTS = [{ - 'url': 'http://coub.com/view/5u5n1', - 'info_dict': { - 'id': '5u5n1', - 'ext': 'mp4', - 'title': 'The Matrix Moonwalk', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 4.6, - 'timestamp': 1428527772, - 'upload_date': '20150408', - 'uploader': 'Artyom Loskutnikov', - 'uploader_id': 'artyom.loskutnikov', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'age_limit': 0, - }, - }, { - 'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4', - 'only_matching': True, - }, { - 'url': 'coub:5u5n1', - 'only_matching': True, - }, { - # longer video id - 'url': 'http://coub.com/view/237d5l5h', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - coub = self._download_json( - 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id) - - if coub.get('error'): - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, coub['error']), expected=True) - - title = coub['title'] - - file_versions = coub['file_versions'] - - QUALITIES = ('low', 'med', 'high') - - MOBILE = 'mobile' - IPHONE = 'iphone' - HTML5 = 'html5' - - SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5) - - quality_key = qualities(QUALITIES) - preference_key = qualities(SOURCE_PREFERENCE) - - formats = [] - - for kind, items in file_versions.get(HTML5, {}).items(): - if kind not in ('video', 'audio'): - continue - if not isinstance(items, dict): - continue - for quality, item in items.items(): - if not isinstance(item, dict): - continue - item_url = item.get('url') - if not item_url: - continue - formats.append({ - 'url': item_url, - 'format_id': '%s-%s-%s' % (HTML5, kind, quality), - 'filesize': int_or_none(item.get('size')), - 'vcodec': 'none' if kind == 'audio' else None, - 'quality': quality_key(quality), - 'preference': preference_key(HTML5), - }) - - iphone_url = file_versions.get(IPHONE, {}).get('url') - if iphone_url: - formats.append({ - 'url': iphone_url, - 'format_id': IPHONE, - 'preference': preference_key(IPHONE), - }) - - mobile_url = file_versions.get(MOBILE, {}).get('audio_url') - if mobile_url: - formats.append({ - 'url': mobile_url, - 'format_id': '%s-audio' % MOBILE, - 'preference': preference_key(MOBILE), - }) - - self._sort_formats(formats) - - thumbnail = coub.get('picture') - duration = float_or_none(coub.get('duration')) - timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at')) - uploader = coub.get('channel', {}).get('title') - uploader_id = coub.get('channel', {}).get('permalink') - - view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count')) - like_count = int_or_none(coub.get('likes_count')) - repost_count = int_or_none(coub.get('recoubs_count')) - - age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin')) - if age_restricted is not None: - age_limit = 18 if age_restricted is True else 0 - else: - age_limit = None - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'view_count': view_count, - 'like_count': like_count, - 'repost_count': repost_count, - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/youtube_dl/extractor/cpac.py b/youtube_dl/extractor/cpac.py deleted file mode 100644 index 22741152c..000000000 --- a/youtube_dl/extractor/cpac.py +++ /dev/null @@ -1,148 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - str_or_none, - try_get, - unified_timestamp, - update_url_query, - urljoin, -) - -# compat_range -try: - if callable(xrange): - range = xrange -except (NameError, TypeError): - pass - - -class CPACIE(InfoExtractor): - IE_NAME = 'cpac' - _VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})' - _TEST = { - # 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909', - 'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f', - 'md5': 'e46ad699caafd7aa6024279f2614e8fa', - 'info_dict': { - 'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f', - 'ext': 'mp4', - 'upload_date': '20220215', - 'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022', - 'description': 'md5:466a206abd21f3a6f776cdef290c23fb', - 'timestamp': 1644901200, - }, - 'params': { - 'format': 'bestvideo', - 'hls_prefer_native': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - url_lang = 'fr' if '/l-episode?' in url else 'en' - - content = self._download_json( - 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id, - video_id) - video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str) - formats = [] - if video_url: - content = content['page'] - title = str_or_none(content['details']['title_%s_t' % (url_lang, )]) - formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4') - for fmt in formats: - # prefer language to match URL - fmt_lang = fmt.get('language') - if fmt_lang == url_lang: - fmt['language_preference'] = 10 - elif not fmt_lang: - fmt['language_preference'] = -1 - else: - fmt['language_preference'] = -10 - - self._sort_formats(formats) - - category = str_or_none(content['details']['category_%s_t' % (url_lang, )]) - - def is_live(v_type): - return (v_type == 'live') if v_type is not None else None - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))), - 'timestamp': unified_timestamp(content['details'].get('liveDateTime')), - 'category': [category] if category else None, - 'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))), - 'is_live': is_live(content['details'].get('type')), - } - - -class CPACPlaylistIE(InfoExtractor): - IE_NAME = 'cpac:playlist' - _VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))' - - _TESTS = [{ - 'url': 'https://www.cpac.ca/program?id=6', - 'info_dict': { - 'id': 'id=6', - 'title': 'Headline Politics', - 'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.', - }, - 'playlist_count': 10, - }, { - 'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc', - 'info_dict': { - 'id': 'key=hudson', - 'title': 'hudson', - }, - 'playlist_count': 22, - }, { - 'url': 'https://www.cpac.ca/search?programId=50', - 'info_dict': { - 'id': 'programId=50', - 'title': '50', - }, - 'playlist_count': 9, - }, { - 'url': 'https://www.cpac.ca/emission?id=6', - 'only_matching': True, - }, { - 'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en' - pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult') - api_url = ( - 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s' - % (pl_type, video_id, )) - content = self._download_json(api_url, video_id) - entries = [] - total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1) - for page in range(1, total_pages + 1): - if page > 1: - api_url = update_url_query(api_url, {'page': '%d' % (page, ), }) - content = self._download_json( - api_url, video_id, - note='Downloading continuation - %d' % (page, ), - fatal=False) - - for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []: - episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )])) - if episode_url: - entries.append(episode_url) - - return self.playlist_result( - (self.url_result(entry) for entry in entries), - playlist_id=video_id, - playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1], - playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]), - ) diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py deleted file mode 100644 index f77a68ece..000000000 --- a/youtube_dl/extractor/cracked.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .youtube import YoutubeIE -from ..utils import ( - parse_iso8601, - str_to_int, -) - - -class CrackedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html' - _TESTS = [{ - 'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html', - 'md5': '89b90b9824e3806ca95072c4d78f13f7', - 'info_dict': { - 'id': '19070', - 'ext': 'mp4', - 'title': 'If Animal Actors Got E! True Hollywood Stories', - 'timestamp': 1404954000, - 'upload_date': '20140710', - } - }, { - # youtube embed - 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', - 'md5': 'ccd52866b50bde63a6ef3b35016ba8c7', - 'info_dict': { - 'id': 'EjI00A3rZD0', - 'ext': 'mp4', - 'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take", - 'description': 'md5:c603708c718b796fe6079e2b3351ffc7', - 'upload_date': '20140725', - 'uploader_id': 'Cracked', - 'uploader': 'Cracked', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - youtube_url = YoutubeIE._extract_url(webpage) - if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) - - video_url = self._html_search_regex( - [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], - webpage, 'video URL') - - title = self._search_regex( - [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'], - webpage, 'title') - - description = self._search_regex( - r'name="?(?:og:)?description"?\s+content="([^"]+)"', - webpage, 'description', default=None) - - timestamp = self._html_search_regex( - r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False) - if timestamp: - timestamp = parse_iso8601(timestamp[:-6]) - - view_count = str_to_int(self._html_search_regex( - r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>', - webpage, 'view count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>', - webpage, 'comment count', fatal=False)) - - m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url) - if m: - width = int(m.group('width')) - height = int(m.group('height')) - else: - width = height = None - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'view_count': view_count, - 'comment_count': comment_count, - 'height': height, - 'width': width, - } diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py deleted file mode 100644 index 49bf3a4f9..000000000 --- a/youtube_dl/extractor/crackle.py +++ /dev/null @@ -1,200 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals, division - -import hashlib -import hmac -import re -import time - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - determine_ext, - float_or_none, - int_or_none, - parse_age_limit, - parse_duration, - url_or_none, - ExtractorError -) - - -class CrackleIE(InfoExtractor): - _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' - _TESTS = [{ - # geo restricted to CA - 'url': 'https://www.crackle.com/andromeda/2502343', - 'info_dict': { - 'id': '2502343', - 'ext': 'mp4', - 'title': 'Under The Night', - 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a', - 'duration': 2583, - 'view_count': int, - 'average_rating': 0, - 'age_limit': 14, - 'genre': 'Action, Sci-Fi', - 'creator': 'Allan Kroeker', - 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe', - 'release_year': 2000, - 'series': 'Andromeda', - 'episode': 'Under The Night', - 'season_number': 1, - 'episode_number': 1, - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'https://www.sonycrackle.com/andromeda/2502343', - 'only_matching': True, - }] - - _MEDIA_FILE_SLOTS = { - '360p.mp4': { - 'width': 640, - 'height': 360, - }, - '480p.mp4': { - 'width': 768, - 'height': 432, - }, - '480p_1mbps.mp4': { - 'width': 852, - 'height': 480, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - country_code = self._downloader.params.get('geo_bypass_country', None) - countries = [country_code] if country_code else ( - 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI') - - last_e = None - - for country in countries: - try: - # Authorization generation algorithm is reverse engineered from: - # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js - media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country) - timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) - h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() - media = self._download_json( - media_detail_url, video_id, 'Downloading media JSON as %s' % country, - 'Unable to download media JSON', headers={ - 'Accept': 'application/json', - 'Authorization': '|'.join([h, timestamp, '117', '1']), - }) - except ExtractorError as e: - # 401 means geo restriction, trying next country - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - last_e = e - continue - raise - - media_urls = media.get('MediaURLs') - if not media_urls or not isinstance(media_urls, list): - continue - - title = media['Title'] - - formats = [] - for e in media['MediaURLs']: - if e.get('UseDRM') is True: - continue - format_url = url_or_none(e.get('Path')) - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash', fatal=False)) - elif format_url.endswith('.ism/Manifest'): - formats.extend(self._extract_ism_formats( - format_url, video_id, ism_id='mss', fatal=False)) - else: - mfs_path = e.get('Type') - mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) - if not mfs_info: - continue - formats.append({ - 'url': format_url, - 'format_id': 'http-' + mfs_path.split('.')[0], - 'width': mfs_info['width'], - 'height': mfs_info['height'], - }) - self._sort_formats(formats) - - description = media.get('Description') - duration = int_or_none(media.get( - 'DurationInSeconds')) or parse_duration(media.get('Duration')) - view_count = int_or_none(media.get('CountViews')) - average_rating = float_or_none(media.get('UserRating')) - age_limit = parse_age_limit(media.get('Rating')) - genre = media.get('Genre') - release_year = int_or_none(media.get('ReleaseYear')) - creator = media.get('Directors') - artist = media.get('Cast') - - if media.get('MediaTypeDisplayValue') == 'Full Episode': - series = media.get('ShowName') - episode = title - season_number = int_or_none(media.get('Season')) - episode_number = int_or_none(media.get('Episode')) - else: - series = episode = season_number = episode_number = None - - subtitles = {} - cc_files = media.get('ClosedCaptionFiles') - if isinstance(cc_files, list): - for cc_file in cc_files: - if not isinstance(cc_file, dict): - continue - cc_url = url_or_none(cc_file.get('Path')) - if not cc_url: - continue - lang = cc_file.get('Locale') or 'en' - subtitles.setdefault(lang, []).append({'url': cc_url}) - - thumbnails = [] - images = media.get('Images') - if isinstance(images, list): - for image_key, image_url in images.items(): - mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) - if not mobj: - continue - thumbnails.append({ - 'url': image_url, - 'width': int(mobj.group(1)), - 'height': int(mobj.group(2)), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'average_rating': average_rating, - 'age_limit': age_limit, - 'genre': genre, - 'creator': creator, - 'artist': artist, - 'release_year': release_year, - 'series': series, - 'episode': episode, - 'season_number': season_number, - 'episode_number': episode_number, - 'thumbnails': thumbnails, - 'subtitles': subtitles, - 'formats': formats, - } - - raise last_e diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py deleted file mode 100644 index 7fb782db7..000000000 --- a/youtube_dl/extractor/crooksandliars.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - qualities, -) - - -class CrooksAndLiarsIE(InfoExtractor): - _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)' - _TESTS = [{ - 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', - 'info_dict': { - 'id': '8RUoRhRi', - 'ext': 'mp4', - 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', - 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1428207000, - 'upload_date': '20150405', - 'uploader': 'Heather', - 'duration': 236, - } - }, { - 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) - - manifest = self._parse_json( - self._search_regex( - r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'), - video_id) - - quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) - - formats = [{ - 'url': item['url'], - 'format_id': item['type'], - 'quality': quality(item['type']), - } for item in manifest['flavors'] if item['mime'].startswith('video/')] - self._sort_formats(formats) - - return { - 'url': url, - 'id': video_id, - 'title': manifest['title'], - 'description': manifest.get('description'), - 'thumbnail': self._proto_relative_url(manifest.get('poster')), - 'timestamp': int_or_none(manifest.get('created')), - 'uploader': manifest.get('author'), - 'duration': int_or_none(manifest.get('duration')), - 'formats': formats, - } diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py deleted file mode 100644 index bc2d1fa8b..000000000 --- a/youtube_dl/extractor/crunchyroll.py +++ /dev/null @@ -1,686 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import json -import zlib - -from hashlib import sha1 -from math import pow, sqrt, floor -from .common import InfoExtractor -from .vrv import VRVIE -from ..compat import ( - compat_b64decode, - compat_etree_Element, - compat_etree_fromstring, - compat_str, - compat_urllib_parse_urlencode, - compat_urllib_request, - compat_urlparse, -) -from ..utils import ( - ExtractorError, - bytes_to_intlist, - extract_attributes, - float_or_none, - intlist_to_bytes, - int_or_none, - lowercase_escape, - merge_dicts, - remove_end, - sanitized_Request, - urlencode_postdata, - xpath_text, -) -from ..aes import ( - aes_cbc_decrypt, -) - - -class CrunchyrollBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.crunchyroll.com/login' - _LOGIN_FORM = 'login_form' - _NETRC_MACHINE = 'crunchyroll' - - def _call_rpc_api(self, method, video_id, note=None, data=None): - data = data or {} - data['req'] = 'RpcApi' + method - data = compat_urllib_parse_urlencode(data).encode('utf-8') - return self._download_xml( - 'https://www.crunchyroll.com/xml/', - video_id, note, fatal=False, data=data, headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - - def is_logged(webpage): - return 'href="/logout"' in webpage - - # Already logged in - if is_logged(login_page): - return - - login_form_str = self._search_regex( - r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, - login_page, 'login form', group='form') - - post_url = extract_attributes(login_form_str).get('action') - if not post_url: - post_url = self._LOGIN_URL - elif not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - - login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page) - - login_form.update({ - 'login_form[name]': username, - 'login_form[password]': password, - }) - - response = self._download_webpage( - post_url, None, 'Logging in', 'Wrong login info', - data=urlencode_postdata(login_form), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - # Successful login - if is_logged(response): - return - - error = self._html_search_regex( - '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>', - response, 'error message', default=None) - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - - raise ExtractorError('Unable to log in') - - def _real_initialize(self): - self._login() - - @staticmethod - def _add_skip_wall(url): - parsed_url = compat_urlparse.urlparse(url) - qs = compat_urlparse.parse_qs(parsed_url.query) - # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message: - # > This content may be inappropriate for some people. - # > Are you sure you want to continue? - # since it's not disabled by default in crunchyroll account's settings. - # See https://github.com/ytdl-org/youtube-dl/issues/7202. - qs['skip_wall'] = ['1'] - return compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) - - -class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): - IE_NAME = 'crunchyroll' - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' - _TESTS = [{ - 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', - 'info_dict': { - 'id': '645513', - 'ext': 'mp4', - 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', - 'description': 'md5:2d17137920c64f2f49981a7797d275ef', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Yomiuri Telecasting Corporation (YTV)', - 'upload_date': '20131013', - 'url': 're:(?!.*&)', - }, - 'params': { - # rtmp - 'skip_download': True, - }, - 'skip': 'Video gone', - }, { - 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1', - 'info_dict': { - 'id': '589804', - 'ext': 'flv', - 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11', - 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Danny Choo Network', - 'upload_date': '20120213', - }, - 'params': { - # rtmp - 'skip_download': True, - }, - 'skip': 'Video gone', - }, { - 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409', - 'info_dict': { - 'id': '702409', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Re:Zero Partners', - 'timestamp': 1462098900, - 'upload_date': '20160501', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589', - 'info_dict': { - 'id': '727589', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Kadokawa Pictures Inc.', - 'timestamp': 1484130900, - 'upload_date': '20170111', - 'series': compat_str, - 'season': "KONOSUBA -God's blessing on this wonderful world! 2", - 'season_number': 2, - 'episode': 'Give Me Deliverance From This Judicial Injustice!', - 'episode_number': 1, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', - 'only_matching': True, - }, { - # geo-restricted (US), 18+ maturity wall, non-premium available - 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617', - 'only_matching': True, - }, { - # A description with double quotes - 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080', - 'info_dict': { - 'id': '535080', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'uploader': 'Marvelous AQL Inc.', - 'timestamp': 1255512600, - 'upload_date': '20091014', - }, - 'params': { - # Just test metadata extraction - 'skip_download': True, - }, - }, { - # make sure we can extract an uploader name that's not a link - 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899', - 'info_dict': { - 'id': '606899', - 'ext': 'mp4', - 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors', - 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"', - 'uploader': 'Geneon Entertainment', - 'upload_date': '20120717', - }, - 'params': { - # just test metadata extraction - 'skip_download': True, - }, - 'skip': 'Video gone', - }, { - # A video with a vastly different season name compared to the series name - 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532', - 'info_dict': { - 'id': '590532', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'uploader': 'TV TOKYO', - 'timestamp': 1330956000, - 'upload_date': '20120305', - 'series': 'Nyarko-san: Another Crawling Chaos', - 'season': 'Haiyoru! Nyaruani (ONA)', - }, - 'params': { - # Just test metadata extraction - 'skip_download': True, - }, - }, { - 'url': 'http://www.crunchyroll.com/media-723735', - 'only_matching': True, - }, { - 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921', - 'only_matching': True, - }] - - _FORMAT_IDS = { - '360': ('60', '106'), - '480': ('61', '106'), - '720': ('62', '106'), - '1080': ('80', '108'), - } - - def _download_webpage(self, url_or_request, *args, **kwargs): - request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) - else sanitized_Request(url_or_request)) - # Accept-Language must be set explicitly to accept any language to avoid issues - # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. - # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction - # should be imposed or not (from what I can see it just takes the first language - # ignoring the priority and requires it to correspond the IP). By the way this causes - # Crunchyroll to not work in georestriction cases in some browsers that don't place - # the locale lang first in header. However allowing any language seems to workaround the issue. - request.add_header('Accept-Language', '*') - return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) - - def _decrypt_subtitles(self, data, iv, id): - data = bytes_to_intlist(compat_b64decode(data)) - iv = bytes_to_intlist(compat_b64decode(iv)) - id = int(id) - - def obfuscate_key_aux(count, modulo, start): - output = list(start) - for _ in range(count): - output.append(output[-1] + output[-2]) - # cut off start values - output = output[2:] - output = list(map(lambda x: x % modulo + 33, output)) - return output - - def obfuscate_key(key): - num1 = int(floor(pow(2, 25) * sqrt(6.9))) - num2 = (num1 ^ key) << 5 - num3 = key ^ num1 - num4 = num3 ^ (num3 >> 3) ^ num2 - prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) - shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) - # Extend 160 Bit hash to 256 Bit - return shaHash + [0] * 12 - - key = obfuscate_key(id) - - decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) - return zlib.decompress(decrypted_data) - - def _convert_subtitles_to_srt(self, sub_root): - output = '' - - for i, event in enumerate(sub_root.findall('./events/event'), 1): - start = event.attrib['start'].replace('.', ',') - end = event.attrib['end'].replace('.', ',') - text = event.attrib['text'].replace('\\N', '\n') - output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) - return output - - def _convert_subtitles_to_ass(self, sub_root): - output = '' - - def ass_bool(strvalue): - assvalue = '0' - if strvalue == '1': - assvalue = '-1' - return assvalue - - output = '[Script Info]\n' - output += 'Title: %s\n' % sub_root.attrib['title'] - output += 'ScriptType: v4.00+\n' - output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] - output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] - output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ -[V4+ Styles] -Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding -""" - for style in sub_root.findall('./styles/style'): - output += 'Style: ' + style.attrib['name'] - output += ',' + style.attrib['font_name'] - output += ',' + style.attrib['font_size'] - output += ',' + style.attrib['primary_colour'] - output += ',' + style.attrib['secondary_colour'] - output += ',' + style.attrib['outline_colour'] - output += ',' + style.attrib['back_colour'] - output += ',' + ass_bool(style.attrib['bold']) - output += ',' + ass_bool(style.attrib['italic']) - output += ',' + ass_bool(style.attrib['underline']) - output += ',' + ass_bool(style.attrib['strikeout']) - output += ',' + style.attrib['scale_x'] - output += ',' + style.attrib['scale_y'] - output += ',' + style.attrib['spacing'] - output += ',' + style.attrib['angle'] - output += ',' + style.attrib['border_style'] - output += ',' + style.attrib['outline'] - output += ',' + style.attrib['shadow'] - output += ',' + style.attrib['alignment'] - output += ',' + style.attrib['margin_l'] - output += ',' + style.attrib['margin_r'] - output += ',' + style.attrib['margin_v'] - output += ',' + style.attrib['encoding'] - output += '\n' - - output += """ -[Events] -Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text -""" - for event in sub_root.findall('./events/event'): - output += 'Dialogue: 0' - output += ',' + event.attrib['start'] - output += ',' + event.attrib['end'] - output += ',' + event.attrib['style'] - output += ',' + event.attrib['name'] - output += ',' + event.attrib['margin_l'] - output += ',' + event.attrib['margin_r'] - output += ',' + event.attrib['margin_v'] - output += ',' + event.attrib['effect'] - output += ',' + event.attrib['text'] - output += '\n' - - return output - - def _extract_subtitles(self, subtitle): - sub_root = compat_etree_fromstring(subtitle) - return [{ - 'ext': 'srt', - 'data': self._convert_subtitles_to_srt(sub_root), - }, { - 'ext': 'ass', - 'data': self._convert_subtitles_to_ass(sub_root), - }] - - def _get_subtitles(self, video_id, webpage): - subtitles = {} - for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage): - sub_doc = self._call_rpc_api( - 'Subtitle_GetXml', video_id, - 'Downloading subtitles for ' + sub_name, data={ - 'subtitle_script_id': sub_id, - }) - if not isinstance(sub_doc, compat_etree_Element): - continue - sid = sub_doc.get('id') - iv = xpath_text(sub_doc, 'iv', 'subtitle iv') - data = xpath_text(sub_doc, 'data', 'subtitle data') - if not sid or not iv or not data: - continue - subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8') - lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) - if not lang_code: - continue - subtitles[lang_code] = self._extract_subtitles(subtitle) - return subtitles - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') - - if mobj.group('prefix') == 'm': - mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') - webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url') - else: - webpage_url = 'http://www.' + mobj.group('url') - - webpage = self._download_webpage( - self._add_skip_wall(webpage_url), video_id, - headers=self.geo_verification_headers()) - note_m = self._html_search_regex( - r'<div class="showmedia-trailer-notice">(.+?)</div>', - webpage, 'trailer-notice', default='') - if note_m: - raise ExtractorError(note_m) - - mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage) - if mobj: - msg = json.loads(mobj.group('msg')) - if msg.get('type') == 'error': - raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) - - if 'To view this, please log in to verify you are 18 or older.' in webpage: - self.raise_login_required() - - media = self._parse_json(self._search_regex( - r'vilos\.config\.media\s*=\s*({.+?});', - webpage, 'vilos media', default='{}'), video_id) - media_metadata = media.get('metadata') or {} - - language = self._search_regex( - r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1', - webpage, 'language', default=None, group='lang') - - video_title = self._html_search_regex( - (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>', - r'<title>(.+?),\s+-\s+.+? Crunchyroll'), - webpage, 'video_title', default=None) - if not video_title: - video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage)) - video_title = re.sub(r' {2,}', ' ', video_title) - video_description = (self._parse_json(self._html_search_regex( - r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, - webpage, 'description', default='{}'), video_id) or media_metadata).get('description') - if video_description: - video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) - video_uploader = self._html_search_regex( - # try looking for both an uploader that's a link and one that's not - [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], - webpage, 'video_uploader', default=False) - - formats = [] - for stream in media.get('streams', []): - audio_lang = stream.get('audio_lang') - hardsub_lang = stream.get('hardsub_lang') - vrv_formats = self._extract_vrv_formats( - stream.get('url'), video_id, stream.get('format'), - audio_lang, hardsub_lang) - for f in vrv_formats: - if not hardsub_lang: - f['preference'] = 1 - language_preference = 0 - if audio_lang == language: - language_preference += 1 - if hardsub_lang == language: - language_preference += 1 - if language_preference: - f['language_preference'] = language_preference - formats.extend(vrv_formats) - if not formats: - available_fmts = [] - for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): - attrs = extract_attributes(a) - href = attrs.get('href') - if href and '/freetrial' in href: - continue - available_fmts.append(fmt) - if not available_fmts: - for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): - available_fmts = re.findall(p, webpage) - if available_fmts: - break - if not available_fmts: - available_fmts = self._FORMAT_IDS.keys() - video_encode_ids = [] - - for fmt in available_fmts: - stream_quality, stream_format = self._FORMAT_IDS[fmt] - video_format = fmt + 'p' - stream_infos = [] - streamdata = self._call_rpc_api( - 'VideoPlayer_GetStandardConfig', video_id, - 'Downloading media info for %s' % video_format, data={ - 'media_id': video_id, - 'video_format': stream_format, - 'video_quality': stream_quality, - 'current_page': url, - }) - if isinstance(streamdata, compat_etree_Element): - stream_info = streamdata.find('./{default}preload/stream_info') - if stream_info is not None: - stream_infos.append(stream_info) - stream_info = self._call_rpc_api( - 'VideoEncode_GetStreamInfo', video_id, - 'Downloading stream info for %s' % video_format, data={ - 'media_id': video_id, - 'video_format': stream_format, - 'video_encode_quality': stream_quality, - }) - if isinstance(stream_info, compat_etree_Element): - stream_infos.append(stream_info) - for stream_info in stream_infos: - video_encode_id = xpath_text(stream_info, './video_encode_id') - if video_encode_id in video_encode_ids: - continue - video_encode_ids.append(video_encode_id) - - video_file = xpath_text(stream_info, './file') - if not video_file: - continue - if video_file.startswith('http'): - formats.extend(self._extract_m3u8_formats( - video_file, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - - video_url = xpath_text(stream_info, './host') - if not video_url: - continue - metadata = stream_info.find('./metadata') - format_info = { - 'format': video_format, - 'height': int_or_none(xpath_text(metadata, './height')), - 'width': int_or_none(xpath_text(metadata, './width')), - } - - if '.fplive.net/' in video_url: - video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) - parsed_video_url = compat_urlparse.urlparse(video_url) - direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( - netloc='v.lvlt.crcdn.net', - path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) - if self._is_valid_url(direct_video_url, video_id, video_format): - format_info.update({ - 'format_id': 'http-' + video_format, - 'url': direct_video_url, - }) - formats.append(format_info) - continue - - format_info.update({ - 'format_id': 'rtmp-' + video_format, - 'url': video_url, - 'play_path': video_file, - 'ext': 'flv', - }) - formats.append(format_info) - self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps')) - - metadata = self._call_rpc_api( - 'VideoPlayer_GetMediaMetadata', video_id, - note='Downloading media info', data={ - 'media_id': video_id, - }) - - subtitles = {} - for subtitle in media.get('subtitles', []): - subtitle_url = subtitle.get('url') - if not subtitle_url: - continue - subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({ - 'url': subtitle_url, - 'ext': subtitle.get('format', 'ass'), - }) - if not subtitles: - subtitles = self.extract_subtitles(video_id, webpage) - - # webpage provide more accurate data than series_title from XML - series = self._html_search_regex( - r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', - webpage, 'series', fatal=False) - - season = episode = episode_number = duration = thumbnail = None - - if isinstance(metadata, compat_etree_Element): - season = xpath_text(metadata, 'series_title') - episode = xpath_text(metadata, 'episode_title') - episode_number = int_or_none(xpath_text(metadata, 'episode_number')) - duration = float_or_none(media_metadata.get('duration'), 1000) - thumbnail = xpath_text(metadata, 'episode_image_url') - - if not episode: - episode = media_metadata.get('title') - if not episode_number: - episode_number = int_or_none(media_metadata.get('episode_number')) - if not thumbnail: - thumbnail = media_metadata.get('thumbnail', {}).get('url') - - season_number = int_or_none(self._search_regex( - r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', - webpage, 'season number', default=None)) - - info = self._search_json_ld(webpage, video_id, default={}) - - return merge_dicts({ - 'id': video_id, - 'title': video_title, - 'description': video_description, - 'duration': duration, - 'thumbnail': thumbnail, - 'uploader': video_uploader, - 'series': series, - 'season': season, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'subtitles': subtitles, - 'formats': formats, - }, info) - - -class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): - IE_NAME = 'crunchyroll:playlist' - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)' - - _TESTS = [{ - 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', - 'info_dict': { - 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', - 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi' - }, - 'playlist_count': 13, - }, { - # geo-restricted (US), 18+ maturity wall, non-premium available - 'url': 'http://www.crunchyroll.com/cosplay-complex-ova', - 'info_dict': { - 'id': 'cosplay-complex-ova', - 'title': 'Cosplay Complex OVA' - }, - 'playlist_count': 3, - 'skip': 'Georestricted', - }, { - # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14 - 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1', - 'only_matching': True, - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - - webpage = self._download_webpage( - self._add_skip_wall(url), show_id, - headers=self.geo_verification_headers()) - title = self._html_search_meta('name', webpage, default=None) - - episode_paths = re.findall( - r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"', - webpage) - entries = [ - self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id) - for ep_id, ep in episode_paths - ] - entries.reverse() - - return { - '_type': 'playlist', - 'id': show_id, - 'title': title, - 'entries': entries, - } diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py deleted file mode 100644 index 2e01aff48..000000000 --- a/youtube_dl/extractor/cspan.py +++ /dev/null @@ -1,244 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - extract_attributes, - find_xpath_attr, - get_element_by_attribute, - get_element_by_class, - int_or_none, - js_to_json, - merge_dicts, - parse_iso8601, - smuggle_url, - str_to_int, - unescapeHTML, -) -from .senateisvp import SenateISVPIE -from .ustream import UstreamIE - - -class CSpanIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' - IE_DESC = 'C-SPAN' - _TESTS = [{ - 'url': 'http://www.c-span.org/video/?313572-1/HolderonV', - 'md5': '94b29a4f131ff03d23471dd6f60b6a1d', - 'info_dict': { - 'id': '315139', - 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', - }, - 'playlist_mincount': 2, - 'skip': 'Regularly fails on travis, for unknown reasons', - }, { - 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - # md5 is unstable - 'info_dict': { - 'id': 'c4486943', - 'ext': 'mp4', - 'title': 'CSPAN - International Health Care Models', - 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967', - } - }, { - 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', - 'info_dict': { - 'id': '342759', - 'title': 'General Motors Ignition Switch Recall', - }, - 'playlist_mincount': 6, - }, { - # Video from senate.gov - 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', - 'info_dict': { - 'id': 'judiciary031715', - 'ext': 'mp4', - 'title': 'Immigration Reforms Needed to Protect Skilled American Workers', - }, - 'params': { - 'skip_download': True, # m3u8 downloads - } - }, { - # Ustream embedded video - 'url': 'https://www.c-span.org/video/?114917-1/armed-services', - 'info_dict': { - 'id': '58428542', - 'ext': 'flv', - 'title': 'USHR07 Armed Services Committee', - 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee', - 'timestamp': 1423060374, - 'upload_date': '20150204', - 'uploader': 'HouseCommittee', - 'uploader_id': '12987475', - }, - }, { - # Audio Only - 'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - video_type = None - webpage = self._download_webpage(url, video_id) - - ustream_url = UstreamIE._extract_url(webpage) - if ustream_url: - return self.url_result(ustream_url, UstreamIE.ie_key()) - - if '&vod' not in url: - bc = self._search_regex( - r"(<[^>]+id='brightcove-player-embed'[^>]+>)", - webpage, 'brightcove embed', default=None) - if bc: - bc_attr = extract_attributes(bc) - bc_url = self.BRIGHTCOVE_URL_TEMPLATE % ( - bc_attr.get('data-bcaccountid', '3162030207001'), - bc_attr.get('data-noprebcplayerid', 'SyGGpuJy3g'), - bc_attr.get('data-newbcplayerid', 'default'), - bc_attr['data-bcid']) - return self.url_result(smuggle_url(bc_url, {'source_url': url})) - - def add_referer(formats): - for f in formats: - f.setdefault('http_headers', {})['Referer'] = url - - # As of 01.12.2020 this path looks to cover all cases making the rest - # of the code unnecessary - jwsetup = self._parse_json( - self._search_regex( - r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup', - default='{}'), - video_id, transform_source=js_to_json, fatal=False) - if jwsetup: - info = self._parse_jwplayer_data( - jwsetup, video_id, require_title=False, m3u8_id='hls', - base_url=url) - add_referer(info['formats']) - for subtitles in info['subtitles'].values(): - for subtitle in subtitles: - ext = determine_ext(subtitle['url']) - if ext == 'php': - ext = 'vtt' - subtitle['ext'] = ext - ld_info = self._search_json_ld(webpage, video_id, default={}) - title = get_element_by_class('video-page-title', webpage) or \ - self._og_search_title(webpage) - description = get_element_by_attribute('itemprop', 'description', webpage) or \ - self._html_search_meta(['og:description', 'description'], webpage) - return merge_dicts(info, ld_info, { - 'title': title, - 'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage), - 'description': description, - 'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)), - 'location': get_element_by_attribute('itemprop', 'contentLocation', webpage), - 'duration': int_or_none(self._search_regex( - r'jwsetup\.seclength\s*=\s*(\d+);', - webpage, 'duration', fatal=False)), - 'view_count': str_to_int(self._search_regex( - r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>", - webpage, 'views', fatal=False)), - }) - - # Obsolete - # We first look for clipid, because clipprog always appears before - patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] - results = list(filter(None, (re.search(p, webpage) for p in patterns))) - if results: - matches = results[0] - video_type, video_id = matches.groups() - video_type = 'clip' if video_type == 'id' else 'program' - else: - m = re.search(r'data-(?P<type>clip|prog)id=["\'](?P<id>\d+)', webpage) - if m: - video_id = m.group('id') - video_type = 'program' if m.group('type') == 'prog' else 'clip' - else: - senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) - if senate_isvp_url: - title = self._og_search_title(webpage) - surl = smuggle_url(senate_isvp_url, {'force_title': title}) - return self.url_result(surl, 'SenateISVP', video_id, title) - video_id = self._search_regex( - r'jwsetup\.clipprog\s*=\s*(\d+);', - webpage, 'jwsetup program id', default=None) - if video_id: - video_type = 'program' - if video_type is None or video_id is None: - error_message = get_element_by_class('VLplayer-error-message', webpage) - if error_message: - raise ExtractorError(error_message) - raise ExtractorError('unable to find video id and type') - - def get_text_attr(d, attr): - return d.get(attr, {}).get('#text') - - data = self._download_json( - 'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id), - video_id)['video'] - if data['@status'] != 'Success': - raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True) - - doc = self._download_xml( - 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id), - video_id) - - description = self._html_search_meta('description', webpage) - - title = find_xpath_attr(doc, './/string', 'name', 'title').text - thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text - - files = data['files'] - capfile = get_text_attr(data, 'capfile') - - entries = [] - for partnum, f in enumerate(files): - formats = [] - for quality in f.get('qualities', []): - formats.append({ - 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), - 'url': unescapeHTML(get_text_attr(quality, 'file')), - 'height': int_or_none(get_text_attr(quality, 'height')), - 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), - }) - if not formats: - path = unescapeHTML(get_text_attr(f, 'path')) - if not path: - continue - formats = self._extract_m3u8_formats( - path, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] - add_referer(formats) - self._sort_formats(formats) - entries.append({ - 'id': '%s_%d' % (video_id, partnum + 1), - 'title': ( - title if len(files) == 1 else - '%s part %d' % (title, partnum + 1)), - 'formats': formats, - 'description': description, - 'thumbnail': thumbnail, - 'duration': int_or_none(get_text_attr(f, 'length')), - 'subtitles': { - 'en': [{ - 'url': capfile, - 'ext': determine_ext(capfile, 'dfxp') - }], - } if capfile else None, - }) - - if len(entries) == 1: - entry = dict(entries[0]) - entry['id'] = 'c' + video_id if video_type == 'clip' else video_id - return entry - else: - return { - '_type': 'playlist', - 'entries': entries, - 'title': title, - 'id': 'c' + video_id if video_type == 'clip' else video_id, - } diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py deleted file mode 100644 index 679f1d92e..000000000 --- a/youtube_dl/extractor/ctsnews.py +++ /dev/null @@ -1,87 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import unified_timestamp -from .youtube import YoutubeIE - - -class CtsNewsIE(InfoExtractor): - IE_DESC = '華視新聞' - _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' - _TESTS = [{ - 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', - 'md5': 'a9875cb790252b08431186d741beaabe', - 'info_dict': { - 'id': '201501291578109', - 'ext': 'mp4', - 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網', - 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...', - 'timestamp': 1422528540, - 'upload_date': '20150129', - } - }, { - # News count not appear on page but still available in database - 'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html', - 'md5': '3aee7e0df7cdff94e43581f54c22619e', - 'info_dict': { - 'id': '201309031304098', - 'ext': 'mp4', - 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網', - 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1378205880, - 'upload_date': '20130903', - } - }, { - # With Youtube embedded video - 'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html', - 'md5': 'e4726b2ccd70ba2c319865e28f0a91d1', - 'info_dict': { - 'id': 'OVbfO7d0_hQ', - 'ext': 'mp4', - 'title': 'iPhone6熱銷 蘋果財報亮眼', - 'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20150128', - 'uploader_id': 'TBSCTS', - 'uploader': '中華電視公司', - }, - 'add_ie': ['Youtube'], - }] - - def _real_extract(self, url): - news_id = self._match_id(url) - page = self._download_webpage(url, news_id) - - news_id = self._hidden_inputs(page).get('get_id') - - if news_id: - mp4_feed = self._download_json( - 'http://news.cts.com.tw/action/test_mp4feed.php', - news_id, note='Fetching feed', query={'news_id': news_id}) - video_url = mp4_feed['source_url'] - else: - self.to_screen('Not CTSPlayer video, trying Youtube...') - youtube_url = YoutubeIE._extract_url(page) - - return self.url_result(youtube_url, ie='Youtube') - - description = self._html_search_meta('description', page) - title = self._html_search_meta('title', page, fatal=True) - thumbnail = self._html_search_meta('image', page) - - datetime_str = self._html_search_regex( - r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False) - timestamp = None - if datetime_str: - timestamp = unified_timestamp(datetime_str) - 8 * 3600 - - return { - 'id': news_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - } diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/ctv.py deleted file mode 100644 index 756bcc2be..000000000 --- a/youtube_dl/extractor/ctv.py +++ /dev/null @@ -1,52 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88', - 'info_dict': { - 'id': '2102249', - 'ext': 'flv', - 'title': 'Wednesday, December 23, 2020', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.', - 'timestamp': 1608732000, - 'upload_date': '20201223', - 'series': 'Your Morning', - 'season': '2020-2021', - 'season_number': 5, - 'episode_number': 88, - 'tags': ['Your Morning'], - 'categories': ['Talk Show'], - 'duration': 7467.126, - }, - }, { - 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - content = self._download_json( - 'https://www.ctv.ca/space-graphql/graphql', display_id, query={ - 'query': '''{ - resolvedPath(path: "/%s") { - lastSegment { - content { - ... on AxisContent { - axisId - videoPlayerDestCode - } - } - } - } -}''' % display_id, - })['data']['resolvedPath']['lastSegment']['content'] - video_id = content['axisId'] - return self.url_result( - '9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id), - 'NineCNineMedia', video_id) diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dl/extractor/ctvnews.py deleted file mode 100644 index 03f8cefb7..000000000 --- a/youtube_dl/extractor/ctvnews.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import orderedSet - - -class CTVNewsIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)' - _TESTS = [{ - 'url': 'http://www.ctvnews.ca/video?clipId=901995', - 'md5': '9b8624ba66351a23e0b6e1391971f9af', - 'info_dict': { - 'id': '901995', - 'ext': 'flv', - 'title': 'Extended: \'That person cannot be me\' Johnson says', - 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', - 'timestamp': 1467286284, - 'upload_date': '20160630', - } - }, { - 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', - 'info_dict': - { - 'id': '1.2966224', - }, - 'playlist_mincount': 19, - }, { - 'url': 'http://www.ctvnews.ca/video?binId=1.2876780', - 'info_dict': - { - 'id': '1.2876780', - }, - 'playlist_mincount': 100, - }, { - 'url': 'http://www.ctvnews.ca/1.810401', - 'only_matching': True, - }, { - 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231', - 'only_matching': True, - }, { - 'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241', - 'only_matching': True, - }] - - def _real_extract(self, url): - page_id = self._match_id(url) - - def ninecninemedia_url_result(clip_id): - return { - '_type': 'url_transparent', - 'id': clip_id, - 'url': '9c9media:ctvnews_web:%s' % clip_id, - 'ie_key': 'NineCNineMedia', - } - - if page_id.isdigit(): - return ninecninemedia_url_result(page_id) - else: - webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ - 'ot': 'example.AjaxPageLayout.ot', - 'maxItemsPerPage': 1000000, - }) - entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( - re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] - return self.playlist_result(entries, page_id) diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py deleted file mode 100644 index bcdf27323..000000000 --- a/youtube_dl/extractor/cultureunplugged.py +++ /dev/null @@ -1,70 +0,0 @@ -from __future__ import unicode_literals - -import re -import time - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - HEADRequest, -) - - -class CultureUnpluggedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P<id>\d+)(?:/(?P<display_id>[^/]+))?' - _TESTS = [{ - 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West', - 'md5': 'ac6c093b089f7d05e79934dcb3d228fc', - 'info_dict': { - 'id': '53662', - 'display_id': 'The-Next--Best-West', - 'ext': 'mp4', - 'title': 'The Next, Best West', - 'description': 'md5:0423cd00833dea1519cf014e9d0903b1', - 'thumbnail': r're:^https?://.*\.jpg$', - 'creator': 'Coldstream Creative', - 'duration': 2203, - 'view_count': int, - } - }, { - 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request - self._request_webpage(HEADRequest( - 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id) - movie_data = self._download_json( - 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id) - - video_url = movie_data['url'] - title = movie_data['title'] - - description = movie_data.get('synopsis') - creator = movie_data.get('producer') - duration = int_or_none(movie_data.get('duration')) - view_count = int_or_none(movie_data.get('views')) - - thumbnails = [{ - 'url': movie_data['%s_thumb' % size], - 'id': size, - 'preference': preference, - } for preference, size in enumerate(( - 'small', 'large')) if movie_data.get('%s_thumb' % size)] - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - 'description': description, - 'creator': creator, - 'duration': duration, - 'view_count': view_count, - 'thumbnails': thumbnails, - } diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py deleted file mode 100644 index 48ff30432..000000000 --- a/youtube_dl/extractor/curiositystream.py +++ /dev/null @@ -1,177 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - urlencode_postdata, - compat_str, - ExtractorError, -) - - -class CuriosityStreamBaseIE(InfoExtractor): - _NETRC_MACHINE = 'curiositystream' - _auth_token = None - _API_BASE_URL = 'https://api.curiositystream.com/v1/' - - def _handle_errors(self, result): - error = result.get('error', {}).get('message') - if error: - if isinstance(error, dict): - error = ', '.join(error.values()) - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error), expected=True) - - def _call_api(self, path, video_id, query=None): - headers = {} - if self._auth_token: - headers['X-Auth-Token'] = self._auth_token - result = self._download_json( - self._API_BASE_URL + path, video_id, headers=headers, query=query) - self._handle_errors(result) - return result['data'] - - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return - result = self._download_json( - self._API_BASE_URL + 'login', None, data=urlencode_postdata({ - 'email': email, - 'password': password, - })) - self._handle_errors(result) - self._auth_token = result['message']['auth_token'] - - -class CuriosityStreamIE(CuriosityStreamBaseIE): - IE_NAME = 'curiositystream' - _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)' - _TEST = { - 'url': 'https://app.curiositystream.com/video/2', - 'info_dict': { - 'id': '2', - 'ext': 'mp4', - 'title': 'How Did You Develop The Internet?', - 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', - }, - 'params': { - 'format': 'bestvideo', - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - formats = [] - for encoding_format in ('m3u8', 'mpd'): - media = self._call_api('media/' + video_id, video_id, query={ - 'encodingsNew': 'true', - 'encodingsFormat': encoding_format, - }) - for encoding in media.get('encodings', []): - playlist_url = encoding.get('master_playlist_url') - if encoding_format == 'm3u8': - # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol - formats.extend(self._extract_m3u8_formats( - playlist_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) - elif encoding_format == 'mpd': - formats.extend(self._extract_mpd_formats( - playlist_url, video_id, mpd_id='dash', fatal=False)) - encoding_url = encoding.get('url') - file_url = encoding.get('file_url') - if not encoding_url and not file_url: - continue - f = { - 'width': int_or_none(encoding.get('width')), - 'height': int_or_none(encoding.get('height')), - 'vbr': int_or_none(encoding.get('video_bitrate')), - 'abr': int_or_none(encoding.get('audio_bitrate')), - 'filesize': int_or_none(encoding.get('size_in_bytes')), - 'vcodec': encoding.get('video_codec'), - 'acodec': encoding.get('audio_codec'), - 'container': encoding.get('container_type'), - } - for f_url in (encoding_url, file_url): - if not f_url: - continue - fmt = f.copy() - rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url) - if rtmp: - fmt.update({ - 'url': rtmp.group('url'), - 'play_path': rtmp.group('playpath'), - 'app': rtmp.group('app'), - 'ext': 'flv', - 'format_id': 'rtmp', - }) - else: - fmt.update({ - 'url': f_url, - 'format_id': 'http', - }) - formats.append(fmt) - self._sort_formats(formats) - - title = media['title'] - - subtitles = {} - for closed_caption in media.get('closed_captions', []): - sub_url = closed_caption.get('file') - if not sub_url: - continue - lang = closed_caption.get('code') or closed_caption.get('language') or 'en' - subtitles.setdefault(lang, []).append({ - 'url': sub_url, - }) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': media.get('description'), - 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), - 'duration': int_or_none(media.get('duration')), - 'tags': media.get('tags'), - 'subtitles': subtitles, - } - - -class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): - IE_NAME = 'curiositystream:collection' - _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://app.curiositystream.com/collection/2', - 'info_dict': { - 'id': '2', - 'title': 'Curious Minds: The Internet', - 'description': 'How is the internet shaping our lives in the 21st Century?', - }, - 'playlist_mincount': 16, - }, { - 'url': 'https://curiositystream.com/series/2', - 'only_matching': True, - }, { - 'url': 'https://curiositystream.com/collections/36', - 'only_matching': True, - }] - - def _real_extract(self, url): - collection_id = self._match_id(url) - collection = self._call_api( - 'collections/' + collection_id, collection_id) - entries = [] - for media in collection.get('media', []): - media_id = compat_str(media.get('id')) - entries.append(self.url_result( - 'https://curiositystream.com/video/' + media_id, - CuriosityStreamIE.ie_key(), media_id)) - return self.playlist_result( - entries, collection_id, - collection.get('title'), collection.get('description')) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py deleted file mode 100644 index 73382431b..000000000 --- a/youtube_dl/extractor/cwtv.py +++ /dev/null @@ -1,97 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - parse_age_limit, - parse_iso8601, - smuggle_url, - str_or_none, -) - - -class CWTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' - _TESTS = [{ - 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', - 'info_dict': { - 'id': '6b15e985-9345-4f60-baf8-56e96be57c63', - 'ext': 'mp4', - 'title': 'Legends of Yesterday', - 'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.', - 'duration': 2665, - 'series': 'Arrow', - 'season_number': 4, - 'season': '4', - 'episode_number': 8, - 'upload_date': '20151203', - 'timestamp': 1449122100, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'redirect to http://cwtv.com/shows/arrow/', - }, { - 'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088', - 'info_dict': { - 'id': '24282b12-ead2-42f2-95ad-26770c2c6088', - 'ext': 'mp4', - 'title': 'Jeff Davis 4', - 'description': 'Jeff Davis is back to make you laugh.', - 'duration': 1263, - 'series': 'Whose Line Is It Anyway?', - 'season_number': 11, - 'episode_number': 20, - 'upload_date': '20151006', - 'timestamp': 1444107300, - 'age_limit': 14, - 'uploader': 'CWTV', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', - 'only_matching': True, - }, { - 'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e', - 'only_matching': True, - }, { - 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_json( - 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id, - video_id) - if data.get('result') != 'ok': - raise ExtractorError(data['msg'], expected=True) - video_data = data['video'] - title = video_data['title'] - mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id - - season = str_or_none(video_data.get('season')) - episode = str_or_none(video_data.get('episode')) - if episode and season: - episode = episode[len(season):] - - return { - '_type': 'url_transparent', - 'id': video_id, - 'title': title, - 'url': smuggle_url(mpx_url, {'force_smil_url': True}), - 'description': video_data.get('description_long'), - 'duration': int_or_none(video_data.get('duration_secs')), - 'series': video_data.get('series_name'), - 'season_number': int_or_none(season), - 'episode_number': int_or_none(episode), - 'timestamp': parse_iso8601(video_data.get('start_time')), - 'age_limit': parse_age_limit(video_data.get('rating')), - 'ie_key': 'ThePlatform', - } diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py deleted file mode 100644 index 67b88fd56..000000000 --- a/youtube_dl/extractor/dailymail.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - determine_protocol, - try_get, - unescapeHTML, -) - - -class DailyMailIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html', - 'md5': 'f6129624562251f628296c3a9ffde124', - 'info_dict': { - 'id': '1295863', - 'ext': 'mp4', - 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', - 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', - } - }, { - 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)', - webpage) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_data = self._parse_json(self._search_regex( - r"data-opts='({.+?})'", webpage, 'video data'), video_id) - title = unescapeHTML(video_data['title']) - - sources_url = (try_get( - video_data, - (lambda x: x['plugins']['sources']['url'], - lambda x: x['sources']['url']), compat_str) - or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) - - video_sources = self._download_json(sources_url, video_id) - body = video_sources.get('body') - if body: - video_sources = body - - formats = [] - for rendition in video_sources['renditions']: - rendition_url = rendition.get('url') - if not rendition_url: - continue - tbr = int_or_none(rendition.get('encodingRate'), 1000) - container = rendition.get('videoContainer') - is_hls = container == 'M2TS' - protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url}) - formats.append({ - 'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''), - 'url': rendition_url, - 'width': int_or_none(rendition.get('frameWidth')), - 'height': int_or_none(rendition.get('frameHeight')), - 'tbr': tbr, - 'vcodec': rendition.get('videoCodec'), - 'container': container, - 'protocol': protocol, - 'ext': 'mp4' if is_hls else None, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': unescapeHTML(video_data.get('descr')), - 'thumbnail': video_data.get('poster') or video_data.get('thumbnail'), - 'formats': formats, - } diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py deleted file mode 100644 index b8529050c..000000000 --- a/youtube_dl/extractor/dailymotion.py +++ /dev/null @@ -1,393 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import functools -import json -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - age_restricted, - clean_html, - ExtractorError, - int_or_none, - OnDemandPagedList, - try_get, - unescapeHTML, - urlencode_postdata, -) - - -class DailymotionBaseInfoExtractor(InfoExtractor): - _FAMILY_FILTER = None - _HEADERS = { - 'Content-Type': 'application/json', - 'Origin': 'https://www.dailymotion.com', - } - _NETRC_MACHINE = 'dailymotion' - - def _get_dailymotion_cookies(self): - return self._get_cookies('https://www.dailymotion.com/') - - @staticmethod - def _get_cookie_value(cookies, name): - cookie = cookies.get(name) - if cookie: - return cookie.value - - def _set_dailymotion_cookie(self, name, value): - self._set_cookie('www.dailymotion.com', name, value) - - def _real_initialize(self): - cookies = self._get_dailymotion_cookies() - ff = self._get_cookie_value(cookies, 'ff') - self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit')) - self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off') - - def _call_api(self, object_type, xid, object_fields, note, filter_extra=None): - if not self._HEADERS.get('Authorization'): - cookies = self._get_dailymotion_cookies() - token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token') - if not token: - data = { - 'client_id': 'f1a362d288c1b98099c7', - 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5', - } - username, password = self._get_login_info() - if username: - data.update({ - 'grant_type': 'password', - 'password': password, - 'username': username, - }) - else: - data['grant_type'] = 'client_credentials' - try: - token = self._download_json( - 'https://graphql.api.dailymotion.com/oauth/token', - None, 'Downloading Access Token', - data=urlencode_postdata(data))['access_token'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - raise ExtractorError(self._parse_json( - e.cause.read().decode(), xid)['error_description'], expected=True) - raise - self._set_dailymotion_cookie('access_token' if username else 'client_token', token) - self._HEADERS['Authorization'] = 'Bearer ' + token - - resp = self._download_json( - 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({ - 'query': '''{ - %s(xid: "%s"%s) { - %s - } -}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields), - }).encode(), headers=self._HEADERS) - obj = resp['data'][object_type] - if not obj: - raise ExtractorError(resp['errors'][0]['message'], expected=True) - return obj - - -class DailymotionIE(DailymotionBaseInfoExtractor): - _VALID_URL = r'''(?ix) - https?:// - (?: - (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| - (?:www\.)?lequipe\.fr/video - ) - /(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))? - ''' - IE_NAME = 'dailymotion' - _TESTS = [{ - 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', - 'md5': '074b95bdee76b9e3654137aee9c79dfe', - 'info_dict': { - 'id': 'x5kesuj', - 'ext': 'mp4', - 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller', - 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', - 'duration': 187, - 'timestamp': 1493651285, - 'upload_date': '20170501', - 'uploader': 'Deadline', - 'uploader_id': 'x1xm8ri', - 'age_limit': 0, - }, - }, { - 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', - 'md5': '2137c41a8e78554bb09225b8eb322406', - 'info_dict': { - 'id': 'x2iuewm', - 'ext': 'mp4', - 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', - 'description': 'Several come bundled with the Steam Controller.', - 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', - 'duration': 74, - 'timestamp': 1425657362, - 'upload_date': '20150306', - 'uploader': 'IGN', - 'uploader_id': 'xijv66', - 'age_limit': 0, - 'view_count': int, - }, - 'skip': 'video gone', - }, { - # Vevo video - 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', - 'info_dict': { - 'title': 'Roar (Official)', - 'id': 'USUV71301934', - 'ext': 'mp4', - 'uploader': 'Katy Perry', - 'upload_date': '20130905', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'VEVO is only available in some countries', - }, { - # age-restricted video - 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', - 'md5': '0d667a7b9cebecc3c89ee93099c4159d', - 'info_dict': { - 'id': 'xyh2zz', - 'ext': 'mp4', - 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', - 'uploader': 'HotWaves1012', - 'age_limit': 18, - }, - 'skip': 'video gone', - }, { - # geo-restricted, player v5 - 'url': 'http://www.dailymotion.com/video/xhza0o', - 'only_matching': True, - }, { - # with subtitles - 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news', - 'only_matching': True, - }, { - 'url': 'http://www.dailymotion.com/swf/video/x3n92nf', - 'only_matching': True, - }, { - 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun', - 'only_matching': True, - }, { - 'url': 'https://www.lequipe.fr/video/x791mem', - 'only_matching': True, - }, { - 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2', - 'only_matching': True, - }, { - 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw', - 'only_matching': True, - }] - _GEO_BYPASS = False - _COMMON_MEDIA_FIELDS = '''description - geoblockedCountries { - allowed - } - xid''' - - @staticmethod - def _extract_urls(webpage): - urls = [] - # Look for embedded Dailymotion player - # https://developer.dailymotion.com/player#player-parameters - for mobj in re.finditer( - r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage): - urls.append(unescapeHTML(mobj.group('url'))) - for mobj in re.finditer( - r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage): - urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id')) - return urls - - def _real_extract(self, url): - video_id, playlist_id = re.match(self._VALID_URL, url).groups() - - if playlist_id: - if not self._downloader.params.get('noplaylist'): - self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) - return self.url_result( - 'http://www.dailymotion.com/playlist/' + playlist_id, - 'DailymotionPlaylist', playlist_id) - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - - password = self._downloader.params.get('videopassword') - media = self._call_api( - 'media', video_id, '''... on Video { - %s - stats { - likes { - total - } - views { - total - } - } - } - ... on Live { - %s - audienceCount - isOnAir - }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata', - 'password: "%s"' % self._downloader.params.get('videopassword') if password else None) - xid = media['xid'] - - metadata = self._download_json( - 'https://www.dailymotion.com/player/metadata/video/' + xid, - xid, 'Downloading metadata JSON', - query={'app': 'com.dailymotion.neon'}) - - error = metadata.get('error') - if error: - title = error.get('title') or error['raw_message'] - # See https://developer.dailymotion.com/api#access-error - if error.get('code') == 'DM007': - allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list) - self.raise_geo_restricted(msg=title, countries=allowed_countries) - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, title), expected=True) - - title = metadata['title'] - is_live = media.get('isOnAir') - formats = [] - for quality, media_list in metadata['qualities'].items(): - for m in media_list: - media_url = m.get('url') - media_type = m.get('type') - if not media_url or media_type == 'application/vnd.lumberjack.manifest': - continue - if media_type == 'application/x-mpegURL': - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - f = { - 'url': media_url, - 'format_id': 'http-' + quality, - } - m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url) - if m: - width, height, fps = map(int_or_none, m.groups()) - f.update({ - 'fps': fps, - 'height': height, - 'width': width, - }) - formats.append(f) - for f in formats: - f['url'] = f['url'].split('#')[0] - if not f.get('fps') and f['format_id'].endswith('@60'): - f['fps'] = 60 - self._sort_formats(formats) - - subtitles = {} - subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {} - for subtitle_lang, subtitle in subtitles_data.items(): - subtitles[subtitle_lang] = [{ - 'url': subtitle_url, - } for subtitle_url in subtitle.get('urls', [])] - - thumbnails = [] - for height, poster_url in metadata.get('posters', {}).items(): - thumbnails.append({ - 'height': int_or_none(height), - 'id': height, - 'url': poster_url, - }) - - owner = metadata.get('owner') or {} - stats = media.get('stats') or {} - get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total'])) - - return { - 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'description': clean_html(media.get('description')), - 'thumbnails': thumbnails, - 'duration': int_or_none(metadata.get('duration')) or None, - 'timestamp': int_or_none(metadata.get('created_time')), - 'uploader': owner.get('screenname'), - 'uploader_id': owner.get('id') or metadata.get('screenname'), - 'age_limit': 18 if metadata.get('explicit') else 0, - 'tags': metadata.get('tags'), - 'view_count': get_count('view') or int_or_none(media.get('audienceCount')), - 'like_count': get_count('like'), - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - } - - -class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor): - _PAGE_SIZE = 100 - - def _fetch_page(self, playlist_id, page): - page += 1 - videos = self._call_api( - self._OBJECT_TYPE, playlist_id, - '''videos(allowExplicit: %s, first: %d, page: %d) { - edges { - node { - xid - url - } - } - }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page), - 'Downloading page %d' % page)['videos'] - for edge in videos['edges']: - node = edge['node'] - yield self.url_result( - node['url'], DailymotionIE.ie_key(), node['xid']) - - def _real_extract(self, url): - playlist_id = self._match_id(url) - entries = OnDemandPagedList(functools.partial( - self._fetch_page, playlist_id), self._PAGE_SIZE) - return self.playlist_result( - entries, playlist_id) - - -class DailymotionPlaylistIE(DailymotionPlaylistBaseIE): - IE_NAME = 'dailymotion:playlist' - _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)' - _TESTS = [{ - 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', - 'info_dict': { - 'id': 'xv4bw', - }, - 'playlist_mincount': 20, - }] - _OBJECT_TYPE = 'collection' - - -class DailymotionUserIE(DailymotionPlaylistBaseIE): - IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)' - _TESTS = [{ - 'url': 'https://www.dailymotion.com/user/nqtv', - 'info_dict': { - 'id': 'nqtv', - }, - 'playlist_mincount': 152, - }, { - 'url': 'http://www.dailymotion.com/user/UnderProject', - 'info_dict': { - 'id': 'UnderProject', - }, - 'playlist_mincount': 1000, - 'skip': 'Takes too long time', - }, { - 'url': 'https://www.dailymotion.com/user/nqtv', - 'info_dict': { - 'id': 'nqtv', - }, - 'playlist_mincount': 148, - 'params': { - 'age_limit': 0, - }, - }] - _OBJECT_TYPE = 'channel' diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py deleted file mode 100644 index 137095577..000000000 --- a/youtube_dl/extractor/daum.py +++ /dev/null @@ -1,266 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -import itertools - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_unquote, - compat_urlparse, -) - - -class DaumBaseIE(InfoExtractor): - _KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/' - - -class DaumIE(DaumBaseIE): - _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)' - IE_NAME = 'daum.net' - - _TESTS = [{ - 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', - 'info_dict': { - 'id': 'vab4dyeDBysyBssyukBUjBz', - 'ext': 'mp4', - 'title': '마크 헌트 vs 안토니오 실바', - 'description': 'Mark Hunt vs Antonio Silva', - 'upload_date': '20131217', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - 'duration': 2117, - 'view_count': int, - 'comment_count': int, - 'uploader_id': 186139, - 'uploader': '콘간지', - 'timestamp': 1387310323, - }, - }, { - 'url': 'http://m.tvpot.daum.net/v/65139429', - 'info_dict': { - 'id': '65139429', - 'ext': 'mp4', - 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118', - 'description': 'md5:79794514261164ff27e36a21ad229fc5', - 'upload_date': '20150118', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - 'duration': 154, - 'view_count': int, - 'comment_count': int, - 'uploader': 'MBC 예능', - 'uploader_id': 132251, - 'timestamp': 1421604228, - }, - }, { - 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', - 'only_matching': True, - }, { - 'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=', - 'info_dict': { - 'id': 'vwIpVpCQsT8$', - 'ext': 'flv', - 'title': '01-Korean War ( Trouble on the horizon )', - 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름', - 'upload_date': '20080223', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - 'duration': 249, - 'view_count': int, - 'comment_count': int, - 'uploader': '까칠한 墮落始祖 황비홍님의', - 'uploader_id': 560824, - 'timestamp': 1203770745, - }, - }, { - # Requires dte_type=WEB (#9972) - 'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU', - 'md5': 'a8917742069a4dd442516b86e7d66529', - 'info_dict': { - 'id': 's3794Uf1NZeZ1qMpGpeqeRU', - 'ext': 'mp4', - 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', - 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', - 'upload_date': '20170129', - 'uploader': '쇼! 음악중심', - 'uploader_id': 2653210, - 'timestamp': 1485684628, - }, - }] - - def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(self._match_id(url)) - if not video_id.isdigit(): - video_id += '@my' - return self.url_result( - self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id) - - -class DaumClipIE(DaumBaseIE): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)' - IE_NAME = 'daum.net:clip' - _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s' - - _TESTS = [{ - 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', - 'info_dict': { - 'id': '52554690', - 'ext': 'mp4', - 'title': 'DOTA 2GETHER 시즌2 6회 - 2부', - 'description': 'DOTA 2GETHER 시즌2 6회 - 2부', - 'upload_date': '20130831', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - 'duration': 3868, - 'view_count': int, - 'uploader': 'GOMeXP', - 'uploader_id': 6667, - 'timestamp': 1377911092, - }, - }, { - 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url) - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result( - self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id) - - -class DaumListIE(InfoExtractor): - def _get_entries(self, list_id, list_id_type): - name = None - entries = [] - for pagenum in itertools.count(1): - list_info = self._download_json( - 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % ( - pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum) - - entries.extend([ - self.url_result( - 'http://tvpot.daum.net/v/%s' % clip['vid']) - for clip in list_info['clip_list'] - ]) - - if not name: - name = list_info.get('playlist_bean', {}).get('name') or \ - list_info.get('potInfo', {}).get('name') - - if not list_info.get('has_more'): - break - - return name, entries - - def _check_clip(self, url, list_id): - query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query) - if 'clipid' in query_dict: - clip_id = query_dict['clipid'][0] - if self._downloader.params.get('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % clip_id) - return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip') - else: - self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id) - - -class DaumPlaylistIE(DaumListIE): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)' - IE_NAME = 'daum.net:playlist' - _URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s' - - _TESTS = [{ - 'note': 'Playlist url with clipid', - 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844', - 'info_dict': { - 'id': '6213966', - 'title': 'Woorissica Official', - }, - 'playlist_mincount': 181 - }, { - 'note': 'Playlist url with clipid - noplaylist', - 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844', - 'info_dict': { - 'id': '73806844', - 'ext': 'mp4', - 'title': '151017 Airport', - 'upload_date': '20160117', - }, - 'params': { - 'noplaylist': True, - 'skip_download': True, - } - }] - - @classmethod - def suitable(cls, url): - return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url) - - def _real_extract(self, url): - list_id = self._match_id(url) - - clip_result = self._check_clip(url, list_id) - if clip_result: - return clip_result - - name, entries = self._get_entries(list_id, 'playlistid') - - return self.playlist_result(entries, list_id, name) - - -class DaumUserIE(DaumListIE): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)' - IE_NAME = 'daum.net:user' - - _TESTS = [{ - 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0', - 'info_dict': { - 'id': 'o2scDLIVbHc0', - 'title': '마이 리틀 텔레비전', - }, - 'playlist_mincount': 213 - }, { - 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156', - 'info_dict': { - 'id': '73801156', - 'ext': 'mp4', - 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116', - 'upload_date': '20160117', - 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36' - }, - 'params': { - 'noplaylist': True, - 'skip_download': True, - } - }, { - 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence', - 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631', - 'info_dict': { - 'id': '6196631', - 'title': '마이 리틀 텔레비전 - 20160109', - }, - 'playlist_count': 11 - }, { - 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0', - 'only_matching': True, - }, { - 'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733', - 'only_matching': True, - }] - - def _real_extract(self, url): - list_id = self._match_id(url) - - clip_result = self._check_clip(url, list_id) - if clip_result: - return clip_result - - query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query) - if 'playlistid' in query_dict: - playlist_id = query_dict['playlistid'][0] - return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist') - - name, entries = self._get_entries(list_id, 'ownerid') - - return self.playlist_result(entries, list_id, name) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py deleted file mode 100644 index aaedf2e3d..000000000 --- a/youtube_dl/extractor/dbtv.py +++ /dev/null @@ -1,57 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class DBTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})' - _TESTS = [{ - 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/', - 'md5': 'b8f850ba1860adbda668d367f9b77699', - 'info_dict': { - 'id': 'PynxJnNWChE', - 'ext': 'mp4', - 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', - 'thumbnail': r're:https?://.*\.jpg', - 'upload_date': '20160916', - 'duration': 69, - 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', - 'uploader': 'Dagbladet', - }, - 'add_ie': ['Youtube'] - }, { - 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', - 'only_matching': True, - }, { - 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return [url for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1', - webpage)] - - def _real_extract(self, url): - display_id, video_id = re.match(self._VALID_URL, url).groups() - info = { - '_type': 'url_transparent', - 'id': video_id, - 'display_id': display_id, - } - if len(video_id) == 11: - info.update({ - 'url': video_id, - 'ie_key': 'Youtube', - }) - else: - info.update({ - 'url': 'jwplatform:' + video_id, - 'ie_key': 'JWPlatform', - }) - return info diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py deleted file mode 100644 index e700f8d86..000000000 --- a/youtube_dl/extractor/dctp.py +++ /dev/null @@ -1,105 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - float_or_none, - int_or_none, - unified_timestamp, - url_or_none, -) - - -class DctpTvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)' - _TESTS = [{ - # 4x3 - 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', - 'md5': '3ffbd1556c3fe210724d7088fad723e3', - 'info_dict': { - 'id': '95eaa4f33dad413aa17b4ee613cccc6c', - 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', - 'ext': 'm4v', - 'title': 'Videoinstallation für eine Kaufhausfassade', - 'description': 'Kurzfilm', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 71.24, - 'timestamp': 1302172322, - 'upload_date': '20110407', - }, - }, { - # 16x9 - 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/', - 'only_matching': True, - }] - - _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com' - - def _real_extract(self, url): - display_id = self._match_id(url) - - version = self._download_json( - '%s/version.json' % self._BASE_URL, display_id, - 'Downloading version JSON') - - restapi_base = '%s/%s/restapi' % ( - self._BASE_URL, version['version_name']) - - info = self._download_json( - '%s/slugs/%s.json' % (restapi_base, display_id), display_id, - 'Downloading video info JSON') - - media = self._download_json( - '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])), - display_id, 'Downloading media JSON') - - uuid = media['uuid'] - title = media['title'] - is_wide = media.get('is_wide') - formats = [] - - def add_formats(suffix): - templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix) - formats.extend([{ - 'format_id': 'hls-' + suffix, - 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8', - 'protocol': 'm3u8_native', - }, { - 'format_id': 's3-' + suffix, - 'url': templ % 'completed-media.s3.amazonaws.com', - }, { - 'format_id': 'http-' + suffix, - 'url': templ % 'cdn-media.dctp.tv', - }]) - - add_formats('0500_' + ('16x9' if is_wide else '4x3')) - if is_wide: - add_formats('720p') - - thumbnails = [] - images = media.get('images') - if isinstance(images, list): - for image in images: - if not isinstance(image, dict): - continue - image_url = url_or_none(image.get('url')) - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) - - return { - 'id': uuid, - 'display_id': display_id, - 'title': title, - 'alt_title': media.get('subtitle'), - 'description': media.get('description') or media.get('teaser'), - 'timestamp': unified_timestamp(media.get('created')), - 'duration': float_or_none(media.get('duration_in_ms'), scale=1000), - 'thumbnails': thumbnails, - 'formats': formats, - } diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py deleted file mode 100644 index a38b2683d..000000000 --- a/youtube_dl/extractor/deezer.py +++ /dev/null @@ -1,91 +0,0 @@ -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - orderedSet, -) - - -class DeezerPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.deezer.com/playlist/176747451', - 'info_dict': { - 'id': '176747451', - 'title': 'Best!', - 'uploader': 'Anonymous', - 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$', - }, - 'playlist_count': 30, - 'skip': 'Only available in .de', - } - - def _real_extract(self, url): - if 'test' not in self._downloader.params: - self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!') - - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') - - webpage = self._download_webpage(url, playlist_id) - geoblocking_msg = self._html_search_regex( - r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message', - default=None) - if geoblocking_msg is not None: - raise ExtractorError( - 'Deezer said: %s' % geoblocking_msg, expected=True) - - data_json = self._search_regex( - (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>', - r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'), - webpage, 'data JSON') - data = json.loads(data_json) - - playlist_title = data.get('DATA', {}).get('TITLE') - playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME') - playlist_thumbnail = self._search_regex( - r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage, - 'playlist thumbnail') - - preview_pattern = self._search_regex( - r"var SOUND_PREVIEW_GATEWAY\s*=\s*'([^']+)';", webpage, - 'preview URL pattern', fatal=False) - entries = [] - for s in data['SONGS']['data']: - puid = s['MD5_ORIGIN'] - preview_video_url = preview_pattern.\ - replace('{0}', puid[0]).\ - replace('{1}', puid).\ - replace('{2}', s['MEDIA_VERSION']) - formats = [{ - 'format_id': 'preview', - 'url': preview_video_url, - 'preference': -100, # Only the first 30 seconds - 'ext': 'mp3', - }] - self._sort_formats(formats) - artists = ', '.join( - orderedSet(a['ART_NAME'] for a in s['ARTISTS'])) - entries.append({ - 'id': s['SNG_ID'], - 'duration': int_or_none(s.get('DURATION')), - 'title': '%s - %s' % (artists, s['SNG_TITLE']), - 'uploader': s['ART_NAME'], - 'uploader_id': s['ART_ID'], - 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0, - 'formats': formats, - }) - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': playlist_title, - 'uploader': playlist_uploader, - 'thumbnail': playlist_thumbnail, - 'entries': entries, - } diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py deleted file mode 100644 index 9fe144e14..000000000 --- a/youtube_dl/extractor/defense.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class DefenseGouvFrIE(InfoExtractor): - IE_NAME = 'defense.gouv.fr' - _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' - - _TEST = { - 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', - 'md5': '75bba6124da7e63d2d60b5244ec9430c', - 'info_dict': { - 'id': '11213', - 'ext': 'mp4', - 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1' - } - } - - def _real_extract(self, url): - title = self._match_id(url) - webpage = self._download_webpage(url, title) - - video_id = self._search_regex( - r"flashvars.pvg_id=\"(\d+)\";", - webpage, 'ID') - - json_url = ( - 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % - video_id) - info = self._download_json(json_url, title, 'Downloading JSON config') - video_url = info['renditions'][0]['url'] - - return { - 'id': video_id, - 'ext': 'mp4', - 'url': video_url, - 'title': title, - } diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py deleted file mode 100644 index 5c9c0ecdc..000000000 --- a/youtube_dl/extractor/democracynow.py +++ /dev/null @@ -1,96 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import os.path - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - url_basename, - remove_start, -) - - -class DemocracynowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)' - IE_NAME = 'democracynow' - _TESTS = [{ - 'url': 'http://www.democracynow.org/shows/2015/7/3', - 'md5': '3757c182d3d84da68f5c8f506c18c196', - 'info_dict': { - 'id': '2015-0703-001', - 'ext': 'mp4', - 'title': 'Daily Show for July 03, 2015', - 'description': 'md5:80eb927244d6749900de6072c7cc2c86', - }, - }, { - 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', - 'info_dict': { - 'id': '2015-0703-001', - 'ext': 'mp4', - 'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag', - 'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21', - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - json_data = self._parse_json(self._search_regex( - r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'), - display_id) - - title = json_data['title'] - formats = [] - - video_id = None - - for key in ('file', 'audio', 'video', 'high_res_video'): - media_url = json_data.get(key, '') - if not media_url: - continue - media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) - video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') - formats.append({ - 'url': media_url, - 'vcodec': 'none' if key == 'audio' else None, - }) - - self._sort_formats(formats) - - default_lang = 'en' - subtitles = {} - - def add_subtitle_item(lang, info_dict): - if lang not in subtitles: - subtitles[lang] = [] - subtitles[lang].append(info_dict) - - # chapter_file are not subtitles - if 'caption_file' in json_data: - add_subtitle_item(default_lang, { - 'url': compat_urlparse.urljoin(url, json_data['caption_file']), - }) - - for subtitle_item in json_data.get('captions', []): - lang = subtitle_item.get('language', '').lower() or default_lang - add_subtitle_item(lang, { - 'url': compat_urlparse.urljoin(url, subtitle_item['url']), - }) - - description = self._og_search_description(webpage, default=None) - - return { - 'id': video_id or display_id, - 'title': title, - 'description': description, - 'thumbnail': json_data.get('image'), - 'subtitles': subtitles, - 'formats': formats, - } diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py deleted file mode 100644 index a4d0448c2..000000000 --- a/youtube_dl/extractor/dfb.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import unified_strdate - - -class DFBIE(InfoExtractor): - IE_NAME = 'tv.dfb.de' - _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)' - - _TEST = { - 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/', - 'md5': 'ac0f98a52a330f700b4b3034ad240649', - 'info_dict': { - 'id': '11633', - 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland', - 'ext': 'mp4', - 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland', - 'upload_date': '20150714', - }, - } - - def _real_extract(self, url): - display_id, video_id = re.match(self._VALID_URL, url).groups() - - player_info = self._download_xml( - 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, - display_id) - video_info = player_info.find('video') - stream_access_url = self._proto_relative_url(video_info.find('url').text.strip()) - - formats = [] - # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats - for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'): - stream_access_info = self._download_xml(sa_url, display_id) - token_el = stream_access_info.find('token') - manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] - if '.f4m' in manifest_url: - formats.extend(self._extract_f4m_formats( - manifest_url + '&hdcore=3.2.0', - display_id, f4m_id='hds', fatal=False)) - else: - formats.extend(self._extract_m3u8_formats( - manifest_url, display_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': video_info.find('title').text, - 'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id, - 'upload_date': unified_strdate(video_info.find('time_date').text), - 'formats': formats, - } diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py deleted file mode 100644 index aee72a6ed..000000000 --- a/youtube_dl/extractor/dhm.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import parse_duration - - -class DHMIE(InfoExtractor): - IE_DESC = 'Filmarchiv - Deutsches Historisches Museum' - _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)' - - _TESTS = [{ - 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', - 'md5': '11c475f670209bf6acca0b2b7ef51827', - 'info_dict': { - 'id': 'the-marshallplan-at-work-in-west-germany', - 'ext': 'flv', - 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', - 'description': 'md5:1fabd480c153f97b07add61c44407c82', - 'duration': 660, - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/', - 'md5': '09890226332476a3e3f6f2cb74734aa5', - 'info_dict': { - 'id': 'rolle-1', - 'ext': 'flv', - 'title': 'ROLLE 1', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - playlist_url = self._search_regex( - r"file\s*:\s*'([^']+)'", webpage, 'playlist url') - - entries = self._extract_xspf_playlist(playlist_url, playlist_id) - - title = self._search_regex( - [r'dc:title="([^"]+)"', r'<title> »([^<]+)'], - webpage, 'title').strip() - description = self._html_search_regex( - r'

Description:(.+?)

', - webpage, 'description', default=None) - duration = parse_duration(self._search_regex( - r'Length\s*\s*:\s*
([^<]+)', - webpage, 'duration', default=None)) - - entries[0].update({ - 'title': title, - 'description': description, - 'duration': duration, - }) - - return self.playlist_result(entries, playlist_id) diff --git a/youtube_dl/extractor/digg.py b/youtube_dl/extractor/digg.py deleted file mode 100644 index 913c1750f..000000000 --- a/youtube_dl/extractor/digg.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import js_to_json - - -class DiggIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P[^/?#&]+)' - _TESTS = [{ - # JWPlatform via provider - 'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out', - 'info_dict': { - 'id': 'LcqvmS0b', - 'ext': 'mp4', - 'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'", - 'description': 'md5:541bb847648b6ee3d6514bc84b82efda', - 'upload_date': '20180109', - 'timestamp': 1515530551, - }, - 'params': { - 'skip_download': True, - }, - }, { - # Youtube via provider - 'url': 'http://digg.com/video/dog-boat-seal-play', - 'only_matching': True, - }, { - # vimeo as regular embed - 'url': 'http://digg.com/video/dream-girl-short-film', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - info = self._parse_json( - self._search_regex( - r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info', - default='{}'), display_id, transform_source=js_to_json, - fatal=False) - - video_id = info.get('video_id') - - if video_id: - provider = info.get('provider_name') - if provider == 'youtube': - return self.url_result( - video_id, ie='Youtube', video_id=video_id) - elif provider == 'jwplayer': - return self.url_result( - 'jwplatform:%s' % video_id, ie='JWPlatform', - video_id=video_id) - - return self.url_result(url, 'Generic') diff --git a/youtube_dl/extractor/digiteka.py b/youtube_dl/extractor/digiteka.py deleted file mode 100644 index 3dfde0d8c..000000000 --- a/youtube_dl/extractor/digiteka.py +++ /dev/null @@ -1,112 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class DigitekaIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/ - (?: - deliver/ - (?P - generic| - musique - ) - (?:/[^/]+)*/ - (?: - src| - article - )| - default/index/video - (?P - generic| - music - ) - /id - )/(?P[\d+a-z]+)''' - _TESTS = [{ - # news - 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', - 'md5': '276a0e49de58c7e85d32b057837952a2', - 'info_dict': { - 'id': 's8uk0r', - 'ext': 'mp4', - 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 74, - 'upload_date': '20150317', - 'timestamp': 1426604939, - 'uploader_id': '3fszv', - }, - }, { - # music - 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8', - 'md5': '2ea3513813cf230605c7e2ffe7eca61c', - 'info_dict': { - 'id': 'xvpfp8', - 'ext': 'mp4', - 'title': 'Two - C\'est La Vie (clip)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 233, - 'upload_date': '20150224', - 'timestamp': 1424760500, - 'uploader_id': '3rfzk', - }, - }, { - 'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes', - 'only_matching': True, - }] - - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r'<(?:iframe|script)[^>]+src=["\'](?P(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - video_type = mobj.group('embed_type') or mobj.group('site_type') - if video_type == 'music': - video_type = 'musique' - - deliver_info = self._download_json( - 'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type), - video_id) - - yt_id = deliver_info.get('yt_id') - if yt_id: - return self.url_result(yt_id, 'Youtube') - - jwconf = deliver_info['jwconf'] - - formats = [] - for source in jwconf['playlist'][0]['sources']: - formats.append({ - 'url': source['file'], - 'format_id': source.get('label'), - }) - - self._sort_formats(formats) - - title = deliver_info['title'] - thumbnail = jwconf.get('image') - duration = int_or_none(deliver_info.get('duration')) - timestamp = int_or_none(deliver_info.get('release_time')) - uploader_id = deliver_info.get('owner_id') - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'uploader_id': uploader_id, - 'formats': formats, - } diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py deleted file mode 100644 index e0139cc86..000000000 --- a/youtube_dl/extractor/discovery.py +++ /dev/null @@ -1,118 +0,0 @@ -from __future__ import unicode_literals - -import random -import re -import string - -from .discoverygo import DiscoveryGoBaseIE -from ..compat import compat_urllib_parse_unquote -from ..utils import ExtractorError -from ..compat import compat_HTTPError - - -class DiscoveryIE(DiscoveryGoBaseIE): - _VALID_URL = r'''(?x)https?:// - (?P - go\.discovery| - www\. - (?: - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc - )| - watch\. - (?: - hgtv| - foodnetwork| - travelchannel| - diynetwork| - cookingchanneltv| - motortrend - ) - )\.com/tv-shows/(?P[^/]+)/(?:video|full-episode)s/(?P[^./?#]+)''' - _TESTS = [{ - 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', - 'info_dict': { - 'id': '5a2f35ce6b66d17a5026e29e', - 'ext': 'mp4', - 'title': 'Riding with Matthew Perry', - 'description': 'md5:a34333153e79bc4526019a5129e7f878', - 'duration': 84, - }, - 'params': { - 'skip_download': True, # requires ffmpeg - } - }, { - 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', - 'only_matching': True, - }, { - 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', - 'only_matching': True, - }, { - # using `show_slug` is important to get the correct video data - 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', - 'only_matching': True, - }] - _GEO_COUNTRIES = ['US'] - _GEO_BYPASS = False - _API_BASE_URL = 'https://api.discovery.com/v1/' - - def _real_extract(self, url): - site, show_slug, display_id = re.match(self._VALID_URL, url).groups() - - access_token = None - cookies = self._get_cookies(url) - - # prefer Affiliate Auth Token over Anonymous Auth Token - auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn') - if auth_storage_cookie and auth_storage_cookie.value: - auth_storage = self._parse_json(compat_urllib_parse_unquote( - compat_urllib_parse_unquote(auth_storage_cookie.value)), - display_id, fatal=False) or {} - access_token = auth_storage.get('a') or auth_storage.get('access_token') - - if not access_token: - access_token = self._download_json( - 'https://%s.com/anonymous' % site, display_id, - 'Downloading token JSON metadata', query={ - 'authRel': 'authorization', - 'client_id': '3020a40c2356a645b4b4', - 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), - 'redirectUri': 'https://www.discovery.com/', - })['access_token'] - - headers = self.geo_verification_headers() - headers['Authorization'] = 'Bearer ' + access_token - - try: - video = self._download_json( - self._API_BASE_URL + 'content/videos', - display_id, 'Downloading content JSON metadata', - headers=headers, query={ - 'embed': 'show.name', - 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags', - 'slug': display_id, - 'show_slug': show_slug, - })[0] - video_id = video['id'] - stream = self._download_json( - self._API_BASE_URL + 'streaming/video/' + video_id, - display_id, 'Downloading streaming JSON metadata', headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): - e_description = self._parse_json( - e.cause.read().decode(), display_id)['description'] - if 'resource not available for country' in e_description: - self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - if 'Authorized Networks' in e_description: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.', expected=True) - raise ExtractorError(e_description) - raise - - return self._extract_video_info(video, stream, display_id) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py deleted file mode 100644 index 9e7b14a7d..000000000 --- a/youtube_dl/extractor/discoverygo.py +++ /dev/null @@ -1,175 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - extract_attributes, - ExtractorError, - int_or_none, - parse_age_limit, - remove_end, - unescapeHTML, - url_or_none, -) - - -class DiscoveryGoBaseIE(InfoExtractor): - _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: - discovery| - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc| - velocitychannel - )go\.com/%s(?P[^/?#&]+)''' - - def _extract_video_info(self, video, stream, display_id): - title = video['name'] - - if not stream: - if video.get('authenticated') is True: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.', expected=True) - else: - raise ExtractorError('Unable to find stream') - STREAM_URL_SUFFIX = 'streamUrl' - formats = [] - for stream_kind in ('', 'hds'): - suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX - stream_url = stream.get('%s%s' % (stream_kind, suffix)) - if not stream_url: - continue - if stream_kind == '': - formats.extend(self._extract_m3u8_formats( - stream_url, display_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif stream_kind == 'hds': - formats.extend(self._extract_f4m_formats( - stream_url, display_id, f4m_id=stream_kind, fatal=False)) - self._sort_formats(formats) - - video_id = video.get('id') or display_id - description = video.get('description', {}).get('detailed') - duration = int_or_none(video.get('duration')) - - series = video.get('show', {}).get('name') - season_number = int_or_none(video.get('season', {}).get('number')) - episode_number = int_or_none(video.get('episodeNumber')) - - tags = video.get('tags') - age_limit = parse_age_limit(video.get('parental', {}).get('rating')) - - subtitles = {} - captions = stream.get('captions') - if isinstance(captions, list): - for caption in captions: - subtitle_url = url_or_none(caption.get('fileUrl')) - if not subtitle_url or not subtitle_url.startswith('http'): - continue - lang = caption.get('fileLang', 'en') - ext = determine_ext(subtitle_url) - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - 'ext': 'ttml' if ext == 'xml' else ext, - }) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'duration': duration, - 'series': series, - 'season_number': season_number, - 'episode_number': episode_number, - 'tags': tags, - 'age_limit': age_limit, - 'formats': formats, - 'subtitles': subtitles, - } - - -class DiscoveryGoIE(DiscoveryGoBaseIE): - _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' - _GEO_COUNTRIES = ['US'] - _TEST = { - 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', - 'info_dict': { - 'id': '58c167d86b66d12f2addeb01', - 'ext': 'mp4', - 'title': 'Reaper Madness', - 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', - 'duration': 2519, - 'series': 'Bering Sea Gold', - 'season_number': 8, - 'episode_number': 6, - 'age_limit': 14, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - container = extract_attributes( - self._search_regex( - r'(]+class=["\']video-player-container[^>]+>)', - webpage, 'video container')) - - video = self._parse_json( - container.get('data-video') or container.get('data-json'), - display_id) - - stream = video.get('stream') - - return self._extract_video_info(video, stream, display_id) - - -class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): - _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' - _TEST = { - 'url': 'https://www.discoverygo.com/bering-sea-gold/', - 'info_dict': { - 'id': 'bering-sea-gold', - 'title': 'Bering Sea Gold', - 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', - }, - 'playlist_mincount': 6, - } - - @classmethod - def suitable(cls, url): - return False if DiscoveryGoIE.suitable(url) else super( - DiscoveryGoPlaylistIE, cls).suitable(url) - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - entries = [] - for mobj in re.finditer(r'data-json=(["\'])(?P{.+?})\1', webpage): - data = self._parse_json( - mobj.group('json'), display_id, - transform_source=unescapeHTML, fatal=False) - if not isinstance(data, dict) or data.get('type') != 'episode': - continue - episode_url = data.get('socialUrl') - if not episode_url: - continue - entries.append(self.url_result( - episode_url, ie=DiscoveryGoIE.ie_key(), - video_id=data.get('id'))) - - return self.playlist_result( - entries, display_id, - remove_end(self._og_search_title( - webpage, fatal=False), ' | Discovery GO'), - self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py deleted file mode 100644 index c512b95d0..000000000 --- a/youtube_dl/extractor/discoverynetworks.py +++ /dev/null @@ -1,43 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .dplay import DPlayIE - - -class DiscoveryNetworksDeIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P[^/]+)/(?:video/)?(?P[^/]+)' - - _TESTS = [{ - 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', - 'info_dict': { - 'id': '78867', - 'ext': 'mp4', - 'title': 'Die Welt da draußen', - 'description': 'md5:61033c12b73286e409d99a41742ef608', - 'timestamp': 1554069600, - 'upload_date': '20190331', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, { - 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', - 'only_matching': True, - }, { - 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', - 'only_matching': True, - }, { - 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', - 'only_matching': True, - }] - - def _real_extract(self, url): - domain, programme, alternate_id = re.match(self._VALID_URL, url).groups() - country = 'GB' if domain == 'dplay.co.uk' else 'DE' - realm = 'questuk' if country == 'GB' else domain.replace('.', '') - return self._get_disco_api_info( - url, '%s/%s' % (programme, alternate_id), - 'sonic-eu1-prod.disco-api.com', realm, country) diff --git a/youtube_dl/extractor/discoveryvr.py b/youtube_dl/extractor/discoveryvr.py deleted file mode 100644 index cb63c2649..000000000 --- a/youtube_dl/extractor/discoveryvr.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import parse_duration - - -class DiscoveryVRIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P[^/?#]+)' - _TEST = { - 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction', - 'md5': '32b1929798c464a54356378b7912eca4', - 'info_dict': { - 'id': 'discovery-vr-an-introduction', - 'ext': 'mp4', - 'title': 'Discovery VR - An Introduction', - 'description': 'md5:80d418a10efb8899d9403e61d8790f06', - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - bootstrap_data = self._search_regex( - r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";', - webpage, 'bootstrap data') - bootstrap_data = self._parse_json( - bootstrap_data.encode('utf-8').decode('unicode_escape'), - display_id) - videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos'] - video_data = next(video for video in videos if video.get('slug') == display_id) - - series = video_data.get('showTitle') - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - formats = [] - for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')): - f_url = video_data.get(f) - if not f_url: - continue - formats.append({ - 'format_id': format_id, - 'url': f_url, - }) - - return { - 'id': display_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('runTime')), - 'formats': formats, - 'episode': episode, - 'series': series, - } diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py deleted file mode 100644 index 0eee82fd6..000000000 --- a/youtube_dl/extractor/disney.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, - compat_str, - determine_ext, - ExtractorError, - update_url_query, -) - - -class DisneyIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?P(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P[a-z0-9]{24})|(?:[^/]+/)?(?P[^/?#]+))''' - _TESTS = [{ - # Disney.EmbedVideo - 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', - 'info_dict': { - 'id': '545ed1857afee5a0ec239977', - 'ext': 'mp4', - 'title': 'Moana - Trailer', - 'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7', - 'upload_date': '20170112', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - # Grill.burger - 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette', - 'info_dict': { - 'id': '5454e9f4e9804a552e3524c8', - 'ext': 'mp4', - 'title': '"Intro" Featurette: Rogue One: A Star Wars Story', - 'upload_date': '20170104', - 'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', - 'only_matching': True, - }, { - 'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114', - 'only_matching': True, - }, { - 'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2', - 'only_matching': True, - }, { - 'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d', - 'only_matching': True, - }, { - 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', - 'only_matching': True, - }, { - 'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677', - 'only_matching': True, - }, { - 'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1', - 'only_matching': True, - }, { - 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo', - 'only_matching': True, - }, { - 'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268', - 'only_matching': True, - }, { - 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue', - 'only_matching': True, - }] - - def _real_extract(self, url): - domain, video_id, display_id = re.match(self._VALID_URL, url).groups() - if not video_id: - webpage = self._download_webpage(url, display_id) - grill = re.sub(r'"\s*\+\s*"', '', self._search_regex( - r'Grill\.burger\s*=\s*({.+})\s*:', - webpage, 'grill data')) - page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video') - video_data = page_data['data'][0] - else: - webpage = self._download_webpage( - 'http://%s/embed/%s' % (domain, video_id), video_id) - page_data = self._parse_json(self._search_regex( - r'Disney\.EmbedVideo\s*=\s*({.+});', - webpage, 'embed data'), video_id) - video_data = page_data['video'] - - for external in video_data.get('externals', []): - if external.get('source') == 'vevo': - return self.url_result('vevo:' + external['data_id'], 'Vevo') - - video_id = video_data['id'] - title = video_data['title'] - - formats = [] - for flavor in video_data.get('flavors', []): - flavor_format = flavor.get('format') - flavor_url = flavor.get('url') - if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access': - continue - tbr = int_or_none(flavor.get('bitrate')) - if tbr == 99999: - # wrong ks(Kaltura Signature) causes 404 Error - flavor_url = update_url_query(flavor_url, {'ks': ''}) - m3u8_formats = self._extract_m3u8_formats( - flavor_url, video_id, 'mp4', - m3u8_id=flavor_format, fatal=False) - for f in m3u8_formats: - # Apple FairPlay - if '/fpshls/' in f['url']: - continue - formats.append(f) - continue - format_id = [] - if flavor_format: - format_id.append(flavor_format) - if tbr: - format_id.append(compat_str(tbr)) - ext = determine_ext(flavor_url) - if flavor_format == 'applehttp' or ext == 'm3u8': - ext = 'mp4' - width = int_or_none(flavor.get('width')) - height = int_or_none(flavor.get('height')) - formats.append({ - 'format_id': '-'.join(format_id), - 'url': flavor_url, - 'width': width, - 'height': height, - 'tbr': tbr, - 'ext': ext, - 'vcodec': 'none' if (width == 0 and height == 0) else None, - }) - if not formats and video_data.get('expired'): - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), - expected=True) - self._sort_formats(formats) - - subtitles = {} - for caption in video_data.get('captions', []): - caption_url = caption.get('url') - caption_format = caption.get('format') - if not caption_url or caption_format.startswith('unknown'): - continue - subtitles.setdefault(caption.get('language', 'en'), []).append({ - 'url': caption_url, - 'ext': { - 'webvtt': 'vtt', - }.get(caption_format, caption_format), - }) - - return { - 'id': video_id, - 'title': title, - 'description': video_data.get('description') or video_data.get('short_desc'), - 'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'), - 'duration': int_or_none(video_data.get('duration_sec')), - 'upload_date': unified_strdate(video_data.get('publish_date')), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py deleted file mode 100644 index 276fd4b09..000000000 --- a/youtube_dl/extractor/dispeak.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - remove_end, - xpath_element, - xpath_text, -) - - -class DigitallySpeakingIE(InfoExtractor): - _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P[^.]+)\.xml' - - _TESTS = [{ - # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface - 'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml', - 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', - 'info_dict': { - 'id': '840376_BQRC', - 'ext': 'mp4', - 'title': 'Tenacious Design and The Interface of \'Destiny\'', - }, - }, { - # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC - 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml', - 'only_matching': True, - }, { - # From http://www.gdcvault.com/play/1013700/Advanced-Material - 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', - 'only_matching': True, - }, { - # From https://gdcvault.com/play/1016624, empty speakerVideo - 'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml', - 'info_dict': { - 'id': '201210-822101_1349794556671DDDD', - 'ext': 'flv', - 'title': 'Pre-launch - Preparing to Take the Plunge', - }, - }, { - # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo - 'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml', - 'only_matching': True, - }] - - def _parse_mp4(self, metadata): - video_formats = [] - video_root = None - - mp4_video = xpath_text(metadata, './mp4video', default=None) - if mp4_video is not None: - mobj = re.match(r'(?Phttps?://.*?/).*', mp4_video) - video_root = mobj.group('root') - if video_root is None: - http_host = xpath_text(metadata, 'httpHost', default=None) - if http_host: - video_root = 'http://%s/' % http_host - if video_root is None: - # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js - # Works for GPUTechConf, too - video_root = 'http://s3-2u.digitallyspeaking.com/' - - formats = metadata.findall('./MBRVideos/MBRVideo') - if not formats: - return None - for a_format in formats: - stream_name = xpath_text(a_format, 'streamName', fatal=True) - video_path = re.match(r'mp4\:(?P.*)', stream_name).group('path') - url = video_root + video_path - bitrate = xpath_text(a_format, 'bitrate') - tbr = int_or_none(bitrate) - vbr = int_or_none(self._search_regex( - r'-(\d+)\.mp4', video_path, 'vbr', default=None)) - abr = tbr - vbr if tbr and vbr else None - video_formats.append({ - 'format_id': bitrate, - 'url': url, - 'tbr': tbr, - 'vbr': vbr, - 'abr': abr, - }) - return video_formats - - def _parse_flv(self, metadata): - formats = [] - akamai_url = xpath_text(metadata, './akamaiHost', fatal=True) - audios = metadata.findall('./audios/audio') - for audio in audios: - formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, - 'play_path': remove_end(audio.get('url'), '.flv'), - 'ext': 'flv', - 'vcodec': 'none', - 'format_id': audio.get('code'), - }) - for video_key, format_id, preference in ( - ('slide', 'slides', -2), ('speaker', 'speaker', -1)): - video_path = xpath_text(metadata, './%sVideo' % video_key) - if not video_path: - continue - formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, - 'play_path': remove_end(video_path, '.flv'), - 'ext': 'flv', - 'format_note': '%s video' % video_key, - 'quality': preference, - 'preference': preference, - 'format_id': format_id, - }) - return formats - - def _real_extract(self, url): - video_id = self._match_id(url) - - xml_description = self._download_xml(url, video_id) - metadata = xpath_element(xml_description, 'metadata') - - video_formats = self._parse_mp4(metadata) - if video_formats is None: - video_formats = self._parse_flv(metadata) - - return { - 'id': video_id, - 'formats': video_formats, - 'title': xpath_text(metadata, 'title', fatal=True), - 'duration': parse_duration(xpath_text(metadata, 'endTime')), - 'creator': xpath_text(metadata, 'speaker'), - } diff --git a/youtube_dl/extractor/dlf.py b/youtube_dl/extractor/dlf.py deleted file mode 100644 index cc3de4582..000000000 --- a/youtube_dl/extractor/dlf.py +++ /dev/null @@ -1,204 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - determine_ext, - extract_attributes, - int_or_none, - merge_dicts, - traverse_obj, - url_or_none, - variadic, -) - - -class DLFBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/' - _BUTTON_REGEX = r'(]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)' - - def _parse_button_attrs(self, button, audio_id=None): - attrs = extract_attributes(button) - audio_id = audio_id or attrs['data-audio-diraid'] - - url = traverse_obj( - attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference', - 'data-audio-src', expected_type=url_or_none) - ext = determine_ext(url) - formats = (self._extract_m3u8_formats(url, audio_id, fatal=False) - if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}]) - self._sort_formats(formats) - - def traverse_attrs(path): - path = list(variadic(path)) - t = path.pop() if callable(path[-1]) else None - return traverse_obj(attrs, path, expected_type=t, get_all=False) - - def txt_or_none(v, default=None): - return default if v is None else (compat_str(v).strip() or default) - - return merge_dicts(*reversed([{ - 'id': audio_id, - # 'extractor_key': DLFIE.ie_key(), - # 'extractor': DLFIE.IE_NAME, - 'formats': formats, - }, dict((k, traverse_attrs(v)) for k, v in { - 'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), txt_or_none), - 'duration': (('data-audioduration', 'data-audio-duration'), int_or_none), - 'thumbnail': ('data-audioimage', url_or_none), - 'uploader': 'data-audio-producer', - 'series': 'data-audio-series', - 'channel': 'data-audio-origin-site-name', - 'webpage_url': ('data-audio-download-tracking-path', url_or_none), - }.items())])) - - -class DLFIE(DLFBaseIE): - IE_NAME = 'dlf' - _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P[\da-f]{8})-100\.html' - _TESTS = [ - # Audio as an HLS stream - { - 'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html', - 'info_dict': { - 'id': '03a3eb19', - 'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien', - 'ext': 'm4a', - 'duration': 3298, - 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673', - 'uploader': 'Deutschlandfunk', - 'series': 'On Stage', - 'channel': 'deutschlandfunk' - }, - 'params': { - 'skip_download': 'm3u8' - }, - 'skip': 'This webpage no longer exists' - }, { - 'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html', - 'info_dict': { - 'id': 'd9cc1856', - 'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner', - 'ext': 'mp3', - 'duration': 291, - 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673', - 'uploader': 'Deutschlandfunk', - 'series': 'Kommentare und Themen der Woche', - 'channel': 'deutschlandfunk' - } - }, - ] - - def _real_extract(self, url): - audio_id = self._match_id(url) - webpage = self._download_webpage(url, audio_id) - - return self._parse_button_attrs( - self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id) - - -class DLFCorpusIE(DLFBaseIE): - IE_NAME = 'dlf:corpus' - IE_DESC = 'DLF Multi-feed Archives' - _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html' - _TESTS = [ - # Recorded news broadcast with referrals to related broadcasts - { - 'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html', - 'info_dict': { - 'id': 'fechten-russland-belarus-ukraine-protest-100', - 'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet', - 'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad' - }, - 'playlist_mincount': 5, - 'playlist': [{ - 'info_dict': { - 'id': '1fc5d64a', - 'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet', - 'ext': 'mp3', - 'duration': 252, - 'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313', - 'uploader': 'Deutschlandfunk', - 'series': 'Sport', - 'channel': 'deutschlandfunk' - } - }, { - 'info_dict': { - 'id': '2ada145f', - 'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten', - 'ext': 'mp3', - 'duration': 336, - 'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005', - 'uploader': 'Deutschlandfunk', - 'series': 'Deutschlandfunk Nova', - 'channel': 'deutschlandfunk-nova' - } - }, { - 'info_dict': { - 'id': '5e55e8c9', - 'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis', - 'ext': 'mp3', - 'duration': 187, - 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412', - 'uploader': 'Deutschlandfunk', - 'series': 'Sport am Samstag', - 'channel': 'deutschlandfunk' - } - }, { - 'info_dict': { - 'id': '47e1a096', - 'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"', - 'ext': 'mp3', - 'duration': 602, - 'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770', - 'uploader': 'Deutschlandfunk', - 'series': 'Sport am Samstag', - 'channel': 'deutschlandfunk' - } - }, { - 'info_dict': { - 'id': '5e55e8c9', - 'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis', - 'ext': 'mp3', - 'duration': 187, - 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412', - 'uploader': 'Deutschlandfunk', - 'series': 'Sport am Samstag', - 'channel': 'deutschlandfunk' - } - }] - }, - # Podcast feed with tag buttons, playlist count fluctuates - { - 'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html', - 'info_dict': { - 'id': 'kommentare-und-themen-der-woche-100', - 'title': 'Meinung - Kommentare und Themen der Woche', - 'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5', - }, - 'playlist_mincount': 10, - }, - # Podcast feed with no description - { - 'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html', - 'info_dict': { - 'id': 'podcast-tolle-idee-100', - 'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?', - }, - 'playlist_mincount': 11, - }, - ] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - return self.playlist_result( - map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)), - playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), - self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage, default=None)) diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py deleted file mode 100644 index d95c67a5b..000000000 --- a/youtube_dl/extractor/dlive.py +++ /dev/null @@ -1,97 +0,0 @@ -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class DLiveVODIE(InfoExtractor): - IE_NAME = 'dlive:vod' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', - 'info_dict': { - 'id': '3mTzOl4WR', - 'ext': 'mp4', - 'title': 'Minecraft with james charles epic', - 'upload_date': '20190701', - 'timestamp': 1562011015, - 'uploader_id': 'pdp', - } - }, { - 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg', - 'only_matching': True, - }] - - def _real_extract(self, url): - uploader_id, vod_id = re.match(self._VALID_URL, url).groups() - broadcast = self._download_json( - 'https://graphigo.prd.dlive.tv/', vod_id, - data=json.dumps({'query': '''query { - pastBroadcast(permlink:"%s+%s") { - content - createdAt - length - playbackUrl - title - thumbnailUrl - viewCount - } -}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast'] - title = broadcast['title'] - formats = self._extract_m3u8_formats( - broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) - return { - 'id': vod_id, - 'title': title, - 'uploader_id': uploader_id, - 'formats': formats, - 'description': broadcast.get('content'), - 'thumbnail': broadcast.get('thumbnailUrl'), - 'timestamp': int_or_none(broadcast.get('createdAt'), 1000), - 'view_count': int_or_none(broadcast.get('viewCount')), - } - - -class DLiveStreamIE(InfoExtractor): - IE_NAME = 'dlive:stream' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P[\w.-]+)' - - def _real_extract(self, url): - display_name = self._match_id(url) - user = self._download_json( - 'https://graphigo.prd.dlive.tv/', display_name, - data=json.dumps({'query': '''query { - userByDisplayName(displayname:"%s") { - livestream { - content - createdAt - title - thumbnailUrl - watchingCount - } - username - } -}''' % display_name}).encode())['data']['userByDisplayName'] - livestream = user['livestream'] - title = livestream['title'] - username = user['username'] - formats = self._extract_m3u8_formats( - 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username, - display_name, 'mp4') - self._sort_formats(formats) - return { - 'id': display_name, - 'title': self._live_title(title), - 'uploader': display_name, - 'uploader_id': username, - 'formats': formats, - 'description': livestream.get('content'), - 'thumbnail': livestream.get('thumbnailUrl'), - 'is_live': True, - 'timestamp': int_or_none(livestream.get('createdAt'), 1000), - 'view_count': int_or_none(livestream.get('watchingCount')), - } diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py deleted file mode 100644 index 148605c0b..000000000 --- a/youtube_dl/extractor/dotsub.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, -) - - -class DotsubIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P[^/]+)' - _TESTS = [{ - 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09', - 'md5': '21c7ff600f545358134fea762a6d42b6', - 'info_dict': { - 'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09', - 'ext': 'flv', - 'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever', - 'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6', - 'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p', - 'duration': 198, - 'uploader': 'liuxt', - 'timestamp': 1385778501.104, - 'upload_date': '20131130', - 'view_count': int, - } - }, { - 'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6', - 'md5': '2bb4a83896434d5c26be868c609429a3', - 'info_dict': { - 'id': '168006778', - 'ext': 'mp4', - 'title': 'Apartments and flats in Raipur the white symphony', - 'description': 'md5:784d0639e6b7d1bc29530878508e38fe', - 'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p', - 'duration': 290, - 'timestamp': 1476767794.2809999, - 'upload_date': '20161018', - 'uploader': 'parthivi001', - 'uploader_id': 'user52596202', - 'view_count': int, - }, - 'add_ie': ['Vimeo'], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - info = self._download_json( - 'https://dotsub.com/api/media/%s/metadata' % video_id, video_id) - video_url = info.get('mediaURI') - - if not video_url: - webpage = self._download_webpage(url, video_id) - video_url = self._search_regex( - [r']+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'], - webpage, 'video url', default=None) - info_dict = { - 'id': video_id, - 'url': video_url, - 'ext': 'flv', - } - - if not video_url: - setup_data = self._parse_json(self._html_search_regex( - r'(?s)data-setup=([\'"])(?P(?!\1).+?)\1', - webpage, 'setup data', group='content'), video_id) - info_dict = { - '_type': 'url_transparent', - 'url': setup_data['src'], - } - - info_dict.update({ - 'title': info['title'], - 'description': info.get('description'), - 'thumbnail': info.get('screenshotURI'), - 'duration': int_or_none(info.get('duration'), 1000), - 'uploader': info.get('user'), - 'timestamp': float_or_none(info.get('dateCreated'), 1000), - 'view_count': int_or_none(info.get('numberOfViews')), - }) - - return info_dict diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py deleted file mode 100644 index 9757f4422..000000000 --- a/youtube_dl/extractor/douyutv.py +++ /dev/null @@ -1,201 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import time -import hashlib -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unescapeHTML, - unified_strdate, - urljoin, -) - - -class DouyuTVIE(InfoExtractor): - IE_DESC = '斗鱼' - _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P[A-Za-z0-9]+)' - _TESTS = [{ - 'url': 'http://www.douyutv.com/iseven', - 'info_dict': { - 'id': '17732', - 'display_id': 'iseven', - 'ext': 'flv', - 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': r're:.*m7show@163\.com.*', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': '7师傅', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.douyutv.com/85982', - 'info_dict': { - 'id': '85982', - 'display_id': '85982', - 'ext': 'flv', - 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'douyu小漠', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Room not found', - }, { - 'url': 'http://www.douyutv.com/17732', - 'info_dict': { - 'id': '17732', - 'display_id': '17732', - 'ext': 'flv', - 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': r're:.*m7show@163\.com.*', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': '7师傅', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.douyu.com/xiaocang', - 'only_matching': True, - }, { - # \"room_id\" - 'url': 'http://www.douyu.com/t/lpl', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - if video_id.isdigit(): - room_id = video_id - else: - page = self._download_webpage(url, video_id) - room_id = self._html_search_regex( - r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') - - # Grab metadata from mobile API - room = self._download_json( - 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, - note='Downloading room info')['data'] - - # 1 = live, 2 = offline - if room.get('show_status') == '2': - raise ExtractorError('Live stream is offline', expected=True) - - # Grab the URL from PC client API - # The m3u8 url from mobile API requires re-authentication every 5 minutes - tt = int(time.time()) - signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt) - sign = hashlib.md5(signContent.encode('ascii')).hexdigest() - video_url = self._download_json( - 'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id, - video_id, note='Downloading video URL info', - query={'rate': 0}, headers={ - 'auth': sign, - 'time': str(tt), - 'aid': 'pcclient' - })['data']['live_url'] - - title = self._live_title(unescapeHTML(room['room_name'])) - description = room.get('show_details') - thumbnail = room.get('room_src') - uploader = room.get('nickname') - - return { - 'id': room_id, - 'display_id': video_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'is_live': True, - } - - -class DouyuShowIE(InfoExtractor): - _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P[0-9a-zA-Z]+)' - - _TESTS = [{ - 'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw', - 'md5': '0c2cfd068ee2afe657801269b2d86214', - 'info_dict': { - 'id': 'rjNBdvnVXNzvE2yw', - 'ext': 'mp4', - 'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场', - 'duration': 7150.08, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': '陈一发儿', - 'uploader_id': 'XrZwYelr5wbK', - 'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK', - 'upload_date': '20170402', - }, - }, { - 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw', - 'only_matching': True, - }] - - def _real_extract(self, url): - url = url.replace('vmobile.', 'v.') - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - room_info = self._parse_json(self._search_regex( - r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id) - - video_info = None - - for trial in range(5): - # Sometimes Douyu rejects our request. Let's try it more times - try: - video_info = self._download_json( - 'https://vmobile.douyu.com/video/getInfo', video_id, - query={'vid': video_id}, - headers={ - 'Referer': url, - 'x-requested-with': 'XMLHttpRequest', - }) - break - except ExtractorError: - self._sleep(1, video_id) - - if not video_info: - raise ExtractorError('Can\'t fetch video info') - - formats = self._extract_m3u8_formats( - video_info['data']['video_url'], video_id, - entry_protocol='m3u8_native', ext='mp4') - - upload_date = unified_strdate(self._html_search_regex( - r'上传时间:([^<]+)', webpage, - 'upload date', fatal=False)) - - uploader = uploader_id = uploader_url = None - mobj = re.search( - r'(?m)]+href="/author/([0-9a-zA-Z]+)".+?]+title="([^"]+)"', - webpage) - if mobj: - uploader_id, uploader = mobj.groups() - uploader_url = urljoin(url, '/author/' + uploader_id) - - return { - 'id': video_id, - 'title': room_info['name'], - 'formats': formats, - 'duration': room_info.get('duration'), - 'thumbnail': room_info.get('pic'), - 'upload_date': upload_date, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'uploader_url': uploader_url, - } diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py deleted file mode 100644 index bbb199094..000000000 --- a/youtube_dl/extractor/dplay.py +++ /dev/null @@ -1,369 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - determine_ext, - ExtractorError, - float_or_none, - int_or_none, - strip_or_none, - unified_timestamp, -) - - -class DPlayIE(InfoExtractor): - _PATH_REGEX = r'/(?P[^/]+/[^/?#]+)' - _VALID_URL = r'''(?x)https?:// - (?P - (?:www\.)?(?Pd - (?: - play\.(?Pdk|fi|jp|se|no)| - iscoveryplus\.(?Pdk|es|fi|it|se|no) - ) - )| - (?Pes|it)\.dplay\.com - )/[^/]+''' + _PATH_REGEX - - _TESTS = [{ - # non geo restricted, via secure api, unsigned download hls URL - 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', - 'info_dict': { - 'id': '13628', - 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', - 'ext': 'mp4', - 'title': 'Svensken lär sig njuta av livet', - 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', - 'duration': 2649.856, - 'timestamp': 1365453720, - 'upload_date': '20130408', - 'creator': 'Kanal 5', - 'series': 'Nugammalt - 77 händelser som format Sverige', - 'season_number': 1, - 'episode_number': 1, - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, { - # geo restricted, via secure api, unsigned download hls URL - 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', - 'info_dict': { - 'id': '104465', - 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', - 'ext': 'mp4', - 'title': 'Ted Bundy: Mind Of A Monster', - 'description': 'md5:8b780f6f18de4dae631668b8a9637995', - 'duration': 5290.027, - 'timestamp': 1570694400, - 'upload_date': '20191010', - 'creator': 'ID - Investigation Discovery', - 'series': 'Ted Bundy: Mind Of A Monster', - 'season_number': 1, - 'episode_number': 1, - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, { - # disco-api - 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', - 'info_dict': { - 'id': '40206', - 'display_id': 'i-kongens-klr/sesong-1-episode-7', - 'ext': 'mp4', - 'title': 'Episode 7', - 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', - 'duration': 2611.16, - 'timestamp': 1516726800, - 'upload_date': '20180123', - 'series': 'I kongens klær', - 'season_number': 1, - 'episode_number': 7, - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - 'skip': 'Available for Premium users', - }, { - 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', - 'md5': '2b808ffb00fc47b884a172ca5d13053c', - 'info_dict': { - 'id': '6918', - 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij', - 'ext': 'mp4', - 'title': 'Luigi Di Maio: la psicosi di Stanislawskij', - 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', - 'thumbnail': r're:^https?://.*\.jpe?g', - 'upload_date': '20160524', - 'timestamp': 1464076800, - 'series': 'Biografie imbarazzanti', - 'season_number': 1, - 'episode': 'Episode 1', - 'episode_number': 1, - }, - }, { - 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/', - 'info_dict': { - 'id': '21652', - 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1', - 'ext': 'mp4', - 'title': 'Episodio 1', - 'description': 'md5:b9dcff2071086e003737485210675f69', - 'thumbnail': r're:^https?://.*\.png', - 'upload_date': '20180709', - 'timestamp': 1531173540, - 'series': 'La fiebre del oro', - 'season_number': 8, - 'episode': 'Episode 1', - 'episode_number': 1, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16', - 'only_matching': True, - }, { - 'url': 'https://www.dplay.jp/video/gold-rush/24086', - 'only_matching': True, - }, { - 'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', - 'only_matching': True, - }, { - 'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', - 'only_matching': True, - }, { - 'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7', - 'only_matching': True, - }, { - 'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij', - 'only_matching': True, - }, { - 'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1', - 'only_matching': True, - }, { - 'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16', - 'only_matching': True, - }] - - def _process_errors(self, e, geo_countries): - info = self._parse_json(e.cause.read().decode('utf-8'), None) - error = info['errors'][0] - error_code = error.get('code') - if error_code == 'access.denied.geoblocked': - self.raise_geo_restricted(countries=geo_countries) - elif error_code in ('access.denied.missingpackage', 'invalid.token'): - raise ExtractorError( - 'This video is only available for registered users. You may want to use --cookies.', expected=True) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['Authorization'] = 'Bearer ' + self._download_json( - disco_base + 'token', display_id, 'Downloading token', - query={ - 'realm': realm, - })['data']['attributes']['token'] - - def _download_video_playback_info(self, disco_base, video_id, headers): - streaming = self._download_json( - disco_base + 'playback/videoPlaybackInfo/' + video_id, - video_id, headers=headers)['data']['attributes']['streaming'] - streaming_list = [] - for format_id, format_dict in streaming.items(): - streaming_list.append({ - 'type': format_id, - 'url': format_dict.get('url'), - }) - return streaming_list - - def _get_disco_api_info(self, url, display_id, disco_host, realm, country): - geo_countries = [country.upper()] - self._initialize_geo_bypass({ - 'countries': geo_countries, - }) - disco_base = 'https://%s/' % disco_host - headers = { - 'Referer': url, - } - self._update_disco_api_headers(headers, disco_base, display_id, realm) - try: - video = self._download_json( - disco_base + 'content/videos/' + display_id, display_id, - headers=headers, query={ - 'fields[channel]': 'name', - 'fields[image]': 'height,src,width', - 'fields[show]': 'name', - 'fields[tag]': 'name', - 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', - 'include': 'images,primaryChannel,show,tags' - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - self._process_errors(e, geo_countries) - raise - video_id = video['data']['id'] - info = video['data']['attributes'] - title = info['name'].strip() - formats = [] - try: - streaming = self._download_video_playback_info( - disco_base, video_id, headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self._process_errors(e, geo_countries) - raise - for format_dict in streaming: - if not isinstance(format_dict, dict): - continue - format_url = format_dict.get('url') - if not format_url: - continue - format_id = format_dict.get('type') - ext = determine_ext(format_url) - if format_id == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, display_id, mpd_id='dash', fatal=False)) - elif format_id == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - creator = series = None - tags = [] - thumbnails = [] - included = video.get('included') or [] - if isinstance(included, list): - for e in included: - attributes = e.get('attributes') - if not attributes: - continue - e_type = e.get('type') - if e_type == 'channel': - creator = attributes.get('name') - elif e_type == 'image': - src = attributes.get('src') - if src: - thumbnails.append({ - 'url': src, - 'width': int_or_none(attributes.get('width')), - 'height': int_or_none(attributes.get('height')), - }) - if e_type == 'show': - series = attributes.get('name') - elif e_type == 'tag': - name = attributes.get('name') - if name: - tags.append(name) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': strip_or_none(info.get('description')), - 'duration': float_or_none(info.get('videoDuration'), 1000), - 'timestamp': unified_timestamp(info.get('publishStart')), - 'series': series, - 'season_number': int_or_none(info.get('seasonNumber')), - 'episode_number': int_or_none(info.get('episodeNumber')), - 'creator': creator, - 'tags': tags, - 'thumbnails': thumbnails, - 'formats': formats, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') - domain = mobj.group('domain').lstrip('www.') - country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') - host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' - return self._get_disco_api_info( - url, display_id, host, 'dplay' + country, country) - - -class DiscoveryPlusIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', - 'info_dict': { - 'id': '1140794', - 'display_id': 'property-brothers-forever-home/food-and-family', - 'ext': 'mp4', - 'title': 'Food and Family', - 'description': 'The brothers help a Richmond family expand their single-level home.', - 'duration': 2583.113, - 'timestamp': 1609304400, - 'upload_date': '20201230', - 'creator': 'HGTV', - 'series': 'Property Brothers: Forever Home', - 'season_number': 1, - 'episode_number': 1, - }, - 'skip': 'Available for Premium users', - }] - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0' - - def _download_video_playback_info(self, disco_base, video_id, headers): - return self._download_json( - disco_base + 'playback/v3/videoPlaybackInfo', - video_id, headers=headers, data=json.dumps({ - 'deviceInfo': { - 'adBlocker': False, - }, - 'videoId': video_id, - 'wisteriaProperties': { - 'platform': 'desktop', - 'product': 'dplus_us', - }, - }).encode('utf-8'))['data']['attributes']['streaming'] - - def _real_extract(self, url): - display_id = self._match_id(url) - return self._get_disco_api_info( - url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us') - - -class HGTVDeIE(DPlayIE): - _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/', - 'info_dict': { - 'id': '151205', - 'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette', - 'ext': 'mp4', - 'title': 'Wer braucht schon eine Toilette', - 'description': 'md5:05b40a27e7aed2c9172de34d459134e2', - 'duration': 1177.024, - 'timestamp': 1595705400, - 'upload_date': '20200725', - 'creator': 'HGTV', - 'series': 'Tiny House - klein, aber oho', - 'season_number': 3, - 'episode_number': 3, - }, - 'params': { - 'format': 'bestvideo', - }, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - return self._get_disco_api_info( - url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de') diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dl/extractor/drbonanza.py deleted file mode 100644 index 164e97c36..000000000 --- a/youtube_dl/extractor/drbonanza.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - parse_duration, - unescapeHTML, -) - - -class DRBonanzaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P\d+)/(?P[^/?#&]+)' - _TEST = { - 'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-', - 'info_dict': { - 'id': '40312', - 'display_id': 'matador---0824-komme-fremmede-', - 'ext': 'mp4', - 'title': 'MATADOR - 08:24. "Komme fremmede".', - 'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84', - 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - 'duration': 4613, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id, display_id = mobj.group('id', 'display_id') - - webpage = self._download_webpage(url, display_id) - - info = self._parse_html5_media_entries( - url, webpage, display_id, m3u8_id='hls', - m3u8_entry_protocol='m3u8_native')[0] - self._sort_formats(info['formats']) - - asset = self._parse_json( - self._search_regex( - r'(?s)currentAsset\s*=\s*({.+?})\s*]+>\s*

%s:

\s*\s*]+>\s*

([^<]+)

' % field, - webpage, field, default=None) - - info.update({ - 'id': asset.get('AssetId') or video_id, - 'display_id': display_id, - 'title': title, - 'description': extract('Programinfo'), - 'duration': parse_duration(extract('Tid')), - 'thumbnail': asset.get('AssetImageUrl'), - }) - return info diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py deleted file mode 100644 index 5a07c18f4..000000000 --- a/youtube_dl/extractor/dreisat.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import unicode_literals - -from .zdf import ZDFIE - - -class DreiSatIE(ZDFIE): - IE_NAME = '3sat' - _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P[^/?#&]+)\.html' - _TESTS = [{ - # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html - 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html', - 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', - 'info_dict': { - 'id': '141007_ab18_10wochensommer_film', - 'ext': 'mp4', - 'title': 'Ab 18! - 10 Wochen Sommer', - 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', - 'duration': 2660, - 'timestamp': 1608604200, - 'upload_date': '20201222', - }, - }, { - 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html', - 'info_dict': { - 'id': '140913_sendung_schweizweit', - 'ext': 'mp4', - 'title': 'Waidmannsheil', - 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', - 'timestamp': 1410623100, - 'upload_date': '20140913' - }, - 'params': { - 'skip_download': True, - } - }, { - # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html - 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html', - 'only_matching': True, - }, { - # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids - 'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html', - 'only_matching': True, - }] diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py deleted file mode 100644 index 14b6c00b0..000000000 --- a/youtube_dl/extractor/dropbox.py +++ /dev/null @@ -1,40 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import os.path -import re - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote -from ..utils import url_basename - - -class DropboxIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P[a-zA-Z0-9]{15})/.*' - _TESTS = [ - { - 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', - 'info_dict': { - 'id': 'nelirfsxnmcfbfh', - 'ext': 'mp4', - 'title': 'youtube-dl test video \'ä"BaW_jenozKc' - } - }, { - 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v', - 'only_matching': True, - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - fn = compat_urllib_parse_unquote(url_basename(url)) - title = os.path.splitext(fn)[0] - video_url = re.sub(r'[?&]dl=0', '', url) - video_url += ('?' if '?' not in video_url else '&') + 'dl=1' - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - } diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py deleted file mode 100644 index 2baea585b..000000000 --- a/youtube_dl/extractor/drtuber.py +++ /dev/null @@ -1,112 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - NO_DEFAULT, - parse_duration, - str_to_int, -) - - -class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' - _TESTS = [{ - 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', - 'md5': '93e680cf2536ad0dfb7e74d94a89facd', - 'info_dict': { - 'id': '1740434', - 'display_id': 'hot-perky-blonde-naked-golf', - 'ext': 'mp4', - 'title': 'hot perky blonde naked golf', - 'like_count': int, - 'comment_count': int, - 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], - 'thumbnail': r're:https?://.*\.jpg$', - 'age_limit': 18, - } - }, { - 'url': 'http://www.drtuber.com/embed/489939', - 'only_matching': True, - }, { - 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)', - webpage) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - webpage = self._download_webpage( - 'http://www.drtuber.com/video/%s' % video_id, display_id) - - video_data = self._download_json( - 'http://www.drtuber.com/player_config_json/', video_id, query={ - 'vid': video_id, - 'embed': 0, - 'aid': 0, - 'domain_id': 0, - }) - - formats = [] - for format_id, video_url in video_data['files'].items(): - if video_url: - formats.append({ - 'format_id': format_id, - 'quality': 2 if format_id == 'hq' else 1, - 'url': video_url - }) - self._sort_formats(formats) - - duration = int_or_none(video_data.get('duration')) or parse_duration( - video_data.get('duration_format')) - - title = self._html_search_regex( - (r']+class=["\']title[^>]+>([^<]+)', - r'([^<]+)\s*@\s+DrTuber', - r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', - r'<p[^>]+class="title_substrate">([^<]+)</p>', - r'<title>([^<]+) - \d+'), - webpage, 'title') - - thumbnail = self._html_search_regex( - r'poster="([^"]+)"', - webpage, 'thumbnail', fatal=False) - - def extract_count(id_, name, default=NO_DEFAULT): - return str_to_int(self._html_search_regex( - r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_, - webpage, '%s count' % name, default=default, fatal=False)) - - like_count = extract_count('rate_likes', 'like') - dislike_count = extract_count('rate_dislikes', 'dislike', default=None) - comment_count = extract_count('comments_count', 'comment') - - cats_str = self._search_regex( - r'<div[^>]+class="categories_list">(.+?)</div>', - webpage, 'categories', fatal=False) - categories = [] if not cats_str else re.findall( - r'<a title="([^"]+)"', cats_str) - - return { - 'id': video_id, - 'display_id': display_id, - 'formats': formats, - 'title': title, - 'thumbnail': thumbnail, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'comment_count': comment_count, - 'categories': categories, - 'age_limit': self._rta_search(webpage), - 'duration': duration, - } diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py deleted file mode 100644 index c0036adb6..000000000 --- a/youtube_dl/extractor/drtv.py +++ /dev/null @@ -1,355 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import binascii -import hashlib -import re - - -from .common import InfoExtractor -from ..aes import aes_cbc_decrypt -from ..compat import compat_urllib_parse_unquote -from ..utils import ( - bytes_to_intlist, - ExtractorError, - int_or_none, - intlist_to_bytes, - float_or_none, - mimetype2ext, - str_or_none, - try_get, - unified_timestamp, - update_url_query, - url_or_none, -) - - -class DRTVIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| - (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ - ) - (?P<id>[\da-z_-]+) - ''' - _GEO_BYPASS = False - _GEO_COUNTRIES = ['DK'] - IE_NAME = 'drtv' - _TESTS = [{ - 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', - 'md5': '25e659cccc9a2ed956110a299fdf5983', - 'info_dict': { - 'id': 'klassen-darlig-taber-10', - 'ext': 'mp4', - 'title': 'Klassen - Dårlig taber (10)', - 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', - 'timestamp': 1539085800, - 'upload_date': '20181009', - 'duration': 606.84, - 'series': 'Klassen', - 'season': 'Klassen I', - 'season_number': 1, - 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b', - 'episode': 'Episode 10', - 'episode_number': 10, - 'release_year': 2016, - }, - 'expected_warnings': ['Unable to download f4m manifest'], - }, { - # embed - 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', - 'info_dict': { - 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6', - 'ext': 'mp4', - 'title': 'christiania pusher street ryddes drdkrjpo', - 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5', - 'timestamp': 1472800279, - 'upload_date': '20160902', - 'duration': 131.4, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest'], - }, { - # with SignLanguage formats - 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', - 'info_dict': { - 'id': 'historien-om-danmark-stenalder', - 'ext': 'mp4', - 'title': 'Historien om Danmark: Stenalder', - 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', - 'timestamp': 1546628400, - 'upload_date': '20190104', - 'duration': 3502.56, - 'formats': 'mincount:20', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', - 'only_matching': True, - }, { - 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', - 'info_dict': { - 'id': '00951930010', - 'ext': 'mp4', - 'title': 'Bonderøven (1:8)', - 'description': 'md5:3cf18fc0d3b205745d4505f896af8121', - 'timestamp': 1546542000, - 'upload_date': '20190103', - 'duration': 2576.6, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769', - 'only_matching': True, - }, { - 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769', - 'only_matching': True, - }, { - 'url': 'https://www.dr.dk/drtv/program/jagten_220924', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - if '>Programmet er ikke længere tilgængeligt' in webpage: - raise ExtractorError( - 'Video %s is not available' % video_id, expected=True) - - video_id = self._search_regex( - (r'data-(?:material-identifier|episode-slug)="([^"]+)"', - r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), - webpage, 'video id', default=None) - - if not video_id: - video_id = self._search_regex( - r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', - webpage, 'urn', default=None) - if video_id: - video_id = compat_urllib_parse_unquote(video_id) - - _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard' - query = {'expanded': 'true'} - - if video_id: - programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id) - else: - programcard_url = _PROGRAMCARD_BASE - page = self._parse_json( - self._search_regex( - r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage, - 'data'), '1')['cache']['page'] - page = page[list(page.keys())[0]] - item = try_get( - page, (lambda x: x['item'], lambda x: x['entries'][0]['item']), - dict) - video_id = item['customId'].split(':')[-1] - query['productionnumber'] = video_id - - data = self._download_json( - programcard_url, video_id, 'Downloading video JSON', query=query) - - title = str_or_none(data.get('Title')) or re.sub( - r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '', - self._og_search_title(webpage)) - description = self._og_search_description( - webpage, default=None) or data.get('Description') - - timestamp = unified_timestamp( - data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime')) - - thumbnail = None - duration = None - - restricted_to_denmark = False - - formats = [] - subtitles = {} - - assets = [] - primary_asset = data.get('PrimaryAsset') - if isinstance(primary_asset, dict): - assets.append(primary_asset) - secondary_assets = data.get('SecondaryAssets') - if isinstance(secondary_assets, list): - for secondary_asset in secondary_assets: - if isinstance(secondary_asset, dict): - assets.append(secondary_asset) - - def hex_to_bytes(hex): - return binascii.a2b_hex(hex.encode('ascii')) - - def decrypt_uri(e): - n = int(e[2:10], 16) - a = e[10 + n:] - data = bytes_to_intlist(hex_to_bytes(e[10:10 + n])) - key = bytes_to_intlist(hashlib.sha256( - ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()) - iv = bytes_to_intlist(hex_to_bytes(a)) - decrypted = aes_cbc_decrypt(data, key, iv) - return intlist_to_bytes( - decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0] - - for asset in assets: - kind = asset.get('Kind') - if kind == 'Image': - thumbnail = url_or_none(asset.get('Uri')) - elif kind in ('VideoResource', 'AudioResource'): - duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) - restricted_to_denmark = asset.get('RestrictedToDenmark') - asset_target = asset.get('Target') - for link in asset.get('Links', []): - uri = link.get('Uri') - if not uri: - encrypted_uri = link.get('EncryptedUri') - if not encrypted_uri: - continue - try: - uri = decrypt_uri(encrypted_uri) - except Exception: - self.report_warning( - 'Unable to decrypt EncryptedUri', video_id) - continue - uri = url_or_none(uri) - if not uri: - continue - target = link.get('Target') - format_id = target or '' - if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'): - preference = -1 - format_id += '-%s' % asset_target - elif asset_target == 'Default': - preference = 1 - else: - preference = None - if target == 'HDS': - f4m_formats = self._extract_f4m_formats( - uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', - video_id, preference, f4m_id=format_id, fatal=False) - if kind == 'AudioResource': - for f in f4m_formats: - f['vcodec'] = 'none' - formats.extend(f4m_formats) - elif target == 'HLS': - formats.extend(self._extract_m3u8_formats( - uri, video_id, 'mp4', entry_protocol='m3u8_native', - preference=preference, m3u8_id=format_id, - fatal=False)) - else: - bitrate = link.get('Bitrate') - if bitrate: - format_id += '-%s' % bitrate - formats.append({ - 'url': uri, - 'format_id': format_id, - 'tbr': int_or_none(bitrate), - 'ext': link.get('FileFormat'), - 'vcodec': 'none' if kind == 'AudioResource' else None, - 'preference': preference, - }) - subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist') - if isinstance(subtitles_list, list): - LANGS = { - 'Danish': 'da', - } - for subs in subtitles_list: - if not isinstance(subs, dict): - continue - sub_uri = url_or_none(subs.get('Uri')) - if not sub_uri: - continue - lang = subs.get('Language') or 'da' - subtitles.setdefault(LANGS.get(lang, lang), []).append({ - 'url': sub_uri, - 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' - }) - - if not formats and restricted_to_denmark: - self.raise_geo_restricted( - 'Unfortunately, DR is not allowed to show this program outside Denmark.', - countries=self._GEO_COUNTRIES) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - 'series': str_or_none(data.get('SeriesTitle')), - 'season': str_or_none(data.get('SeasonTitle')), - 'season_number': int_or_none(data.get('SeasonNumber')), - 'season_id': str_or_none(data.get('SeasonUrn')), - 'episode': str_or_none(data.get('EpisodeTitle')), - 'episode_number': int_or_none(data.get('EpisodeNumber')), - 'release_year': int_or_none(data.get('ProductionYear')), - } - - -class DRTVLiveIE(InfoExtractor): - IE_NAME = 'drtv:live' - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)' - _GEO_COUNTRIES = ['DK'] - _TEST = { - 'url': 'https://www.dr.dk/tv/live/dr1', - 'info_dict': { - 'id': 'dr1', - 'ext': 'mp4', - 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - channel_id = self._match_id(url) - channel_data = self._download_json( - 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, - channel_id) - title = self._live_title(channel_data['Title']) - - formats = [] - for streaming_server in channel_data.get('StreamingServers', []): - server = streaming_server.get('Server') - if not server: - continue - link_type = streaming_server.get('LinkType') - for quality in streaming_server.get('Qualities', []): - for stream in quality.get('Streams', []): - stream_path = stream.get('Stream') - if not stream_path: - continue - stream_url = update_url_query( - '%s/%s' % (server, stream_path), {'b': ''}) - if link_type == 'HLS': - formats.extend(self._extract_m3u8_formats( - stream_url, channel_id, 'mp4', - m3u8_id=link_type, fatal=False, live=True)) - elif link_type == 'HDS': - formats.extend(self._extract_f4m_formats(update_url_query( - '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), - channel_id, f4m_id=link_type, fatal=False)) - self._sort_formats(formats) - - return { - 'id': channel_id, - 'title': title, - 'thumbnail': channel_data.get('PrimaryImageUri'), - 'formats': formats, - 'is_live': True, - } diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py deleted file mode 100644 index 114d2dbe3..000000000 --- a/youtube_dl/extractor/dtube.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re -from socket import timeout - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class DTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})' - _TEST = { - 'url': 'https://d.tube/#!/v/broncnutz/x380jtr1', - 'md5': '9f29088fa08d699a7565ee983f56a06e', - 'info_dict': { - 'id': 'x380jtr1', - 'ext': 'mp4', - 'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks', - 'description': 'md5:60be222088183be3a42f196f34235776', - 'uploader_id': 'broncnutz', - 'upload_date': '20190107', - 'timestamp': 1546854054, - }, - 'params': { - 'format': '480p', - }, - } - - def _real_extract(self, url): - uploader_id, video_id = re.match(self._VALID_URL, url).groups() - result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({ - 'jsonrpc': '2.0', - 'method': 'get_content', - 'params': [uploader_id, video_id], - }).encode())['result'] - - metadata = json.loads(result['json_metadata']) - video = metadata['video'] - content = video['content'] - info = video.get('info', {}) - title = info.get('title') or result['title'] - - def canonical_url(h): - if not h: - return None - return 'https://video.dtube.top/ipfs/' + h - - formats = [] - for q in ('240', '480', '720', '1080', ''): - video_url = canonical_url(content.get('video%shash' % q)) - if not video_url: - continue - format_id = (q + 'p') if q else 'Source' - try: - self.to_screen('%s: Checking %s video format URL' % (video_id, format_id)) - self._downloader._opener.open(video_url, timeout=5).close() - except timeout: - self.to_screen( - '%s: %s URL is invalid, skipping' % (video_id, format_id)) - continue - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'height': int_or_none(q), - 'ext': 'mp4', - }) - - return { - 'id': video_id, - 'title': title, - 'description': content.get('description'), - 'thumbnail': canonical_url(info.get('snaphash')), - 'tags': content.get('tags') or metadata.get('tags'), - 'duration': info.get('duration'), - 'formats': formats, - 'timestamp': parse_iso8601(result.get('created')), - 'uploader_id': uploader_id, - } diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py deleted file mode 100644 index d9d9afdec..000000000 --- a/youtube_dl/extractor/dumpert.py +++ /dev/null @@ -1,80 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - qualities, -) - - -class DumpertIE(InfoExtractor): - _VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'https://www.dumpert.nl/item/6646981_951bc60f', - 'md5': '1b9318d7d5054e7dcb9dc7654f21d643', - 'info_dict': { - 'id': '6646981/951bc60f', - 'ext': 'mp4', - 'title': 'Ik heb nieuws voor je', - 'description': 'Niet schrikken hoor', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, { - 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7', - 'only_matching': True, - }, { - 'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f', - 'only_matching': True, - }, { - 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url).replace('_', '/') - item = self._download_json( - 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'), - video_id)['items'][0] - title = item['title'] - media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO') - - quality = qualities(['flv', 'mobile', 'tablet', '720p']) - formats = [] - for variant in media.get('variants', []): - uri = variant.get('uri') - if not uri: - continue - version = variant.get('version') - formats.append({ - 'url': uri, - 'format_id': version, - 'quality': quality(version), - }) - self._sort_formats(formats) - - thumbnails = [] - stills = item.get('stills') or {} - for t in ('thumb', 'still'): - for s in ('', '-medium', '-large'): - still_id = t + s - still_url = stills.get(still_id) - if not still_url: - continue - thumbnails.append({ - 'id': still_id, - 'url': still_url, - }) - - stats = item.get('stats') or {} - - return { - 'id': video_id, - 'title': title, - 'description': item.get('description'), - 'thumbnails': thumbnails, - 'formats': formats, - 'duration': int_or_none(media.get('duration')), - 'like_count': int_or_none(stats.get('kudos_total')), - 'view_count': int_or_none(stats.get('views_total')), - } diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py deleted file mode 100644 index de7f6d670..000000000 --- a/youtube_dl/extractor/dvtv.py +++ /dev/null @@ -1,184 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - js_to_json, - mimetype2ext, - try_get, - unescapeHTML, - parse_iso8601, -) - - -class DVTVIE(InfoExtractor): - IE_NAME = 'dvtv' - IE_DESC = 'http://video.aktualne.cz/' - _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' - _TESTS = [{ - 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', - 'md5': '67cb83e4a955d36e1b5d31993134a0c2', - 'info_dict': { - 'id': 'dc0768de855511e49e4b0025900fea04', - 'ext': 'mp4', - 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', - 'duration': 1484, - 'upload_date': '20141217', - 'timestamp': 1418792400, - } - }, { - 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', - 'info_dict': { - 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', - 'id': '973eb3bc854e11e498be002590604f2e', - }, - 'playlist': [{ - 'md5': 'da7ca6be4935532241fa9520b3ad91e4', - 'info_dict': { - 'id': 'b0b40906854d11e4bdad0025900fea04', - 'ext': 'mp4', - 'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne', - 'description': 'md5:0916925dea8e30fe84222582280b47a0', - 'timestamp': 1418760010, - 'upload_date': '20141216', - } - }, { - 'md5': '5f7652a08b05009c1292317b449ffea2', - 'info_dict': { - 'id': '420ad9ec854a11e4bdad0025900fea04', - 'ext': 'mp4', - 'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka', - 'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42', - 'timestamp': 1418760010, - 'upload_date': '20141216', - } - }, { - 'md5': '498eb9dfa97169f409126c617e2a3d64', - 'info_dict': { - 'id': '95d35580846a11e4b6d20025900fea04', - 'ext': 'mp4', - 'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?', - 'description': 'md5:889fe610a70fee5511dc3326a089188e', - 'timestamp': 1418760010, - 'upload_date': '20141216', - } - }, { - 'md5': 'b8dc6b744844032dab6ba3781a7274b9', - 'info_dict': { - 'id': '6fe14d66853511e4833a0025900fea04', - 'ext': 'mp4', - 'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády', - 'description': 'md5:544f86de6d20c4815bea11bf2ac3004f', - 'timestamp': 1418760010, - 'upload_date': '20141216', - } - }], - }, { - 'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/', - 'md5': 'f8efe9656017da948369aa099788c8ea', - 'info_dict': { - 'id': '3c496fec365911e7a6500025900fea04', - 'ext': 'mp4', - 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta', - 'duration': 1103, - 'upload_date': '20170511', - 'timestamp': 1494514200, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', - 'only_matching': True, - }, { - # Test live stream video (liveStarter) parsing - 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/', - 'md5': '2e552e483f2414851ca50467054f9d5d', - 'info_dict': { - 'id': '8d116360288011e98c840cc47ab5f122', - 'ext': 'mp4', - 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu', - 'upload_date': '20190204', - 'timestamp': 1549289591, - }, - 'params': { - # Video content is no longer available - 'skip_download': True, - }, - }] - - def _parse_video_metadata(self, js, video_id, timestamp): - data = self._parse_json(js, video_id, transform_source=js_to_json) - title = unescapeHTML(data['title']) - - live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) - if live_starter: - data.update(live_starter) - - formats = [] - for tracks in data.get('tracks', {}).values(): - for video in tracks: - video_url = video.get('src') - if not video_url: - continue - video_type = video.get('type') - ext = determine_ext(video_url, mimetype2ext(video_type)) - if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif video_type == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - else: - label = video.get('label') - height = self._search_regex( - r'^(\d+)[pP]', label or '', 'height', default=None) - format_id = ['http'] - for f in (ext, label): - if f: - format_id.append(f) - formats.append({ - 'url': video_url, - 'format_id': '-'.join(format_id), - 'height': int_or_none(height), - }) - self._sort_formats(formats) - - return { - 'id': data.get('mediaid') or video_id, - 'title': title, - 'description': data.get('description'), - 'thumbnail': data.get('image'), - 'duration': int_or_none(data.get('duration')), - 'timestamp': int_or_none(timestamp), - 'formats': formats - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - timestamp = parse_iso8601(self._html_search_meta( - 'article:published_time', webpage, 'published time', default=None)) - - items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) - if items: - return self.playlist_result( - [self._parse_video_metadata(i, video_id, timestamp) for i in items], - video_id, self._html_search_meta('twitter:title', webpage)) - - item = self._search_regex( - r'(?s)BBXPlayer\.setup\((.+?)\);', - webpage, 'video', default=None) - if item: - # remove function calls (ex. htmldeentitize) - # TODO this should be fixed in a general way in the js_to_json - item = re.sub(r'\w+?\((.+)\)', r'\1', item) - return self._parse_video_metadata(item, video_id, timestamp) - - raise ExtractorError('Could not find neither video nor playlist') diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py deleted file mode 100644 index d740652f1..000000000 --- a/youtube_dl/extractor/dw.py +++ /dev/null @@ -1,108 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, -) -from ..compat import compat_urlparse - - -class DWIE(InfoExtractor): - IE_NAME = 'dw' - _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)' - _TESTS = [{ - # video - 'url': 'http://www.dw.com/en/intelligent-light/av-19112290', - 'md5': '7372046e1815c5a534b43f3c3c36e6e9', - 'info_dict': { - 'id': '19112290', - 'ext': 'mp4', - 'title': 'Intelligent light', - 'description': 'md5:90e00d5881719f2a6a5827cb74985af1', - 'upload_date': '20160311', - } - }, { - # audio - 'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941', - 'md5': '2814c9a1321c3a51f8a7aeb067a360dd', - 'info_dict': { - 'id': '19111941', - 'ext': 'mp3', - 'title': 'WorldLink: My business', - 'description': 'md5:bc9ca6e4e063361e21c920c53af12405', - 'upload_date': '20160311', - } - }, { - # DW documentaries, only last for one or two weeks - 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', - 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1', - 'info_dict': { - 'id': '19274438', - 'ext': 'mp4', - 'title': 'Welcome to the 90s – Hip Hop', - 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop', - 'upload_date': '20160521', - }, - 'skip': 'Video removed', - }] - - def _real_extract(self, url): - media_id = self._match_id(url) - webpage = self._download_webpage(url, media_id) - hidden_inputs = self._hidden_inputs(webpage) - title = hidden_inputs['media_title'] - media_id = hidden_inputs.get('media_id') or media_id - - if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': - formats = self._extract_smil_formats( - 'http://www.dw.com/smil/v-%s' % media_id, media_id, - transform_source=lambda s: s.replace( - 'rtmp://tv-od.dw.de/flash/', - 'http://tv-download.dw.de/dwtv_video/flv/')) - self._sort_formats(formats) - else: - formats = [{'url': hidden_inputs['file_name']}] - - upload_date = hidden_inputs.get('display_date') - if not upload_date: - upload_date = self._html_search_regex( - r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage, - 'upload date', default=None) - upload_date = unified_strdate(upload_date) - - return { - 'id': media_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'thumbnail': hidden_inputs.get('preview_image'), - 'duration': int_or_none(hidden_inputs.get('file_duration')), - 'upload_date': upload_date, - 'formats': formats, - } - - -class DWArticleIE(InfoExtractor): - IE_NAME = 'dw:article' - _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)' - _TEST = { - 'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009', - 'md5': '8ca657f9d068bbef74d6fc38b97fc869', - 'info_dict': { - 'id': '19105868', - 'ext': 'mp4', - 'title': 'The harsh life of refugees in Idomeni', - 'description': 'md5:196015cc7e48ebf474db9399420043c7', - 'upload_date': '20160310', - } - } - - def _real_extract(self, url): - article_id = self._match_id(url) - webpage = self._download_webpage(url, article_id) - hidden_inputs = self._hidden_inputs(webpage) - media_id = hidden_inputs['media_id'] - media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url') - media_url = compat_urlparse.urljoin(url, media_path) - return self.url_result(media_url, 'DW', media_id) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py deleted file mode 100644 index 36fef07b7..000000000 --- a/youtube_dl/extractor/eagleplatform.py +++ /dev/null @@ -1,206 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - unsmuggle_url, - url_or_none, -) - - -class EaglePlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - eagleplatform:(?P<custom_host>[^/]+):| - https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= - ) - (?P<id>\d+) - ''' - _TESTS = [{ - # http://lenta.ru/news/2015/03/06/navalny/ - 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - }, { - # http://muz-tv.ru/play/7129/ - # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true - 'url': 'eagleplatform:media.clipyou.ru:12820', - 'md5': '358597369cf8ba56675c1df15e7af624', - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'skip': 'Georestricted', - }, { - # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', - 'only_matching': True, - }] - - @staticmethod - def _extract_url(webpage): - # Regular iframe embedding - mobj = re.search( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', - webpage) - if mobj is not None: - return mobj.group('url') - PLAYER_JS_RE = r''' - <script[^>]+ - src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) - .+? - ''' - # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - <div[^>]+ - class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+ - data-id=["\'](?P<id>\d+) - ''' % PLAYER_JS_RE, webpage) - if mobj is not None: - return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() - # Generalization of "Javascript code usage", "Combined usage" and - # "Usage without attaching to DOM" embeddings (see - # http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - <script> - .+? - new\s+EaglePlayer\( - (?:[^,]+\s*,\s*)? - { - .+? - \bid\s*:\s*["\']?(?P<id>\d+) - .+? - } - \s*\) - .+? - </script> - ''' % PLAYER_JS_RE, webpage) - if mobj is not None: - return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() - - @staticmethod - def _handle_error(response): - status = int_or_none(response.get('status', 200)) - if status != 200: - raise ExtractorError(' '.join(response['errors']), expected=True) - - def _download_json(self, url_or_request, video_id, *args, **kwargs): - try: - response = super(EaglePlatformIE, self)._download_json( - url_or_request, video_id, *args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError): - response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) - self._handle_error(response) - raise - return response - - def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): - return self._download_json(url_or_request, video_id, note)['data'][0] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = re.match(self._VALID_URL, url) - host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') - - headers = {} - query = { - 'id': video_id, - } - - referrer = smuggled_data.get('referrer') - if referrer: - headers['Referer'] = referrer - query['referrer'] = referrer - - player_data = self._download_json( - 'http://%s/api/player_data' % host, video_id, - headers=headers, query=query) - - media = player_data['data']['playlist']['viewports'][0]['medialist'][0] - - title = media['title'] - description = media.get('description') - thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') - duration = int_or_none(media.get('duration')) - view_count = int_or_none(media.get('views')) - - age_restriction = media.get('age_restriction') - age_limit = None - if age_restriction: - age_limit = 0 if age_restriction == 'allow_all' else 18 - - secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') - - formats = [] - - m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - - m3u8_formats_dict = {} - for f in m3u8_formats: - if f.get('height') is not None: - m3u8_formats_dict[f['height']] = f - - mp4_data = self._download_json( - # Secure mp4 URL is constructed according to Player.prototype.mp4 from - # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), - video_id, 'Downloading mp4 JSON', fatal=False) - if mp4_data: - for format_id, format_url in mp4_data.get('data', {}).items(): - if not url_or_none(format_url): - continue - height = int_or_none(format_id) - if height is not None and m3u8_formats_dict.get(height): - f = m3u8_formats_dict[height].copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - else: - f = { - 'format_id': 'http-%s' % format_id, - 'height': int_or_none(format_id), - } - f['url'] = format_url - formats.append(f) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dl/extractor/ebaumsworld.py deleted file mode 100644 index c97682cd3..000000000 --- a/youtube_dl/extractor/ebaumsworld.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class EbaumsWorldIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/', - 'info_dict': { - 'id': '83367677', - 'ext': 'mp4', - 'title': 'A Giant Python Opens The Door', - 'description': 'This is how nightmares start...', - 'uploader': 'jihadpizza', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - config = self._download_xml( - 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) - video_url = config.find('file').text - - return { - 'id': video_id, - 'title': config.find('title').text, - 'url': video_url, - 'description': config.find('description').text, - 'thumbnail': config.find('image').text, - 'uploader': config.find('username').text, - } diff --git a/youtube_dl/extractor/echomsk.py b/youtube_dl/extractor/echomsk.py deleted file mode 100644 index 6b7cc652f..000000000 --- a/youtube_dl/extractor/echomsk.py +++ /dev/null @@ -1,46 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class EchoMskIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.echo.msk.ru/sounds/1464134.html', - 'md5': '2e44b3b78daff5b458e4dbc37f191f7c', - 'info_dict': { - 'id': '1464134', - 'ext': 'mp3', - 'title': 'Особое мнение - 29 декабря 2014, 19:08', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - audio_url = self._search_regex( - r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL') - - title = self._html_search_regex( - r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>', - webpage, 'title') - - air_date = self._html_search_regex( - r'(?s)<div class="date">(.+?)</div>', - webpage, 'date', fatal=False, default=None) - - if air_date: - air_date = re.sub(r'(\s)\1+', r'\1', air_date) - if air_date: - title = '%s - %s' % (title, air_date) - - return { - 'id': video_id, - 'url': audio_url, - 'title': title, - } diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py deleted file mode 100644 index 9bbd703e0..000000000 --- a/youtube_dl/extractor/egghead.py +++ /dev/null @@ -1,140 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - determine_ext, - int_or_none, - try_get, - unified_timestamp, - url_or_none, -) - - -class EggheadBaseIE(InfoExtractor): - def _call_api(self, path, video_id, resource, fatal=True): - return self._download_json( - 'https://app.egghead.io/api/v1/' + path, - video_id, 'Downloading %s JSON' % resource, fatal=fatal) - - -class EggheadCourseIE(EggheadBaseIE): - IE_DESC = 'egghead.io course' - IE_NAME = 'egghead:course' - _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', - 'playlist_count': 29, - 'info_dict': { - 'id': '432655', - 'title': 'Professor Frisby Introduces Composable Functional JavaScript', - 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$', - }, - }, { - 'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript', - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - series_path = 'series/' + playlist_id - lessons = self._call_api( - series_path + '/lessons', playlist_id, 'course lessons') - - entries = [] - for lesson in lessons: - lesson_url = url_or_none(lesson.get('http_url')) - if not lesson_url: - continue - lesson_id = lesson.get('id') - if lesson_id: - lesson_id = compat_str(lesson_id) - entries.append(self.url_result( - lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id)) - - course = self._call_api( - series_path, playlist_id, 'course', False) or {} - - playlist_id = course.get('id') - if playlist_id: - playlist_id = compat_str(playlist_id) - - return self.playlist_result( - entries, playlist_id, course.get('title'), - course.get('description')) - - -class EggheadLessonIE(EggheadBaseIE): - IE_DESC = 'egghead.io lesson' - IE_NAME = 'egghead:lesson' - _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', - 'info_dict': { - 'id': '1196', - 'display_id': 'javascript-linear-data-flow-with-container-style-types-box', - 'ext': 'mp4', - 'title': 'Create linear data flow with container style types (Box)', - 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e', - 'thumbnail': r're:^https?:.*\.jpg$', - 'timestamp': 1481296768, - 'upload_date': '20161209', - 'duration': 304, - 'view_count': 0, - 'tags': 'count:2', - }, - 'params': { - 'skip_download': True, - 'format': 'bestvideo', - }, - }, { - 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application', - 'only_matching': True, - }, { - 'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - lesson = self._call_api( - 'lessons/' + display_id, display_id, 'lesson') - - lesson_id = compat_str(lesson['id']) - title = lesson['title'] - - formats = [] - for _, format_url in lesson['media_urls'].items(): - format_url = url_or_none(format_url) - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, lesson_id, 'mp4', entry_protocol='m3u8', - m3u8_id='hls', fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, lesson_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'url': format_url, - }) - self._sort_formats(formats) - - return { - 'id': lesson_id, - 'display_id': display_id, - 'title': title, - 'description': lesson.get('summary'), - 'thumbnail': lesson.get('thumb_nail'), - 'timestamp': unified_timestamp(lesson.get('published_at')), - 'duration': int_or_none(lesson.get('duration')), - 'view_count': int_or_none(lesson.get('plays_count')), - 'tags': try_get(lesson, lambda x: x['tag_list'], list), - 'series': try_get( - lesson, lambda x: x['series']['title'], compat_str), - 'formats': formats, - } diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py deleted file mode 100644 index b1cd4f5d4..000000000 --- a/youtube_dl/extractor/ehow.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class EHowIE(InfoExtractor): - IE_NAME = 'eHow' - _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html', - 'md5': '9809b4e3f115ae2088440bcb4efbf371', - 'info_dict': { - 'id': '12245069', - 'ext': 'flv', - 'title': 'Hardwood Flooring Basics', - 'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...', - 'uploader': 'Erick Nathan', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_url = self._search_regex( - r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') - final_url = compat_urllib_parse_unquote(video_url) - uploader = self._html_search_meta('uploader', webpage) - title = self._og_search_title(webpage).replace(' | eHow', '') - - return { - 'id': video_id, - 'url': final_url, - 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), - 'description': self._og_search_description(webpage), - 'uploader': uploader, - } diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py deleted file mode 100644 index 9a44f89f3..000000000 --- a/youtube_dl/extractor/eighttracks.py +++ /dev/null @@ -1,164 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import random - -from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - ExtractorError, -) - - -class EightTracksIE(InfoExtractor): - IE_NAME = '8tracks' - _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' - _TEST = { - 'name': 'EightTracks', - 'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a', - 'info_dict': { - 'id': '1336550', - 'display_id': 'youtube-dl-test-tracks-a', - 'description': "test chars: \"'/\\ä↭", - 'title': "youtube-dl test tracks \"'/\\ä↭<>", - }, - 'playlist': [ - { - 'md5': '96ce57f24389fc8734ce47f4c1abcc55', - 'info_dict': { - 'id': '11885610', - 'ext': 'm4a', - 'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - }, - { - 'md5': '4ab26f05c1f7291ea460a3920be8021f', - 'info_dict': { - 'id': '11885608', - 'ext': 'm4a', - 'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - }, - { - 'md5': 'd30b5b5f74217410f4689605c35d1fd7', - 'info_dict': { - 'id': '11885679', - 'ext': 'm4a', - 'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - }, - { - 'md5': '4eb0a669317cd725f6bbd336a29f923a', - 'info_dict': { - 'id': '11885680', - 'ext': 'm4a', - 'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - }, - { - 'md5': '1893e872e263a2705558d1d319ad19e8', - 'info_dict': { - 'id': '11885682', - 'ext': 'm4a', - 'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - }, - { - 'md5': 'b673c46f47a216ab1741ae8836af5899', - 'info_dict': { - 'id': '11885683', - 'ext': 'm4a', - 'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - }, - { - 'md5': '1d74534e95df54986da7f5abf7d842b7', - 'info_dict': { - 'id': '11885684', - 'ext': 'm4a', - 'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - }, - { - 'md5': 'f081f47af8f6ae782ed131d38b9cd1c0', - 'info_dict': { - 'id': '11885685', - 'ext': 'm4a', - 'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - } - ] - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - data = self._parse_json( - self._search_regex( - r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'), - playlist_id) - - session = str(random.randint(0, 1000000000)) - mix_id = data['id'] - track_count = data['tracks_count'] - duration = data['duration'] - avg_song_duration = float(duration) / track_count - # duration is sometimes negative, use predefined avg duration - if avg_song_duration <= 0: - avg_song_duration = 300 - first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) - next_url = first_url - entries = [] - - for i in range(track_count): - api_json = None - download_tries = 0 - - while api_json is None: - try: - api_json = self._download_webpage( - next_url, playlist_id, - note='Downloading song information %d/%d' % (i + 1, track_count), - errnote='Failed to download song information') - except ExtractorError: - if download_tries > 3: - raise - else: - download_tries += 1 - self._sleep(avg_song_duration, playlist_id) - - api_data = json.loads(api_json) - track_data = api_data['set']['track'] - info = { - 'id': compat_str(track_data['id']), - 'url': track_data['track_file_stream_url'], - 'title': track_data['performer'] + ' - ' + track_data['name'], - 'raw_title': track_data['name'], - 'uploader_id': data['user']['login'], - 'ext': 'm4a', - } - entries.append(info) - - next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % ( - session, mix_id, track_data['id']) - return { - '_type': 'playlist', - 'entries': entries, - 'id': compat_str(mix_id), - 'display_id': playlist_id, - 'title': data.get('name'), - 'description': data.get('description'), - } diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py deleted file mode 100644 index 4e0f8bc81..000000000 --- a/youtube_dl/extractor/einthusan.py +++ /dev/null @@ -1,111 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_str, - compat_urlparse, -) -from ..utils import ( - extract_attributes, - ExtractorError, - get_elements_by_class, - urlencode_postdata, -) - - -class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://einthusan.tv/movie/watch/9097/', - 'md5': 'ff0f7f2065031b8a2cf13a933731c035', - 'info_dict': { - 'id': '9097', - 'ext': 'mp4', - 'title': 'Ae Dil Hai Mushkil', - 'description': 'md5:33ef934c82a671a94652a9b4e54d931b', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, { - 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', - 'only_matching': True, - }, { - 'url': 'https://einthusan.com/movie/watch/9097/', - 'only_matching': True, - }, { - 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi', - 'only_matching': True, - }] - - # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js - def _decrypt(self, encrypted_data, video_id): - return self._parse_json(compat_b64decode(( - encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1] - )).decode('utf-8'), video_id) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title') - - player_params = extract_attributes(self._search_regex( - r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters')) - - page_id = self._html_search_regex( - '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID') - video_data = self._download_json( - 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id, - data=urlencode_postdata({ - 'xEvent': 'UIVideoPlayer.PingOutcome', - 'xJson': json.dumps({ - 'EJOutcomes': player_params['data-ejpingables'], - 'NativeHLS': False - }), - 'arcVersion': 3, - 'appVersion': 59, - 'gorilla.csrf.Token': page_id, - }))['Data'] - - if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'): - raise ExtractorError( - 'Download rate reached. Please try again later.', expected=True) - - ej_links = self._decrypt(video_data['EJLinks'], video_id) - - formats = [] - - m3u8_url = ej_links.get('HLSLink') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')) - - mp4_url = ej_links.get('MP4Link') - if mp4_url: - formats.append({ - 'url': mp4_url, - }) - - self._sort_formats(formats) - - description = get_elements_by_class('synopsis', webpage)[0] - thumbnail = self._html_search_regex( - r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''', - webpage, 'thumbnail url', fatal=False, group='url') - if thumbnail is not None: - thumbnail = compat_urlparse.urljoin(url, thumbnail) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'description': description, - } diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py deleted file mode 100644 index ee5ead18b..000000000 --- a/youtube_dl/extractor/eitb.py +++ /dev/null @@ -1,88 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - parse_iso8601, - sanitized_Request, -) - - -class EitbIE(InfoExtractor): - IE_NAME = 'eitb.tv' - _VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/', - 'md5': 'edf4436247185adee3ea18ce64c47998', - 'info_dict': { - 'id': '4090227752001', - 'ext': 'mp4', - 'title': '60 minutos (Lasa y Zabala, 30 años)', - 'description': 'Programa de reportajes de actualidad.', - 'duration': 3996.76, - 'timestamp': 1381789200, - 'upload_date': '20131014', - 'tags': list, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id, - video_id, 'Downloading video JSON') - - media = video['web_media'][0] - - formats = [] - for rendition in media['RENDITIONS']: - video_url = rendition.get('PMD_URL') - if not video_url: - continue - tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000) - format_id = 'http' - if tbr: - format_id += '-%d' % int(tbr) - formats.append({ - 'url': rendition['PMD_URL'], - 'format_id': format_id, - 'width': int_or_none(rendition.get('FRAME_WIDTH')), - 'height': int_or_none(rendition.get('FRAME_HEIGHT')), - 'tbr': tbr, - }) - - hls_url = media.get('HLS_SURL') - if hls_url: - request = sanitized_Request( - 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', - headers={'Referer': url}) - token_data = self._download_json( - request, video_id, 'Downloading auth token', fatal=False) - if token_data: - token = token_data.get('token') - if token: - formats.extend(self._extract_m3u8_formats( - '%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False)) - - hds_url = media.get('HDS_SURL') - if hds_url: - formats.extend(self._extract_f4m_formats( - '%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'), - video_id, f4m_id='hds', fatal=False)) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'], - 'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'), - 'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'), - 'duration': float_or_none(media.get('LENGTH'), 1000), - 'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '), - 'tags': media.get('TAGS'), - 'formats': formats, - } diff --git a/youtube_dl/extractor/ellentube.py b/youtube_dl/extractor/ellentube.py deleted file mode 100644 index 544473274..000000000 --- a/youtube_dl/extractor/ellentube.py +++ /dev/null @@ -1,133 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - clean_html, - extract_attributes, - float_or_none, - int_or_none, - try_get, -) - - -class EllenTubeBaseIE(InfoExtractor): - def _extract_data_config(self, webpage, video_id): - details = self._search_regex( - r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?></div>)', webpage, - 'details') - return self._parse_json( - extract_attributes(details)['data-config'], video_id) - - def _extract_video(self, data, video_id): - title = data['title'] - - formats = [] - duration = None - for entry in data.get('media'): - if entry.get('id') == 'm3u8': - formats = self._extract_m3u8_formats( - entry['url'], video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - duration = int_or_none(entry.get('duration')) - break - self._sort_formats(formats) - - def get_insight(kind): - return int_or_none(try_get( - data, lambda x: x['insight']['%ss' % kind])) - - return { - 'extractor_key': EllenTubeIE.ie_key(), - 'id': video_id, - 'title': title, - 'description': data.get('description'), - 'duration': duration, - 'thumbnail': data.get('thumbnail'), - 'timestamp': float_or_none(data.get('publishTime'), scale=1000), - 'view_count': get_insight('view'), - 'like_count': get_insight('like'), - 'formats': formats, - } - - -class EllenTubeIE(EllenTubeBaseIE): - _VALID_URL = r'''(?x) - (?: - ellentube:| - https://api-prod\.ellentube\.com/ellenapi/api/item/ - ) - (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}) - ''' - _TESTS = [{ - 'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3', - 'md5': '2fabc277131bddafdd120e0fc0f974c9', - 'info_dict': { - 'id': '0822171c-3829-43bf-b99f-d77358ae75e3', - 'ext': 'mp4', - 'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck', - 'description': 'md5:76e3355e2242a78ad9e3858e5616923f', - 'thumbnail': r're:^https?://.+?', - 'duration': 514, - 'timestamp': 1508505120, - 'upload_date': '20171020', - 'view_count': int, - 'like_count': int, - } - }, { - 'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_json( - 'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id, - video_id) - return self._extract_video(data, video_id) - - -class EllenTubeVideoIE(EllenTubeBaseIE): - _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+?)\.html' - _TEST = { - 'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html', - 'only_matching': True, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._extract_data_config(webpage, display_id)['id'] - return self.url_result( - 'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(), - video_id=video_id) - - -class EllenTubePlaylistIE(EllenTubeBaseIE): - _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P<id>.+?)\.html' - _TESTS = [{ - 'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html', - 'info_dict': { - 'id': 'dax-shepard-jordan-fisher-haim', - 'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM", - 'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c', - }, - 'playlist_count': 6, - }, { - 'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - data = self._extract_data_config(webpage, display_id)['data'] - feed = self._download_json( - 'https://api-prod.ellentube.com/ellenapi/api/feed/?%s' - % data['filter'], display_id) - entries = [ - self._extract_video(elem, elem['id']) - for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')] - return self.playlist_result( - entries, display_id, data.get('title'), - clean_html(data.get('description'))) diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py deleted file mode 100644 index b89f6db62..000000000 --- a/youtube_dl/extractor/elpais.py +++ /dev/null @@ -1,95 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import strip_jsonp, unified_strdate - - -class ElPaisIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])' - IE_DESC = 'El País' - - _TESTS = [{ - 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html', - 'md5': '98406f301f19562170ec071b83433d55', - 'info_dict': { - 'id': 'tiempo-nuevo-recetas-viejas', - 'ext': 'mp4', - 'title': 'Tiempo nuevo, recetas viejas', - 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.', - 'upload_date': '20140206', - } - }, { - 'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t', - 'md5': '3bd5b09509f3519d7d9e763179b013de', - 'info_dict': { - 'id': '1456340311_668921', - 'ext': 'mp4', - 'title': 'Cómo hacer el mejor café con cafetera italiana', - 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.', - 'upload_date': '20160303', - } - }, { - 'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html', - 'md5': '9c79923a118a067e1a45789e1e0b0f9c', - 'info_dict': { - 'id': '1485456786_417876', - 'ext': 'mp4', - 'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años', - 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas', - 'upload_date': '20170127', - }, - }, { - 'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html', - 'info_dict': { - 'id': '1487062137_075943', - 'ext': 'mp4', - 'title': 'Disyuntivas', - 'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218', - 'upload_date': '20170214', - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - prefix = self._html_search_regex( - r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix') - id_multimedia = self._search_regex( - r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None) - if id_multimedia: - url_info = self._download_json( - 'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp) - video_suffix = url_info['mp4'] - else: - video_suffix = self._search_regex( - r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL') - video_url = prefix + video_suffix - thumbnail_suffix = self._search_regex( - r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", - webpage, 'thumbnail URL', default=None) - thumbnail = ( - None if thumbnail_suffix is None - else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage) - title = self._html_search_regex( - (r"tituloVideo\s*=\s*'([^']+)'", - r'<h2 class="entry-header entry-title.*?>(.*?)</h2>', - r'<h1[^>]+class="titulo"[^>]*>([^<]+)'), - webpage, 'title', default=None) or self._og_search_title(webpage) - upload_date = unified_strdate(self._search_regex( - r'<p class="date-header date-int updated"\s+title="([^"]+)">', - webpage, 'upload date', default=None) or self._html_search_meta( - 'datePublished', webpage, 'timestamp')) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': self._og_search_description(webpage), - 'thumbnail': thumbnail, - 'upload_date': upload_date, - } diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py deleted file mode 100644 index a5820b21e..000000000 --- a/youtube_dl/extractor/embedly.py +++ /dev/null @@ -1,16 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class EmbedlyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' - _TESTS = [{ - 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', - 'only_matching': True, - }] - - def _real_extract(self, url): - return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py deleted file mode 100644 index 65635c18b..000000000 --- a/youtube_dl/extractor/engadget.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class EngadgetIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)' - - _TESTS = [{ - # video with 5min ID - 'url': 'http://www.engadget.com/video/518153925/', - 'md5': 'c6820d4828a5064447a4d9fc73f312c9', - 'info_dict': { - 'id': '518153925', - 'ext': 'mp4', - 'title': 'Samsung Galaxy Tab Pro 8.4 Review', - }, - 'add_ie': ['FiveMin'], - }, { - # video with vidible ID - 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result('aol-video:%s' % video_id) diff --git a/youtube_dl/extractor/epidemicsound.py b/youtube_dl/extractor/epidemicsound.py deleted file mode 100644 index 1a52738aa..000000000 --- a/youtube_dl/extractor/epidemicsound.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - T, - traverse_obj, - txt_or_none, - unified_timestamp, - url_or_none, -) - - -class EpidemicSoundIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/', - 'md5': 'd98ff2ddb49e8acab9716541cbc9dfac', - 'info_dict': { - 'id': '45014', - 'display_id': 'yFfQVRpSPz', - 'ext': 'mp3', - 'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'], - 'title': 'Door Knock Door 1', - 'duration': 1, - 'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg', - 'timestamp': 1415320353, - 'upload_date': '20141107', - 'age_limit': None, - # check that the "best" format was found, since test file MD5 doesn't - # distinguish the formats - 'format': 'full', - }, - }, { - 'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/', - 'md5': 'c82b745890f9baf18dc2f8d568ee3830', - 'info_dict': { - 'id': '148700', - 'display_id': 'mj8GTTwsZd', - 'ext': 'mp3', - 'tags': ['liquid drum n bass', 'energetic'], - 'title': 'Noplace', - 'duration': 237, - 'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg', - 'timestamp': 1694426482, - 'release_timestamp': 1700535606, - 'upload_date': '20230911', - 'age_limit': None, - 'format': 'full', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - json_data = self._download_json('https://www.epidemicsound.com/json/track/' + video_id, video_id) - - def fmt_or_none(f): - if not f.get('format'): - f['format'] = f.get('format_id') - elif not f.get('format_id'): - f['format_id'] = f['format'] - if not (f['url'] and f['format']): - return - if f.get('format_note'): - f['format_note'] = 'track ID ' + f['format_note'] - f['preference'] = -1 if f['format'] == 'full' else -2 - return f - - formats = traverse_obj(json_data, ( - 'stems', T(dict.items), Ellipsis, { - 'format': (0, T(txt_or_none)), - 'format_note': (1, 's3TrackId', T(txt_or_none)), - 'format_id': (1, 'stemType', T(txt_or_none)), - 'url': (1, 'lqMp3Url', T(url_or_none)), - }, T(fmt_or_none))) - - self._sort_formats(formats) - - info = traverse_obj(json_data, { - 'id': ('id', T(txt_or_none)), - 'tags': ('metadataTags', Ellipsis, T(txt_or_none)), - 'title': ('title', T(txt_or_none)), - 'duration': ('length', T(float_or_none)), - 'timestamp': ('added', T(unified_timestamp)), - 'thumbnail': (('imageUrl', 'cover'), T(url_or_none)), - 'age_limit': ('isExplicit', T(lambda b: 18 if b else None)), - 'release_timestamp': ('releaseDate', T(unified_timestamp)), - }, get_all=False) - - info.update(traverse_obj(json_data, { - 'categories': ('genres', Ellipsis, 'tag', T(txt_or_none)), - 'tags': ('metadataTags', Ellipsis, T(txt_or_none)), - })) - - info.update({ - 'display_id': video_id, - 'formats': formats, - }) - - return info diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py deleted file mode 100644 index bfecd3a41..000000000 --- a/youtube_dl/extractor/eporner.py +++ /dev/null @@ -1,132 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - encode_base_n, - ExtractorError, - int_or_none, - merge_dicts, - parse_duration, - str_to_int, - url_or_none, -) - - -class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?' - _TESTS = [{ - 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', - 'md5': '39d486f046212d8e1b911c52ab4691f8', - 'info_dict': { - 'id': 'qlDUmNsj6VS', - 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', - 'ext': 'mp4', - 'title': 'Infamous Tiffany Teen Strip Tease Video', - 'description': 'md5:764f39abf932daafa37485eb46efa152', - 'timestamp': 1232520922, - 'upload_date': '20090121', - 'duration': 1838, - 'view_count': int, - 'age_limit': 18, - }, - 'params': { - 'proxy': '127.0.0.1:8118' - } - }, { - # New (May 2016) URL layout - 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', - 'only_matching': True, - }, { - 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', - 'only_matching': True, - }, { - 'url': 'http://www.eporner.com/embed/3YRUtzMcWn0', - 'only_matching': True, - }, { - 'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - webpage, urlh = self._download_webpage_handle(url, display_id) - - video_id = self._match_id(urlh.geturl()) - - hash = self._search_regex( - r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash') - - title = self._og_search_title(webpage, default=None) or self._html_search_regex( - r'<title>(.+?) - EPORNER', webpage, 'title') - - # Reverse engineered from vjs.js - def calc_hash(s): - return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8))) - - video = self._download_json( - 'http://www.eporner.com/xhr/video/%s' % video_id, - display_id, note='Downloading video JSON', - query={ - 'hash': calc_hash(hash), - 'device': 'generic', - 'domain': 'www.eporner.com', - 'fallback': 'false', - }) - - if video.get('available') is False: - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, video['message']), expected=True) - - sources = video['sources'] - - formats = [] - for kind, formats_dict in sources.items(): - if not isinstance(formats_dict, dict): - continue - for format_id, format_dict in formats_dict.items(): - if not isinstance(format_dict, dict): - continue - src = url_or_none(format_dict.get('src')) - if not src or not src.startswith('http'): - continue - if kind == 'hls': - formats.extend(self._extract_m3u8_formats( - src, display_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id=kind, fatal=False)) - else: - height = int_or_none(self._search_regex( - r'(\d+)[pP]', format_id, 'height', default=None)) - fps = int_or_none(self._search_regex( - r'(\d+)fps', format_id, 'fps', default=None)) - - formats.append({ - 'url': src, - 'format_id': format_id, - 'height': height, - 'fps': fps, - }) - self._sort_formats(formats) - - json_ld = self._search_json_ld(webpage, display_id, default={}) - - duration = parse_duration(self._html_search_meta( - 'duration', webpage, default=None)) - view_count = str_to_int(self._search_regex( - r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)', - webpage, 'view count', default=None)) - - return merge_dicts(json_ld, { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'duration': duration, - 'view_count': view_count, - 'formats': formats, - 'age_limit': 18, - }) diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py deleted file mode 100644 index c460dc7f9..000000000 --- a/youtube_dl/extractor/eroprofile.py +++ /dev/null @@ -1,92 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import ( - ExtractorError, - merge_dicts, -) - - -class EroProfileIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)' - _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?' - _NETRC_MACHINE = 'eroprofile' - _TESTS = [{ - 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore', - 'md5': 'c26f351332edf23e1ea28ce9ec9de32f', - 'info_dict': { - 'id': '3733775', - 'display_id': 'sexy-babe-softcore', - 'ext': 'm4v', - 'title': 'sexy babe softcore', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - }, - 'skip': 'Video not found', - }, { - 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', - 'md5': '1baa9602ede46ce904c431f5418d8916', - 'info_dict': { - 'id': '1133519', - 'ext': 'm4v', - 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - }, - 'skip': 'Requires login', - }] - - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - - query = compat_urllib_parse_urlencode({ - 'username': username, - 'password': password, - 'url': 'http://www.eroprofile.com/', - }) - login_url = self._LOGIN_URL + query - login_page = self._download_webpage(login_url, None, False) - - m = re.search(r'Your username or password was incorrect\.', login_page) - if m: - raise ExtractorError( - 'Wrong username and/or password.', expected=True) - - self.report_login() - redirect_url = self._search_regex( - r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url') - self._download_webpage(redirect_url, None, False) - - def _real_initialize(self): - self._login() - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - m = re.search(r'You must be logged in to view this video\.', webpage) - if m: - self.raise_login_required('This video requires login') - - video_id = self._search_regex( - [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], - webpage, 'video id', default=None) - - title = self._html_search_regex( - (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'), - webpage, 'title') - - info = self._parse_html5_media_entries(url, webpage, video_id)[0] - - return merge_dicts(info, { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'age_limit': 18, - }) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py deleted file mode 100644 index 4cd815ebc..000000000 --- a/youtube_dl/extractor/escapist.py +++ /dev/null @@ -1,111 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - clean_html, - int_or_none, - float_or_none, -) - - -def _decrypt_config(key, string): - a = '' - i = '' - r = '' - - while len(a) < (len(string) / 2): - a += key - - a = a[0:int(len(string) / 2)] - - t = 0 - while t < len(string): - i += chr(int(string[t] + string[t + 1], 16)) - t += 2 - - icko = [s for s in i] - - for t, c in enumerate(a): - r += chr(ord(c) ^ ord(icko[t])) - - return r - - -class EscapistIE(InfoExtractor): - _VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', - 'md5': 'ab3a706c681efca53f0a35f1415cf0d1', - 'info_dict': { - 'id': '6618', - 'ext': 'mp4', - 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", - 'title': "Breaking Down Baldur's Gate", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 264, - 'uploader': 'The Escapist', - } - }, { - 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer', - 'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf', - 'info_dict': { - 'id': '10044', - 'ext': 'mp4', - 'description': 'This week, Zero Punctuation reviews Evolve.', - 'title': 'Evolve - One vs Multiplayer', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 304, - 'uploader': 'The Escapist', - } - }, { - 'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618', - 'only_matching': True, - }, { - 'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - ims_video = self._parse_json( - self._search_regex( - r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'), - video_id) - video_id = ims_video['videoID'] - key = ims_video['hash'] - - config = self._download_webpage( - 'http://www.escapistmagazine.com/videos/vidconfig.php', - video_id, 'Downloading video config', headers={ - 'Referer': url, - }, query={ - 'videoID': video_id, - 'hash': key, - }) - - data = self._parse_json(_decrypt_config(key, config), video_id) - - video_data = data['videoData'] - - title = clean_html(video_data['title']) - - formats = [{ - 'url': video['src'], - 'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']), - 'height': int_or_none(video.get('res')), - } for video in data['files']['videos']] - self._sort_formats(formats) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'), - 'description': self._og_search_description(webpage), - 'duration': float_or_none(video_data.get('duration'), 1000), - 'uploader': video_data.get('publisher'), - 'series': video_data.get('show'), - } diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py deleted file mode 100644 index 6cf05e6da..000000000 --- a/youtube_dl/extractor/espn.py +++ /dev/null @@ -1,238 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .once import OnceIE -from ..compat import compat_str -from ..utils import ( - determine_ext, - int_or_none, - unified_timestamp, -) - - -class ESPNIE(OnceIE): - _VALID_URL = r'''(?x) - https?:// - (?: - (?: - (?: - (?:(?:\w+\.)+)?espn\.go| - (?:www\.)?espn - )\.com/ - (?: - (?: - video/(?:clip|iframe/twitter)| - watch/player - ) - (?: - .*?\?.*?\bid=| - /_/id/ - )| - [^/]+/video/ - ) - )| - (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ - ) - (?P<id>\d+) - ''' - - _TESTS = [{ - 'url': 'http://espn.go.com/video/clip?id=10365079', - 'info_dict': { - 'id': '10365079', - 'ext': 'mp4', - 'title': '30 for 30 Shorts: Judging Jewell', - 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', - 'timestamp': 1390936111, - 'upload_date': '20140128', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://broadband.espn.go.com/video/clip?id=18910086', - 'info_dict': { - 'id': '18910086', - 'ext': 'mp4', - 'title': 'Kyrie spins around defender for two', - 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b', - 'timestamp': 1489539155, - 'upload_date': '20170315', - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest'], - }, { - 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', - 'only_matching': True, - }, { - 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/watch/player?id=19141491', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/watch/player/_/id/19141491', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/video/clip?id=10365079', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/video/clip/_/id/17989860', - 'only_matching': True, - }, { - 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', - 'only_matching': True, - }, { - 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls', - 'only_matching': True, - }, { - 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - clip = self._download_json( - 'http://api-app.espn.com/v1/video/clips/%s' % video_id, - video_id)['videos'][0] - - title = clip['headline'] - - format_urls = set() - formats = [] - - def traverse_source(source, base_source_id=None): - for source_id, source in source.items(): - if source_id == 'alert': - continue - elif isinstance(source, compat_str): - extract_source(source, base_source_id) - elif isinstance(source, dict): - traverse_source( - source, - '%s-%s' % (base_source_id, source_id) - if base_source_id else source_id) - - def extract_source(source_url, source_id=None): - if source_url in format_urls: - return - format_urls.add(source_url) - ext = determine_ext(source_url) - if OnceIE.suitable(source_url): - formats.extend(self._extract_once_formats(source_url)) - elif ext == 'smil': - formats.extend(self._extract_smil_formats( - source_url, video_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, video_id, f4m_id=source_id, fatal=False)) - elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id=source_id, fatal=False)) - else: - f = { - 'url': source_url, - 'format_id': source_id, - } - mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url) - if mobj: - f.update({ - 'height': int(mobj.group(1)), - 'fps': int(mobj.group(2)), - 'tbr': int(mobj.group(3)), - }) - if source_id == 'mezzanine': - f['preference'] = 1 - formats.append(f) - - links = clip.get('links', {}) - traverse_source(links.get('source', {})) - traverse_source(links.get('mobile', {})) - self._sort_formats(formats) - - description = clip.get('caption') or clip.get('description') - thumbnail = clip.get('thumbnail') - duration = int_or_none(clip.get('duration')) - timestamp = unified_timestamp(clip.get('originalPublishDate')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } - - -class ESPNArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' - _TESTS = [{ - 'url': 'http://espn.go.com/nba/recap?gameId=400793786', - 'only_matching': True, - }, { - 'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge', - 'only_matching': True, - }, { - 'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings', - 'only_matching': True, - }, { - 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video_id = self._search_regex( - r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)', - webpage, 'video id', group='id') - - return self.url_result( - 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) - - -class FiveThirtyEightIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)' - _TEST = { - 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', - 'info_dict': { - 'id': '56032156', - 'ext': 'flv', - 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', - 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', - }, - 'params': { - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - embed_url = self._search_regex( - r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)', - webpage, 'embed url') - - return self.url_result(embed_url, 'AbcNewsVideo') diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py deleted file mode 100644 index e9dcaeb1d..000000000 --- a/youtube_dl/extractor/esri.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - parse_filesize, - unified_strdate, -) - - -class EsriVideoIE(InfoExtractor): - _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' - _TEST = { - 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', - 'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc', - 'info_dict': { - 'id': '1124', - 'ext': 'mp4', - 'title': 'ArcGIS Online - Developing Applications', - 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 185, - 'upload_date': '20120419', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - formats = [] - for width, height, content in re.findall( - r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage): - for video_url, ext, filesize in re.findall( - r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content): - formats.append({ - 'url': compat_urlparse.urljoin(url, video_url), - 'ext': ext.lower(), - 'format_id': '%s-%s' % (ext.lower(), height), - 'width': int(width), - 'height': int(height), - 'filesize_approx': parse_filesize(filesize), - }) - self._sort_formats(formats) - - title = self._html_search_meta('title', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description', fatal=False) - - thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False) - if thumbnail: - thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail) - - duration = int_or_none(self._search_regex( - [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"], - webpage, 'duration', fatal=False)) - - upload_date = unified_strdate(self._html_search_meta( - 'last-modified', webpage, 'upload date', fatal=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'upload_date': upload_date, - 'formats': formats - } diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py deleted file mode 100644 index 2c1c747a1..000000000 --- a/youtube_dl/extractor/europa.py +++ /dev/null @@ -1,93 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - orderedSet, - parse_duration, - qualities, - unified_strdate, - xpath_text -) - - -class EuropaIE(InfoExtractor): - _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)' - _TESTS = [{ - 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', - 'md5': '574f080699ddd1e19a675b0ddf010371', - 'info_dict': { - 'id': 'I107758', - 'ext': 'mp4', - 'title': 'TRADE - Wikileaks on TTIP', - 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20150811', - 'duration': 34, - 'view_count': int, - 'formats': 'mincount:3', - } - }, { - 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786', - 'only_matching': True, - }, { - 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - playlist = self._download_xml( - 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id) - - def get_item(type_, preference): - items = {} - for item in playlist.findall('./info/%s/item' % type_): - lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None) - if lang and label: - items[lang] = label.strip() - for p in preference: - if items.get(p): - return items[p] - - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - preferred_lang = query.get('sitelang', ('en', ))[0] - - preferred_langs = orderedSet((preferred_lang, 'en', 'int')) - - title = get_item('title', preferred_langs) or video_id - description = get_item('description', preferred_langs) - thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail') - upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) - duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) - view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) - - language_preference = qualities(preferred_langs[::-1]) - - formats = [] - for file_ in playlist.findall('./files/file'): - video_url = xpath_text(file_, './url') - if not video_url: - continue - lang = xpath_text(file_, './lg') - formats.append({ - 'url': video_url, - 'format_id': lang, - 'format_note': xpath_text(file_, './lglabel'), - 'language_preference': language_preference(lang) - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'view_count': view_count, - 'formats': formats - } diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py deleted file mode 100644 index 95a897782..000000000 --- a/youtube_dl/extractor/expotv.py +++ /dev/null @@ -1,77 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, -) - - -class ExpoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])' - _TEST = { - 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', - 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', - 'info_dict': { - 'id': '667916', - 'ext': 'mp4', - 'title': 'NYX Butter Lipstick Little Susie', - 'description': 'Goes on like butter, but looks better!', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Stephanie S.', - 'upload_date': '20150520', - 'view_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - player_key = self._search_regex( - r'<param name="playerKey" value="([^"]+)"', webpage, 'player key') - config = self._download_json( - 'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key), - video_id, 'Downloading video configuration') - - formats = [] - for fcfg in config['sources']: - media_url = fcfg.get('file') - if not media_url: - continue - if fcfg.get('type') == 'm3u8': - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) - else: - formats.append({ - 'url': media_url, - 'height': int_or_none(fcfg.get('height')), - 'format_id': fcfg.get('label'), - 'ext': self._search_regex( - r'filename=.*\.([a-z0-9_A-Z]+)&', media_url, - 'file extension', default=None) or fcfg.get('type'), - }) - self._sort_formats(formats) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = config.get('image') - view_count = int_or_none(self._search_regex( - r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts')) - uploader = self._search_regex( - r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader', - fatal=False) - upload_date = unified_strdate(self._search_regex( - r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date', - fatal=False), day_first=False) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': description, - 'view_count': view_count, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, - } diff --git a/youtube_dl/extractor/expressen.py b/youtube_dl/extractor/expressen.py deleted file mode 100644 index dc8b855d2..000000000 --- a/youtube_dl/extractor/expressen.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - unescapeHTML, - unified_timestamp, -) - - -class ExpressenIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?(?:expressen|di)\.se/ - (?:(?:tvspelare/video|videoplayer/embed)/)? - tv/(?:[^/]+/)* - (?P<id>[^/?#&]+) - ''' - _TESTS = [{ - 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/', - 'md5': '2fbbe3ca14392a6b1b36941858d33a45', - 'info_dict': { - 'id': '8690962', - 'ext': 'mp4', - 'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden', - 'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 788, - 'timestamp': 1526639109, - 'upload_date': '20180518', - }, - }, { - 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/', - 'only_matching': True, - }, { - 'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', - 'only_matching': True, - }, { - 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', - 'only_matching': True, - }, { - 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') for mobj in re.finditer( - r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1', - webpage)] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - def extract_data(name): - return self._parse_json( - self._search_regex( - r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name, - webpage, 'info', group='value'), - display_id, transform_source=unescapeHTML) - - info = extract_data('video-tracking-info') - video_id = info['videoId'] - - data = extract_data('article-data') - stream = data['stream'] - - if determine_ext(stream) == 'm3u8': - formats = self._extract_m3u8_formats( - stream, display_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - else: - formats = [{ - 'url': stream, - }] - self._sort_formats(formats) - - title = info.get('titleRaw') or data['title'] - description = info.get('descriptionRaw') - thumbnail = info.get('socialMediaImage') or data.get('image') - duration = int_or_none(info.get('videoTotalSecondsDuration') - or data.get('totalSecondsDuration')) - timestamp = unified_timestamp(info.get('publishDate')) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'formats': formats, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3da5f8020..0c4558dde 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1,1661 +1,6 @@ # flake8: noqa from __future__ import unicode_literals -from .abc import ( - ABCIE, - ABCIViewIE, -) -from .abcnews import ( - AbcNewsIE, - AbcNewsVideoIE, -) -from .abcotvs import ( - ABCOTVSIE, - ABCOTVSClipsIE, -) -from .academicearth import AcademicEarthCourseIE -from .acast import ( - ACastIE, - ACastChannelIE, -) -from .adn import ADNIE -from .adobeconnect import AdobeConnectIE -from .adobetv import ( - AdobeTVEmbedIE, - AdobeTVIE, - AdobeTVShowIE, - AdobeTVChannelIE, - AdobeTVVideoIE, -) -from .adultswim import AdultSwimIE -from .aenetworks import ( - AENetworksIE, - AENetworksCollectionIE, - AENetworksShowIE, - HistoryTopicIE, - HistoryPlayerIE, - BiographyIE, -) -from .afreecatv import AfreecaTVIE -from .airmozilla import AirMozillaIE -from .aljazeera import AlJazeeraIE -from .alphaporno import AlphaPornoIE -from .amara import AmaraIE -from .amcnetworks import AMCNetworksIE -from .americastestkitchen import ( - AmericasTestKitchenIE, - AmericasTestKitchenSeasonIE, -) -from .animeondemand import AnimeOnDemandIE -from .anvato import AnvatoIE -from .aol import AolIE -from .allocine import AllocineIE -from .aliexpress import AliExpressLiveIE -from .alsace20tv import ( - Alsace20TVIE, - Alsace20TVEmbedIE, -) -from .apa import APAIE -from .aparat import AparatIE -from .appleconnect import AppleConnectIE -from .appletrailers import ( - AppleTrailersIE, - AppleTrailersSectionIE, -) -from .applepodcasts import ApplePodcastsIE -from .archiveorg import ArchiveOrgIE -from .arcpublishing import ArcPublishingIE -from .arkena import ArkenaIE -from .ard import ( - ARDBetaMediathekIE, - ARDIE, - ARDMediathekIE, -) -from .arte import ( - ArteTVIE, - ArteTVEmbedIE, - ArteTVPlaylistIE, - ArteTVCategoryIE, -) -from .arnes import ArnesIE -from .asiancrush import ( - AsianCrushIE, - AsianCrushPlaylistIE, -) -from .atresplayer import AtresPlayerIE -from .atttechchannel import ATTTechChannelIE -from .atvat import ATVAtIE -from .audimedia import AudiMediaIE -from .audioboom import AudioBoomIE -from .audiomack import AudiomackIE, AudiomackAlbumIE -from .awaan import ( - AWAANIE, - AWAANVideoIE, - AWAANLiveIE, - AWAANSeasonIE, -) -from .azmedien import AZMedienIE -from .baidu import BaiduVideoIE -from .bandaichannel import BandaiChannelIE -from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE -from .bbc import ( - BBCCoUkIE, - BBCCoUkArticleIE, - BBCCoUkIPlayerEpisodesIE, - BBCCoUkIPlayerGroupIE, - BBCCoUkPlaylistIE, - BBCIE, -) -from .beeg import BeegIE -from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE -from .beatport import BeatportIE -from .bet import BetIE -from .bfi import BFIPlayerIE -from .bfmtv import ( - BFMTVIE, - BFMTVLiveIE, - BFMTVArticleIE, -) -from .bibeltv import BibelTVIE -from .bigflix import BigflixIE -from .bigo import BigoIE -from .bild import BildIE -from .bilibili import ( - BiliBiliIE, - BiliBiliBangumiIE, - BilibiliAudioIE, - BilibiliAudioAlbumIE, - BiliBiliPlayerIE, -) -from .biobiochiletv import BioBioChileTVIE -from .bitchute import ( - BitChuteIE, - BitChuteChannelIE, -) -from .biqle import BIQLEIE -from .bleacherreport import ( - BleacherReportIE, - BleacherReportCMSIE, -) -from .blerp import BlerpIE -from .bloomberg import BloombergIE -from .bokecc import BokeCCIE -from .bongacams import BongaCamsIE -from .bostonglobe import BostonGlobeIE -from .box import BoxIE -from .bpb import BpbIE -from .br import ( - BRIE, - BRMediathekIE, -) -from .bravotv import BravoTVIE -from .breakcom import BreakIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .businessinsider import BusinessInsiderIE -from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE -from .c56 import C56IE -from .caffeine import CaffeineTVIE -from .callin import CallinIE -from .camdemy import ( - CamdemyIE, - CamdemyFolderIE -) -from .cammodels import CamModelsIE -from .camtube import CamTubeIE -from .camwithher import CamWithHerIE -from .canalplus import CanalplusIE -from .canalc2 import Canalc2IE -from .canvas import ( - CanvasIE, - CanvasEenIE, - VrtNUIE, - DagelijkseKostIE, -) -from .carambatv import ( - CarambaTVIE, - CarambaTVPageIE, -) -from .cartoonnetwork import CartoonNetworkIE -from .cbc import ( - CBCIE, - CBCPlayerIE, - CBCWatchVideoIE, - CBCWatchIE, - CBCOlympicsIE, -) -from .cbs import CBSIE -from .cbslocal import ( - CBSLocalIE, - CBSLocalArticleIE, -) -from .cbsinteractive import CBSInteractiveIE -from .cbsnews import ( - CBSNewsEmbedIE, - CBSNewsIE, - CBSNewsLiveVideoIE, -) -from .cbssports import ( - CBSSportsEmbedIE, - CBSSportsIE, - TwentyFourSevenSportsIE, -) -from .ccc import ( - CCCIE, - CCCPlaylistIE, -) -from .ccma import CCMAIE -from .cctv import CCTVIE -from .cda import CDAIE -from .ceskatelevize import CeskaTelevizeIE -from .channel9 import Channel9IE -from .charlierose import CharlieRoseIE -from .chaturbate import ChaturbateIE -from .chilloutzone import ChilloutzoneIE -from .chirbit import ( - ChirbitIE, - ChirbitProfileIE, -) -from .cinchcast import CinchcastIE -from .cinemax import CinemaxIE -from .ciscolive import ( - CiscoLiveSessionIE, - CiscoLiveSearchIE, -) -from .cjsw import CJSWIE -from .clipchamp import ClipchampIE -from .cliphunter import CliphunterIE -from .clippit import ClippitIE -from .cliprs import ClipRsIE -from .clipsyndicate import ClipsyndicateIE -from .closertotruth import CloserToTruthIE -from .cloudflarestream import CloudflareStreamIE -from .cloudy import CloudyIE -from .clubic import ClubicIE -from .clyp import ClypIE -from .cmt import CMTIE -from .cnbc import ( - CNBCIE, - CNBCVideoIE, -) -from .cnn import ( - CNNIE, - CNNBlogsIE, - CNNArticleIE, -) -from .coub import CoubIE -from .comedycentral import ( - ComedyCentralIE, - ComedyCentralTVIE, -) -from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import ( - MmsIE, - RtmpIE, -) -from .condenast import CondeNastIE -from .contv import CONtvIE -from .corus import CorusIE -from .cpac import ( - CPACIE, - CPACPlaylistIE, -) -from .cracked import CrackedIE -from .crackle import CrackleIE -from .crooksandliars import CrooksAndLiarsIE -from .crunchyroll import ( - CrunchyrollIE, - CrunchyrollShowPlaylistIE -) -from .cspan import CSpanIE -from .ctsnews import CtsNewsIE -from .ctv import CTVIE -from .ctvnews import CTVNewsIE -from .cultureunplugged import CultureUnpluggedIE -from .curiositystream import ( - CuriosityStreamIE, - CuriosityStreamCollectionIE, -) -from .cwtv import CWTVIE -from .dailymail import DailyMailIE -from .dailymotion import ( - DailymotionIE, - DailymotionPlaylistIE, - DailymotionUserIE, -) -from .daum import ( - DaumIE, - DaumClipIE, - DaumPlaylistIE, - DaumUserIE, -) -from .dbtv import DBTVIE -from .dctp import DctpTvIE -from .deezer import DeezerPlaylistIE -from .democracynow import DemocracynowIE -from .dlf import ( - DLFCorpusIE, - DLFIE, -) -from .dfb import DFBIE -from .dhm import DHMIE -from .digg import DiggIE -from .dotsub import DotsubIE -from .douyutv import ( - DouyuShowIE, - DouyuTVIE, -) -from .dplay import ( - DPlayIE, - DiscoveryPlusIE, - HGTVDeIE, -) -from .dreisat import DreiSatIE -from .drbonanza import DRBonanzaIE -from .drtuber import DrTuberIE -from .drtv import ( - DRTVIE, - DRTVLiveIE, -) -from .dtube import DTubeIE -from .dvtv import DVTVIE -from .dumpert import DumpertIE -from .defense import DefenseGouvFrIE -from .discovery import DiscoveryIE -from .discoverygo import ( - DiscoveryGoIE, - DiscoveryGoPlaylistIE, -) -from .discoverynetworks import DiscoveryNetworksDeIE -from .discoveryvr import DiscoveryVRIE -from .disney import DisneyIE -from .dispeak import DigitallySpeakingIE -from .dropbox import DropboxIE -from .dw import ( - DWIE, - DWArticleIE, -) -from .eagleplatform import EaglePlatformIE -from .ebaumsworld import EbaumsWorldIE -from .echomsk import EchoMskIE -from .egghead import ( - EggheadCourseIE, - EggheadLessonIE, -) -from .ehow import EHowIE -from .eighttracks import EightTracksIE -from .einthusan import EinthusanIE -from .eitb import EitbIE -from .ellentube import ( - EllenTubeIE, - EllenTubeVideoIE, - EllenTubePlaylistIE, -) -from .elpais import ElPaisIE -from .embedly import EmbedlyIE -from .engadget import EngadgetIE -from .epidemicsound import EpidemicSoundIE -from .eporner import EpornerIE -from .eroprofile import EroProfileIE -from .escapist import EscapistIE -from .espn import ( - ESPNIE, - ESPNArticleIE, - FiveThirtyEightIE, -) -from .esri import EsriVideoIE -from .europa import EuropaIE -from .expotv import ExpoTVIE -from .expressen import ExpressenIE -from .extremetube import ExtremeTubeIE -from .eyedotv import EyedoTVIE -from .facebook import ( - FacebookIE, - FacebookPluginsVideoIE, -) -from .faz import FazIE -from .fc2 import ( - FC2IE, - FC2EmbedIE, -) -from .fczenit import FczenitIE -from .fifa import FifaIE -from .filmon import ( - FilmOnIE, - FilmOnChannelIE, -) -from .filmweb import FilmwebIE -from .firsttv import FirstTVIE -from .fivemin import FiveMinIE -from .fivetv import FiveTVIE -from .flickr import FlickrIE -from .folketinget import FolketingetIE -from .footyroom import FootyRoomIE -from .formula1 import Formula1IE -from .fourtube import ( - FourTubeIE, - PornTubeIE, - PornerBrosIE, - FuxIE, -) -from .fox import FOXIE -from .fox9 import ( - FOX9IE, - FOX9NewsIE, -) -from .foxgay import FoxgayIE -from .foxnews import ( - FoxNewsIE, - FoxNewsArticleIE, -) -from .foxsports import FoxSportsIE -from .franceculture import FranceCultureIE -from .franceinter import FranceInterIE -from .francetv import ( - FranceTVIE, - FranceTVSiteIE, - FranceTVEmbedIE, - FranceTVInfoIE, - FranceTVInfoSportIE, - FranceTVJeunesseIE, - GenerationWhatIE, - CultureboxIE, -) -from .freesound import FreesoundIE -from .freespeech import FreespeechIE -from .freshlive import FreshLiveIE -from .frontendmasters import ( - FrontendMastersIE, - FrontendMastersLessonIE, - FrontendMastersCourseIE -) -from .fujitv import FujiTVFODPlus7IE -from .funimation import FunimationIE -from .funk import FunkIE -from .fusion import FusionIE -from .gaia import GaiaIE -from .gameinformer import GameInformerIE -from .gamespot import GameSpotIE -from .gamestar import GameStarIE -from .gaskrank import GaskrankIE -from .gazeta import GazetaIE -from .gbnews import GBNewsIE -from .gdcvault import GDCVaultIE -from .gedidigital import GediDigitalIE -from .generic import GenericIE -from .gfycat import GfycatIE -from .giantbomb import GiantBombIE -from .giga import GigaIE -from .glide import GlideIE -from .globalplayer import ( - GlobalPlayerLiveIE, - GlobalPlayerLivePlaylistIE, - GlobalPlayerAudioIE, - GlobalPlayerAudioEpisodeIE, - GlobalPlayerVideoIE -) -from .globo import ( - GloboIE, - GloboArticleIE, -) -from .go import GoIE -from .godtube import GodTubeIE -from .golem import GolemIE -from .googledrive import GoogleDriveIE -from .googlepodcasts import ( - GooglePodcastsIE, - GooglePodcastsFeedIE, -) -from .googlesearch import GoogleSearchIE -from .goshgay import GoshgayIE -from .gputechconf import GPUTechConfIE -from .groupon import GrouponIE -from .hbo import HBOIE -from .hearthisat import HearThisAtIE -from .heise import HeiseIE -from .hellporno import HellPornoIE -from .helsinki import HelsinkiIE -from .hentaistigma import HentaiStigmaIE -from .hgtv import HGTVComShowIE -from .hketv import HKETVIE -from .hidive import HiDiveIE -from .historicfilms import HistoricFilmsIE -from .hitbox import HitboxIE, HitboxLiveIE -from .hitrecord import HitRecordIE -from .hornbunny import HornBunnyIE -from .hotnewhiphop import HotNewHipHopIE -from .hotstar import ( - HotStarIE, - HotStarPlaylistIE, -) -from .howcast import HowcastIE -from .howstuffworks import HowStuffWorksIE -from .hrfernsehen import HRFernsehenIE -from .hrti import ( - HRTiIE, - HRTiPlaylistIE, -) -from .huajiao import HuajiaoIE -from .huffpost import HuffPostIE -from .hungama import ( - HungamaIE, - HungamaSongIE, -) -from .hypem import HypemIE -from .ign import ( - IGNIE, - IGNVideoIE, - IGNArticleIE, -) -from .iheart import ( - IHeartRadioIE, - IHeartRadioPodcastIE, -) -from .imdb import ( - ImdbIE, - ImdbListIE -) -from .imgur import ( - ImgurIE, - ImgurAlbumIE, - ImgurGalleryIE, -) -from .ina import InaIE -from .inc import IncIE -from .indavideo import IndavideoEmbedIE -from .infoq import InfoQIE -from .instagram import ( - InstagramIE, - InstagramUserIE, - InstagramTagIE, -) -from .internazionale import InternazionaleIE -from .internetvideoarchive import InternetVideoArchiveIE -from .iprima import IPrimaIE -from .iqiyi import IqiyiIE -from .ir90tv import Ir90TvIE -from .itv import ( - ITVIE, - ITVBTCCIE, -) -from .ivi import ( - IviIE, - IviCompilationIE -) -from .ivideon import IvideonIE -from .iwara import IwaraIE -from .izlesene import IzleseneIE -from .jamendo import ( - JamendoIE, - JamendoAlbumIE, -) -from .jeuxvideo import JeuxVideoIE -from .jove import JoveIE -from .joj import JojIE -from .jwplatform import JWPlatformIE -from .kakao import KakaoIE -from .kaltura import KalturaIE -from .kankan import KankanIE -from .karaoketv import KaraoketvIE -from .karrierevideos import KarriereVideosIE -from .keezmovies import KeezMoviesIE -from .ketnet import KetnetIE -from .khanacademy import ( - KhanAcademyIE, - KhanAcademyUnitIE, -) -from .kickstarter import KickStarterIE -from .kinja import KinjaEmbedIE -from .kinopoisk import KinoPoiskIE -from .kommunetv import KommunetvIE -from .konserthusetplay import KonserthusetPlayIE -from .krasview import KrasViewIE -from .kth import KTHIE -from .ku6 import Ku6IE -from .kusi import KUSIIE -from .kuwo import ( - KuwoIE, - KuwoAlbumIE, - KuwoChartIE, - KuwoSingerIE, - KuwoCategoryIE, - KuwoMvIE, -) -from .la7 import LA7IE -from .laola1tv import ( - Laola1TvEmbedIE, - Laola1TvIE, - EHFTVIE, - ITTFIE, -) -from .lbry import ( - LBRYIE, - LBRYChannelIE, -) -from .lci import LCIIE -from .lcp import ( - LcpPlayIE, - LcpIE, -) -from .lecture2go import Lecture2GoIE -from .lecturio import ( - LecturioIE, - LecturioCourseIE, - LecturioDeCourseIE, -) -from .leeco import ( - LeIE, - LePlaylistIE, - LetvCloudIE, -) -from .lego import LEGOIE -from .lemonde import LemondeIE -from .lenta import LentaIE -from .libraryofcongress import LibraryOfCongressIE -from .libsyn import LibsynIE -from .lifenews import ( - LifeNewsIE, - LifeEmbedIE, -) -from .limelight import ( - LimelightMediaIE, - LimelightChannelIE, - LimelightChannelListIE, -) -from .line import ( - LineTVIE, - LineLiveIE, - LineLiveChannelIE, -) -from .linkedin import ( - LinkedInLearningIE, - LinkedInLearningCourseIE, -) -from .linuxacademy import LinuxAcademyIE -from .litv import LiTVIE -from .livejournal import LiveJournalIE -from .livestream import ( - LivestreamIE, - LivestreamOriginalIE, - LivestreamShortenerIE, -) -from .lnkgo import LnkGoIE -from .localnews8 import LocalNews8IE -from .lovehomeporn import LoveHomePornIE -from .lrt import LRTIE -from .lynda import ( - LyndaIE, - LyndaCourseIE -) -from .m6 import M6IE -from .mailru import ( - MailRuIE, - MailRuMusicIE, - MailRuMusicSearchIE, -) -from .malltv import MallTVIE -from .mangomolo import ( - MangomoloVideoIE, - MangomoloLiveIE, -) -from .manyvids import ManyVidsIE -from .maoritv import MaoriTVIE -from .markiza import ( - MarkizaIE, - MarkizaPageIE, -) -from .massengeschmacktv import MassengeschmackTVIE -from .matchtv import MatchTVIE -from .mdr import MDRIE -from .medaltv import MedalTVIE -from .mediaset import MediasetIE -from .mediasite import ( - MediasiteIE, - MediasiteCatalogIE, - MediasiteNamedCatalogIE, -) -from .medici import MediciIE -from .megaphone import MegaphoneIE -from .meipai import MeipaiIE -from .melonvod import MelonVODIE -from .meta import METAIE -from .metacafe import MetacafeIE -from .metacritic import MetacriticIE -from .mgoon import MgoonIE -from .mgtv import MGTVIE -from .miaopai import MiaoPaiIE -from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyIE, - MicrosoftVirtualAcademyCourseIE, -) -from .minds import ( - MindsIE, - MindsChannelIE, - MindsGroupIE, -) -from .ministrygrid import MinistryGridIE -from .minoto import MinotoIE -from .miomio import MioMioIE -from .mit import TechTVMITIE, OCWMITIE -from .mitele import MiTeleIE -from .mixcloud import ( - MixcloudIE, - MixcloudUserIE, - MixcloudPlaylistIE, -) -from .mlb import ( - MLBIE, - MLBVideoIE, -) -from .mnet import MnetIE -from .moevideo import MoeVideoIE -from .mofosex import ( - MofosexIE, - MofosexEmbedIE, -) -from .mojvideo import MojvideoIE -from .morningstar import MorningstarIE -from .motherless import ( - MotherlessIE, - MotherlessGroupIE -) -from .motorsport import MotorsportIE -from .movieclips import MovieClipsIE -from .moviezine import MoviezineIE -from .movingimage import MovingImageIE -from .msn import MSNIE -from .mtv import ( - MTVIE, - MTVVideoIE, - MTVServicesEmbeddedIE, - MTVDEIE, - MTVJapanIE, -) -from .muenchentv import MuenchenTVIE -from .mwave import MwaveIE, MwaveMeetGreetIE -from .mychannels import MyChannelsIE -from .myspace import MySpaceIE, MySpaceAlbumIE -from .myspass import MySpassIE -from .myvi import ( - MyviIE, - MyviEmbedIE, -) -from .myvideoge import MyVideoGeIE -from .myvidster import MyVidsterIE -from .nationalgeographic import ( - NationalGeographicVideoIE, - NationalGeographicTVIE, -) -from .naver import NaverIE -from .nba import ( - NBAWatchEmbedIE, - NBAWatchIE, - NBAWatchCollectionIE, - NBAEmbedIE, - NBAIE, - NBAChannelIE, -) -from .nbc import ( - NBCIE, - NBCNewsIE, - NBCOlympicsIE, - NBCOlympicsStreamIE, - NBCSportsIE, - NBCSportsStreamIE, - NBCSportsVPlayerIE, -) -from .ndr import ( - NDRIE, - NJoyIE, - NDREmbedBaseIE, - NDREmbedIE, - NJoyEmbedIE, -) -from .ndtv import NDTVIE -from .netzkino import NetzkinoIE -from .nerdcubed import NerdCubedFeedIE -from .neteasemusic import ( - NetEaseMusicIE, - NetEaseMusicAlbumIE, - NetEaseMusicSingerIE, - NetEaseMusicListIE, - NetEaseMusicMvIE, - NetEaseMusicProgramIE, - NetEaseMusicDjRadioIE, -) -from .newgrounds import ( - NewgroundsIE, - NewgroundsPlaylistIE, -) -from .newstube import NewstubeIE -from .nextmedia import ( - NextMediaIE, - NextMediaActionNewsIE, - AppleDailyIE, - NextTVIE, -) -from .nexx import ( - NexxIE, - NexxEmbedIE, -) -from .nfl import ( - NFLIE, - NFLArticleIE, -) -from .nhk import ( - NhkVodIE, - NhkVodProgramIE, -) -from .nhl import NHLIE -from .nick import ( - NickIE, - NickBrIE, - NickDeIE, - NickNightIE, - NickRuIE, -) -from .niconico import ( - NiconicoIE, - NiconicoPlaylistIE, - NiconicoUserIE, - NicovideoSearchIE, - NicovideoSearchDateIE, - NicovideoSearchURLIE, -) -from .ninecninemedia import NineCNineMediaIE -from .ninegag import NineGagIE -from .ninenow import NineNowIE -from .nintendo import NintendoIE -from .njpwworld import NJPWWorldIE -from .nobelprize import NobelPrizeIE -from .nonktube import NonkTubeIE -from .noovo import NoovoIE -from .normalboots import NormalbootsIE -from .nosvideo import NosVideoIE -from .nova import ( - NovaEmbedIE, - NovaIE, -) -from .nowness import ( - NownessIE, - NownessPlaylistIE, - NownessSeriesIE, -) -from .noz import NozIE -from .npo import ( - AndereTijdenIE, - NPOIE, - NPOLiveIE, - NPORadioIE, - NPORadioFragmentIE, - SchoolTVIE, - HetKlokhuisIE, - VPROIE, - WNLIE, -) -from .npr import NprIE -from .nrk import ( - NRKIE, - NRKPlaylistIE, - NRKSkoleIE, - NRKTVIE, - NRKTVDirekteIE, - NRKRadioPodkastIE, - NRKTVEpisodeIE, - NRKTVEpisodesIE, - NRKTVSeasonIE, - NRKTVSeriesIE, -) -from .nrl import NRLTVIE -from .ntvcojp import NTVCoJpCUIE -from .ntvde import NTVDeIE -from .ntvru import NTVRuIE -from .nytimes import ( - NYTimesIE, - NYTimesArticleIE, - NYTimesCookingIE, -) -from .nuvid import NuvidIE -from .nzz import NZZIE -from .odatv import OdaTVIE -from .odnoklassniki import OdnoklassnikiIE -from .oktoberfesttv import OktoberfestTVIE -from .ondemandkorea import OnDemandKoreaIE -from .onet import ( - OnetIE, - OnetChannelIE, - OnetMVPIE, - OnetPlIE, -) -from .onionstudios import OnionStudiosIE -from .ooyala import ( - OoyalaIE, - OoyalaExternalIE, -) -from .ora import OraTVIE -from .orf import ( - ORFONIE, - ORFONLiveIE, - ORFFM4StoryIE, - ORFIPTVIE, - ORFPodcastIE, - ORFRadioIE, - ORFRadioCollectionIE, -) -from .outsidetv import OutsideTVIE -from .packtpub import ( - PacktPubIE, - PacktPubCourseIE, -) -from .palcomp3 import ( - PalcoMP3IE, - PalcoMP3ArtistIE, - PalcoMP3VideoIE, -) -from .pandoratv import PandoraTVIE -from .parliamentliveuk import ParliamentLiveUKIE -from .patreon import PatreonIE -from .pbs import PBSIE -from .pearvideo import PearVideoIE -from .peekvids import ( - PeekVidsIE, - PlayVidsIE, -) -from .peertube import PeerTubeIE -from .people import PeopleIE -from .performgroup import PerformGroupIE -from .periscope import ( - PeriscopeIE, - PeriscopeUserIE, -) -from .philharmoniedeparis import PhilharmonieDeParisIE -from .phoenix import PhoenixIE -from .photobucket import PhotobucketIE -from .picarto import ( - PicartoIE, - PicartoVodIE, -) -from .piksel import PikselIE -from .pinkbike import PinkbikeIE -from .pinterest import ( - PinterestIE, - PinterestCollectionIE, -) -from .pladform import PladformIE -from .platzi import ( - PlatziIE, - PlatziCourseIE, -) -from .playfm import PlayFMIE -from .playplustv import PlayPlusTVIE -from .plays import PlaysTVIE -from .playstuff import PlayStuffIE -from .playtvak import PlaytvakIE -from .playvid import PlayvidIE -from .playwire import PlaywireIE -from .pluralsight import ( - PluralsightIE, - PluralsightCourseIE, -) -from .podomatic import PodomaticIE -from .pokemon import PokemonIE -from .polskieradio import ( - PolskieRadioIE, - PolskieRadioCategoryIE, -) -from .popcorntimes import PopcorntimesIE -from .popcorntv import PopcornTVIE -from .porn91 import Porn91IE -from .porncom import PornComIE -from .pornhd import PornHdIE -from .pornhub import ( - PornHubIE, - PornHubUserIE, - PornHubPagedVideoListIE, - PornHubUserVideosUploadIE, -) -from .pornotube import PornotubeIE -from .pornovoisines import PornoVoisinesIE -from .pornoxo import PornoXOIE -from .pr0gramm import ( - Pr0grammIE, - Pr0grammStaticIE, -) -from .puhutv import ( - PuhuTVIE, - PuhuTVSerieIE, -) -from .presstv import PressTVIE -from .prosiebensat1 import ProSiebenSat1IE -from .puls4 import Puls4IE -from .pyvideo import PyvideoIE -from .qqmusic import ( - QQMusicIE, - QQMusicSingerIE, - QQMusicAlbumIE, - QQMusicToplistIE, - QQMusicPlaylistIE, -) -from .r7 import ( - R7IE, - R7ArticleIE, -) -from .radiocanada import ( - RadioCanadaIE, - RadioCanadaAudioVideoIE, -) -from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE -from .radiobremen import RadioBremenIE -from .radiofrance import RadioFranceIE -from .rai import ( - RaiPlayIE, - RaiPlayLiveIE, - RaiPlayPlaylistIE, - RaiIE, -) -from .raywenderlich import ( - RayWenderlichIE, - RayWenderlichCourseIE, -) -from .rbgtum import ( - RbgTumIE, - RbgTumCourseIE, -) -from .rbmaradio import RBMARadioIE -from .rds import RDSIE -from .redbulltv import ( - RedBullTVIE, - RedBullEmbedIE, - RedBullTVRrnContentIE, - RedBullIE, -) -from .reddit import ( - RedditIE, - RedditRIE, -) -from .redtube import RedTubeIE -from .regiotv import RegioTVIE -from .rentv import ( - RENTVIE, - RENTVArticleIE, -) -from .restudy import RestudyIE -from .reuters import ReutersIE -from .reverbnation import ReverbNationIE -from .rice import RICEIE -from .rmcdecouverte import RMCDecouverteIE -from .ro220 import Ro220IE -from .rockstargames import RockstarGamesIE -from .roosterteeth import RoosterTeethIE -from .rottentomatoes import RottenTomatoesIE -from .roxwel import RoxwelIE -from .rozhlas import RozhlasIE -from .rtbf import RTBFIE -from .rte import RteIE, RteRadioIE -from .rtlnl import RtlNlIE -from .rtl2 import ( - RTL2IE, - RTL2YouIE, - RTL2YouSeriesIE, -) -from .rtp import RTPIE -from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE -from .rtvnh import RTVNHIE -from .rtvs import RTVSIE -from .ruhd import RUHDIE -from .rumble import RumbleEmbedIE -from .rutube import ( - RutubeIE, - RutubeChannelIE, - RutubeEmbedIE, - RutubeMovieIE, - RutubePersonIE, - RutubePlaylistIE, -) -from .rutv import RUTVIE -from .ruutu import RuutuIE -from .ruv import RuvIE -from .s4c import ( - S4CIE, - S4CSeriesIE, -) -from .safari import ( - SafariIE, - SafariApiIE, - SafariCourseIE, -) -from .samplefocus import SampleFocusIE -from .sapo import SapoIE -from .savefrom import SaveFromIE -from .sbs import SBSIE -from .screencast import ScreencastIE -from .screencastomatic import ScreencastOMaticIE -from .scrippsnetworks import ( - ScrippsNetworksWatchIE, - ScrippsNetworksIE, -) -from .scte import ( - SCTEIE, - SCTECourseIE, -) -from .seeker import SeekerIE -from .senateisvp import SenateISVPIE -from .sendtonews import SendtoNewsIE -from .servus import ServusIE -from .sevenplus import SevenPlusIE -from .sexu import SexuIE -from .seznamzpravy import ( - SeznamZpravyIE, - SeznamZpravyArticleIE, -) -from .shahid import ( - ShahidIE, - ShahidShowIE, -) -from .shared import ( - SharedIE, - VivoIE, -) -from .showroomlive import ShowRoomLiveIE -from .simplecast import ( - SimplecastIE, - SimplecastEpisodeIE, - SimplecastPodcastIE, -) -from .sina import SinaIE -from .sixplay import SixPlayIE -from .skyit import ( - SkyItPlayerIE, - SkyItVideoIE, - SkyItVideoLiveIE, - SkyItIE, - SkyItAcademyIE, - SkyItArteIE, - CieloTVItIE, - TV8ItIE, -) -from .skylinewebcams import SkylineWebcamsIE -from .skynewsarabia import ( - SkyNewsArabiaIE, - SkyNewsArabiaArticleIE, -) -from .sky import ( - SkyNewsIE, - SkySportsIE, - SkySportsNewsIE, -) -from .slideshare import SlideshareIE -from .slideslive import SlidesLiveIE -from .slutload import SlutloadIE -from .snotr import SnotrIE -from .sohu import SohuIE -from .sonyliv import SonyLIVIE -from .soundcloud import ( - SoundcloudEmbedIE, - SoundcloudIE, - SoundcloudSetIE, - SoundcloudUserIE, - SoundcloudTrackStationIE, - SoundcloudPlaylistIE, - SoundcloudSearchIE, -) -from .soundgasm import ( - SoundgasmIE, - SoundgasmProfileIE -) -from .southpark import ( - SouthParkIE, - SouthParkDeIE, - SouthParkDkIE, - SouthParkEsIE, - SouthParkNlIE -) -from .spankbang import ( - SpankBangIE, - SpankBangPlaylistIE, -) -from .spankwire import SpankwireIE -from .spiegel import SpiegelIE -from .spike import ( - BellatorIE, - ParamountNetworkIE, -) -from .stitcher import ( - StitcherIE, - StitcherShowIE, -) -from .sport5 import Sport5IE -from .sportbox import SportBoxIE -from .sportdeutschland import SportDeutschlandIE -from .spotify import ( - SpotifyIE, - SpotifyShowIE, -) -from .spreaker import ( - SpreakerIE, - SpreakerPageIE, - SpreakerShowIE, - SpreakerShowPageIE, -) -from .springboardplatform import SpringboardPlatformIE -from .sprout import SproutIE -from .srgssr import ( - SRGSSRIE, - SRGSSRPlayIE, -) -from .srmediathek import SRMediathekIE -from .stanfordoc import StanfordOpenClassroomIE -from .steam import SteamIE -from .storyfire import ( - StoryFireIE, - StoryFireUserIE, - StoryFireSeriesIE, -) -from .streamable import StreamableIE -from .streamcloud import StreamcloudIE -from .streamcz import StreamCZIE -from .streamsb import StreamsbIE -from .streetvoice import StreetVoiceIE -from .stretchinternet import StretchInternetIE -from .stv import STVPlayerIE -from .sunporno import SunPornoIE -from .sverigesradio import ( - SverigesRadioEpisodeIE, - SverigesRadioPublicationIE, -) -from .svt import ( - SVTIE, - SVTPageIE, - SVTPlayIE, - SVTSeriesIE, -) -from .swrmediathek import SWRMediathekIE -from .syfy import SyfyIE -from .sztvhu import SztvHuIE -from .tagesschau import ( - TagesschauPlayerIE, - TagesschauIE, -) -from .tass import TassIE -from .tbs import TBSIE -from .tdslifeway import TDSLifewayIE -from .teachable import ( - TeachableIE, - TeachableCourseIE, -) -from .teachertube import ( - TeacherTubeIE, - TeacherTubeUserIE, -) -from .teachingchannel import TeachingChannelIE -from .teamcoco import TeamcocoIE -from .teamtreehouse import TeamTreeHouseIE -from .techtalks import TechTalksIE -from .ted import TEDIE -from .tele5 import Tele5IE -from .tele13 import Tele13IE -from .telebruxelles import TeleBruxellesIE -from .telecinco import TelecincoIE -from .telegraaf import TelegraafIE -from .telemb import TeleMBIE -from .telequebec import ( - TeleQuebecIE, - TeleQuebecSquatIE, - TeleQuebecEmissionIE, - TeleQuebecLiveIE, - TeleQuebecVideoIE, -) -from .teletask import TeleTaskIE -from .telewebion import TelewebionIE -from .tennistv import TennisTVIE -from .tenplay import TenPlayIE -from .testurl import TestURLIE -from .tf1 import TF1IE -from .tfo import TFOIE -from .theintercept import TheInterceptIE -from .theplatform import ( - ThePlatformIE, - ThePlatformFeedIE, -) -from .thescene import TheSceneIE -from .thestar import TheStarIE -from .thesun import TheSunIE -from .theweatherchannel import TheWeatherChannelIE -from .thisamericanlife import ThisAmericanLifeIE -from .thisav import ThisAVIE -from .thisoldhouse import ThisOldHouseIE -from .thisvid import ( - ThisVidIE, - ThisVidMemberIE, - ThisVidPlaylistIE, -) -from .threeqsdn import ThreeQSDNIE -from .tiktok import ( - TikTokIE, - TikTokUserIE, -) -from .tinypic import TinyPicIE -from .tmz import ( - TMZIE, - TMZArticleIE, -) -from .tnaflix import ( - TNAFlixNetworkEmbedIE, - TNAFlixIE, - EMPFlixIE, - MovieFapIE, -) -from .toggle import ( - ToggleIE, - MeWatchIE, -) -from .tonline import TOnlineIE -from .toongoggles import ToonGogglesIE -from .toutv import TouTvIE -from .toypics import ToypicsUserIE, ToypicsIE -from .traileraddict import TrailerAddictIE -from .trilulilu import TriluliluIE -from .trovo import ( - TrovoIE, - TrovoVodIE, -) -from .trunews import TruNewsIE -from .trutv import TruTVIE -from .tube8 import Tube8IE -from .tubitv import TubiTvIE -from .tumblr import TumblrIE -from .tunein import ( - TuneInClipIE, - TuneInStationIE, - TuneInProgramIE, - TuneInTopicIE, - TuneInShortenerIE, -) -from .tunepk import TunePkIE -from .turbo import TurboIE -from .tv2 import ( - TV2IE, - TV2ArticleIE, - KatsomoIE, - MTVUutisetArticleIE, -) -from .tv2dk import ( - TV2DKIE, - TV2DKBornholmPlayIE, -) -from .tv2hu import TV2HuIE -from .tv4 import TV4IE -from .tv5mondeplus import TV5MondePlusIE -from .tv5unis import ( - TV5UnisVideoIE, - TV5UnisIE, -) -from .tva import ( - TVAIE, - QubIE, -) -from .tvanouvelles import ( - TVANouvellesIE, - TVANouvellesArticleIE, -) -from .tvc import ( - TVCIE, - TVCArticleIE, -) -from .tver import TVerIE -from .tvigle import TvigleIE -from .tvland import TVLandIE -from .tvn24 import TVN24IE -from .tvnet import TVNetIE -from .tvnoe import TVNoeIE -from .tvnow import ( - TVNowIE, - TVNowNewIE, - TVNowSeasonIE, - TVNowAnnualIE, - TVNowShowIE, -) -from .tvp import ( - TVPEmbedIE, - TVPIE, - TVPWebsiteIE, -) -from .tvplay import ( - TVPlayIE, - ViafreeIE, - TVPlayHomeIE, -) -from .tvplayer import TVPlayerIE -from .tweakers import TweakersIE -from .twentyfourvideo import TwentyFourVideoIE -from .twentymin import TwentyMinutenIE -from .twentythreevideo import TwentyThreeVideoIE -from .twitcasting import TwitCastingIE -from .twitch import ( - TwitchVodIE, - TwitchCollectionIE, - TwitchVideosIE, - TwitchVideosClipsIE, - TwitchVideosCollectionsIE, - TwitchStreamIE, - TwitchClipsIE, -) -from .twitter import ( - TwitterCardIE, - TwitterIE, - TwitterAmplifyIE, - TwitterBroadcastIE, -) -from .udemy import ( - UdemyIE, - UdemyCourseIE -) -from .udn import UDNEmbedIE -from .ufctv import ( - UFCTVIE, - UFCArabiaIE, -) -from .uktvplay import UKTVPlayIE -from .digiteka import DigitekaIE -from .dlive import ( - DLiveVODIE, - DLiveStreamIE, -) -from .umg import UMGDeIE -from .unistra import UnistraIE -from .unity import UnityIE -from .uol import UOLIE -from .uplynk import ( - UplynkIE, - UplynkPreplayIE, -) -from .urort import UrortIE -from .urplay import URPlayIE -from .usanetwork import USANetworkIE -from .usatoday import USATodayIE -from .ustream import UstreamIE, UstreamChannelIE -from .ustudio import ( - UstudioIE, - UstudioEmbedIE, -) -from .varzesh3 import Varzesh3IE -from .vbox7 import Vbox7IE -from .veehd import VeeHDIE -from .veoh import VeohIE -from .vesti import VestiIE -from .vevo import ( - VevoIE, - VevoPlaylistIE, -) -from .vgtv import ( - BTArticleIE, - BTVestlendingenIE, - VGTVIE, -) -from .vh1 import VH1IE -from .vice import ( - ViceIE, - ViceArticleIE, - ViceShowIE, -) -from .vidbit import VidbitIE -from .viddler import ViddlerIE -from .videa import VideaIE -from .videodetective import VideoDetectiveIE -from .videofyme import VideofyMeIE -from .videomore import ( - VideomoreIE, - VideomoreVideoIE, - VideomoreSeasonIE, -) -from .videopress import VideoPressIE -from .vidio import VidioIE -from .vidlii import VidLiiIE -from .vidme import ( - VidmeIE, - VidmeUserIE, - VidmeUserLikesIE, -) -from .vier import VierIE, VierVideosIE -from .viewlift import ( - ViewLiftIE, - ViewLiftEmbedIE, -) -from .viidea import ViideaIE -from .vimeo import ( - VimeoIE, - VimeoAlbumIE, - VimeoChannelIE, - VimeoGroupsIE, - VimeoLikesIE, - VimeoOndemandIE, - VimeoReviewIE, - VimeoUserIE, - VimeoWatchLaterIE, - VHXEmbedIE, -) -from .vimple import VimpleIE -from .vine import ( - VineIE, - VineUserIE, -) -from .viki import ( - VikiIE, - VikiChannelIE, -) -from .viqeo import ViqeoIE -from .viu import ( - ViuIE, - ViuPlaylistIE, - ViuOTTIE, -) -from .vk import ( - VKIE, - VKUserVideosIE, - VKWallPostIE, -) -from .vlive import ( - VLiveIE, - VLivePostIE, - VLiveChannelIE, -) -from .vodlocker import VodlockerIE -from .vodpl import VODPlIE -from .vodplatform import VODPlatformIE -from .voicerepublic import VoiceRepublicIE -from .voot import VootIE -from .voxmedia import ( - VoxMediaVolumeIE, - VoxMediaIE, -) -from .vrt import VRTIE -from .vrak import VrakIE -from .vrv import ( - VRVIE, - VRVSeriesIE, -) -from .vshare import VShareIE -from .vtm import VTMIE -from .medialaan import MedialaanIE -from .vube import VubeIE -from .vuclip import VuClipIE -from .vvvvid import ( - VVVVIDIE, - VVVVIDShowIE, -) -from .vyborymos import VyboryMosIE -from .vzaar import VzaarIE -from .wakanim import WakanimIE -from .walla import WallaIE -from .washingtonpost import ( - WashingtonPostIE, - WashingtonPostArticleIE, -) -from .wat import WatIE -from .watchbox import WatchBoxIE -from .watchindianporn import WatchIndianPornIE -from .wdr import ( - WDRIE, - WDRPageIE, - WDRElefantIE, - WDRMobileIE, -) -from .webcaster import ( - WebcasterIE, - WebcasterFeedIE, -) -from .webofstories import ( - WebOfStoriesIE, - WebOfStoriesPlaylistIE, -) -from .weibo import ( - WeiboIE, - WeiboMobileIE -) -from .weiqitv import WeiqiTVIE -from .whyp import WhypIE -from .wistia import ( - WistiaIE, - WistiaPlaylistIE, -) -from .worldstarhiphop import WorldStarHipHopIE -from .wsj import ( - WSJIE, - WSJArticleIE, -) -from .wwe import WWEIE -from .xbef import XBefIE -from .xboxclips import XboxClipsIE -from .xfileshare import XFileShareIE -from .xhamster import ( - XHamsterIE, - XHamsterEmbedIE, - XHamsterUserIE, -) -from .xiami import ( - XiamiSongIE, - XiamiAlbumIE, - XiamiArtistIE, - XiamiCollectionIE -) -from .ximalaya import ( - XimalayaIE, - XimalayaAlbumIE -) -from .xminus import XMinusIE -from .xnxx import XNXXIE -from .xstream import XstreamIE -from .xtube import XTubeUserIE, XTubeIE -from .xuite import XuiteIE -from .xvideos import XVideosIE -from .xxxymovies import XXXYMoviesIE -from .yahoo import ( - YahooIE, - YahooSearchIE, - YahooGyaOPlayerIE, - YahooGyaOIE, - YahooJapanNewsIE, -) -from .yandexdisk import YandexDiskIE -from .yandexmusic import ( - YandexMusicTrackIE, - YandexMusicAlbumIE, - YandexMusicPlaylistIE, - YandexMusicArtistTracksIE, - YandexMusicArtistAlbumsIE, -) -from .yandexvideo import YandexVideoIE -from .yapfiles import YapFilesIE -from .yesjapan import YesJapanIE -from .yinyuetai import YinYueTaiIE -from .ynet import YnetIE -from .youjizz import YouJizzIE -from .youku import ( - YoukuIE, - YoukuShowIE, -) -from .younow import ( - YouNowLiveIE, - YouNowChannelIE, - YouNowMomentIE, -) -from .youporn import ( - YouPornIE, - YouPornCategoryIE, - YouPornChannelIE, - YouPornCollectionIE, - YouPornStarIE, - YouPornTagIE, - YouPornVideosIE, -) -from .yourporn import YourPornIE -from .yourupload import YourUploadIE from .youtube import ( YoutubeIE, YoutubeFavouritesIE, @@ -1673,31 +18,3 @@ from .youtube import ( YoutubeYtUserIE, YoutubeWatchLaterIE, ) -from .zapiks import ZapiksIE -from .zattoo import ( - BBVTVIE, - EinsUndEinsTVIE, - EWETVIE, - GlattvisionTVIE, - MNetTVIE, - MyVisionTVIE, - NetPlusIE, - OsnatelTVIE, - QuantumTVIE, - QuicklineIE, - QuicklineLiveIE, - SaltTVIE, - SAKTVIE, - VTXTVIE, - WalyTVIE, - ZattooIE, - ZattooLiveIE, -) -from .zdf import ZDFIE, ZDFChannelIE -from .zhihu import ZhihuIE -from .zingmp3 import ( - ZingMp3IE, - ZingMp3AlbumIE, -) -from .zoom import ZoomIE -from .zype import ZypeIE diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py deleted file mode 100644 index acd4090fa..000000000 --- a/youtube_dl/extractor/extremetube.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import unicode_literals - -from ..utils import str_to_int -from .keezmovies import KeezMoviesIE - - -class ExtremeTubeIE(KeezMoviesIE): - _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)' - _TESTS = [{ - 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'md5': '92feaafa4b58e82f261e5419f39c60cb', - 'info_dict': { - 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'ext': 'mp4', - 'title': 'Music Video 14 british euro brit european cumshots swallow', - 'uploader': 'anonim', - 'view_count': int, - 'age_limit': 18, - } - }, { - 'url': 'http://www.extremetube.com/gay/video/abcde-1234', - 'only_matching': True, - }, { - 'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick', - 'only_matching': True, - }, { - 'url': 'http://www.extremetube.com/video/652431', - 'only_matching': True, - }] - - def _real_extract(self, url): - webpage, info = self._extract_info(url) - - if not info['title']: - info['title'] = self._search_regex( - r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title') - - uploader = self._html_search_regex( - r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>', - webpage, 'uploader', fatal=False) - view_count = str_to_int(self._search_regex( - r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</', - webpage, 'view count', fatal=False)) - - info.update({ - 'uploader': uploader, - 'view_count': view_count, - }) - - return info diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py deleted file mode 100644 index f62ddebae..000000000 --- a/youtube_dl/extractor/eyedotv.py +++ /dev/null @@ -1,64 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - xpath_text, - parse_duration, - ExtractorError, -) - - -class EyedoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)' - _TEST = { - 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301', - 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7', - 'info_dict': { - 'id': '16301', - 'ext': 'mp4', - 'title': 'Journée du conseil scientifique de l\'Afnic 2015', - 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98', - 'uploader': 'Afnic Live', - 'uploader_id': '8023', - } - } - _ROOT_URL = 'http://live.eyedo.net:1935/' - - def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id) - - def _add_ns(path): - return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api') - - title = xpath_text(video_data, _add_ns('Titre'), 'title', True) - state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True) - if state_live_code == 'avenir': - raise ExtractorError( - '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME, - expected=True) - - is_live = state_live_code == 'live' - m3u8_url = None - # http://eyedo.tv/Content/Html5/Scripts/html5view.js - if is_live: - if xpath_text(video_data, 'Cdn') == 'true': - m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id - else: - m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id - else: - m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id) - - return { - 'id': video_id, - 'title': title, - 'formats': self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native'), - 'description': xpath_text(video_data, _add_ns('Description')), - 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), - 'uploader': xpath_text(video_data, _add_ns('Createur')), - 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')), - 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')), - 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')), - } diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py deleted file mode 100644 index 04650af39..000000000 --- a/youtube_dl/extractor/facebook.py +++ /dev/null @@ -1,709 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re -import socket - -from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_http_client, - compat_str, - compat_urllib_error, - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, -) -from ..utils import ( - clean_html, - error_to_compat_str, - ExtractorError, - float_or_none, - get_element_by_id, - int_or_none, - js_to_json, - limit_length, - parse_count, - qualities, - sanitized_Request, - try_get, - urlencode_postdata, - urljoin, -) - - -class FacebookIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - https?:// - (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/ - (?:[^#]*?\#!/)? - (?: - (?: - video/video\.php| - photo\.php| - video\.php| - video/embed| - story\.php| - watch(?:/live)?/? - )\?(?:.*?)(?:v|video_id|story_fbid)=| - [^/]+/videos/(?:[^/]+/)?| - [^/]+/posts/| - groups/[^/]+/permalink/| - watchparty/ - )| - facebook: - ) - (?P<id>[0-9]+) - ''' - _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' - _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' - _NETRC_MACHINE = 'facebook' - IE_NAME = 'facebook' - - _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' - _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' - - _TESTS = [{ - 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', - 'md5': '6a40d33c0eccbb1af76cf0485a052659', - 'info_dict': { - 'id': '637842556329505', - 'ext': 'mp4', - 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', - 'uploader': 'Tennis on Facebook', - 'upload_date': '20140908', - 'timestamp': 1410199200, - }, - 'skip': 'Requires logging in', - }, { - # data.video - 'url': 'https://www.facebook.com/video.php?v=274175099429670', - 'info_dict': { - 'id': '274175099429670', - 'ext': 'mp4', - 'title': 're:^Asif Nawab Butt posted a video', - 'uploader': 'Asif Nawab Butt', - 'upload_date': '20140506', - 'timestamp': 1399398998, - 'thumbnail': r're:^https?://.*', - }, - 'expected_warnings': [ - 'title' - ] - }, { - 'note': 'Video with DASH manifest', - 'url': 'https://www.facebook.com/video.php?v=957955867617029', - 'md5': 'b2c28d528273b323abe5c6ab59f0f030', - 'info_dict': { - 'id': '957955867617029', - 'ext': 'mp4', - 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', - 'uploader': 'Demy de Zeeuw', - 'upload_date': '20160110', - 'timestamp': 1452431627, - }, - 'skip': 'Requires logging in', - }, { - 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', - 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6', - 'info_dict': { - 'id': '544765982287235', - 'ext': 'mp4', - 'title': '"What are you doing running in the snow?"', - 'uploader': 'FailArmy', - }, - 'skip': 'Video gone', - }, { - 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903', - 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3', - 'info_dict': { - 'id': '1035862816472149', - 'ext': 'mp4', - 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog', - 'uploader': 'S. Saint', - }, - 'skip': 'Video gone', - }, { - 'note': 'swf params escaped', - 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749', - 'md5': '97ba073838964d12c70566e0085c2b91', - 'info_dict': { - 'id': '10153664894881749', - 'ext': 'mp4', - 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', - 'thumbnail': r're:^https?://.*', - 'timestamp': 1456259628, - 'upload_date': '20160223', - 'uploader': 'Barack Obama', - }, - }, { - # have 1080P, but only up to 720p in swf params - # data.video.story.attachments[].media - 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', - 'md5': '9571fae53d4165bbbadb17a94651dcdc', - 'info_dict': { - 'id': '10155529876156509', - 'ext': 'mp4', - 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...', - 'timestamp': 1477818095, - 'upload_date': '20161030', - 'uploader': 'CNN', - 'thumbnail': r're:^https?://.*', - 'view_count': int, - }, - }, { - # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall - # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media - 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', - 'info_dict': { - 'id': '1417995061575415', - 'ext': 'mp4', - 'title': 'md5:1db063d6a8c13faa8da727817339c857', - 'timestamp': 1486648217, - 'upload_date': '20170209', - 'uploader': 'Yaroslav Korpan', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471', - 'info_dict': { - 'id': '1072691702860471', - 'ext': 'mp4', - 'title': 'md5:ae2d22a93fbb12dad20dc393a869739d', - 'timestamp': 1477305000, - 'upload_date': '20161024', - 'uploader': 'La Guía Del Varón', - 'thumbnail': r're:^https?://.*', - }, - 'params': { - 'skip_download': True, - }, - }, { - # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media - 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', - 'info_dict': { - 'id': '1396382447100162', - 'ext': 'mp4', - 'title': 'md5:19a428bbde91364e3de815383b54a235', - 'timestamp': 1486035494, - 'upload_date': '20170202', - 'uploader': 'Elisabeth Ahtn', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.facebook.com/video.php?v=10204634152394104', - 'only_matching': True, - }, { - 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', - 'only_matching': True, - }, { - # data.mediaset.currMedia.edges - 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', - 'only_matching': True, - }, { - # data.video.story.attachments[].media - 'url': 'facebook:544765982287235', - 'only_matching': True, - }, { - # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media - 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', - 'only_matching': True, - }, { - # data.video.creation_story.attachments[].media - 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', - 'only_matching': True, - }, { - # data.video - 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', - 'only_matching': True, - }, { - # no title - 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', - 'only_matching': True, - }, { - # data.video - 'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/', - 'info_dict': { - 'id': '359649331226507', - 'ext': 'mp4', - 'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses', - 'uploader': 'ESL One Dota 2', - }, - 'params': { - 'skip_download': True, - }, - }, { - # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media - 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', - 'info_dict': { - 'id': '106560053808006', - }, - 'playlist_count': 2, - }, { - # data.video.story.attachments[].media - 'url': 'https://www.facebook.com/watch/?v=647537299265662', - 'only_matching': True, - }, { - # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media - 'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271', - 'info_dict': { - 'id': '10157667649866271', - }, - 'playlist_count': 3, - }, { - # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media - 'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330', - 'info_dict': { - 'id': '117576630041613', - 'ext': 'mp4', - # TODO: title can be extracted from video page - 'title': 'Facebook video #117576630041613', - 'uploader_id': '189393014416438', - 'upload_date': '20201123', - 'timestamp': 1606162592, - }, - 'skip': 'Requires logging in', - }, { - # node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media - 'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/', - 'info_dict': { - 'id': '211567722618337', - 'ext': 'mp4', - 'title': 'Facebook video #211567722618337', - 'uploader_id': '127875227654254', - 'upload_date': '20161122', - 'timestamp': 1479793574, - }, - }, { - # data.video.creation_story.attachments[].media - 'url': 'https://www.facebook.com/watch/live/?v=1823658634322275', - 'only_matching': True, - }, { - 'url': 'https://www.facebook.com/watchparty/211641140192478', - 'info_dict': { - 'id': '211641140192478', - }, - 'playlist_count': 1, - 'skip': 'Requires logging in', - }] - _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' - _api_config = { - 'graphURI': '/api/graphql/' - } - - @staticmethod - def _extract_urls(webpage): - urls = [] - for mobj in re.finditer( - r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1', - webpage): - urls.append(mobj.group('url')) - # Facebook API embed - # see https://developers.facebook.com/docs/plugins/embedded-video-player - for mobj in re.finditer(r'''(?x)<div[^>]+ - class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ - data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage): - urls.append(mobj.group('url')) - return urls - - def _login(self): - useremail, password = self._get_login_info() - if useremail is None: - return - - login_page_req = sanitized_Request(self._LOGIN_URL) - self._set_cookie('facebook.com', 'locale', 'en_US') - login_page = self._download_webpage(login_page_req, None, - note='Downloading login page', - errnote='Unable to download login page') - lsd = self._search_regex( - r'<input type="hidden" name="lsd" value="([^"]*)"', - login_page, 'lsd') - lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') - - login_form = { - 'email': useremail, - 'pass': password, - 'lsd': lsd, - 'lgnrnd': lgnrnd, - 'next': 'http://facebook.com/home.php', - 'default_persistent': '0', - 'legacy_return': '1', - 'timezone': '-60', - 'trynum': '1', - } - request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - try: - login_results = self._download_webpage(request, None, - note='Logging in', errnote='unable to fetch login page') - if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: - error = self._html_search_regex( - r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>', - login_results, 'login error', default=None, group='error') - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.') - return - - fb_dtsg = self._search_regex( - r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None) - h = self._search_regex( - r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None) - - if not fb_dtsg or not h: - return - - check_form = { - 'fb_dtsg': fb_dtsg, - 'h': h, - 'name_action_selected': 'dont_save', - } - check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) - check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - check_response = self._download_webpage(check_req, None, - note='Confirming login') - if re.search(r'id="checkpointSubmitButton"', check_response) is not None: - self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err)) - return - - def _real_initialize(self): - self._login() - - def _extract_from_url(self, url, video_id): - webpage = self._download_webpage( - url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) - - video_data = None - - def extract_video_data(instances): - video_data = [] - for item in instances: - if try_get(item, lambda x: x[1][0]) == 'VideoConfig': - video_item = item[2][0] - if video_item.get('video_id'): - video_data.append(video_item['videoData']) - return video_data - - server_js_data = self._parse_json(self._search_regex( - [r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'], - webpage, 'server js data', default='{}'), video_id, fatal=False) - - if server_js_data: - video_data = extract_video_data(server_js_data.get('instances', [])) - - def extract_from_jsmods_instances(js_data): - if js_data: - return extract_video_data(try_get( - js_data, lambda x: x['jsmods']['instances'], list) or []) - - def extract_dash_manifest(video, formats): - dash_manifest = video.get('dash_manifest') - if dash_manifest: - formats.extend(self._parse_mpd_formats( - compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) - - def process_formats(formats): - # Downloads with browser's User-Agent are rate limited. Working around - # with non-browser User-Agent. - for f in formats: - f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1' - - self._sort_formats(formats) - - def extract_relay_data(_filter): - return self._parse_json(self._search_regex( - r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter, - webpage, 'replay data', default='{}'), video_id, fatal=False) or {} - - def extract_relay_prefetched_data(_filter): - replay_data = extract_relay_data(_filter) - for require in (replay_data.get('require') or []): - if require[0] == 'RelayPrefetchedStreamCache': - return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} - - if not video_data: - server_js_data = self._parse_json(self._search_regex([ - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX, - r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX - ], webpage, 'js data', default='{}'), video_id, js_to_json, False) - video_data = extract_from_jsmods_instances(server_js_data) - - if not video_data: - data = extract_relay_prefetched_data( - r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"') - if data: - entries = [] - - def parse_graphql_video(video): - formats = [] - q = qualities(['sd', 'hd']) - for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: - playable_url = video.get('playable_url' + suffix) - if not playable_url: - continue - formats.append({ - 'format_id': format_id, - 'quality': q(format_id), - 'url': playable_url, - }) - extract_dash_manifest(video, formats) - process_formats(formats) - v_id = video.get('videoId') or video.get('id') or video_id - info = { - 'id': v_id, - 'formats': formats, - 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), - 'uploader_id': try_get(video, lambda x: x['owner']['id']), - 'timestamp': int_or_none(video.get('publish_time')), - 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), - } - description = try_get(video, lambda x: x['savable_description']['text']) - title = video.get('name') - if title: - info.update({ - 'title': title, - 'description': description, - }) - else: - info['title'] = description or 'Facebook video #%s' % v_id - entries.append(info) - - def parse_attachment(attachment, key='media'): - media = attachment.get(key) or {} - if media.get('__typename') == 'Video': - return parse_graphql_video(media) - - nodes = data.get('nodes') or [] - node = data.get('node') or {} - if not nodes and node: - nodes.append(node) - for node in nodes: - story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {} - attachments = try_get(story, [ - lambda x: x['attached_story']['attachments'], - lambda x: x['attachments'] - ], list) or [] - for attachment in attachments: - attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict) - ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or [] - for n in ns: - parse_attachment(n) - parse_attachment(attachment) - - edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] - for edge in edges: - parse_attachment(edge, key='node') - - video = data.get('video') or {} - if video: - attachments = try_get(video, [ - lambda x: x['story']['attachments'], - lambda x: x['creation_story']['attachments'] - ], list) or [] - for attachment in attachments: - parse_attachment(attachment) - if not entries: - parse_graphql_video(video) - - return self.playlist_result(entries, video_id) - - if not video_data: - m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) - if m_msg is not None: - raise ExtractorError( - 'The video is not available, Facebook said: "%s"' % m_msg.group(1), - expected=True) - elif any(p in webpage for p in ( - '>You must log in to continue', - 'id="login_form"', - 'id="loginbutton"')): - self.raise_login_required() - - if not video_data and '/watchparty/' in url: - post_data = { - 'doc_id': 3731964053542869, - 'variables': json.dumps({ - 'livingRoomID': video_id, - }), - } - - prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{') - if prefetched_data: - lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict) - if lsd: - post_data[lsd['name']] = lsd['value'] - - relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,') - for define in (relay_data.get('define') or []): - if define[0] == 'RelayAPIConfigDefaults': - self._api_config = define[2] - - living_room = self._download_json( - urljoin(url, self._api_config['graphURI']), video_id, - data=urlencode_postdata(post_data))['data']['living_room'] - - entries = [] - for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []): - video = try_get(edge, lambda x: x['node']['video']) or {} - v_id = video.get('id') - if not v_id: - continue - v_id = compat_str(v_id) - entries.append(self.url_result( - self._VIDEO_PAGE_TEMPLATE % v_id, - self.ie_key(), v_id, video.get('name'))) - - return self.playlist_result(entries, video_id) - - if not video_data: - # Video info not in first request, do a secondary request using - # tahoe player specific URL - tahoe_data = self._download_webpage( - self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id, - data=urlencode_postdata({ - '__a': 1, - '__pc': self._search_regex( - r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage, - 'pkg cohort', default='PHASED:DEFAULT'), - '__rev': self._search_regex( - r'client_revision["\']\s*:\s*(\d+),', webpage, - 'client revision', default='3944515'), - 'fb_dtsg': self._search_regex( - r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', - webpage, 'dtsg token', default=''), - }), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - tahoe_js_data = self._parse_json( - self._search_regex( - r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data, - 'tahoe js data', default='{}'), - video_id, fatal=False) - video_data = extract_from_jsmods_instances(tahoe_js_data) - - if not video_data: - raise ExtractorError('Cannot parse data') - - if len(video_data) > 1: - entries = [] - for v in video_data: - video_url = v[0].get('video_url') - if not video_url: - continue - entries.append(self.url_result(urljoin( - url, video_url), self.ie_key(), v[0].get('video_id'))) - return self.playlist_result(entries, video_id) - video_data = video_data[0] - - formats = [] - subtitles = {} - for f in video_data: - format_id = f['stream_type'] - if f and isinstance(f, dict): - f = [f] - if not f or not isinstance(f, list): - continue - for quality in ('sd', 'hd'): - for src_type in ('src', 'src_no_ratelimit'): - src = f[0].get('%s_%s' % (quality, src_type)) - if src: - preference = -10 if format_id == 'progressive' else 0 - if quality == 'hd': - preference += 5 - formats.append({ - 'format_id': '%s_%s_%s' % (format_id, quality, src_type), - 'url': src, - 'preference': preference, - }) - extract_dash_manifest(f[0], formats) - subtitles_src = f[0].get('subtitles_src') - if subtitles_src: - subtitles.setdefault('en', []).append({'url': subtitles_src}) - if not formats: - raise ExtractorError('Cannot find video formats') - - process_formats(formats) - - video_title = self._html_search_regex( - r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, - 'title', default=None) - if not video_title: - video_title = self._html_search_regex( - r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>', - webpage, 'alternative title', default=None) - if not video_title: - video_title = self._html_search_meta( - 'description', webpage, 'title', default=None) - if video_title: - video_title = limit_length(video_title, 80) - else: - video_title = 'Facebook video #%s' % video_id - uploader = clean_html(get_element_by_id( - 'fbPhotoPageAuthorName', webpage)) or self._search_regex( - r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - default=None) or self._og_search_title(webpage, fatal=False) - timestamp = int_or_none(self._search_regex( - r'<abbr[^>]+data-utime=["\'](\d+)', webpage, - 'timestamp', default=None)) - thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) - - view_count = parse_count(self._search_regex( - r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None)) - - info_dict = { - 'id': video_id, - 'title': video_title, - 'formats': formats, - 'uploader': uploader, - 'timestamp': timestamp, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'subtitles': subtitles, - } - - return info_dict - - def _real_extract(self, url): - video_id = self._match_id(url) - - real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url - return self._extract_from_url(real_url, video_id) - - -class FacebookPluginsVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)' - - _TESTS = [{ - 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560', - 'md5': '5954e92cdfe51fe5782ae9bda7058a07', - 'info_dict': { - 'id': '10154383743583686', - 'ext': 'mp4', - 'title': 'What to do during the haze?', - 'uploader': 'Gov.sg', - 'upload_date': '20160826', - 'timestamp': 1472184808, - }, - 'add_ie': [FacebookIE.ie_key()], - }, { - 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104', - 'only_matching': True, - }, { - 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560', - 'only_matching': True, - }] - - def _real_extract(self, url): - return self.url_result( - compat_urllib_parse_unquote(self._match_id(url)), - FacebookIE.ie_key()) diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py deleted file mode 100644 index 312ee2aee..000000000 --- a/youtube_dl/extractor/faz.py +++ /dev/null @@ -1,93 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_etree_fromstring -from ..utils import ( - xpath_element, - xpath_text, - int_or_none, -) - - -class FazIE(InfoExtractor): - IE_NAME = 'faz.net' - _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html' - - _TESTS = [{ - 'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', - 'info_dict': { - 'id': '12610585', - 'ext': 'mp4', - 'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', - 'description': 'md5:1453fbf9a0d041d985a47306192ea253', - }, - }, { - 'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', - 'only_matching': True, - }, { - 'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', - 'only_matching': True, - }, { - 'url': 'http://www.faz.net/-13659345.html', - 'only_matching': True, - }, { - 'url': 'http://www.faz.net/aktuell/politik/-13659345.html', - 'only_matching': True, - }, { - 'url': 'http://www.faz.net/foobarblafasel-13659345.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - description = self._og_search_description(webpage) - media = self._html_search_regex( - r"data-videojs-media='([^']+)", - webpage, 'media') - if media == 'extern': - perform_url = self._search_regex( - r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})", - webpage, 'perform url') - return self.url_result(perform_url) - config = compat_etree_fromstring(media) - - encodings = xpath_element(config, 'ENCODINGS', 'encodings', True) - formats = [] - for pref, code in enumerate(['LOW', 'HIGH', 'HQ']): - encoding = xpath_element(encodings, code) - if encoding is not None: - encoding_url = xpath_text(encoding, 'FILENAME') - if encoding_url: - tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000) - if tbr: - tbr = int_or_none(tbr.replace(',', '.')) - f = { - 'url': encoding_url, - 'format_id': code.lower(), - 'quality': pref, - 'tbr': tbr, - 'vcodec': xpath_text(encoding, 'CODEC'), - } - mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url) - if mobj: - f.update({ - 'width': int(mobj.group(1)), - 'height': int(mobj.group(2)), - 'tbr': tbr or int(mobj.group(3)), - }) - formats.append(f) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': self._og_search_title(webpage), - 'formats': formats, - 'description': description.strip() if description else None, - 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'), - 'duration': int_or_none(xpath_text(config, 'DURATION')), - } diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py deleted file mode 100644 index 435561147..000000000 --- a/youtube_dl/extractor/fc2.py +++ /dev/null @@ -1,160 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import hashlib -import re - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_request, - compat_urlparse, -) -from ..utils import ( - ExtractorError, - sanitized_Request, - urlencode_postdata, -) - - -class FC2IE(InfoExtractor): - _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)' - IE_NAME = 'fc2' - _NETRC_MACHINE = 'fc2' - _TESTS = [{ - 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', - 'md5': 'a6ebe8ebe0396518689d963774a54eb7', - 'info_dict': { - 'id': '20121103kUan1KHs', - 'ext': 'flv', - 'title': 'Boxing again with Puff', - }, - }, { - 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/', - 'info_dict': { - 'id': '20150125cEva0hDn', - 'ext': 'mp4', - }, - 'params': { - 'username': 'ytdl@yt-dl.org', - 'password': '(snip)', - }, - 'skip': 'requires actual password', - }, { - 'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF', - 'only_matching': True, - }] - - def _login(self): - username, password = self._get_login_info() - if username is None or password is None: - return False - - # Log in - login_form_strs = { - 'email': username, - 'password': password, - 'done': 'video', - 'Submit': ' Login ', - } - - login_data = urlencode_postdata(login_form_strs) - request = sanitized_Request( - 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) - - login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') - if 'mode=redirect&login=done' not in login_results: - self.report_warning('unable to log in: bad username or password') - return False - - # this is also needed - login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done') - self._download_webpage( - login_redir, None, note='Login redirect', errnote='Login redirect failed') - - return True - - def _real_extract(self, url): - video_id = self._match_id(url) - self._login() - webpage = None - if not url.startswith('fc2:'): - webpage = self._download_webpage(url, video_id) - self._downloader.cookiejar.clear_session_cookies() # must clear - self._login() - - title = 'FC2 video %s' % video_id - thumbnail = None - if webpage is not None: - title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) - refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url - - mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() - - info_url = ( - 'http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&'. - format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E'))) - - info_webpage = self._download_webpage( - info_url, video_id, note='Downloading info page') - info = compat_urlparse.parse_qs(info_webpage) - - if 'err_code' in info: - # most of the time we can still download wideo even if err_code is 403 or 602 - self.report_warning( - 'Error code was: %s... but still trying' % info['err_code'][0]) - - if 'filepath' not in info: - raise ExtractorError('Cannot download file. Are you logged in?') - - video_url = info['filepath'][0] + '?mid=' + info['mid'][0] - title_info = info.get('title') - if title_info: - title = title_info[0] - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - 'ext': 'flv', - 'thumbnail': thumbnail, - } - - -class FC2EmbedIE(InfoExtractor): - _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)' - IE_NAME = 'fc2:embed' - - _TEST = { - 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】', - 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a', - 'info_dict': { - 'id': '201403223kCqB3Ez', - 'ext': 'flv', - 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - query = compat_parse_qs(mobj.group('query')) - - video_id = query['i'][-1] - title = query.get('tl', ['FC2 video %s' % video_id])[0] - - sj = query.get('sj', [None])[0] - thumbnail = None - if sj: - # See thumbnailImagePath() in ServerConst.as of flv2.swf - thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % ( - sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id))) - - return { - '_type': 'url_transparent', - 'ie_key': FC2IE.ie_key(), - 'url': 'fc2:%s' % video_id, - 'title': title, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py deleted file mode 100644 index 8db7c5963..000000000 --- a/youtube_dl/extractor/fczenit.py +++ /dev/null @@ -1,56 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - float_or_none, -) - - -class FczenitIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://fc-zenit.ru/video/41044/', - 'md5': '0e3fab421b455e970fa1aa3891e57df0', - 'info_dict': { - 'id': '41044', - 'ext': 'mp4', - 'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»', - 'timestamp': 1462283735, - 'upload_date': '20160503', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - msi_id = self._search_regex( - r"(?s)config\s*=\s*{.+?video_id\s*:\s*'([^']+)'", webpage, 'msi id') - - msi_data = self._download_json( - 'http://player.fc-zenit.ru/msi/video', msi_id, query={ - 'video': msi_id, - })['data'] - title = msi_data['name'] - - formats = [{ - 'format_id': q.get('label'), - 'url': q['url'], - 'height': int_or_none(q.get('label')), - } for q in msi_data['qualities'] if q.get('url')] - - self._sort_formats(formats) - - tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': msi_data.get('preview'), - 'formats': formats, - 'duration': float_or_none(msi_data.get('duration')), - 'timestamp': int_or_none(msi_data.get('date')), - 'tags': tags, - } diff --git a/youtube_dl/extractor/fifa.py b/youtube_dl/extractor/fifa.py deleted file mode 100644 index 15157774e..000000000 --- a/youtube_dl/extractor/fifa.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - -from ..utils import ( - int_or_none, - traverse_obj, - unified_timestamp, -) - -if not callable(getattr(InfoExtractor, '_match_valid_url', None)): - - BaseInfoExtractor = InfoExtractor - - import re - - class InfoExtractor(BaseInfoExtractor): - - @classmethod - def _match_valid_url(cls, url): - return re.match(cls._VALID_URL, url) - - -class FifaIE(InfoExtractor): - _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)' - _TESTS = [{ - 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', - 'info_dict': { - 'id': '7on10qPcnyLajDDU3ntg6y', - 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay', - 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', - 'ext': 'mp4', - 'categories': ['FIFA Tournaments'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero', - 'duration': 8165, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV', - 'info_dict': { - 'id': '1cg5r5Qt6Qt12ilkDgb1sV', - 'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights', - 'description': 'md5:d908c74ee66322b804ae2e521b02a855', - 'ext': 'mp4', - 'categories': ['FIFA Tournaments', 'Highlights'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB', - 'duration': 902, - 'release_timestamp': 1404777600, - 'release_date': '20140708', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp', - 'info_dict': { - 'id': '3C6gQH9C2DLwzNx7BMRQdp', - 'title': 'Josimar goal against Northern Ireland | Classic Goals', - 'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b', - 'ext': 'mp4', - 'categories': ['FIFA Tournaments', 'Goal'], - 'duration': 28, - 'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR', - }, - 'params': {'skip_download': 'm3u8'}, - }] - - def _real_extract(self, url): - video_id, locale = self._match_valid_url(url).group('id', 'locale') - webpage = self._download_webpage(url, video_id) - - preconnect_link = self._search_regex( - r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') - - video_details = self._download_json( - '{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False) - - preplay_parameters = self._download_json( - '{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters'] - - content_data = self._download_json( - # 1. query string is expected to be sent as-is - # 2. `sig` must be appended - # 3. if absent, the call appears to work but the manifest is bad (404) - 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters), - video_id, 'Downloading Content Data') - - # formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) - formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_details['title'], - 'description': video_details.get('description'), - 'duration': int_or_none(video_details.get('duration')), - 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')), - 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), - 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py deleted file mode 100644 index f775fe0ba..000000000 --- a/youtube_dl/extractor/filmon.py +++ /dev/null @@ -1,178 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_HTTPError, -) -from ..utils import ( - qualities, - strip_or_none, - int_or_none, - ExtractorError, -) - - -class FilmOnIE(InfoExtractor): - IE_NAME = 'filmon' - _VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space', - 'info_dict': { - 'id': '24869', - 'ext': 'mp4', - 'title': 'Plan 9 From Outer Space', - 'description': 'Dead human, zombies and vampires', - }, - }, { - 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1', - 'info_dict': { - 'id': '2825', - 'title': 'Popeye Series 1', - 'description': 'The original series of Popeye.', - }, - 'playlist_mincount': 8, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - try: - response = self._download_json( - 'https://www.filmon.com/api/vod/movie?id=%s' % video_id, - video_id)['response'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason'] - raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) - raise - - title = response['title'] - description = strip_or_none(response.get('description')) - - if response.get('type_id') == 1: - entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])] - return self.playlist_result(entries, video_id, title, description) - - QUALITY = qualities(('low', 'high')) - formats = [] - for format_id, stream in response.get('streams', {}).items(): - stream_url = stream.get('url') - if not stream_url: - continue - formats.append({ - 'format_id': format_id, - 'url': stream_url, - 'ext': 'mp4', - 'quality': QUALITY(stream.get('quality')), - 'protocol': 'm3u8_native', - }) - self._sort_formats(formats) - - thumbnails = [] - poster = response.get('poster', {}) - thumbs = poster.get('thumbs', {}) - thumbs['poster'] = poster - for thumb_id, thumb in thumbs.items(): - thumb_url = thumb.get('url') - if not thumb_url: - continue - thumbnails.append({ - 'id': thumb_id, - 'url': thumb_url, - 'width': int_or_none(thumb.get('width')), - 'height': int_or_none(thumb.get('height')), - }) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'thumbnails': thumbnails, - } - - -class FilmOnChannelIE(InfoExtractor): - IE_NAME = 'filmon:channel' - _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)' - _TESTS = [{ - # VOD - 'url': 'http://www.filmon.com/tv/sports-haters', - 'info_dict': { - 'id': '4190', - 'ext': 'mp4', - 'title': 'Sports Haters', - 'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d', - }, - }, { - # LIVE - 'url': 'https://www.filmon.com/channel/filmon-sports', - 'only_matching': True, - }, { - 'url': 'https://www.filmon.com/tv/2894', - 'only_matching': True, - }] - - _THUMBNAIL_RES = [ - ('logo', 56, 28), - ('big_logo', 106, 106), - ('extra_big_logo', 300, 300), - ] - - def _real_extract(self, url): - channel_id = self._match_id(url) - - try: - channel_data = self._download_json( - 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message'] - raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) - raise - - channel_id = compat_str(channel_data['id']) - is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox') - title = channel_data['title'] - - QUALITY = qualities(('low', 'high')) - formats = [] - for stream in channel_data.get('streams', []): - stream_url = stream.get('url') - if not stream_url: - continue - if not is_live: - formats.extend(self._extract_wowza_formats( - stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp'])) - continue - quality = stream.get('quality') - formats.append({ - 'format_id': quality, - # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats - # because it doesn't have bitrate variants anyway - 'url': stream_url, - 'ext': 'mp4', - 'quality': QUALITY(quality), - }) - self._sort_formats(formats) - - thumbnails = [] - for name, width, height in self._THUMBNAIL_RES: - thumbnails.append({ - 'id': name, - 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name), - 'width': width, - 'height': height, - }) - - return { - 'id': channel_id, - 'display_id': channel_data.get('alias'), - 'title': self._live_title(title) if is_live else title, - 'description': channel_data.get('description'), - 'thumbnails': thumbnails, - 'formats': formats, - 'is_live': is_live, - } diff --git a/youtube_dl/extractor/filmweb.py b/youtube_dl/extractor/filmweb.py deleted file mode 100644 index 56000bc5b..000000000 --- a/youtube_dl/extractor/filmweb.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class FilmwebIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?P<type>trailere|filmnytt)/article(?P<id>\d+)\.ece' - _TEST = { - 'url': 'http://www.filmweb.no/trailere/article1264921.ece', - 'md5': 'e353f47df98e557d67edaceda9dece89', - 'info_dict': { - 'id': '13033574', - 'ext': 'mp4', - 'title': 'Det som en gang var', - 'upload_date': '20160316', - 'timestamp': 1458140101, - 'uploader_id': '12639966', - 'uploader': 'Live Roaldset', - } - } - - def _real_extract(self, url): - article_type, article_id = re.match(self._VALID_URL, url).groups() - if article_type == 'filmnytt': - webpage = self._download_webpage(url, article_id) - article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id') - embed_code = self._download_json( - 'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp', - article_id, query={ - 'articleId': article_id, - })['embedCode'] - iframe_url = self._proto_relative_url(self._search_regex( - r'<iframe[^>]+src="([^"]+)', embed_code, 'iframe url')) - - return { - '_type': 'url_transparent', - 'id': article_id, - 'url': iframe_url, - 'ie_key': 'TwentyThreeVideo', - } diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py deleted file mode 100644 index 28617d83c..000000000 --- a/youtube_dl/extractor/firsttv.py +++ /dev/null @@ -1,156 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - int_or_none, - qualities, - unified_strdate, - url_or_none, -) - - -class FirstTVIE(InfoExtractor): - IE_NAME = '1tv' - IE_DESC = 'Первый канал' - _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)' - - _TESTS = [{ - # single format - 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', - 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', - 'info_dict': { - 'id': '40049', - 'ext': 'mp4', - 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', - 'upload_date': '20150212', - 'duration': 2694, - }, - }, { - # multiple formats - 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', - 'info_dict': { - 'id': '364746', - 'ext': 'mp4', - 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', - 'upload_date': '20160407', - 'duration': 179, - 'formats': 'mincount:3', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00', - 'info_dict': { - 'id': '14:00', - 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал', - 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd', - }, - 'playlist_count': 13, - }, { - 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - playlist_url = compat_urlparse.urljoin(url, self._search_regex( - r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1', - webpage, 'playlist url', group='url')) - - parsed_url = compat_urlparse.urlparse(playlist_url) - qs = compat_urlparse.parse_qs(parsed_url.query) - item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]') - - items = self._download_json(playlist_url, display_id) - - if item_ids: - items = [ - item for item in items - if item.get('uid') and compat_str(item['uid']) in item_ids] - else: - items = [items[0]] - - entries = [] - QUALITIES = ('ld', 'sd', 'hd', ) - - for item in items: - title = item['title'] - quality = qualities(QUALITIES) - formats = [] - path = None - for f in item.get('mbr', []): - src = url_or_none(f.get('src')) - if not src: - continue - tbr = int_or_none(self._search_regex( - r'_(\d{3,})\.mp4', src, 'tbr', default=None)) - if not path: - path = self._search_regex( - r'//[^/]+/(.+?)_\d+\.mp4', src, - 'm3u8 path', default=None) - formats.append({ - 'url': src, - 'format_id': f.get('name'), - 'tbr': tbr, - 'source_preference': quality(f.get('name')), - # quality metadata of http formats may be incorrect - 'preference': -1, - }) - # m3u8 URL format is reverse engineered from [1] (search for - # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) - # is taken from [2]. - # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted - # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834 - if not path and len(formats) == 1: - path = self._search_regex( - r'//[^/]+/(.+?$)', formats[0]['url'], - 'm3u8 path', default=None) - if path: - if len(formats) == 1: - m3u8_path = ',' - else: - tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)] - m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4') - formats.extend(self._extract_m3u8_formats( - 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8' - % (path, m3u8_path), - display_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) - - thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) - duration = int_or_none(item.get('duration') or self._html_search_meta( - 'video:duration', webpage, 'video duration', fatal=False)) - upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date', default=None)) - - entries.append({ - 'id': compat_str(item.get('id') or item['uid']), - 'thumbnail': thumbnail, - 'title': title, - 'upload_date': upload_date, - 'duration': int_or_none(duration), - 'formats': formats - }) - - title = self._html_search_regex( - (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', - r"'title'\s*:\s*'([^']+)'"), - webpage, 'title', default=None) or self._og_search_title( - webpage, default=None) - description = self._html_search_regex( - r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', - webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description', default=None) - - return self.playlist_result(entries, display_id, title, description) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py deleted file mode 100644 index f3f876ecd..000000000 --- a/youtube_dl/extractor/fivemin.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class FiveMinIE(InfoExtractor): - IE_NAME = '5min' - _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)' - - _TESTS = [ - { - # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/ - 'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791', - 'md5': '4f7b0b79bf1a470e5004f7112385941d', - 'info_dict': { - 'id': '518013791', - 'ext': 'mp4', - 'title': 'iPad Mini with Retina Display Review', - 'description': 'iPad mini with Retina Display review', - 'duration': 177, - 'uploader': 'engadget', - 'upload_date': '20131115', - 'timestamp': 1384515288, - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, - { - # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247 - 'url': '5min:518086247', - 'md5': 'e539a9dd682c288ef5a498898009f69e', - 'info_dict': { - 'id': '518086247', - 'ext': 'mp4', - 'title': 'How to Make a Next-Level Fruit Salad', - 'duration': 184, - }, - 'skip': 'no longer available', - }, - { - 'url': 'http://embed.5min.com/518726732/', - 'only_matching': True, - }, - { - 'url': 'http://delivery.vidible.tv/aol?playList=518013791', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result('aol-video:%s' % video_id) diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py deleted file mode 100644 index c4c0f1b3d..000000000 --- a/youtube_dl/extractor/fivetv.py +++ /dev/null @@ -1,91 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class FiveTVIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?5-tv\.ru/ - (?: - (?:[^/]+/)+(?P<id>\d+)| - (?P<path>[^/?#]+)(?:[/?#])? - ) - ''' - - _TESTS = [{ - 'url': 'http://5-tv.ru/news/96814/', - 'md5': 'bbff554ad415ecf5416a2f48c22d9283', - 'info_dict': { - 'id': '96814', - 'ext': 'mp4', - 'title': 'Россияне выбрали имя для общенациональной платежной системы', - 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 180, - }, - }, { - 'url': 'http://5-tv.ru/video/1021729/', - 'info_dict': { - 'id': '1021729', - 'ext': 'mp4', - 'title': '3D принтер', - 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 180, - }, - }, { - # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/ - 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails', - 'info_dict': { - 'id': 'glavnoe', - 'ext': 'mp4', - 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'skip': 'redirect to «Известия. Главное» project page', - }, { - 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/', - 'only_matching': True, - }, { - 'url': 'http://5-tv.ru/films/1507502/', - 'only_matching': True, - }, { - 'url': 'http://5-tv.ru/programs/broadcast/508713/', - 'only_matching': True, - }, { - 'url': 'http://5-tv.ru/angel/', - 'only_matching': True, - }, { - 'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') or mobj.group('path') - - webpage = self._download_webpage(url, video_id) - - video_url = self._search_regex( - [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"', - r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'], - webpage, 'video url') - - title = self._og_search_title(webpage, default=None) or self._search_regex( - r'<title>([^<]+)', webpage, 'title') - duration = int_or_none(self._og_search_property( - 'video:duration', webpage, 'duration', default=None)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': self._og_search_thumbnail(webpage, default=None), - 'duration': duration, - } diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py deleted file mode 100644 index 9f166efd4..000000000 --- a/youtube_dl/extractor/flickr.py +++ /dev/null @@ -1,116 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) -from ..utils import ( - ExtractorError, - int_or_none, - qualities, -) - - -class FlickrIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P\d+)' - _TEST = { - 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', - 'md5': '164fe3fa6c22e18d448d4d5af2330f31', - 'info_dict': { - 'id': '5645318632', - 'ext': 'mpg', - 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.', - 'title': 'Dark Hollow Waterfalls', - 'duration': 19, - 'timestamp': 1303528740, - 'upload_date': '20110423', - 'uploader_id': '10922353@N03', - 'uploader': 'Forest Wander', - 'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/', - 'comment_count': int, - 'view_count': int, - 'tags': list, - 'license': 'Attribution-ShareAlike', - } - } - _API_BASE_URL = 'https://api.flickr.com/services/rest?' - # https://help.yahoo.com/kb/flickr/SLN25525.html - _LICENSES = { - '0': 'All Rights Reserved', - '1': 'Attribution-NonCommercial-ShareAlike', - '2': 'Attribution-NonCommercial', - '3': 'Attribution-NonCommercial-NoDerivs', - '4': 'Attribution', - '5': 'Attribution-ShareAlike', - '6': 'Attribution-NoDerivs', - '7': 'No known copyright restrictions', - '8': 'United States government work', - '9': 'Public Domain Dedication (CC0)', - '10': 'Public Domain Work', - } - - def _call_api(self, method, video_id, api_key, note, secret=None): - query = { - 'photo_id': video_id, - 'method': 'flickr.%s' % method, - 'api_key': api_key, - 'format': 'json', - 'nojsoncallback': 1, - } - if secret: - query['secret'] = secret - data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note) - if data['stat'] != 'ok': - raise ExtractorError(data['message']) - return data - - def _real_extract(self, url): - video_id = self._match_id(url) - - api_key = self._download_json( - 'https://www.flickr.com/hermes_error_beacon.gne', video_id, - 'Downloading api key')['site_key'] - - video_info = self._call_api( - 'photos.getInfo', video_id, api_key, 'Downloading video info')['photo'] - if video_info['media'] == 'video': - streams = self._call_api( - 'video.getStreamInfo', video_id, api_key, - 'Downloading streams info', video_info['secret'])['streams'] - - preference = qualities( - ['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig']) - - formats = [] - for stream in streams['stream']: - stream_type = compat_str(stream.get('type')) - formats.append({ - 'format_id': stream_type, - 'url': stream['_content'], - 'preference': preference(stream_type), - }) - self._sort_formats(formats) - - owner = video_info.get('owner', {}) - uploader_id = owner.get('nsid') - uploader_path = owner.get('path_alias') or uploader_id - uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None - - return { - 'id': video_id, - 'title': video_info['title']['_content'], - 'description': video_info.get('description', {}).get('_content'), - 'formats': formats, - 'timestamp': int_or_none(video_info.get('dateuploaded')), - 'duration': int_or_none(video_info.get('video', {}).get('duration')), - 'uploader_id': uploader_id, - 'uploader': owner.get('realname'), - 'uploader_url': uploader_url, - 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')), - 'view_count': int_or_none(video_info.get('views')), - 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])], - 'license': self._LICENSES.get(video_info.get('license')), - } - else: - raise ExtractorError('not a video', expected=True) diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py deleted file mode 100644 index b3df93f28..000000000 --- a/youtube_dl/extractor/folketinget.py +++ /dev/null @@ -1,77 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_parse_qs -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601, - xpath_text, -) - - -class FolketingetIE(InfoExtractor): - IE_DESC = 'Folketinget (ft.dk; Danish parliament)' - _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P[0-9]+)\.aspx' - _TEST = { - 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player', - 'md5': '6269e8626fa1a891bf5369b386ae996a', - 'info_dict': { - 'id': '1165642', - 'ext': 'mp4', - 'title': 'Åbent samråd i Erhvervsudvalget', - 'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet', - 'view_count': int, - 'width': 768, - 'height': 432, - 'tbr': 928000, - 'timestamp': 1416493800, - 'upload_date': '20141120', - 'duration': 3960, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage) - description = self._html_search_regex( - r'(?s)
]*>(.*?)<', - webpage, 'description', fatal=False) - - player_params = compat_parse_qs(self._search_regex( - r'\d+)' - _TESTS = [{ - 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review', - 'info_dict': { - 'id': '79922154', - 'title': 'VIDEO Hull City 0 - 2 Chelsea', - }, - 'playlist_count': 2, - 'add_ie': [StreamableIE.ie_key()], - }, { - 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review', - 'info_dict': { - 'id': '75817984', - 'title': 'VIDEO Georgia 0 - 2 Germany', - }, - 'playlist_count': 1, - 'add_ie': ['Playwire'] - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - playlist = self._parse_json(self._search_regex( - r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'), - playlist_id) - - playlist_title = self._og_search_title(webpage) - - entries = [] - for video in playlist: - payload = video.get('payload') - if not payload: - continue - playwire_url = self._html_search_regex( - r'data-config="([^"]+)"', payload, - 'playwire url', default=None) - if playwire_url: - entries.append(self.url_result(self._proto_relative_url( - playwire_url, 'http:'), 'Playwire')) - - streamable_url = StreamableIE._extract_url(payload) - if streamable_url: - entries.append(self.url_result( - streamable_url, StreamableIE.ie_key())) - - return self.playlist_result(entries, playlist_id, playlist_title) diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py deleted file mode 100644 index 67662e6de..000000000 --- a/youtube_dl/extractor/formula1.py +++ /dev/null @@ -1,27 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class Formula1IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P\d+)\.html' - _TEST = { - 'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html', - 'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8', - 'info_dict': { - 'id': '6060988138001', - 'ext': 'mp4', - 'title': 'Race highlights - Spain 2016', - 'timestamp': 1463332814, - 'upload_date': '20160515', - 'uploader_id': '6057949432001', - }, - 'add_ie': ['BrightcoveNew'], - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s' - - def _real_extract(self, url): - bc_id = self._match_id(url) - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py deleted file mode 100644 index be4e81342..000000000 --- a/youtube_dl/extractor/fourtube.py +++ /dev/null @@ -1,309 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_str, - compat_urllib_parse_unquote, - compat_urlparse, -) -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601, - str_or_none, - str_to_int, - try_get, - unified_timestamp, - url_or_none, -) - - -class FourTubeBaseIE(InfoExtractor): - def _extract_formats(self, url, video_id, media_id, sources): - token_url = 'https://%s/%s/desktop/%s' % ( - self._TKN_HOST, media_id, '+'.join(sources)) - - parsed_url = compat_urlparse.urlparse(url) - tokens = self._download_json(token_url, video_id, data=b'', headers={ - 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), - 'Referer': url, - }) - formats = [{ - 'url': tokens[format]['token'], - 'format_id': format + 'p', - 'resolution': format + 'p', - 'quality': int(format), - } for format in sources] - self._sort_formats(formats) - return formats - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') - - if kind == 'm' or not display_id: - url = self._URL_TEMPLATE % video_id - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta('name', webpage) - timestamp = parse_iso8601(self._html_search_meta( - 'uploadDate', webpage)) - thumbnail = self._html_search_meta('thumbnailUrl', webpage) - uploader_id = self._html_search_regex( - r'', - webpage, 'uploader id', fatal=False) - uploader = self._html_search_regex( - r'', - webpage, 'uploader', fatal=False) - - categories_html = self._search_regex( - r'(?s)>\s*Categories / Tags\s*.*?
    (.*?)
', - webpage, 'categories', fatal=False) - categories = None - if categories_html: - categories = [ - c.strip() for c in re.findall( - r'(?s)
  • (.*?)', categories_html)] - - view_count = str_to_int(self._search_regex( - r']+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', - webpage, 'view count', default=None)) - like_count = str_to_int(self._search_regex( - r']+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', - webpage, 'like count', default=None)) - duration = parse_duration(self._html_search_meta('duration', webpage)) - - media_id = self._search_regex( - r']+data-id=(["\'])(?P\d+)\1[^>]+data-quality=', webpage, - 'media id', default=None, group='id') - sources = [ - quality - for _, quality in re.findall(r']+data-quality=(["\'])(.+?)\1', webpage)] - if not (media_id and sources): - player_js = self._download_webpage( - self._search_regex( - r']id=(["\'])playerembed\1[^>]+src=(["\'])(?P.+?)\2', - webpage, 'player JS', group='url'), - video_id, 'Downloading player JS') - params_js = self._search_regex( - r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)', - player_js, 'initialization parameters') - params = self._parse_json('[%s]' % params_js, video_id) - media_id = params[0] - sources = ['%s' % p for p in params[2]] - - formats = self._extract_formats(url, video_id, media_id, sources) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'categories': categories, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'timestamp': timestamp, - 'like_count': like_count, - 'view_count': view_count, - 'duration': duration, - 'age_limit': 18, - } - - -class FourTubeIE(FourTubeBaseIE): - IE_NAME = '4tube' - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?4tube\.com/(?:videos|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' - _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video' - _TKN_HOST = 'token.4tube.com' - _TESTS = [{ - 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', - 'md5': '6516c8ac63b03de06bc8eac14362db4f', - 'info_dict': { - 'id': '209733', - 'ext': 'mp4', - 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', - 'uploader': 'WCP Club', - 'uploader_id': 'wcp-club', - 'upload_date': '20131031', - 'timestamp': 1383263892, - 'duration': 583, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 18, - }, - }, { - 'url': 'http://www.4tube.com/embed/209733', - 'only_matching': True, - }, { - 'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', - 'only_matching': True, - }] - - -class FuxIE(FourTubeBaseIE): - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?fux\.com/(?:video|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' - _URL_TEMPLATE = 'https://www.fux.com/video/%s/video' - _TKN_HOST = 'token.fux.com' - _TESTS = [{ - 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', - 'info_dict': { - 'id': '195359', - 'ext': 'mp4', - 'title': 'Awesome fucking in the kitchen ends with cum swallow', - 'uploader': 'alenci2342', - 'uploader_id': 'alenci2342', - 'upload_date': '20131230', - 'timestamp': 1388361660, - 'duration': 289, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.fux.com/embed/195359', - 'only_matching': True, - }, { - 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', - 'only_matching': True, - }] - - -class PornTubeIE(FourTubeBaseIE): - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?porntube\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' - _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' - _TKN_HOST = 'tkn.porntube.com' - _TESTS = [{ - 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', - 'info_dict': { - 'id': '7089759', - 'ext': 'mp4', - 'title': 'Teen couple doing anal', - 'uploader': 'Alexy', - 'uploader_id': '91488', - 'upload_date': '20150606', - 'timestamp': 1433595647, - 'duration': 5052, - 'view_count': int, - 'like_count': int, - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406', - 'info_dict': { - 'id': '1331406', - 'ext': 'mp4', - 'title': 'Squirting Teen Ballerina on ECG', - 'uploader': 'Exploited College Girls', - 'uploader_id': '665', - 'channel': 'Exploited College Girls', - 'channel_id': '665', - 'upload_date': '20130920', - 'timestamp': 1379685485, - 'duration': 851, - 'view_count': int, - 'like_count': int, - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.porntube.com/embed/7089759', - 'only_matching': True, - }, { - 'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id, display_id = mobj.group('id', 'display_id') - - webpage = self._download_webpage(url, display_id) - - video = self._parse_json( - self._search_regex( - r'INITIALSTATE\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'data', group='value'), video_id, - transform_source=lambda x: compat_urllib_parse_unquote( - compat_b64decode(x).decode('utf-8')))['page']['video'] - - title = video['title'] - media_id = video['mediaId'] - sources = [compat_str(e['height']) - for e in video['encodings'] if e.get('height')] - formats = self._extract_formats(url, video_id, media_id, sources) - - thumbnail = url_or_none(video.get('masterThumb')) - uploader = try_get(video, lambda x: x['user']['username'], compat_str) - uploader_id = str_or_none(try_get( - video, lambda x: x['user']['id'], int)) - channel = try_get(video, lambda x: x['channel']['name'], compat_str) - channel_id = str_or_none(try_get( - video, lambda x: x['channel']['id'], int)) - like_count = int_or_none(video.get('likes')) - dislike_count = int_or_none(video.get('dislikes')) - view_count = int_or_none(video.get('playsQty')) - duration = int_or_none(video.get('durationInSeconds')) - timestamp = unified_timestamp(video.get('publishedAt')) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'uploader': uploader or channel, - 'uploader_id': uploader_id or channel_id, - 'channel': channel, - 'channel_id': channel_id, - 'timestamp': timestamp, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'view_count': view_count, - 'duration': duration, - 'age_limit': 18, - } - - -class PornerBrosIE(FourTubeBaseIE): - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' - _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s' - _TKN_HOST = 'token.pornerbros.com' - _TESTS = [{ - 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', - 'md5': '6516c8ac63b03de06bc8eac14362db4f', - 'info_dict': { - 'id': '181369', - 'ext': 'mp4', - 'title': 'Skinny brunette takes big cock down her anal hole', - 'uploader': 'PornerBros HD', - 'uploader_id': 'pornerbros-hd', - 'upload_date': '20130130', - 'timestamp': 1359527401, - 'duration': 1224, - 'view_count': int, - 'categories': list, - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.pornerbros.com/embed/181369', - 'only_matching': True, - }, { - 'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', - 'only_matching': True, - }] diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py deleted file mode 100644 index 04f4bdba6..000000000 --- a/youtube_dl/extractor/fox.py +++ /dev/null @@ -1,150 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import uuid - -from .adobepass import AdobePassIE -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_parse_unquote, -) -from ..utils import ( - ExtractorError, - int_or_none, - parse_age_limit, - parse_duration, - try_get, - unified_timestamp, -) - - -class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P[\da-fA-F]+)' - _TESTS = [{ - # clip - 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', - 'md5': 'ebd296fcc41dd4b19f8115d8461a3165', - 'info_dict': { - 'id': '4b765a60490325103ea69888fb2bd4e8', - 'ext': 'mp4', - 'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight', - 'description': 'md5:549cd9c70d413adb32ce2a779b53b486', - 'duration': 102, - 'timestamp': 1504291893, - 'upload_date': '20170901', - 'creator': 'FOX', - 'series': 'Gotham', - 'age_limit': 14, - }, - 'params': { - 'skip_download': True, - }, - }, { - # episode, geo-restricted - 'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/', - 'only_matching': True, - }, { - # episode, geo-restricted, tv provided required - 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', - 'only_matching': True, - }] - _GEO_BYPASS = False - _HOME_PAGE_URL = 'https://www.fox.com/' - _API_KEY = 'abdcbed02c124d393b39e818a4312055' - _access_token = None - - def _call_api(self, path, video_id, data=None): - headers = { - 'X-Api-Key': self._API_KEY, - } - if self._access_token: - headers['Authorization'] = 'Bearer ' + self._access_token - try: - return self._download_json( - 'https://api2.fox.com/v2.0/' + path, - video_id, data=data, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - entitlement_issues = self._parse_json( - e.cause.read().decode(), video_id)['entitlementIssues'] - for e in entitlement_issues: - if e.get('errorCode') == 1005: - raise ExtractorError( - 'This video is only available via cable service provider ' - 'subscription. You may want to use --cookies.', expected=True) - messages = ', '.join([e['message'] for e in entitlement_issues]) - raise ExtractorError(messages, expected=True) - raise - - def _real_initialize(self): - if not self._access_token: - mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth') - if mvpd_auth: - self._access_token = (self._parse_json(compat_urllib_parse_unquote( - mvpd_auth.value), None, fatal=False) or {}).get('accessToken') - if not self._access_token: - self._access_token = self._call_api( - 'login', None, json.dumps({ - 'deviceId': compat_str(uuid.uuid4()), - }).encode())['accessToken'] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._call_api('vodplayer/' + video_id, video_id) - - title = video['name'] - release_url = video['url'] - try: - m3u8_url = self._download_json(release_url, video_id)['playURL'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - error = self._parse_json(e.cause.read().decode(), video_id) - if error.get('exception') == 'GeoLocationBlocked': - self.raise_geo_restricted(countries=['US']) - raise ExtractorError(error['description'], expected=True) - raise - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) - - data = try_get( - video, lambda x: x['trackingData']['properties'], dict) or {} - - duration = int_or_none(video.get('durationInSeconds')) or int_or_none( - video.get('duration')) or parse_duration(video.get('duration')) - timestamp = unified_timestamp(video.get('datePublished')) - creator = data.get('brand') or data.get('network') or video.get('network') - series = video.get('seriesName') or data.get( - 'seriesName') or data.get('show') - - subtitles = {} - for doc_rel in video.get('documentReleases', []): - rel_url = doc_rel.get('url') - if not url or doc_rel.get('format') != 'SCC': - continue - subtitles['en'] = [{ - 'url': rel_url, - 'ext': 'scc', - }] - break - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': video.get('description'), - 'duration': duration, - 'timestamp': timestamp, - 'age_limit': parse_age_limit(video.get('contentRating')), - 'creator': creator, - 'series': series, - 'season_number': int_or_none(video.get('seasonNumber')), - 'episode': video.get('name'), - 'episode_number': int_or_none(video.get('episodeNumber')), - 'release_year': int_or_none(video.get('releaseYear')), - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py deleted file mode 100644 index 91f8f7b8a..000000000 --- a/youtube_dl/extractor/fox9.py +++ /dev/null @@ -1,41 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class FOX9IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P\d+)' - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result( - 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id, - 'Anvato', video_id) - - -class FOX9NewsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P[^/?&#]+)' - _TEST = { - 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota', - 'md5': 'd6e1b2572c3bab8a849c9103615dd243', - 'info_dict': { - 'id': '314473', - 'ext': 'mp4', - 'title': 'Bear climbs tree in downtown Duluth', - 'description': 'md5:6a36bfb5073a411758a752455408ac90', - 'duration': 51, - 'timestamp': 1478123580, - 'upload_date': '20161102', - 'uploader': 'EPFOX', - 'categories': ['News', 'Sports'], - 'tags': ['news', 'video'], - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - anvato_id = self._search_regex( - r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id') - return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9') diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py deleted file mode 100644 index 512a10645..000000000 --- a/youtube_dl/extractor/foxgay.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import unicode_literals - -import itertools - -from .common import InfoExtractor -from ..utils import ( - get_element_by_id, - int_or_none, - remove_end, -) - - -class FoxgayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P\d+)\.shtml' - _TEST = { - 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', - 'md5': '344558ccfea74d33b7adbce22e577f54', - 'info_dict': { - 'id': '2582', - 'ext': 'mp4', - 'title': 'Fuck Turkish-style', - 'description': 'md5:6ae2d9486921891efe89231ace13ffdf', - 'age_limit': 18, - 'thumbnail': r're:https?://.*\.jpg$', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = remove_end(self._html_search_regex( - r'([^<]+)', webpage, 'title'), ' - Foxgay.com') - description = get_element_by_id('inf_tit', webpage) - - # The default user-agent with foxgay cookies leads to pages without videos - self._downloader.cookiejar.clear('.foxgay.com') - # Find the URL for the iFrame which contains the actual video. - iframe_url = self._html_search_regex( - r']+src=([\'"])(?P[^\'"]+)\1', webpage, - 'video frame', group='url') - iframe = self._download_webpage( - iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'}, - note='Downloading video frame') - video_data = self._parse_json(self._search_regex( - r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id) - - formats = [{ - 'url': source, - 'height': int_or_none(resolution), - } for source, resolution in zip( - video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'thumbnail': video_data.get('act_vid', {}).get('thumb'), - 'age_limit': 18, - } diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py deleted file mode 100644 index 63613cb85..000000000 --- a/youtube_dl/extractor/foxnews.py +++ /dev/null @@ -1,127 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .amp import AMPIE -from .common import InfoExtractor - - -class FoxNewsIE(AMPIE): - IE_NAME = 'foxnews' - IE_DESC = 'Fox News and Fox Business Video' - _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' - _TESTS = [ - { - 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', - 'md5': '32aaded6ba3ef0d1c04e238d01031e5e', - 'info_dict': { - 'id': '3937480', - 'ext': 'flv', - 'title': 'Frozen in Time', - 'description': '16-year-old girl is size of toddler', - 'duration': 265, - 'timestamp': 1304411491, - 'upload_date': '20110503', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, - { - 'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips', - 'md5': '5846c64a1ea05ec78175421b8323e2df', - 'info_dict': { - 'id': '3922535568001', - 'ext': 'mp4', - 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", - 'description': "Congressman discusses president's plan", - 'duration': 292, - 'timestamp': 1417662047, - 'upload_date': '20141204', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', - 'only_matching': True, - }, - { - 'url': 'http://video.foxbusiness.com/v/4442309889001', - 'only_matching': True, - }, - { - # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words - 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', - 'only_matching': True, - }, - ] - - @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') - for mobj in re.finditer( - r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1', - webpage)] - - def _real_extract(self, url): - host, video_id = re.match(self._VALID_URL, url).groups() - - info = self._extract_feed_info( - 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) - info['id'] = video_id - return info - - -class FoxNewsArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' - IE_NAME = 'foxnews:article' - - _TESTS = [{ - # data-video-id - 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', - 'md5': '83d44e1aff1433e7a29a7b537d1700b5', - 'info_dict': { - 'id': '5116295019001', - 'ext': 'mp4', - 'title': 'Trump and Clinton asked to defend positions on Iraq War', - 'description': 'Veterans react on \'The Kelly File\'', - 'timestamp': 1473301045, - 'upload_date': '20160908', - }, - }, { - # iframe embed - 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', - 'info_dict': { - 'id': '5748266721001', - 'ext': 'flv', - 'title': 'Kyle Kashuv has a positive message for the Trump White House', - 'description': 'Marjory Stoneman Douglas student disagrees with classmates.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 229, - 'timestamp': 1520594670, - 'upload_date': '20180309', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - video_id = self._html_search_regex( - r'data-video-id=([\'"])(?P[^\'"]+)\1', - webpage, 'video ID', group='id', default=None) - if video_id: - return self.url_result( - 'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key()) - - return self.url_result( - FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key()) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py deleted file mode 100644 index 2b2cb6c6f..000000000 --- a/youtube_dl/extractor/foxsports.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class FoxSportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P\d+)' - - _TEST = { - 'url': 'http://www.foxsports.com/tennessee/video/432609859715', - 'md5': 'b49050e955bebe32c301972e4012ac17', - 'info_dict': { - 'id': '432609859715', - 'ext': 'mp4', - 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', - 'description': 'Courtney Lee talks about Memphis being focused.', - # TODO: fix timestamp - 'upload_date': '19700101', # '20150423', - # 'timestamp': 1429761109, - 'uploader': 'NEWA-FNG-FOXSPORTS', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['ThePlatform'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - return self.url_result( - 'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed') diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py deleted file mode 100644 index 14f4cb489..000000000 --- a/youtube_dl/extractor/franceculture.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - extract_attributes, - int_or_none, -) - - -class FranceCultureIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', - 'info_dict': { - 'id': 'rendez-vous-au-pays-des-geeks', - 'display_id': 'rendez-vous-au-pays-des-geeks', - 'ext': 'mp3', - 'title': 'Rendez-vous au pays des geeks', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20140301', - 'timestamp': 1393700400, - 'vcodec': 'none', - } - }, { - # no thumbnail - 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - video_data = extract_attributes(self._search_regex( - r'''(?sx) - (?: - | - ]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*> - ).*? - (]+data-(?:url|asset-source)="[^"]+"[^>]+>) - ''', - webpage, 'video data')) - - video_url = video_data.get('data-url') or video_data['data-asset-source'] - title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage) - - description = self._html_search_regex( - r'(?s)]+class="intro"[^>]*>.*?

    (.+?)

    ', - webpage, 'description', default=None) - thumbnail = self._search_regex( - r'(?s)]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+(?:data-dejavu-)?src="([^"]+)"', - webpage, 'thumbnail', default=None) - uploader = self._html_search_regex( - r'(?s)(.*?)', - webpage, 'uploader', default=None) - ext = determine_ext(video_url.lower()) - - return { - 'id': display_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'ext': ext, - 'vcodec': 'none' if ext == 'mp3' else None, - 'uploader': uploader, - 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')), - 'duration': int_or_none(video_data.get('data-duration')), - } diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py deleted file mode 100644 index ae822a50e..000000000 --- a/youtube_dl/extractor/franceinter.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import month_by_name - - -class FranceInterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P[^?#]+)' - - _TEST = { - 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016', - 'md5': '9e54d7bdb6fdc02a841007f8a975c094', - 'info_dict': { - 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016', - 'ext': 'mp3', - 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', - 'description': 'md5:401969c5d318c061f86bda1fa359292b', - 'thumbnail': r're:^https?://.*\.jpg', - 'upload_date': '20160907', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video_url = self._search_regex( - r'(?s)]+class=["\']page-diffusion["\'][^>]*>.*?]+data-url=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'video url', group='url') - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) - - upload_date_str = self._search_regex( - r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', - webpage, 'upload date', fatal=False) - if upload_date_str: - upload_date_list = upload_date_str.split() - upload_date_list.reverse() - upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) - upload_date_list[2] = '%02d' % int(upload_date_list[2]) - upload_date = ''.join(upload_date_list) - else: - upload_date = None - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'formats': [{ - 'url': video_url, - 'vcodec': 'none', - }], - } diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py deleted file mode 100644 index e4ec2e200..000000000 --- a/youtube_dl/extractor/francetv.py +++ /dev/null @@ -1,546 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - clean_html, - determine_ext, - ExtractorError, - int_or_none, - parse_duration, - try_get, - url_or_none, - urljoin, -) -from .dailymotion import DailymotionIE - - -class FranceTVBaseInfoExtractor(InfoExtractor): - def _make_url_result(self, video_or_full_id, catalog=None): - full_id = 'francetv:%s' % video_or_full_id - if '@' not in video_or_full_id and catalog: - full_id += '@%s' % catalog - return self.url_result( - full_id, ie=FranceTVIE.ie_key(), - video_id=video_or_full_id.split('@')[0]) - - -class FranceTVIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - https?:// - sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\? - .*?\bidDiffusion=[^&]+| - (?: - https?://videos\.francetv\.fr/video/| - francetv: - ) - (?P[^@]+)(?:@(?P.+))? - ) - ''' - - _TESTS = [{ - # without catalog - 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0', - 'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f', - 'info_dict': { - 'id': '162311093', - 'ext': 'mp4', - 'title': '13h15, le dimanche... - Les mystères de Jésus', - 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', - 'timestamp': 1502623500, - 'upload_date': '20170813', - }, - }, { - # with catalog - 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4', - 'only_matching': True, - }, { - 'url': 'http://videos.francetv.fr/video/NI_657393@Regions', - 'only_matching': True, - }, { - 'url': 'francetv:162311093', - 'only_matching': True, - }, { - 'url': 'francetv:NI_1004933@Zouzous', - 'only_matching': True, - }, { - 'url': 'francetv:NI_983319@Info-web', - 'only_matching': True, - }, { - 'url': 'francetv:NI_983319', - 'only_matching': True, - }, { - 'url': 'francetv:NI_657393@Regions', - 'only_matching': True, - }, { - # france-3 live - 'url': 'francetv:SIM_France3', - 'only_matching': True, - }] - - def _extract_video(self, video_id, catalogue=None): - # Videos are identified by idDiffusion so catalogue part is optional. - # However when provided, some extra formats may be returned so we pass - # it if available. - info = self._download_json( - 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/', - video_id, 'Downloading video JSON', query={ - 'idDiffusion': video_id, - 'catalogue': catalogue or '', - }) - - if info.get('status') == 'NOK': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, info['message']), - expected=True) - allowed_countries = info['videos'][0].get('geoblocage') - if allowed_countries: - georestricted = True - geo_info = self._download_json( - 'http://geo.francetv.fr/ws/edgescape.json', video_id, - 'Downloading geo restriction info') - country = geo_info['reponse']['geo_info']['country_code'] - if country not in allowed_countries: - raise ExtractorError( - 'The video is not available from your location', - expected=True) - else: - georestricted = False - - def sign(manifest_url, manifest_id): - for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): - signed_url = url_or_none(self._download_webpage( - 'https://%s/esi/TA' % host, video_id, - 'Downloading signed %s manifest URL' % manifest_id, - fatal=False, query={ - 'url': manifest_url, - })) - if signed_url: - return signed_url - return manifest_url - - is_live = None - - videos = [] - - for video in (info.get('videos') or []): - if video.get('statut') != 'ONLINE': - continue - if not video.get('url'): - continue - videos.append(video) - - if not videos: - for device_type in ['desktop', 'mobile']: - fallback_info = self._download_json( - 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, - video_id, 'Downloading fallback %s video JSON' % device_type, query={ - 'device_type': device_type, - 'browser': 'chrome', - }, fatal=False) - - if fallback_info and fallback_info.get('video'): - videos.append(fallback_info['video']) - - formats = [] - for video in videos: - video_url = video.get('url') - if not video_url: - continue - if is_live is None: - is_live = (try_get( - video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True - or video.get('is_live') is True - or '/live.francetv.fr/' in video_url) - format_id = video.get('format') - ext = determine_ext(video_url) - if ext == 'f4m': - if georestricted: - # See https://github.com/ytdl-org/youtube-dl/issues/3963 - # m3u8 urls work fine - continue - formats.extend(self._extract_f4m_formats( - sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', - video_id, f4m_id=format_id, fatal=False)) - elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - sign(video_url, format_id), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id=format_id, - fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False)) - elif video_url.startswith('rtmp'): - formats.append({ - 'url': video_url, - 'format_id': 'rtmp-%s' % format_id, - 'ext': 'flv', - }) - else: - if self._is_valid_url(video_url, video_id, format_id): - formats.append({ - 'url': video_url, - 'format_id': format_id, - }) - - self._sort_formats(formats) - - title = info['titre'] - subtitle = info.get('sous_titre') - if subtitle: - title += ' - %s' % subtitle - title = title.strip() - - subtitles = {} - subtitles_list = [{ - 'url': subformat['url'], - 'ext': subformat.get('format'), - } for subformat in info.get('subtitles', []) if subformat.get('url')] - if subtitles_list: - subtitles['fr'] = subtitles_list - - return { - 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'description': clean_html(info.get('synopsis')), - 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')), - 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), - 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), - 'is_live': is_live, - 'formats': formats, - 'subtitles': subtitles, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - catalog = mobj.group('catalog') - - if not video_id: - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - video_id = qs.get('idDiffusion', [None])[0] - catalog = qs.get('catalogue', [None])[0] - if not video_id: - raise ExtractorError('Invalid URL', expected=True) - - return self._extract_video(video_id, catalog) - - -class FranceTVSiteIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P[^/]+)\.html' - - _TESTS = [{ - 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', - 'info_dict': { - 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', - 'ext': 'mp4', - 'title': '13h15, le dimanche... - Les mystères de Jésus', - 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', - 'timestamp': 1502623500, - 'upload_date': '20170813', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }, { - # france3 - 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', - 'only_matching': True, - }, { - # france4 - 'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html', - 'only_matching': True, - }, { - # france5 - 'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html', - 'only_matching': True, - }, { - # franceo - 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html', - 'only_matching': True, - }, { - # france2 live - 'url': 'https://www.france.tv/france-2/direct.html', - 'only_matching': True, - }, { - 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html', - 'only_matching': True, - }, { - 'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html', - 'only_matching': True, - }, { - 'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html', - 'only_matching': True, - }, { - 'url': 'https://www.france.tv/142749-rouge-sang.html', - 'only_matching': True, - }, { - # france-3 live - 'url': 'https://www.france.tv/france-3/direct.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - catalogue = None - video_id = self._search_regex( - r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'video id', default=None, group='id') - - if not video_id: - video_id, catalogue = self._html_search_regex( - r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', - webpage, 'video ID').split('@') - - return self._make_url_result(video_id, catalogue) - - -class FranceTVEmbedIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P[^&]+)' - - _TESTS = [{ - 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961', - 'info_dict': { - 'id': 'NI_983319', - 'ext': 'mp4', - 'title': 'Le Pen Reims', - 'upload_date': '20170505', - 'timestamp': 1493981780, - 'duration': 16, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id, - video_id) - - return self._make_url_result(video['video_id'], video.get('catalog')) - - -class FranceTVInfoIE(FranceTVBaseInfoExtractor): - IE_NAME = 'francetvinfo.fr' - _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&.]+)' - - _TESTS = [{ - 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', - 'info_dict': { - 'id': '84981923', - 'ext': 'mp4', - 'title': 'Soir 3', - 'upload_date': '20130826', - 'timestamp': 1377548400, - 'subtitles': { - 'fr': 'mincount:2', - }, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }, { - 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', - 'only_matching': True, - }, { - 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html', - 'only_matching': True, - }, { - 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html', - 'only_matching': True, - }, { - # Dailymotion embed - 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html', - 'md5': 'ee7f1828f25a648addc90cb2687b1f12', - 'info_dict': { - 'id': 'x4iiko0', - 'ext': 'mp4', - 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen', - 'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016', - 'timestamp': 1467011958, - 'upload_date': '20160627', - 'uploader': 'France Inter', - 'uploader_id': 'x2q2ez', - }, - 'add_ie': ['Dailymotion'], - }, { - 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', - 'only_matching': True, - }, { - # "
    ]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', - r'(?:data-id|[^/?#&]+)' - _TESTS = [{ - 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018', - 'info_dict': { - 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea', - 'ext': 'mp4', - 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018', - 'timestamp': 1523639962, - 'upload_date': '20180413', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id') - return self._make_url_result(video_id, 'Sport-web') - - -class GenerationWhatIE(InfoExtractor): - IE_NAME = 'france2.fr:generation-what' - _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', - 'info_dict': { - 'id': 'wtvKYUG45iw', - 'ext': 'mp4', - 'title': 'Generation What - Garde à vous - FRA', - 'uploader': 'Generation What', - 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', - 'upload_date': '20160411', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://generation-what.francetv.fr/europe/video/present-arms', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - youtube_id = self._search_regex( - r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", - webpage, 'youtube id') - - return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id) - - -class CultureboxIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689', - 'info_dict': { - 'id': 'EV_134885', - 'ext': 'mp4', - 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7', - 'description': 'md5:19c44af004b88219f4daa50fa9a351d4', - 'upload_date': '20180206', - 'timestamp': 1517945220, - 'duration': 5981, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - if ">Ce live n'est plus disponible en replay<" in webpage: - raise ExtractorError( - 'Video %s is not available' % display_id, expected=True) - - video_id, catalogue = self._search_regex( - r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', - webpage, 'video id').split('@') - - return self._make_url_result(video_id, catalogue) - - -class FranceTVJeunesseIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P[^/?#&]+))' - - _TESTS = [{ - 'url': 'https://www.zouzous.fr/heros/simon', - 'info_dict': { - 'id': 'simon', - }, - 'playlist_count': 9, - }, { - 'url': 'https://www.ludo.fr/heros/ninjago', - 'info_dict': { - 'id': 'ninjago', - }, - 'playlist_count': 10, - }, { - 'url': 'https://www.zouzous.fr/heros/simon?abc', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') - - playlist = self._download_json( - '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id) - - if not playlist.get('count'): - raise ExtractorError( - '%s is not available' % playlist_id, expected=True) - - entries = [] - for item in playlist['items']: - identity = item.get('identity') - if identity and isinstance(identity, compat_str): - entries.append(self._make_url_result(identity)) - - return self.playlist_result(entries, playlist_id) diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py deleted file mode 100644 index 138b6bc58..000000000 --- a/youtube_dl/extractor/freesound.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - get_element_by_class, - get_element_by_id, - unified_strdate, -) - - -class FreesoundIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P[^/]+)' - _TEST = { - 'url': 'http://www.freesound.org/people/miklovan/sounds/194503/', - 'md5': '12280ceb42c81f19a515c745eae07650', - 'info_dict': { - 'id': '194503', - 'ext': 'mp3', - 'title': 'gulls in the city.wav', - 'description': 'the sounds of seagulls in the city', - 'duration': 130.233, - 'uploader': 'miklovan', - 'upload_date': '20130715', - 'tags': list, - } - } - - def _real_extract(self, url): - audio_id = self._match_id(url) - - webpage = self._download_webpage(url, audio_id) - - audio_url = self._og_search_property('audio', webpage, 'song url') - title = self._og_search_property('audio:title', webpage, 'song title') - - description = self._html_search_regex( - r'(?s)id=["\']sound_description["\'][^>]*>(.+?)
  • ', - webpage, 'description', fatal=False) - - duration = float_or_none( - get_element_by_class('duration', webpage), scale=1000) - - upload_date = unified_strdate(get_element_by_id('sound_date', webpage)) - uploader = self._og_search_property( - 'audio:artist', webpage, 'uploader', fatal=False) - - channels = self._html_search_regex( - r'Channels
    (.+?)
    ', webpage, - 'channels info', fatal=False) - - tags_str = get_element_by_class('tags', webpage) - tags = re.findall(r']+>([^<]+)', tags_str) if tags_str else None - - audio_urls = [audio_url] - - LQ_FORMAT = '-lq.mp3' - if LQ_FORMAT in audio_url: - audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3')) - - formats = [{ - 'url': format_url, - 'format_note': channels, - 'quality': quality, - } for quality, format_url in enumerate(audio_urls)] - self._sort_formats(formats) - - return { - 'id': audio_id, - 'title': title, - 'description': description, - 'duration': duration, - 'uploader': uploader, - 'upload_date': upload_date, - 'tags': tags, - 'formats': formats, - } diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py deleted file mode 100644 index ea9c3e317..000000000 --- a/youtube_dl/extractor/freespeech.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .youtube import YoutubeIE - - -class FreespeechIE(InfoExtractor): - IE_NAME = 'freespeech.org' - _VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P.+)' - _TEST = { - 'add_ie': ['Youtube'], - 'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/', - 'info_dict': { - 'id': 'waRk6IPqyWM', - 'ext': 'mp4', - 'title': 'What\'s At Stake - Net Neutrality Special', - 'description': 'Presented by MNN and FSTV', - 'upload_date': '20170728', - 'uploader_id': 'freespeechtv', - 'uploader': 'freespeechtv', - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - youtube_url = self._search_regex( - r'data-video-url="([^"]+)"', - webpage, 'youtube url') - - return self.url_result(youtube_url, YoutubeIE.ie_key()) diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dl/extractor/freshlive.py deleted file mode 100644 index 72a845945..000000000 --- a/youtube_dl/extractor/freshlive.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - int_or_none, - try_get, - unified_timestamp, -) - - -class FreshLiveIE(InfoExtractor): - _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P\d+)' - _TEST = { - 'url': 'https://freshlive.tv/satotv/74712', - 'md5': '9f0cf5516979c4454ce982df3d97f352', - 'info_dict': { - 'id': '74712', - 'ext': 'mp4', - 'title': 'テスト', - 'description': 'テスト', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1511, - 'timestamp': 1483619655, - 'upload_date': '20170105', - 'uploader': 'サトTV', - 'uploader_id': 'satotv', - 'view_count': int, - 'comment_count': int, - 'is_live': False, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - options = self._parse_json( - self._search_regex( - r'window\.__CONTEXT__\s*=\s*({.+?});\s*', - webpage, 'initial context'), - video_id) - - info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id] - - title = info['title'] - - if info.get('status') == 'upcoming': - raise ExtractorError('Stream %s is upcoming' % video_id, expected=True) - - stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl'] - - is_live = info.get('liveStreamUrl') is not None - - formats = self._extract_m3u8_formats( - stream_url, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls') - - if is_live: - title = self._live_title(title) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': info.get('description'), - 'thumbnail': info.get('thumbnailUrl'), - 'duration': int_or_none(info.get('airTime')), - 'timestamp': unified_timestamp(info.get('createdAt')), - 'uploader': try_get( - info, lambda x: x['channel']['title'], compat_str), - 'uploader_id': try_get( - info, lambda x: x['channel']['code'], compat_str), - 'uploader_url': try_get( - info, lambda x: x['channel']['permalink'], compat_str), - 'view_count': int_or_none(info.get('viewCount')), - 'comment_count': int_or_none(info.get('commentCount')), - 'tags': info.get('tags', []), - 'is_live': is_live, - } diff --git a/youtube_dl/extractor/frontendmasters.py b/youtube_dl/extractor/frontendmasters.py deleted file mode 100644 index f1db33fb1..000000000 --- a/youtube_dl/extractor/frontendmasters.py +++ /dev/null @@ -1,263 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - ExtractorError, - parse_duration, - url_or_none, - urlencode_postdata, -) - - -class FrontendMastersBaseIE(InfoExtractor): - _API_BASE = 'https://api.frontendmasters.com/v1/kabuki' - _LOGIN_URL = 'https://frontendmasters.com/login/' - - _NETRC_MACHINE = 'frontendmasters' - - _QUALITIES = { - 'low': {'width': 480, 'height': 360}, - 'mid': {'width': 1280, 'height': 720}, - 'high': {'width': 1920, 'height': 1080} - } - - def _real_initialize(self): - self._login() - - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'username': username, - 'password': password - }) - - post_url = self._search_regex( - r']+action=(["\'])(?P.+?)\1', login_page, - 'post_url', default=self._LOGIN_URL, group='url') - - if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - - response = self._download_webpage( - post_url, None, 'Logging in', data=urlencode_postdata(login_form), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - # Successful login - if any(p in response for p in ( - 'wp-login.php?action=logout', '>Logout')): - return - - error = self._html_search_regex( - r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P[^<]+)<', - response, 'error message', default=None, group='error') - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - raise ExtractorError('Unable to log in') - - -class FrontendMastersPageBaseIE(FrontendMastersBaseIE): - def _download_course(self, course_name, url): - return self._download_json( - '%s/courses/%s' % (self._API_BASE, course_name), course_name, - 'Downloading course JSON', headers={'Referer': url}) - - @staticmethod - def _extract_chapters(course): - chapters = [] - lesson_elements = course.get('lessonElements') - if isinstance(lesson_elements, list): - chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)] - return chapters - - @staticmethod - def _extract_lesson(chapters, lesson_id, lesson): - title = lesson.get('title') or lesson_id - display_id = lesson.get('slug') - description = lesson.get('description') - thumbnail = lesson.get('thumbnail') - - chapter_number = None - index = lesson.get('index') - element_index = lesson.get('elementIndex') - if (isinstance(index, int) and isinstance(element_index, int) - and index < element_index): - chapter_number = element_index - index - chapter = (chapters[chapter_number - 1] - if chapter_number - 1 < len(chapters) else None) - - duration = None - timestamp = lesson.get('timestamp') - if isinstance(timestamp, compat_str): - mobj = re.search( - r'(?P\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P\s*\d{1,2}:\d{1,2}:\d{1,2})', - timestamp) - if mobj: - duration = parse_duration(mobj.group('end')) - parse_duration( - mobj.group('start')) - - return { - '_type': 'url_transparent', - 'url': 'frontendmasters:%s' % lesson_id, - 'ie_key': FrontendMastersIE.ie_key(), - 'id': lesson_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'chapter': chapter, - 'chapter_number': chapter_number, - } - - -class FrontendMastersIE(FrontendMastersBaseIE): - _VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P[^/]+)' - _TESTS = [{ - 'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba', - 'md5': '7f161159710d6b7016a4f4af6fcb05e2', - 'info_dict': { - 'id': 'a2qogef6ba', - 'ext': 'mp4', - 'title': 'a2qogef6ba', - }, - 'skip': 'Requires FrontendMasters account credentials', - }, { - 'url': 'frontendmasters:a2qogef6ba', - 'only_matching': True, - }] - - def _real_extract(self, url): - lesson_id = self._match_id(url) - - source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id) - - formats = [] - for ext in ('webm', 'mp4'): - for quality in ('low', 'mid', 'high'): - resolution = self._QUALITIES[quality].copy() - format_id = '%s-%s' % (ext, quality) - format_url = self._download_json( - source_url, lesson_id, - 'Downloading %s source JSON' % format_id, query={ - 'f': ext, - 'r': resolution['height'], - }, headers={ - 'Referer': url, - }, fatal=False)['url'] - - if not format_url: - continue - - f = resolution.copy() - f.update({ - 'url': format_url, - 'ext': ext, - 'format_id': format_id, - }) - formats.append(f) - self._sort_formats(formats) - - subtitles = { - 'en': [{ - 'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id), - }] - } - - return { - 'id': lesson_id, - 'title': lesson_id, - 'formats': formats, - 'subtitles': subtitles - } - - -class FrontendMastersLessonIE(FrontendMastersPageBaseIE): - _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P[^/]+)/(?P[^/]+)' - _TEST = { - 'url': 'https://frontendmasters.com/courses/web-development/tools', - 'info_dict': { - 'id': 'a2qogef6ba', - 'display_id': 'tools', - 'ext': 'mp4', - 'title': 'Tools', - 'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7', - 'thumbnail': r're:^https?://.*\.jpg$', - 'chapter': 'Introduction', - 'chapter_number': 1, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Requires FrontendMasters account credentials', - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - course_name, lesson_name = mobj.group('course_name', 'lesson_name') - - course = self._download_course(course_name, url) - - lesson_id, lesson = next( - (video_id, data) - for video_id, data in course['lessonData'].items() - if data.get('slug') == lesson_name) - - chapters = self._extract_chapters(course) - return self._extract_lesson(chapters, lesson_id, lesson) - - -class FrontendMastersCourseIE(FrontendMastersPageBaseIE): - _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P[^/]+)' - _TEST = { - 'url': 'https://frontendmasters.com/courses/web-development/', - 'info_dict': { - 'id': 'web-development', - 'title': 'Introduction to Web Development', - 'description': 'md5:9317e6e842098bf725d62360e52d49a6', - }, - 'playlist_count': 81, - 'skip': 'Requires FrontendMasters account credentials', - } - - @classmethod - def suitable(cls, url): - return False if FrontendMastersLessonIE.suitable(url) else super( - FrontendMastersBaseIE, cls).suitable(url) - - def _real_extract(self, url): - course_name = self._match_id(url) - - course = self._download_course(course_name, url) - - chapters = self._extract_chapters(course) - - lessons = sorted( - course['lessonData'].values(), key=lambda data: data['index']) - - entries = [] - for lesson in lessons: - lesson_name = lesson.get('slug') - if not lesson_name: - continue - lesson_id = lesson.get('hash') or lesson.get('statsId') - entries.append(self._extract_lesson(chapters, lesson_id, lesson)) - - title = course.get('title') - description = course.get('description') - - return self.playlist_result(entries, course_name, title, description) diff --git a/youtube_dl/extractor/fujitv.py b/youtube_dl/extractor/fujitv.py deleted file mode 100644 index a02a94374..000000000 --- a/youtube_dl/extractor/fujitv.py +++ /dev/null @@ -1,35 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class FujiTVFODPlus7IE(InfoExtractor): - _VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P[0-9a-z]+)' - _BASE_URL = 'http://i.fod.fujitv.co.jp/' - _BITRATE_MAP = { - 300: (320, 180), - 800: (640, 360), - 1200: (1280, 720), - 2000: (1280, 720), - } - - def _real_extract(self, url): - video_id = self._match_id(url) - formats = self._extract_m3u8_formats( - self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4') - for f in formats: - wh = self._BITRATE_MAP.get(f.get('tbr')) - if wh: - f.update({ - 'width': wh[0], - 'height': wh[1], - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'formats': formats, - 'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id, - } diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py deleted file mode 100644 index d8f1e169a..000000000 --- a/youtube_dl/extractor/funimation.py +++ /dev/null @@ -1,158 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import random -import string - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - determine_ext, - int_or_none, - js_to_json, - ExtractorError, - urlencode_postdata -) - - -class FunimationIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P[^/?#&]+)' - - _NETRC_MACHINE = 'funimation' - _TOKEN = None - - _TESTS = [{ - 'url': 'https://www.funimation.com/shows/hacksign/role-play/', - 'info_dict': { - 'id': '91144', - 'display_id': 'role-play', - 'ext': 'mp4', - 'title': '.hack//SIGN - Role Play', - 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', - 'thumbnail': r're:https?://.*\.jpg', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/', - 'info_dict': { - 'id': '210051', - 'display_id': 'broadcast-dub-preview', - 'ext': 'mp4', - 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview', - 'thumbnail': r're:https?://.*\.(?:jpg|png)', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', - 'only_matching': True, - }, { - # with lang code - 'url': 'https://www.funimation.com/en/shows/hacksign/role-play/', - 'only_matching': True, - }] - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - try: - data = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in', data=urlencode_postdata({ - 'username': username, - 'password': password, - })) - self._TOKEN = data['token'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - error = self._parse_json(e.cause.read().decode(), None)['error'] - raise ExtractorError(error, expected=True) - raise - - def _real_initialize(self): - self._login() - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - def _search_kane(name): - return self._search_regex( - r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name, - webpage, name, default=None) - - title_data = self._parse_json(self._search_regex( - r'TITLE_DATA\s*=\s*({[^}]+})', - webpage, 'title data', default=''), - display_id, js_to_json, fatal=False) or {} - - video_id = title_data.get('id') or self._search_regex([ - r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", - r']+src="/player/(\d+)', - ], webpage, 'video_id', default=None) - if not video_id: - player_url = self._html_search_meta([ - 'al:web:url', - 'og:video:url', - 'og:video:secure_url', - ], webpage, fatal=True) - video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id') - - title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage) - series = _search_kane('showName') - if series: - title = '%s - %s' % (series, title) - description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) - - try: - headers = {} - if self._TOKEN: - headers['Authorization'] = 'Token %s' % self._TOKEN - sources = self._download_json( - 'https://www.funimation.com/api/showexperience/%s/' % video_id, - video_id, headers=headers, query={ - 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), - })['items'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - error = self._parse_json(e.cause.read(), video_id)['errors'][0] - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, error.get('detail') or error.get('title')), expected=True) - raise - - formats = [] - for source in sources: - source_url = source.get('src') - if not source_url: - continue - source_type = source.get('videoType') or determine_ext(source_url) - if source_type == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'format_id': source_type, - 'url': source_url, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': self._og_search_thumbnail(webpage), - 'series': series, - 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')), - 'episode_number': int_or_none(title_data.get('episodeNum')), - 'episode': episode, - 'season_id': title_data.get('seriesId'), - 'formats': formats, - } diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py deleted file mode 100644 index 81d1949fd..000000000 --- a/youtube_dl/extractor/funk.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .nexx import NexxIE -from ..utils import ( - int_or_none, - str_or_none, -) - - -class FunkIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P[0-9a-z-]+)-(?P\d+)' - _TESTS = [{ - 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', - 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', - 'info_dict': { - 'id': '1155821', - 'ext': 'mp4', - 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2', - 'description': 'md5:a691d0413ef4835588c5b03ded670c1f', - 'timestamp': 1514507395, - 'upload_date': '20171229', - }, - - }, { - 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id, nexx_id = re.match(self._VALID_URL, url).groups() - video = self._download_json( - 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) - return { - '_type': 'url_transparent', - 'url': 'nexx:741:' + nexx_id, - 'ie_key': NexxIE.ie_key(), - 'id': nexx_id, - 'title': video.get('title'), - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'channel_id': str_or_none(video.get('channelId')), - 'display_id': display_id, - 'tags': video.get('tags'), - 'thumbnail': video.get('imageUrlLandscape'), - } diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py deleted file mode 100644 index a3f44b812..000000000 --- a/youtube_dl/extractor/fusion.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - mimetype2ext, - parse_iso8601, -) - - -class FusionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P\d+)' - _TESTS = [{ - 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', - 'info_dict': { - 'id': '3145868', - 'ext': 'mp4', - 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', - 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', - 'duration': 140.0, - 'timestamp': 1442589635, - 'uploader': 'UNIVISON', - 'upload_date': '20150918', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Anvato'], - }, { - 'url': 'http://fusion.tv/video/201781', - 'only_matching': True, - }, { - 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - video = self._download_json( - 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id) - - info = { - 'id': video_id, - 'title': video['title'], - 'description': video.get('excerpt'), - 'timestamp': parse_iso8601(video.get('published')), - 'series': video.get('show'), - } - - formats = [] - src = video.get('src') or {} - for f_id, f in src.items(): - for q_id, q in f.items(): - q_url = q.get('url') - if not q_url: - continue - ext = determine_ext(q_url, mimetype2ext(q.get('type'))) - if ext == 'smil': - formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False)) - elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'): - formats.extend(self._extract_m3u8_formats( - q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'format_id': '-'.join([f_id, q_id]), - 'url': q_url, - 'width': int_or_none(q.get('width')), - 'height': int_or_none(q.get('height')), - 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')), - 'ext': 'mp4' if ext == 'm3u8' else ext, - 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', - }) - if formats: - self._sort_formats(formats) - info['formats'] = formats - else: - info.update({ - '_type': 'url', - 'url': 'anvato:uni:' + video['video_ids']['anvato'], - 'ie_key': 'Anvato', - }) - - return info diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py deleted file mode 100644 index e9527758f..000000000 --- a/youtube_dl/extractor/gaia.py +++ /dev/null @@ -1,130 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) -from ..utils import ( - ExtractorError, - int_or_none, - str_or_none, - strip_or_none, - try_get, - urlencode_postdata, -) - - -class GaiaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P[^/?]+).*?\bfullplayer=(?Pfeature|preview)' - _TESTS = [{ - 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature', - 'info_dict': { - 'id': '89356', - 'ext': 'mp4', - 'title': 'Connecting with Universal Consciousness', - 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', - 'upload_date': '20151116', - 'timestamp': 1447707266, - 'duration': 936, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview', - 'info_dict': { - 'id': '89351', - 'ext': 'mp4', - 'title': 'Connecting with Universal Consciousness', - 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', - 'upload_date': '20151116', - 'timestamp': 1447707266, - 'duration': 53, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] - _NETRC_MACHINE = 'gaia' - _jwt = None - - def _real_initialize(self): - auth = self._get_cookies('https://www.gaia.com/').get('auth') - if auth: - auth = self._parse_json( - compat_urllib_parse_unquote(auth.value), - None, fatal=False) - if not auth: - username, password = self._get_login_info() - if username is None: - return - auth = self._download_json( - 'https://auth.gaia.com/v1/login', - None, data=urlencode_postdata({ - 'username': username, - 'password': password - })) - if auth.get('success') is False: - raise ExtractorError(', '.join(auth['messages']), expected=True) - if auth: - self._jwt = auth.get('jwt') - - def _real_extract(self, url): - display_id, vtype = re.search(self._VALID_URL, url).groups() - node_id = self._download_json( - 'https://brooklyn.gaia.com/pathinfo', display_id, query={ - 'path': 'video/' + display_id, - })['id'] - node = self._download_json( - 'https://brooklyn.gaia.com/node/%d' % node_id, node_id) - vdata = node[vtype] - media_id = compat_str(vdata['nid']) - title = node['title'] - - headers = None - if self._jwt: - headers = {'Authorization': 'Bearer ' + self._jwt} - media = self._download_json( - 'https://brooklyn.gaia.com/media/' + media_id, - media_id, headers=headers) - formats = self._extract_m3u8_formats( - media['mediaUrls']['bcHLS'], media_id, 'mp4') - self._sort_formats(formats) - - subtitles = {} - text_tracks = media.get('textTracks', {}) - for key in ('captions', 'subtitles'): - for lang, sub_url in text_tracks.get(key, {}).items(): - subtitles.setdefault(lang, []).append({ - 'url': sub_url, - }) - - fivestar = node.get('fivestar', {}) - fields = node.get('fields', {}) - - def get_field_value(key, value_key='value'): - return try_get(fields, lambda x: x[key][0][value_key]) - - return { - 'id': media_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')), - 'timestamp': int_or_none(node.get('created')), - 'subtitles': subtitles, - 'duration': int_or_none(vdata.get('duration')), - 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])), - 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])), - 'comment_count': int_or_none(node.get('comment_count')), - 'series': try_get(node, lambda x: x['series']['title'], compat_str), - 'season_number': int_or_none(get_field_value('season')), - 'season_id': str_or_none(get_field_value('series_nid', 'nid')), - 'episode_number': int_or_none(get_field_value('episode')), - } diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py deleted file mode 100644 index f1b96c172..000000000 --- a/youtube_dl/extractor/gameinformer.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .brightcove import BrightcoveNewIE -from .common import InfoExtractor -from ..utils import ( - clean_html, - get_element_by_class, - get_element_by_id, -) - - -class GameInformerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P[^.?&#]+)' - _TESTS = [{ - # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url - 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', - 'md5': '292f26da1ab4beb4c9099f1304d2b071', - 'info_dict': { - 'id': '4515472681001', - 'ext': 'mp4', - 'title': 'Replay - Animal Crossing', - 'description': 'md5:2e211891b215c85d061adc7a4dd2d930', - 'timestamp': 1443457610, - 'upload_date': '20150928', - 'uploader_id': '694940074001', - }, - }, { - # Brightcove id inside unique element with field--name-field-brightcove-video-id class - 'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue', - 'info_dict': { - 'id': '6057111913001', - 'ext': 'mp4', - 'title': 'New Gameplay Today – Streets Of Rogue', - 'timestamp': 1562699001, - 'upload_date': '20190709', - 'uploader_id': '694940074001', - - }, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage( - url, display_id, headers=self.geo_verification_headers()) - brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage)) - brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage) - return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py deleted file mode 100644 index 7a1beae3c..000000000 --- a/youtube_dl/extractor/gamespot.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import unicode_literals - -from .once import OnceIE -from ..compat import compat_urllib_parse_unquote - - -class GameSpotIE(OnceIE): - _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P\d+)' - _TESTS = [{ - 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', - 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', - 'info_dict': { - 'id': 'gs-2300-6410818', - 'ext': 'mp4', - 'title': 'Arma 3 - Community Guide: SITREP I', - 'description': 'Check out this video where some of the basics of Arma 3 is explained.', - }, - 'skip': 'manifest URL give HTTP Error 404: Not Found', - }, { - 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', - 'md5': '173ea87ad762cf5d3bf6163dceb255a6', - 'info_dict': { - 'id': 'gs-2300-6424837', - 'ext': 'mp4', - 'title': 'Now Playing - The Witcher 3: Wild Hunt', - 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', - }, - }, { - 'url': 'https://www.gamespot.com/videos/embed/6439218/', - 'only_matching': True, - }, { - 'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/', - 'only_matching': True, - }, { - 'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/', - 'only_matching': True, - }] - - def _real_extract(self, url): - page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) - data_video = self._parse_json(self._html_search_regex( - r'data-video=(["\'])({.*?})\1', webpage, - 'video data', group=2), page_id) - title = compat_urllib_parse_unquote(data_video['title']) - streams = data_video['videoStreams'] - formats = [] - - m3u8_url = streams.get('adaptive_stream') - if m3u8_url: - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, page_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - for f in m3u8_formats: - formats.append(f) - http_f = f.copy() - del http_f['manifest_url'] - http_f.update({ - 'format_id': f['format_id'].replace('hls-', 'http-'), - 'protocol': 'http', - 'url': f['url'].replace('.m3u8', '.mp4'), - }) - formats.append(http_f) - - mpd_url = streams.get('adaptive_dash') - if mpd_url: - formats.extend(self._extract_mpd_formats( - mpd_url, page_id, mpd_id='dash', fatal=False)) - - self._sort_formats(formats) - - return { - 'id': data_video.get('guid') or page_id, - 'display_id': page_id, - 'title': title, - 'formats': formats, - 'description': self._html_search_meta('description', webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - } diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py deleted file mode 100644 index f00dab2f3..000000000 --- a/youtube_dl/extractor/gamestar.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - remove_end, -) - - -class GameStarIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?game(?Ppro|star)\.de/videos/.*,(?P[0-9]+)\.html' - _TESTS = [{ - 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', - 'md5': 'ee782f1f8050448c95c5cacd63bc851c', - 'info_dict': { - 'id': '76110', - 'ext': 'mp4', - 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil', - 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1406542380, - 'upload_date': '20140728', - 'duration': 17, - } - }, { - 'url': 'http://www.gamepro.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html', - 'only_matching': True, - }, { - 'url': 'http://www.gamestar.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - site = mobj.group('site') - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - # TODO: there are multiple ld+json objects in the webpage, - # while _search_json_ld finds only the first one - json_ld = self._parse_json(self._search_regex( - r'(?s)]+type=(["\'])application/ld\+json\1[^>]*>(?P[^<]+VideoObject[^<]+)', - webpage, 'JSON-LD', group='json_ld'), video_id) - info_dict = self._json_ld(json_ld, video_id) - info_dict['title'] = remove_end( - info_dict['title'], ' - Game%s' % site.title()) - - view_count = int_or_none(json_ld.get('interactionCount')) - comment_count = int_or_none(self._html_search_regex( - r'Kommentare\s*]+class=["\']count[^>]+>\s*\(\s*([0-9]+)', - webpage, 'comment count', fatal=False)) - - info_dict.update({ - 'id': video_id, - 'url': 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id, - 'ext': 'mp4', - 'view_count': view_count, - 'comment_count': comment_count - }) - - return info_dict diff --git a/youtube_dl/extractor/gaskrank.py b/youtube_dl/extractor/gaskrank.py deleted file mode 100644 index 1726a6704..000000000 --- a/youtube_dl/extractor/gaskrank.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - unified_strdate, -) - - -class GaskrankIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P[^/]+)/(?P[^/]+)\.htm' - _TESTS = [{ - 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm', - 'md5': '1ae88dbac97887d85ebd1157a95fc4f9', - 'info_dict': { - 'id': '201601/26955', - 'ext': 'mp4', - 'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*', - 'thumbnail': r're:^https?://.*\.jpg$', - 'categories': ['motorrad-fun'], - 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden', - 'uploader_id': 'Bikefun', - 'upload_date': '20170110', - 'uploader_url': None, - } - }, { - 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm', - 'md5': 'c33ee32c711bc6c8224bfcbe62b23095', - 'info_dict': { - 'id': '201106/15920', - 'ext': 'mp4', - 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken', - 'thumbnail': r're:^https?://.*\.jpg$', - 'categories': ['racing'], - 'display_id': 'isle-of-man-tt-2011-michael-du-15920', - 'uploader_id': 'IOM', - 'upload_date': '20170523', - 'uploader_url': 'www.iomtt.com', - } - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - title = self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'title', webpage, fatal=True) - - categories = [re.match(self._VALID_URL, url).group('categories')] - - mobj = re.search( - r'Video von:\s*(?P[^|]*?)\s*\|\s*vom:\s*(?P[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])', - webpage) - if mobj is not None: - uploader_id = mobj.groupdict().get('uploader_id') - upload_date = unified_strdate(mobj.groupdict().get('upload_date')) - - uploader_url = self._search_regex( - r'Homepage:\s*<[^>]*>(?P[^<]*)', - webpage, 'uploader_url', default=None) - tags = re.findall( - r'/tv/tags/[^/]+/"\s*>(?P[^<]*?)<', - webpage) - - view_count = self._search_regex( - r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P[0-9\.]*)', - webpage, 'view_count', default=None) - if view_count: - view_count = int_or_none(view_count.replace('.', '')) - - average_rating = self._search_regex( - r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P[0-9,]+)', - webpage, 'average_rating') - if average_rating: - average_rating = float_or_none(average_rating.replace(',', '.')) - - video_id = self._search_regex( - r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4', - webpage, 'video id', default=display_id) - - entry = self._parse_html5_media_entries(url, webpage, video_id)[0] - entry.update({ - 'id': video_id, - 'title': title, - 'categories': categories, - 'display_id': display_id, - 'uploader_id': uploader_id, - 'upload_date': upload_date, - 'uploader_url': uploader_url, - 'tags': tags, - 'view_count': view_count, - 'average_rating': average_rating, - }) - self._sort_formats(entry['formats']) - - return entry diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py deleted file mode 100644 index 57c67a451..000000000 --- a/youtube_dl/extractor/gazeta.py +++ /dev/null @@ -1,48 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class GazetaIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P[A-Za-z0-9-_.]+)\.s?html)' - _TESTS = [{ - 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', - 'md5': 'd49c9bdc6e5a7888f27475dc215ee789', - 'info_dict': { - 'id': '205566', - 'ext': 'mp4', - 'title': '«70–80 процентов гражданских в Донецке на грани голода»', - 'description': 'md5:38617526050bd17b234728e7f9620a71', - 'thumbnail': r're:^https?://.*\.jpg', - }, - 'skip': 'video not found', - }, { - 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml', - 'only_matching': True, - }, { - 'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml', - 'md5': '37f19f78355eb2f4256ee1688359f24c', - 'info_dict': { - 'id': '252048', - 'ext': 'mp4', - 'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"', - }, - 'add_ie': ['EaglePlatform'], - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - display_id = mobj.group('id') - embed_url = '%s?p=embed' % mobj.group('url') - embed_page = self._download_webpage( - embed_url, display_id, 'Downloading embed page') - - video_id = self._search_regex( - r']*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id') - - return self.url_result( - 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform') diff --git a/youtube_dl/extractor/gbnews.py b/youtube_dl/extractor/gbnews.py deleted file mode 100644 index f04f30e5a..000000000 --- a/youtube_dl/extractor/gbnews.py +++ /dev/null @@ -1,139 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - extract_attributes, - ExtractorError, - T, - traverse_obj, - txt_or_none, - url_or_none, -) - - -class GBNewsIE(InfoExtractor): - IE_DESC = 'GB News clips, features and live stream' - - # \w+ is normally shows or news, but apparently any word redirects to the correct URL - _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P[^#?]+)' - - _PLATFORM = 'safari' - _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php' - _TESTS = [{ - 'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889', - 'info_dict': { - 'id': '106889', - 'ext': 'mp4', - 'title': "Andrew Neil's message to companies choosing to boycott GB News", - 'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351', - }, - 'skip': '404 not found', - }, { - 'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row', - 'info_dict': { - 'id': '52264136', - 'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row', - 'ext': 'mp4', - 'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism', - 'description': 'The post was criticised by former employers of the broadcaster', - }, - }, { - 'url': 'https://www.gbnews.uk/watchlive', - 'info_dict': { - 'id': '1069', - 'display_id': 'watchlive', - 'ext': 'mp4', - 'title': 'GB News Live', - 'is_live': True, - }, - 'params': { - 'skip_download': 'm3u8', - }, - }] - - def _real_extract(self, url): - display_id = self._match_id(url).split('/')[-1] - - webpage = self._download_webpage(url, display_id) - # extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341 - ''' -
    - ''' - # exception if no match - video_data = self._search_regex( - r'(]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)', - webpage, 'video data') - - video_data = extract_attributes(video_data) - ss_id = video_data.get('data-id') - if not ss_id: - raise ExtractorError('Simplestream ID not found') - - json_data = self._download_json( - self._SSMP_URL, display_id, - note='Downloading Simplestream JSON metadata', - errnote='Unable to download Simplestream JSON metadata', - query={ - 'id': ss_id, - 'env': video_data.get('data-env', 'production'), - }, fatal=False) - - meta_url = traverse_obj(json_data, ('response', 'api_hostname')) - if not meta_url: - raise ExtractorError('No API host found') - - uvid = video_data['data-uvid'] - dtype = video_data.get('data-type') - stream_data = self._download_json( - '%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid), - uvid, - query={ - 'key': video_data.get('data-key'), - 'platform': self._PLATFORM, - }, - headers={ - 'Token': video_data.get('data-token'), - 'Token-Expiry': video_data.get('data-expiry'), - 'Uvid': uvid, - }, fatal=False) - - stream_url = traverse_obj(stream_data, ( - 'response', 'stream', T(url_or_none))) - if not stream_url: - raise ExtractorError('No stream data/URL') - - # now known to be a dict - stream_data = stream_data['response'] - drm = stream_data.get('drm') - if drm: - self.report_drm(uvid) - - formats = self._extract_m3u8_formats( - stream_url, uvid, ext='mp4', entry_protocol='m3u8_native', - fatal=False) - # exception if no formats - self._sort_formats(formats) - - return { - 'id': uvid, - 'display_id': display_id, - 'title': (traverse_obj(stream_data, ('title', T(txt_or_none))) - or self._og_search_title(webpage, default=None) - or display_id.replace('-', ' ').capitalize()), - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none))) - or self._og_search_thumbnail(webpage)), - 'formats': formats, - 'is_live': (dtype == 'live') or None, - } diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py deleted file mode 100644 index acc6478b8..000000000 --- a/youtube_dl/extractor/gdcvault.py +++ /dev/null @@ -1,220 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import ( - HEADRequest, - remove_start, - sanitized_Request, - smuggle_url, - urlencode_postdata, -) - - -class GDCVaultIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)(?:/(?P[\w-]+))?' - _NETRC_MACHINE = 'gdcvault' - _TESTS = [ - { - 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', - 'md5': '7ce8388f544c88b7ac11c7ab1b593704', - 'info_dict': { - 'id': '201311826596_AWNY', - 'display_id': 'Doki-Doki-Universe-Sweet-Simple', - 'ext': 'mp4', - 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' - } - }, - { - 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', - 'info_dict': { - 'id': '201203272_1330951438328RSXR', - 'display_id': 'Embracing-the-Dark-Art-of', - 'ext': 'flv', - 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' - }, - 'params': { - 'skip_download': True, # Requires rtmpdump - } - }, - { - 'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or', - 'md5': 'a5eb77996ef82118afbbe8e48731b98e', - 'info_dict': { - 'id': '1015301', - 'display_id': 'Thexder-Meets-Windows-95-or', - 'ext': 'flv', - 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment', - }, - 'skip': 'Requires login', - }, - { - 'url': 'http://gdcvault.com/play/1020791/', - 'only_matching': True, - }, - { - # Hard-coded hostname - 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface', - 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', - 'info_dict': { - 'id': '840376_BQRC', - 'ext': 'mp4', - 'display_id': 'Tenacious-Design-and-The-Interface', - 'title': 'Tenacious Design and The Interface of \'Destiny\'', - }, - }, - { - # Multiple audios - 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC', - 'info_dict': { - 'id': '12396_1299111843500GMPX', - 'ext': 'mp4', - 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man', - }, - # 'params': { - # 'skip_download': True, # Requires rtmpdump - # 'format': 'jp', # The japanese audio - # } - }, - { - # gdc-player.html - 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo', - 'info_dict': { - 'id': '9350_1238021887562UHXB', - 'display_id': 'An-American-engine-in-Tokyo', - 'ext': 'mp4', - 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT', - }, - }, - { - # Kaltura Embed - 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling', - 'info_dict': { - 'id': '0_h1fg8j3p', - 'ext': 'mp4', - 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)', - 'timestamp': 1554401811, - 'upload_date': '20190404', - 'uploader_id': 'joe@blazestreaming.com', - }, - 'params': { - 'format': 'mp4-408', - }, - }, - { - # Kaltura embed, whitespace between quote and embedded URL in iframe's src - 'url': 'https://www.gdcvault.com/play/1025699', - 'info_dict': { - 'id': '0_zagynv0a', - 'ext': 'mp4', - 'title': 'Tech Toolbox', - 'upload_date': '20190408', - 'uploader_id': 'joe@blazestreaming.com', - 'timestamp': 1554764629, - }, - 'params': { - 'skip_download': True, - }, - }, - { - # HTML5 video - 'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru', - 'only_matching': True, - }, - ] - - def _login(self, webpage_url, display_id): - username, password = self._get_login_info() - if username is None or password is None: - self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.') - return None - - mobj = re.match(r'(?Phttps?://.*?/).*', webpage_url) - login_url = mobj.group('root_url') + 'api/login.php' - logout_url = mobj.group('root_url') + 'logout' - - login_form = { - 'email': username, - 'password': password, - } - - request = sanitized_Request(login_url, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - self._download_webpage(request, display_id, 'Logging in') - start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') - self._download_webpage(logout_url, display_id, 'Logging out') - - return start_page - - def _real_extract(self, url): - video_id, name = re.match(self._VALID_URL, url).groups() - display_id = name or video_id - - webpage_url = 'http://www.gdcvault.com/play/' + video_id - start_page = self._download_webpage(webpage_url, display_id) - - direct_url = self._search_regex( - r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);', - start_page, 'url', default=None) - if direct_url: - title = self._html_search_regex( - r'Session Name:?\s*(.*?)', - start_page, 'title') - video_url = 'http://www.gdcvault.com' + direct_url - # resolve the url so that we can detect the correct extension - video_url = self._request_webpage( - HEADRequest(video_url), video_id).geturl() - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - } - - embed_url = KalturaIE._extract_url(start_page) - if embed_url: - embed_url = smuggle_url(embed_url, {'source_url': url}) - ie_key = 'Kaltura' - else: - PLAYER_REGEX = r'', - start_page, 'xml filename', default=None) - if not xml_name: - info = self._parse_html5_media_entries(url, start_page, video_id)[0] - info.update({ - 'title': remove_start(self._search_regex( - r'>Session Name:\s*<.*?>\s*(.+?)', start_page, - 'title', default=None) or self._og_search_title( - start_page, default=None), 'GDC Vault - '), - 'id': video_id, - 'display_id': display_id, - }) - return info - embed_url = '%s/xml/%s' % (xml_root, xml_name) - ie_key = 'DigitallySpeaking' - - return { - '_type': 'url_transparent', - 'id': video_id, - 'display_id': display_id, - 'url': embed_url, - 'ie_key': ie_key, - } diff --git a/youtube_dl/extractor/gedidigital.py b/youtube_dl/extractor/gedidigital.py deleted file mode 100644 index 6c4153b40..000000000 --- a/youtube_dl/extractor/gedidigital.py +++ /dev/null @@ -1,161 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, -) - - -class GediDigitalIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://video\. - (?: - (?: - (?:espresso\.)?repubblica - |lastampa - |ilsecoloxix - )| - (?: - iltirreno - |messaggeroveneto - |ilpiccolo - |gazzettadimantova - |mattinopadova - |laprovinciapavese - |tribunatreviso - |nuovavenezia - |gazzettadimodena - |lanuovaferrara - |corrierealpi - |lasentinella - )\.gelocal - )\.it(?:/[^/]+){2,3}?/(?P\d+)(?:[/?&#]|$)''' - _TESTS = [{ - 'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683', - 'md5': '84658d7fb9e55a6e57ecc77b73137494', - 'info_dict': { - 'id': '121559', - 'ext': 'mp4', - 'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso', - 'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca', - 'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$', - 'duration': 125, - }, - }, { - 'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360', - 'only_matching': True, - }, { - 'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963', - 'only_matching': True, - }, { - 'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267', - 'only_matching': True, - }, { - 'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723', - 'only_matching': True, - }, { - 'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268', - 'only_matching': True, - }, { - 'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751', - 'only_matching': True, - }, { - 'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818', - 'only_matching': True, - }, { - 'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964', - 'only_matching': True, - }, { - 'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120', - 'only_matching': True, - }, { - 'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024', - 'only_matching': True, - }, { - 'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266', - 'only_matching': True, - }, { - 'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796', - 'only_matching': True, - }, { - 'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957', - 'only_matching': True, - }, { - 'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331', - 'only_matching': True, - }, { - 'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466', - 'only_matching': True, - }, { - 'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - title = self._html_search_meta( - ['twitter:title', 'og:title'], webpage, fatal=True) - player_data = re.findall( - r"PlayerFactory\.setParam\('(?Pformat|param)',\s*'(?P[^']+)',\s*'(?P[^']+)'\);", - webpage) - - formats = [] - duration = thumb = None - for t, n, v in player_data: - if t == 'format': - if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'): - continue - elif n.endswith('-vod-ak'): - formats.extend(self._extract_akamai_formats( - v, video_id, {'http': 'media.gedidigital.it'})) - else: - ext = determine_ext(v) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False)) - continue - f = { - 'format_id': n, - 'url': v, - } - if ext == 'mp3': - abr = int_or_none(self._search_regex( - r'-mp3-audio-(\d+)', v, 'abr', default=None)) - f.update({ - 'abr': abr, - 'tbr': abr, - 'vcodec': 'none' - }) - else: - mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n) - if mobj: - f.update({ - 'height': int(mobj.group(1)), - 'vbr': int_or_none(mobj.group(2)), - }) - if not f.get('vbr'): - f['vbr'] = int_or_none(self._search_regex( - r'-video-rrtv-(\d+)', v, 'abr', default=None)) - formats.append(f) - elif t == 'param': - if n in ['image_full', 'image']: - thumb = v - elif n == 'videoDuration': - duration = int_or_none(v) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': self._html_search_meta( - ['twitter:description', 'og:description', 'description'], webpage), - 'thumbnail': thumb or self._og_search_thumbnail(webpage), - 'formats': formats, - 'duration': duration, - } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py deleted file mode 100644 index b01900afa..000000000 --- a/youtube_dl/extractor/generic.py +++ /dev/null @@ -1,3781 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -import os -import re -import sys - -from .common import InfoExtractor -from .youtube import YoutubeIE -from ..compat import ( - compat_etree_fromstring, - compat_str, - compat_urllib_parse_unquote, - compat_urlparse, - compat_xml_parse_error, -) -from ..utils import ( - determine_ext, - ExtractorError, - float_or_none, - HEADRequest, - int_or_none, - is_html, - js_to_json, - KNOWN_EXTENSIONS, - merge_dicts, - mimetype2ext, - orderedSet, - parse_duration, - parse_resolution, - sanitized_Request, - smuggle_url, - unescapeHTML, - unified_timestamp, - unsmuggle_url, - UnsupportedError, - url_or_none, - urljoin, - xpath_attr, - xpath_text, - xpath_with_ns, -) -from .commonprotocols import RtmpIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .nexx import ( - NexxIE, - NexxEmbedIE, -) -from .nbc import NBCSportsVPlayerIE -from .ooyala import OoyalaIE -from .rutv import RUTVIE -from .tvc import TVCIE -from .sportbox import SportBoxIE -from .myvi import MyviIE -from .condenast import CondeNastIE -from .udn import UDNEmbedIE -from .senateisvp import SenateISVPIE -from .svt import SVTIE -from .pornhub import PornHubIE -from .xhamster import XHamsterEmbedIE -from .tnaflix import TNAFlixNetworkEmbedIE -from .drtuber import DrTuberIE -from .redtube import RedTubeIE -from .tube8 import Tube8IE -from .mofosex import MofosexEmbedIE -from .spankwire import SpankwireIE -from .youporn import YouPornIE -from .vimeo import ( - VimeoIE, - VHXEmbedIE, -) -from .dailymotion import DailymotionIE -from .dailymail import DailyMailIE -from .onionstudios import OnionStudiosIE -from .viewlift import ViewLiftEmbedIE -from .mtv import MTVServicesEmbeddedIE -from .pladform import PladformIE -from .videomore import VideomoreIE -from .webcaster import WebcasterFeedIE -from .googledrive import GoogleDriveIE -from .jwplatform import JWPlatformIE -from .digiteka import DigitekaIE -from .arkena import ArkenaIE -from .instagram import InstagramIE -from .threeqsdn import ThreeQSDNIE -from .theplatform import ThePlatformIE -from .kaltura import KalturaIE -from .eagleplatform import EaglePlatformIE -from .facebook import FacebookIE -from .soundcloud import SoundcloudEmbedIE -from .tunein import TuneInBaseIE -from .vbox7 import Vbox7IE -from .dbtv import DBTVIE -from .piksel import PikselIE -from .videa import VideaIE -from .twentymin import TwentyMinutenIE -from .ustream import UstreamIE -from .arte import ArteTVEmbedIE -from .videopress import VideoPressIE -from .rutube import RutubeIE -from .limelight import LimelightBaseIE -from .anvato import AnvatoIE -from .washingtonpost import WashingtonPostIE -from .wistia import WistiaIE -from .mediaset import MediasetIE -from .joj import JojIE -from .megaphone import MegaphoneIE -from .vzaar import VzaarIE -from .channel9 import Channel9IE -from .vshare import VShareIE -from .mediasite import MediasiteIE -from .springboardplatform import SpringboardPlatformIE -from .yapfiles import YapFilesIE -from .vice import ViceIE -from .xfileshare import XFileShareIE -from .cloudflarestream import CloudflareStreamIE -from .peertube import PeerTubeIE -from .teachable import TeachableIE -from .indavideo import IndavideoEmbedIE -from .apa import APAIE -from .foxnews import FoxNewsIE -from .viqeo import ViqeoIE -from .expressen import ExpressenIE -from .zype import ZypeIE -from .odnoklassniki import OdnoklassnikiIE -from .vk import VKIE -from .kinja import KinjaEmbedIE -from .arcpublishing import ArcPublishingIE -from .medialaan import MedialaanIE -from .simplecast import SimplecastIE - - -class GenericIE(InfoExtractor): - IE_DESC = 'Generic downloader that works on some sites' - _VALID_URL = r'.*' - IE_NAME = 'generic' - _TESTS = [ - # Direct link to a video - { - 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', - 'md5': '67d406c2bcb6af27fa886f31aa934bbe', - 'info_dict': { - 'id': 'trailer', - 'ext': 'mp4', - 'title': 'trailer', - 'upload_date': '20100513', - } - }, - # Direct link to media delivered compressed (until Accept-Encoding is *) - { - 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', - 'md5': '128c42e68b13950268b648275386fc74', - 'info_dict': { - 'id': 'FictionJunction-Parallel_Hearts', - 'ext': 'flac', - 'title': 'FictionJunction-Parallel_Hearts', - 'upload_date': '20140522', - }, - 'expected_warnings': [ - 'URL could be a direct video link, returning it as such.' - ], - 'skip': 'URL invalid', - }, - # Direct download with broken HEAD - { - 'url': 'http://ai-radio.org:8000/radio.opus', - 'info_dict': { - 'id': 'radio', - 'ext': 'opus', - 'title': 'radio', - }, - 'params': { - 'skip_download': True, # infinite live stream - }, - 'expected_warnings': [ - r'501.*Not Implemented', - r'400.*Bad Request', - ], - }, - # Direct link with incorrect MIME type - { - 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', - 'md5': '4ccbebe5f36706d85221f204d7eb5913', - 'info_dict': { - 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', - 'id': '5_Lennart_Poettering_-_Systemd', - 'ext': 'webm', - 'title': '5_Lennart_Poettering_-_Systemd', - 'upload_date': '20141120', - }, - 'expected_warnings': [ - 'URL could be a direct video link, returning it as such.' - ] - }, - # RSS feed - { - 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', - 'info_dict': { - 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', - 'title': 'Zero Punctuation', - 'description': 're:.*groundbreaking video review series.*' - }, - 'playlist_mincount': 11, - }, - # RSS feed with enclosure - { - 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', - 'info_dict': { - 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', - 'title': 'MSNBC Rachel Maddow (video)', - 'description': 're:.*her unique approach to storytelling.*', - }, - 'playlist': [{ - 'info_dict': { - 'ext': 'mov', - 'id': 'pdv_maddow_netcast_mov-12-04-2020-224335', - 'title': 're:MSNBC Rachel Maddow', - 'description': 're:.*her unique approach to storytelling.*', - 'timestamp': int, - 'upload_date': compat_str, - 'duration': float, - }, - }], - }, - # RSS feed with item with description and thumbnails - { - 'url': 'https://anchor.fm/s/dd00e14/podcast/rss', - 'info_dict': { - 'id': 'https://anchor.fm/s/dd00e14/podcast/rss', - 'title': 're:.*100% Hydrogen.*', - 'description': 're:.*In this episode.*', - }, - 'playlist': [{ - 'info_dict': { - 'ext': 'm4a', - 'id': 'c1c879525ce2cb640b344507e682c36d', - 'title': 're:Hydrogen!', - 'description': 're:.*In this episode we are going.*', - 'timestamp': 1567977776, - 'upload_date': '20190908', - 'duration': 459, - 'thumbnail': r're:^https?://.*\.jpg$', - 'episode_number': 1, - 'season_number': 1, - 'age_limit': 0, - }, - }], - 'params': { - 'skip_download': True, - }, - }, - # RSS feed with enclosures and unsupported link URLs - { - 'url': 'http://www.hellointernet.fm/podcast?format=rss', - 'info_dict': { - 'id': 'http://www.hellointernet.fm/podcast?format=rss', - 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.', - 'title': 'Hello Internet', - }, - 'playlist_mincount': 100, - }, - # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng - { - 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', - 'info_dict': { - 'id': 'smil', - 'ext': 'mp4', - 'title': 'Automatics, robotics and biocybernetics', - 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', - 'upload_date': '20130627', - 'formats': 'mincount:16', - 'subtitles': 'mincount:1', - }, - 'params': { - 'force_generic_extractor': True, - 'skip_download': True, - }, - }, - # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html - { - 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil', - 'info_dict': { - 'id': 'hds', - 'ext': 'flv', - 'title': 'hds', - 'formats': 'mincount:1', - }, - 'params': { - 'skip_download': True, - }, - }, - # SMIL from https://www.restudy.dk/video/play/id/1637 - { - 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml', - 'info_dict': { - 'id': 'video_1637', - 'ext': 'flv', - 'title': 'video_1637', - 'formats': 'mincount:3', - }, - 'params': { - 'skip_download': True, - }, - }, - # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm - { - 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil', - 'info_dict': { - 'id': 'smil-service', - 'ext': 'flv', - 'title': 'smil-service', - 'formats': 'mincount:1', - }, - 'params': { - 'skip_download': True, - }, - }, - # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370 - { - 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil', - 'info_dict': { - 'id': '4719370', - 'ext': 'mp4', - 'title': '571de1fd-47bc-48db-abf9-238872a58d1f', - 'formats': 'mincount:3', - }, - 'params': { - 'skip_download': True, - }, - }, - # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html - { - 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf', - 'info_dict': { - 'id': 'mZlp2ctYIUEB', - 'ext': 'mp4', - 'title': 'Tikibad ontruimd wegens brand', - 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 33, - }, - 'params': { - 'skip_download': True, - }, - }, - # MPD from http://dash-mse-test.appspot.com/media.html - { - 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd', - 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53', - 'info_dict': { - 'id': 'car-20120827-manifest', - 'ext': 'mp4', - 'title': 'car-20120827-manifest', - 'formats': 'mincount:9', - 'upload_date': '20130904', - }, - 'params': { - 'format': 'bestvideo', - }, - }, - # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8 - { - 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8', - 'info_dict': { - 'id': 'content', - 'ext': 'mp4', - 'title': 'content', - 'formats': 'mincount:8', - }, - 'params': { - # m3u8 downloads - 'skip_download': True, - }, - 'skip': 'video gone', - }, - # m3u8 served with Content-Type: text/plain - { - 'url': 'http://www.nacentapps.com/m3u8/index.m3u8', - 'info_dict': { - 'id': 'index', - 'ext': 'mp4', - 'title': 'index', - 'upload_date': '20140720', - 'formats': 'mincount:11', - }, - 'params': { - # m3u8 downloads - 'skip_download': True, - }, - 'skip': 'video gone', - }, - # google redirect - { - 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', - 'info_dict': { - 'id': 'cmQHVoWB5FY', - 'ext': 'mp4', - 'upload_date': '20130224', - 'uploader_id': 'TheVerge', - 'description': r're:^Chris Ziegler takes a look at the\.*', - 'uploader': 'The Verge', - 'title': 'First Firefox OS phones side-by-side', - }, - 'params': { - 'skip_download': False, - } - }, - { - # redirect in Refresh HTTP header - 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1', - 'info_dict': { - 'id': 'pO8h3EaFRdo', - 'ext': 'mp4', - 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set', - 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5', - 'upload_date': '20150917', - 'uploader_id': 'brtvofficial', - 'uploader': 'Boiler Room', - }, - 'params': { - 'skip_download': False, - }, - }, - { - 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', - 'md5': '85b90ccc9d73b4acd9138d3af4c27f89', - 'info_dict': { - 'id': '13601338388002', - 'ext': 'mp4', - 'uploader': 'www.hodiho.fr', - 'title': 'R\u00e9gis plante sa Jeep', - } - }, - # bandcamp page with custom domain - { - 'add_ie': ['Bandcamp'], - 'url': 'http://bronyrock.com/track/the-pony-mash', - 'info_dict': { - 'id': '3235767654', - 'ext': 'mp3', - 'title': 'The Pony Mash', - 'uploader': 'M_Pallante', - }, - 'skip': 'There is a limit of 200 free downloads / month for the test song', - }, - { - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' - # in the http requests - 'add_ie': ['BrightcoveLegacy'], - 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', - 'info_dict': { - 'id': '2765128793001', - 'ext': 'mp4', - 'title': 'Le cours de bourse : l’analyse technique', - 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9', - 'uploader': 'BFM BUSINESS', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # embedded with itemprop embedURL and video id spelled as `idVideo` - 'add_id': ['BrightcoveLegacy'], - 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', - 'info_dict': { - 'id': '5255628253001', - 'ext': 'mp4', - 'title': 'md5:37c519b1128915607601e75a87995fc0', - 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', - 'uploader': 'BFM BUSINESS', - 'uploader_id': '876450612001', - 'timestamp': 1482255315, - 'upload_date': '20161220', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # https://github.com/ytdl-org/youtube-dl/issues/2253 - 'url': 'http://bcove.me/i6nfkrc3', - 'md5': '0ba9446db037002366bab3b3eb30c88c', - 'info_dict': { - 'id': '3101154703001', - 'ext': 'mp4', - 'title': 'Still no power', - 'uploader': 'thestar.com', - 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', - }, - 'add_ie': ['BrightcoveLegacy'], - 'skip': 'video gone', - }, - { - 'url': 'http://www.championat.com/video/football/v/87/87499.html', - 'md5': 'fb973ecf6e4a78a67453647444222983', - 'info_dict': { - 'id': '3414141473001', - 'ext': 'mp4', - 'title': 'Видео. Удаление Дзагоева (ЦСКА)', - 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"', - 'uploader': 'Championat', - }, - }, - { - # https://github.com/ytdl-org/youtube-dl/issues/3541 - 'add_ie': ['BrightcoveLegacy'], - 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', - 'info_dict': { - 'id': '3866516442001', - 'ext': 'mp4', - 'title': 'Leer mij vrouwen kennen: Aflevering 1', - 'description': 'Leer mij vrouwen kennen: Aflevering 1', - 'uploader': 'SBS Broadcasting', - }, - 'skip': 'Restricted to Netherlands', - 'params': { - 'skip_download': True, # m3u8 download - }, - }, - { - # Brightcove video in ', webpage): - url = self._search_regex( - r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, - 'player URL', default=None, group='url') - if url: - break - - if not url: - url = self._og_search_url(webpage) - - mobj = re.match( - self._VALID_URL, self._proto_relative_url(url.strip())) - - player_id = mobj.group('player_id') - if not display_id: - display_id = player_id - if player_id: - player_page = self._download_webpage( - url, display_id, note='Downloading player page', - errnote='Could not download player page') - video_id = self._search_regex( - r'\d+)' - _TEST = { - 'url': 'http://www.pearvideo.com/video_1076290', - 'info_dict': { - 'id': '1076290', - 'ext': 'mp4', - 'title': '小浣熊在主人家玻璃上滚石头:没砸', - 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', - 'timestamp': 1494275280, - 'upload_date': '20170508', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - quality = qualities( - ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) - - formats = [{ - 'url': mobj.group('url'), - 'format_id': mobj.group('id'), - 'quality': quality(mobj.group('id')), - } for mobj in re.finditer( - r'(?P[a-zA-Z]+)Url\s*=\s*(["\'])(?P(?:https?:)?//.+?)\2', - webpage)] - self._sort_formats(formats) - - title = self._search_regex( - (r']+\bclass=(["\'])video-tt\1[^>]*>(?P[^<]+)', - r'<[^>]+\bdata-title=(["\'])(?P(?:(?!\1).)+)\1'), - webpage, 'title', group='value') - description = self._search_regex( - (r']+\bclass=(["\'])summary\1[^>]*>(?P[^<]+)', - r'<[^>]+\bdata-summary=(["\'])(?P(?:(?!\1).)+)\1'), - webpage, 'description', default=None, - group='value') or self._html_search_meta('Description', webpage) - timestamp = unified_timestamp(self._search_regex( - r']+\bclass=["\']date["\'][^>]*>([^<]+)', - webpage, 'timestamp', fatal=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'formats': formats, - } diff --git a/youtube_dl/extractor/peekvids.py b/youtube_dl/extractor/peekvids.py deleted file mode 100644 index c8aad564b..000000000 --- a/youtube_dl/extractor/peekvids.py +++ /dev/null @@ -1,193 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - get_element_by_class, - int_or_none, - merge_dicts, - url_or_none, -) - - -class PeekVidsIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:www\.)?peekvids\.com/ - (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) - (?P[^/?&#]*) - ''' - _TESTS = [{ - 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', - 'md5': '2ff6a357a9717dc9dc9894b51307e9a2', - 'info_dict': { - 'id': '1262717', - 'display_id': 'BSyLMbN0YCd', - 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', - 'ext': 'mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', - 'timestamp': 1642579329, - 'upload_date': '20220119', - 'duration': 416, - 'view_count': int, - 'age_limit': 18, - 'uploader': 'SEXYhub.com', - 'categories': list, - 'tags': list, - }, - }] - _DOMAIN = 'www.peekvids.com' - - def _get_detail(self, html): - return get_element_by_class('detail-video-block', html) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, expected_status=429) - if '>Rate Limit Exceeded' in webpage: - raise ExtractorError( - '[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'), - expected=True) - - title = self._html_search_regex(r'(?s)]*>(.+?)', webpage, 'title') - - display_id = video_id - video_id = self._search_regex(r'(?s)]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID') - srcs = self._download_json( - 'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id, - note='Downloading list of source files') - formats = [{ - 'url': f_url, - 'format_id': f_id, - 'height': int_or_none(f_id), - } for f_url, f_id in ( - (url_or_none(f_v), f_match.group(1)) - for f_v, f_match in ( - (v, re.match(r'^data-src(\d{3,})$', k)) - for k, v in srcs.items() if v) if f_match) - if f_url - ] - if not formats: - formats = [{'url': url} for url in srcs.values()] - self._sort_formats(formats) - - info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) - info.pop('url', None) - # may not have found the thumbnail if it was in a list in the ld+json - info.setdefault('thumbnail', self._og_search_thumbnail(webpage)) - detail = self._get_detail(webpage) or '' - info['description'] = self._html_search_regex( - r'(?s)(.+?)(?:%s\s*<|]*>\s*%s\s*:\s*(.+?)' % (re.escape(name), ), - html, name, default='') - return [x for x in re.split(r'\s+', l) if x] - - return merge_dicts({ - 'id': video_id, - 'display_id': display_id, - 'age_limit': 18, - 'formats': formats, - 'categories': cat_tags('Categories', detail), - 'tags': cat_tags('Tags', detail), - 'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None), - }, info) - - -class PlayVidsIE(PeekVidsIE): - _VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P[^/?#]*)' - _TESTS = [{ - 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', - 'md5': '2f12e50213dd65f142175da633c4564c', - 'info_dict': { - 'id': '1978030', - 'display_id': 'U3pBrYhsjXM', - 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', - 'ext': 'mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', - 'timestamp': 1640435839, - 'upload_date': '20211225', - 'duration': 416, - 'view_count': int, - 'age_limit': 18, - 'uploader': 'SEXYhub.com', - 'categories': list, - 'tags': list, - }, - }, { - 'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', - 'only_matching': True, - }, { - 'url': 'https://www.playvids.com/embed/U3pBrYhsjXM', - 'only_matching': True, - }, { - 'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line', - 'md5': 'e783986e596cafbf46411a174ab42ba6', - 'info_dict': { - 'id': '762385', - 'display_id': 'bKmGLe3IwjZ', - 'ext': 'mp4', - 'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6', - 'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef', - 'timestamp': 1516958544, - 'upload_date': '20180126', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 480, - 'uploader': 'Brazzers', - 'age_limit': 18, - 'view_count': int, - 'age_limit': 18, - 'categories': list, - 'tags': list, - }, - }, { - 'url': 'https://www.playvids.com/v/47iUho33toY', - 'md5': 'b056b5049d34b648c1e86497cf4febce', - 'info_dict': { - 'id': '700621', - 'display_id': '47iUho33toY', - 'ext': 'mp4', - 'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE', - 'description': None, - 'timestamp': 1507052209, - 'upload_date': '20171003', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 332, - 'uploader': 'Cacerenele', - 'age_limit': 18, - 'view_count': int, - 'categories': list, - 'tags': list, - } - }, { - 'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances', - 'md5': 'efa09be9f031314b7b7e3bc6510cd0df', - 'info_dict': { - 'id': '1523518', - 'display_id': 'z3_7iwWCmqt', - 'ext': 'mp4', - 'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances', - 'description': None, - 'timestamp': 1607470323, - 'upload_date': '20201208', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 593, - 'uploader': 'yorours', - 'age_limit': 18, - 'view_count': int, - 'categories': list, - 'tags': list, - }, - }] - _DOMAIN = 'www.playvids.com' - - def _get_detail(self, html): - return get_element_by_class('detail-block', html) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py deleted file mode 100644 index 3af533925..000000000 --- a/youtube_dl/extractor/peertube.py +++ /dev/null @@ -1,628 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - parse_resolution, - str_or_none, - try_get, - unified_timestamp, - url_or_none, - urljoin, -) - - -class PeerTubeIE(InfoExtractor): - _INSTANCES_RE = r'''(?: - # Taken from https://instances.joinpeertube.org/instances - peertube\.rainbowswingers\.net| - tube\.stanisic\.nl| - peer\.suiri\.us| - medias\.libox\.fr| - videomensoif\.ynh\.fr| - peertube\.travelpandas\.eu| - peertube\.rachetjay\.fr| - peertube\.montecsys\.fr| - tube\.eskuero\.me| - peer\.tube| - peertube\.umeahackerspace\.se| - tube\.nx-pod\.de| - video\.monsieurbidouille\.fr| - tube\.openalgeria\.org| - vid\.lelux\.fi| - video\.anormallostpod\.ovh| - tube\.crapaud-fou\.org| - peertube\.stemy\.me| - lostpod\.space| - exode\.me| - peertube\.snargol\.com| - vis\.ion\.ovh| - videosdulib\.re| - v\.mbius\.io| - videos\.judrey\.eu| - peertube\.osureplayviewer\.xyz| - peertube\.mathieufamily\.ovh| - www\.videos-libr\.es| - fightforinfo\.com| - peertube\.fediverse\.ru| - peertube\.oiseauroch\.fr| - video\.nesven\.eu| - v\.bearvideo\.win| - video\.qoto\.org| - justporn\.cc| - video\.vny\.fr| - peervideo\.club| - tube\.taker\.fr| - peertube\.chantierlibre\.org| - tube\.ipfixe\.info| - tube\.kicou\.info| - tube\.dodsorf\.as| - videobit\.cc| - video\.yukari\.moe| - videos\.elbinario\.net| - hkvideo\.live| - pt\.tux\.tf| - www\.hkvideo\.live| - FIGHTFORINFO\.com| - pt\.765racing\.com| - peertube\.gnumeria\.eu\.org| - nordenmedia\.com| - peertube\.co\.uk| - tube\.darfweb\.eu| - tube\.kalah-france\.org| - 0ch\.in| - vod\.mochi\.academy| - film\.node9\.org| - peertube\.hatthieves\.es| - video\.fitchfamily\.org| - peertube\.ddns\.net| - video\.ifuncle\.kr| - video\.fdlibre\.eu| - tube\.22decembre\.eu| - peertube\.harmoniescreatives\.com| - tube\.fabrigli\.fr| - video\.thedwyers\.co| - video\.bruitbruit\.com| - peertube\.foxfam\.club| - peer\.philoxweb\.be| - videos\.bugs\.social| - peertube\.malbert\.xyz| - peertube\.bilange\.ca| - libretube\.net| - diytelevision\.com| - peertube\.fedilab\.app| - libre\.video| - video\.mstddntfdn\.online| - us\.tv| - peertube\.sl-network\.fr| - peertube\.dynlinux\.io| - peertube\.david\.durieux\.family| - peertube\.linuxrocks\.online| - peerwatch\.xyz| - v\.kretschmann\.social| - tube\.otter\.sh| - yt\.is\.nota\.live| - tube\.dragonpsi\.xyz| - peertube\.boneheadmedia\.com| - videos\.funkwhale\.audio| - watch\.44con\.com| - peertube\.gcaillaut\.fr| - peertube\.icu| - pony\.tube| - spacepub\.space| - tube\.stbr\.io| - v\.mom-gay\.faith| - tube\.port0\.xyz| - peertube\.simounet\.net| - play\.jergefelt\.se| - peertube\.zeteo\.me| - tube\.danq\.me| - peertube\.kerenon\.com| - tube\.fab-l3\.org| - tube\.calculate\.social| - peertube\.mckillop\.org| - tube\.netzspielplatz\.de| - vod\.ksite\.de| - peertube\.laas\.fr| - tube\.govital\.net| - peertube\.stephenson\.cc| - bistule\.nohost\.me| - peertube\.kajalinifi\.de| - video\.ploud\.jp| - video\.omniatv\.com| - peertube\.ffs2play\.fr| - peertube\.leboulaire\.ovh| - peertube\.tronic-studio\.com| - peertube\.public\.cat| - peertube\.metalbanana\.net| - video\.1000i100\.fr| - peertube\.alter-nativ-voll\.de| - tube\.pasa\.tf| - tube\.worldofhauru\.xyz| - pt\.kamp\.site| - peertube\.teleassist\.fr| - videos\.mleduc\.xyz| - conf\.tube| - media\.privacyinternational\.org| - pt\.forty-two\.nl| - video\.halle-leaks\.de| - video\.grosskopfgames\.de| - peertube\.schaeferit\.de| - peertube\.jackbot\.fr| - tube\.extinctionrebellion\.fr| - peertube\.f-si\.org| - video\.subak\.ovh| - videos\.koweb\.fr| - peertube\.zergy\.net| - peertube\.roflcopter\.fr| - peertube\.floss-marketing-school\.com| - vloggers\.social| - peertube\.iriseden\.eu| - videos\.ubuntu-paris\.org| - peertube\.mastodon\.host| - armstube\.com| - peertube\.s2s\.video| - peertube\.lol| - tube\.open-plug\.eu| - open\.tube| - peertube\.ch| - peertube\.normandie-libre\.fr| - peertube\.slat\.org| - video\.lacaveatonton\.ovh| - peertube\.uno| - peertube\.servebeer\.com| - peertube\.fedi\.quebec| - tube\.h3z\.jp| - tube\.plus200\.com| - peertube\.eric\.ovh| - tube\.metadocs\.cc| - tube\.unmondemeilleur\.eu| - gouttedeau\.space| - video\.antirep\.net| - nrop\.cant\.at| - tube\.ksl-bmx\.de| - tube\.plaf\.fr| - tube\.tchncs\.de| - video\.devinberg\.com| - hitchtube\.fr| - peertube\.kosebamse\.com| - yunopeertube\.myddns\.me| - peertube\.varney\.fr| - peertube\.anon-kenkai\.com| - tube\.maiti\.info| - tubee\.fr| - videos\.dinofly\.com| - toobnix\.org| - videotape\.me| - voca\.tube| - video\.heromuster\.com| - video\.lemediatv\.fr| - video\.up\.edu\.ph| - balafon\.video| - video\.ivel\.fr| - thickrips\.cloud| - pt\.laurentkruger\.fr| - video\.monarch-pass\.net| - peertube\.artica\.center| - video\.alternanet\.fr| - indymotion\.fr| - fanvid\.stopthatimp\.net| - video\.farci\.org| - v\.lesterpig\.com| - video\.okaris\.de| - tube\.pawelko\.net| - peertube\.mablr\.org| - tube\.fede\.re| - pytu\.be| - evertron\.tv| - devtube\.dev-wiki\.de| - raptube\.antipub\.org| - video\.selea\.se| - peertube\.mygaia\.org| - video\.oh14\.de| - peertube\.livingutopia\.org| - peertube\.the-penguin\.de| - tube\.thechangebook\.org| - tube\.anjara\.eu| - pt\.pube\.tk| - video\.samedi\.pm| - mplayer\.demouliere\.eu| - widemus\.de| - peertube\.me| - peertube\.zapashcanon\.fr| - video\.latavernedejohnjohn\.fr| - peertube\.pcservice46\.fr| - peertube\.mazzonetto\.eu| - video\.irem\.univ-paris-diderot\.fr| - video\.livecchi\.cloud| - alttube\.fr| - video\.coop\.tools| - video\.cabane-libre\.org| - peertube\.openstreetmap\.fr| - videos\.alolise\.org| - irrsinn\.video| - video\.antopie\.org| - scitech\.video| - tube2\.nemsia\.org| - video\.amic37\.fr| - peertube\.freeforge\.eu| - video\.arbitrarion\.com| - video\.datsemultimedia\.com| - stoptrackingus\.tv| - peertube\.ricostrongxxx\.com| - docker\.videos\.lecygnenoir\.info| - peertube\.togart\.de| - tube\.postblue\.info| - videos\.domainepublic\.net| - peertube\.cyber-tribal\.com| - video\.gresille\.org| - peertube\.dsmouse\.net| - cinema\.yunohost\.support| - tube\.theocevaer\.fr| - repro\.video| - tube\.4aem\.com| - quaziinc\.com| - peertube\.metawurst\.space| - videos\.wakapo\.com| - video\.ploud\.fr| - video\.freeradical\.zone| - tube\.valinor\.fr| - refuznik\.video| - pt\.kircheneuenburg\.de| - peertube\.asrun\.eu| - peertube\.lagob\.fr| - videos\.side-ways\.net| - 91video\.online| - video\.valme\.io| - video\.taboulisme\.com| - videos-libr\.es| - tv\.mooh\.fr| - nuage\.acostey\.fr| - video\.monsieur-a\.fr| - peertube\.librelois\.fr| - videos\.pair2jeux\.tube| - videos\.pueseso\.club| - peer\.mathdacloud\.ovh| - media\.assassinate-you\.net| - vidcommons\.org| - ptube\.rousset\.nom\.fr| - tube\.cyano\.at| - videos\.squat\.net| - video\.iphodase\.fr| - peertube\.makotoworkshop\.org| - peertube\.serveur\.slv-valbonne\.fr| - vault\.mle\.party| - hostyour\.tv| - videos\.hack2g2\.fr| - libre\.tube| - pire\.artisanlogiciel\.net| - videos\.numerique-en-commun\.fr| - video\.netsyms\.com| - video\.die-partei\.social| - video\.writeas\.org| - peertube\.swarm\.solvingmaz\.es| - tube\.pericoloso\.ovh| - watching\.cypherpunk\.observer| - videos\.adhocmusic\.com| - tube\.rfc1149\.net| - peertube\.librelabucm\.org| - videos\.numericoop\.fr| - peertube\.koehn\.com| - peertube\.anarchmusicall\.net| - tube\.kampftoast\.de| - vid\.y-y\.li| - peertube\.xtenz\.xyz| - diode\.zone| - tube\.egf\.mn| - peertube\.nomagic\.uk| - visionon\.tv| - videos\.koumoul\.com| - video\.rastapuls\.com| - video\.mantlepro\.com| - video\.deadsuperhero\.com| - peertube\.musicstudio\.pro| - peertube\.we-keys\.fr| - artitube\.artifaille\.fr| - peertube\.ethernia\.net| - tube\.midov\.pl| - peertube\.fr| - watch\.snoot\.tube| - peertube\.donnadieu\.fr| - argos\.aquilenet\.fr| - tube\.nemsia\.org| - tube\.bruniau\.net| - videos\.darckoune\.moe| - tube\.traydent\.info| - dev\.videos\.lecygnenoir\.info| - peertube\.nayya\.org| - peertube\.live| - peertube\.mofgao\.space| - video\.lequerrec\.eu| - peertube\.amicale\.net| - aperi\.tube| - tube\.ac-lyon\.fr| - video\.lw1\.at| - www\.yiny\.org| - videos\.pofilo\.fr| - tube\.lou\.lt| - choob\.h\.etbus\.ch| - tube\.hoga\.fr| - peertube\.heberge\.fr| - video\.obermui\.de| - videos\.cloudfrancois\.fr| - betamax\.video| - video\.typica\.us| - tube\.piweb\.be| - video\.blender\.org| - peertube\.cat| - tube\.kdy\.ch| - pe\.ertu\.be| - peertube\.social| - videos\.lescommuns\.org| - tv\.datamol\.org| - videonaute\.fr| - dialup\.express| - peertube\.nogafa\.org| - megatube\.lilomoino\.fr| - peertube\.tamanoir\.foucry\.net| - peertube\.devosi\.org| - peertube\.1312\.media| - tube\.bootlicker\.party| - skeptikon\.fr| - video\.blueline\.mg| - tube\.homecomputing\.fr| - tube\.ouahpiti\.info| - video\.tedomum\.net| - video\.g3l\.org| - fontube\.fr| - peertube\.gaialabs\.ch| - tube\.kher\.nl| - peertube\.qtg\.fr| - video\.migennes\.net| - tube\.p2p\.legal| - troll\.tv| - videos\.iut-orsay\.fr| - peertube\.solidev\.net| - videos\.cemea\.org| - video\.passageenseine\.fr| - videos\.festivalparminous\.org| - peertube\.touhoppai\.moe| - sikke\.fi| - peer\.hostux\.social| - share\.tube| - peertube\.walkingmountains\.fr| - videos\.benpro\.fr| - peertube\.parleur\.net| - peertube\.heraut\.eu| - tube\.aquilenet\.fr| - peertube\.gegeweb\.eu| - framatube\.org| - thinkerview\.video| - tube\.conferences-gesticulees\.net| - peertube\.datagueule\.tv| - video\.lqdn\.fr| - tube\.mochi\.academy| - media\.zat\.im| - video\.colibris-outilslibres\.org| - tube\.svnet\.fr| - peertube\.video| - peertube3\.cpy\.re| - peertube2\.cpy\.re| - videos\.tcit\.fr| - peertube\.cpy\.re| - canard\.tube - )''' - _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' - _API_BASE = 'https://%s/api/v1/videos/%s/%s' - _VALID_URL = r'''(?x) - (?: - peertube:(?P[^:]+):| - https?://(?P%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/ - ) - (?P%s) - ''' % (_INSTANCES_RE, _UUID_RE) - _TESTS = [{ - 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', - 'md5': '9bed8c0137913e17b86334e5885aacff', - 'info_dict': { - 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d', - 'ext': 'mp4', - 'title': 'What is PeerTube?', - 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10', - 'thumbnail': r're:https?://.*\.(?:jpg|png)', - 'timestamp': 1538391166, - 'upload_date': '20181001', - 'uploader': 'Framasoft', - 'uploader_id': '3', - 'uploader_url': 'https://framatube.org/accounts/framasoft', - 'channel': 'Les vidéos de Framasoft', - 'channel_id': '2', - 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8', - 'language': 'en', - 'license': 'Attribution - Share Alike', - 'duration': 113, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'tags': ['framasoft', 'peertube'], - 'categories': ['Science & Technology'], - } - }, { - # Issue #26002 - 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc', - 'info_dict': { - 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc', - 'ext': 'mp4', - 'title': 'Dot matrix printer shell demo', - 'uploader_id': '3', - 'timestamp': 1587401293, - 'upload_date': '20200420', - 'uploader': 'Drew DeVault', - } - }, { - 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', - 'only_matching': True, - }, { - # nsfw - 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', - 'only_matching': True, - }, { - 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', - 'only_matching': True, - }, { - 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', - 'only_matching': True, - }, { - 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', - 'only_matching': True, - }] - - @staticmethod - def _extract_peertube_url(webpage, source_url): - mobj = re.match( - r'https?://(?P[^/]+)/videos/(?:watch|embed)/(?P%s)' - % PeerTubeIE._UUID_RE, source_url) - if mobj and any(p in webpage for p in ( - 'PeerTube<', - 'There will be other non JS-based clients to access PeerTube', - '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')): - return 'peertube:%s:%s' % mobj.group('host', 'id') - - @staticmethod - def _extract_urls(webpage, source_url): - entries = re.findall( - r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)''' - % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage) - if not entries: - peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url) - if peertube_url: - entries = [peertube_url] - return entries - - def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True): - return self._download_json( - self._API_BASE % (host, video_id, path), video_id, - note=note, errnote=errnote, fatal=fatal) - - def _get_subtitles(self, host, video_id): - captions = self._call_api( - host, video_id, 'captions', note='Downloading captions JSON', - fatal=False) - if not isinstance(captions, dict): - return - data = captions.get('data') - if not isinstance(data, list): - return - subtitles = {} - for e in data: - language_id = try_get(e, lambda x: x['language']['id'], compat_str) - caption_url = urljoin('https://%s' % host, e.get('captionPath')) - if not caption_url: - continue - subtitles.setdefault(language_id or 'en', []).append({ - 'url': caption_url, - }) - return subtitles - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') or mobj.group('host_2') - video_id = mobj.group('id') - - video = self._call_api( - host, video_id, '', note='Downloading video JSON') - - title = video['name'] - - formats = [] - files = video.get('files') or [] - for playlist in (video.get('streamingPlaylists') or []): - if not isinstance(playlist, dict): - continue - playlist_files = playlist.get('files') - if not (playlist_files and isinstance(playlist_files, list)): - continue - files.extend(playlist_files) - for file_ in files: - if not isinstance(file_, dict): - continue - file_url = url_or_none(file_.get('fileUrl')) - if not file_url: - continue - file_size = int_or_none(file_.get('size')) - format_id = try_get( - file_, lambda x: x['resolution']['label'], compat_str) - f = parse_resolution(format_id) - f.update({ - 'url': file_url, - 'format_id': format_id, - 'filesize': file_size, - }) - if format_id == '0p': - f['vcodec'] = 'none' - else: - f['fps'] = int_or_none(file_.get('fps')) - formats.append(f) - self._sort_formats(formats) - - description = video.get('description') - if len(description) >= 250: - # description is shortened - full_description = self._call_api( - host, video_id, 'description', note='Downloading description JSON', - fatal=False) - - if isinstance(full_description, dict): - description = str_or_none(full_description.get('description')) or description - - subtitles = self.extract_subtitles(host, video_id) - - def data(section, field, type_): - return try_get(video, lambda x: x[section][field], type_) - - def account_data(field, type_): - return data('account', field, type_) - - def channel_data(field, type_): - return data('channel', field, type_) - - category = data('category', 'label', compat_str) - categories = [category] if category else None - - nsfw = video.get('nsfw') - if nsfw is bool: - age_limit = 18 if nsfw else 0 - else: - age_limit = None - - webpage_url = 'https://%s/videos/watch/%s' % (host, video_id) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')), - 'timestamp': unified_timestamp(video.get('publishedAt')), - 'uploader': account_data('displayName', compat_str), - 'uploader_id': str_or_none(account_data('id', int)), - 'uploader_url': url_or_none(account_data('url', compat_str)), - 'channel': channel_data('displayName', compat_str), - 'channel_id': str_or_none(channel_data('id', int)), - 'channel_url': url_or_none(channel_data('url', compat_str)), - 'language': data('language', 'id', compat_str), - 'license': data('licence', 'label', compat_str), - 'duration': int_or_none(video.get('duration')), - 'view_count': int_or_none(video.get('views')), - 'like_count': int_or_none(video.get('likes')), - 'dislike_count': int_or_none(video.get('dislikes')), - 'age_limit': age_limit, - 'tags': try_get(video, lambda x: x['tags'], list), - 'categories': categories, - 'formats': formats, - 'subtitles': subtitles, - 'webpage_url': webpage_url, - } diff --git a/youtube_dl/extractor/people.py b/youtube_dl/extractor/people.py deleted file mode 100644 index 6ca95715e..000000000 --- a/youtube_dl/extractor/people.py +++ /dev/null @@ -1,32 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class PeopleIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html' - - _TEST = { - 'url': 'http://www.people.com/people/videos/0,,20995451,00.html', - 'info_dict': { - 'id': 'ref:20995451', - 'ext': 'mp4', - 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”', - 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 246.318, - 'timestamp': 1458720585, - 'upload_date': '20160323', - 'uploader_id': '416418724', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['BrightcoveNew'], - } - - def _real_extract(self, url): - return self.url_result( - 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s' - % self._match_id(url), 'BrightcoveNew') diff --git a/youtube_dl/extractor/performgroup.py b/youtube_dl/extractor/performgroup.py deleted file mode 100644 index 26942bfb3..000000000 --- a/youtube_dl/extractor/performgroup.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class PerformGroupIE(InfoExtractor): - _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})' - _TESTS = [{ - # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html - 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab', - 'md5': '259cb03d142e2e52471e8837ecacb29f', - 'info_dict': { - 'id': 'xgrwobuzumes1lwjxtcdpwgxd', - 'ext': 'mp4', - 'title': 'Liga MX: Keine Einsicht nach Horrorfoul', - 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b', - 'timestamp': 1511533477, - 'upload_date': '20171124', - } - }] - - def _call_api(self, service, auth_token, content_id, referer_url): - return self._download_json( - 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id), - content_id, headers={ - 'Referer': referer_url, - 'Origin': 'http://player.performgroup.com', - }, query={ - '_fmt': 'json', - }) - - def _real_extract(self, url): - player_id, auth_token = re.search(self._VALID_URL, url).groups() - bootstrap = self._call_api('bootstrap', auth_token, player_id, url) - video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0] - video_id = video['uuid'] - vod = self._call_api('vod', auth_token, video_id, url) - media = vod['videos']['video'][0]['media'] - - formats = [] - hls_url = media.get('hls', {}).get('url') - if hls_url: - formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - - hds_url = media.get('hds', {}).get('url') - if hds_url: - formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False)) - - for c in media.get('content', []): - c_url = c.get('url') - if not c_url: - continue - tbr = int_or_none(c.get('bitrate'), 1000) - format_id = 'http' - if tbr: - format_id += '-%d' % tbr - formats.append({ - 'format_id': format_id, - 'url': c_url, - 'tbr': tbr, - 'width': int_or_none(c.get('width')), - 'height': int_or_none(c.get('height')), - 'filesize': int_or_none(c.get('fileSize')), - 'vcodec': c.get('type'), - 'fps': int_or_none(c.get('videoFrameRate')), - 'vbr': int_or_none(c.get('videoRate'), 1000), - 'abr': int_or_none(c.get('audioRate'), 1000), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video['title'], - 'description': video.get('description'), - 'thumbnail': video.get('poster'), - 'duration': int_or_none(video.get('duration')), - 'timestamp': int_or_none(video.get('publishedTime'), 1000), - 'formats': formats, - } diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py deleted file mode 100644 index b93a02b7d..000000000 --- a/youtube_dl/extractor/periscope.py +++ /dev/null @@ -1,195 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, - unescapeHTML, -) - - -class PeriscopeBaseIE(InfoExtractor): - _M3U8_HEADERS = { - 'Referer': 'https://www.periscope.tv/' - } - - def _call_api(self, method, query, item_id): - return self._download_json( - 'https://api.periscope.tv/api/v2/%s' % method, - item_id, query=query) - - def _parse_broadcast_data(self, broadcast, video_id): - title = broadcast.get('status') or 'Periscope Broadcast' - uploader = broadcast.get('user_display_name') or broadcast.get('username') - title = '%s - %s' % (uploader, title) if uploader else title - is_live = broadcast.get('state').lower() == 'running' - - thumbnails = [{ - 'url': broadcast[image], - } for image in ('image_url', 'image_url_small') if broadcast.get(image)] - - return { - 'id': broadcast.get('id') or video_id, - 'title': self._live_title(title) if is_live else title, - 'timestamp': parse_iso8601(broadcast.get('created_at')), - 'uploader': uploader, - 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), - 'thumbnails': thumbnails, - 'view_count': int_or_none(broadcast.get('total_watched')), - 'tags': broadcast.get('tags'), - 'is_live': is_live, - } - - @staticmethod - def _extract_common_format_info(broadcast): - return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height')) - - @staticmethod - def _add_width_and_height(f, width, height): - for key, val in (('width', width), ('height', height)): - if not f.get(key): - f[key] = val - - def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True): - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native' - if state in ('ended', 'timed_out') else 'm3u8', - m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS) - if len(m3u8_formats) == 1: - self._add_width_and_height(m3u8_formats[0], width, height) - for f in m3u8_formats: - f.setdefault('http_headers', {}).update(self._M3U8_HEADERS) - return m3u8_formats - - -class PeriscopeIE(PeriscopeBaseIE): - IE_DESC = 'Periscope' - IE_NAME = 'periscope' - _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' - # Alive example URLs can be found here https://www.periscope.tv/ - _TESTS = [{ - 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', - 'md5': '65b57957972e503fcbbaeed8f4fa04ca', - 'info_dict': { - 'id': '56102209', - 'ext': 'mp4', - 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗', - 'timestamp': 1438978559, - 'upload_date': '20150807', - 'uploader': 'Bec Boop', - 'uploader_id': '1465763', - }, - 'skip': 'Expires in 24 hours', - }, { - 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', - 'only_matching': True, - }, { - 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', - 'only_matching': True, - }, { - 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', - 'only_matching': True, - }] - - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - token = self._match_id(url) - - stream = self._call_api( - 'accessVideoPublic', {'broadcast_id': token}, token) - - broadcast = stream['broadcast'] - info = self._parse_broadcast_data(broadcast, token) - - state = broadcast.get('state').lower() - width = int_or_none(broadcast.get('width')) - height = int_or_none(broadcast.get('height')) - - def add_width_and_height(f): - for key, val in (('width', width), ('height', height)): - if not f.get(key): - f[key] = val - - video_urls = set() - formats = [] - for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): - video_url = stream.get(format_id + '_url') - if not video_url or video_url in video_urls: - continue - video_urls.add(video_url) - if format_id != 'rtmp': - m3u8_formats = self._extract_pscp_m3u8_formats( - video_url, token, format_id, state, width, height, False) - formats.extend(m3u8_formats) - continue - rtmp_format = { - 'url': video_url, - 'ext': 'flv' if format_id == 'rtmp' else 'mp4', - } - self._add_width_and_height(rtmp_format) - formats.append(rtmp_format) - self._sort_formats(formats) - - info['formats'] = formats - return info - - -class PeriscopeUserIE(PeriscopeBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$' - IE_DESC = 'Periscope user videos' - IE_NAME = 'periscope:user' - - _TEST = { - 'url': 'https://www.periscope.tv/LularoeHusbandMike/', - 'info_dict': { - 'id': 'LularoeHusbandMike', - 'title': 'LULAROE HUSBAND MIKE', - 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', - }, - # Periscope only shows videos in the last 24 hours, so it's possible to - # get 0 videos - 'playlist_mincount': 0, - } - - def _real_extract(self, url): - user_name = self._match_id(url) - - webpage = self._download_webpage(url, user_name) - - data_store = self._parse_json( - unescapeHTML(self._search_regex( - r'data-store=(["\'])(?P<data>.+?)\1', - webpage, 'data store', default='{}', group='data')), - user_name) - - user = list(data_store['UserCache']['users'].values())[0]['user'] - user_id = user['id'] - session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] - - broadcasts = self._call_api( - 'getUserBroadcastsPublic', - {'user_id': user_id, 'session_id': session_id}, - user_name)['broadcasts'] - - broadcast_ids = [ - broadcast['id'] for broadcast in broadcasts if broadcast.get('id')] - - title = user.get('display_name') or user.get('username') or user_name - description = user.get('description') - - entries = [ - self.url_result( - 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id)) - for broadcast_id in broadcast_ids] - - return self.playlist_result(entries, user_id, title, description) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py deleted file mode 100644 index 03da64b11..000000000 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ /dev/null @@ -1,106 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - try_get, - urljoin, -) - - -class PhilharmonieDeParisIE(InfoExtractor): - IE_DESC = 'Philharmonie de Paris' - _VALID_URL = r'''(?x) - https?:// - (?: - live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| - pad\.philharmoniedeparis\.fr/doc/CIMU/ - ) - (?P<id>\d+) - ''' - _TESTS = [{ - 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower', - 'md5': 'a0a4b195f544645073631cbec166a2c2', - 'info_dict': { - 'id': '1086697', - 'ext': 'mp4', - 'title': 'Jazz à la Villette : Knower', - }, - }, { - 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', - 'info_dict': { - 'id': '1032066', - 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0', - }, - 'playlist_mincount': 2, - }, { - 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', - 'only_matching': True, - }, { - 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', - 'only_matching': True, - }, { - 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', - 'only_matching': True, - }, { - 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', - 'only_matching': True, - }] - _LIVE_URL = 'https://live.philharmoniedeparis.fr' - - def _real_extract(self, url): - video_id = self._match_id(url) - - config = self._download_json( - '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={ - 'id': video_id, - 'lang': 'fr-FR', - }) - - def extract_entry(source): - if not isinstance(source, dict): - return - title = source.get('title') - if not title: - return - files = source.get('files') - if not isinstance(files, dict): - return - format_urls = set() - formats = [] - for format_id in ('mobile', 'desktop'): - format_url = try_get( - files, lambda x: x[format_id]['file'], compat_str) - if not format_url or format_url in format_urls: - continue - format_urls.add(format_url) - m3u8_url = urljoin(self._LIVE_URL, format_url) - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - if not formats: - return - self._sort_formats(formats) - return { - 'title': title, - 'formats': formats, - } - - thumbnail = urljoin(self._LIVE_URL, config.get('image')) - - info = extract_entry(config) - if info: - info.update({ - 'id': video_id, - 'thumbnail': thumbnail, - }) - return info - - entries = [] - for num, chapter in enumerate(config['chapters'], start=1): - entry = extract_entry(chapter) - entry['id'] = '%s-%d' % (video_id, num) - entries.append(entry) - - return self.playlist_result(entries, video_id, config.get('title')) diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py deleted file mode 100644 index e3ea01443..000000000 --- a/youtube_dl/extractor/phoenix.py +++ /dev/null @@ -1,133 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .youtube import YoutubeIE -from .zdf import ZDFBaseIE -from ..compat import compat_str -from ..utils import ( - int_or_none, - merge_dicts, - try_get, - unified_timestamp, - urljoin, -) - - -class PhoenixIE(ZDFBaseIE): - IE_NAME = 'phoenix.de' - _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html' - _TESTS = [{ - # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html - 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html', - 'md5': '34ec321e7eb34231fd88616c65c92db0', - 'info_dict': { - 'id': '210222_phx_nachgehakt_corona_protest', - 'ext': 'mp4', - 'title': 'Wohin führt der Protest in der Pandemie?', - 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', - 'duration': 1691, - 'timestamp': 1613902500, - 'upload_date': '20210221', - 'uploader': 'Phoenix', - 'series': 'corona nachgehakt', - 'episode': 'Wohin führt der Protest in der Pandemie?', - }, - }, { - # Youtube embed - 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html', - 'info_dict': { - 'id': 'hMQtqFYjomk', - 'ext': 'mp4', - 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?', - 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd', - 'duration': 3509, - 'upload_date': '20201219', - 'uploader': 'phoenix', - 'uploader_id': 'phoenix', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html', - 'only_matching': True, - }, { - # no media - 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html', - 'only_matching': True, - }, { - # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html - 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche', - 'only_matching': True, - }] - - def _real_extract(self, url): - article_id = self._match_id(url) - - article = self._download_json( - 'https://www.phoenix.de/response/id/%s' % article_id, article_id, - 'Downloading article JSON') - - video = article['absaetze'][0] - title = video.get('titel') or article.get('subtitel') - - if video.get('typ') == 'video-youtube': - video_id = video['id'] - return self.url_result( - video_id, ie=YoutubeIE.ie_key(), video_id=video_id, - video_title=title) - - video_id = compat_str(video.get('basename') or video.get('content')) - - details = self._download_json( - 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', - video_id, 'Downloading details JSON', query={ - 'ak': 'web', - 'ptmd': 'true', - 'id': video_id, - 'profile': 'player2', - }) - - title = title or details['title'] - content_id = details['tracking']['nielsen']['content']['assetid'] - - info = self._extract_ptmd( - 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, - content_id, None, url) - - duration = int_or_none(try_get( - details, lambda x: x['tracking']['nielsen']['content']['length'])) - timestamp = unified_timestamp(details.get('editorialDate')) - series = try_get( - details, lambda x: x['tracking']['nielsen']['content']['program'], - compat_str) - episode = title if details.get('contentType') == 'episode' else None - - thumbnails = [] - teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {} - for thumbnail_key, thumbnail_url in teaser_images.items(): - thumbnail_url = urljoin(url, thumbnail_url) - if not thumbnail_url: - continue - thumbnail = { - 'url': thumbnail_url, - } - m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) - if m: - thumbnail['width'] = int(m.group(1)) - thumbnail['height'] = int(m.group(2)) - thumbnails.append(thumbnail) - - return merge_dicts(info, { - 'id': content_id, - 'title': title, - 'description': details.get('leadParagraph'), - 'duration': duration, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'uploader': details.get('tvService'), - 'series': series, - 'episode': episode, - }) diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py deleted file mode 100644 index 6c8bbe1d9..000000000 --- a/youtube_dl/extractor/photobucket.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class PhotobucketIE(InfoExtractor): - _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' - _TEST = { - 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', - 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', - 'info_dict': { - 'id': 'zpsc0c3b9fa', - 'ext': 'mp4', - 'timestamp': 1367669341, - 'upload_date': '20130504', - 'uploader': 'rachaneronas', - 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - video_extension = mobj.group('ext') - - webpage = self._download_webpage(url, video_id) - - # Extract URL, uploader, and title from webpage - self.report_extraction(video_id) - info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', - webpage, 'info json') - info = json.loads(info_json) - url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) - return { - 'id': video_id, - 'url': url, - 'uploader': info['username'], - 'timestamp': info['creationDate'], - 'title': info['title'], - 'ext': video_extension, - 'thumbnail': info['thumbUrl'], - } diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py deleted file mode 100644 index e6c51e16b..000000000 --- a/youtube_dl/extractor/picarto.py +++ /dev/null @@ -1,127 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - js_to_json, -) - - -class PicartoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)' - _TEST = { - 'url': 'https://picarto.tv/Setz', - 'info_dict': { - 'id': 'Setz', - 'ext': 'mp4', - 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'timestamp': int, - 'is_live': True - }, - 'skip': 'Stream is offline', - } - - @classmethod - def suitable(cls, url): - return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) - - def _real_extract(self, url): - channel_id = self._match_id(url) - - data = self._download_json( - 'https://ptvintern.picarto.tv/ptvapi', channel_id, query={ - 'query': '''{ - channel(name: "%s") { - adult - id - online - stream_name - title - } - getLoadBalancerUrl(channel_name: "%s") { - url - } -}''' % (channel_id, channel_id), - })['data'] - metadata = data['channel'] - - if metadata.get('online') == 0: - raise ExtractorError('Stream is offline', expected=True) - title = metadata['title'] - - cdn_data = self._download_json( - data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js', - channel_id, 'Downloading load balancing info') - - formats = [] - for source in (cdn_data.get('source') or []): - source_url = source.get('url') - if not source_url: - continue - source_type = source.get('type') - if source_type == 'html5/application/vnd.apple.mpegurl': - formats.extend(self._extract_m3u8_formats( - source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False)) - elif source_type == 'html5/video/mp4': - formats.append({ - 'url': source_url, - }) - self._sort_formats(formats) - - mature = metadata.get('adult') - if mature is None: - age_limit = None - else: - age_limit = 18 if mature is True else 0 - - return { - 'id': channel_id, - 'title': self._live_title(title.strip()), - 'is_live': True, - 'channel': channel_id, - 'channel_id': metadata.get('id'), - 'channel_url': 'https://picarto.tv/%s' % channel_id, - 'age_limit': age_limit, - 'formats': formats, - } - - -class PicartoVodIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', - 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca', - 'info_dict': { - 'id': 'ArtofZod_2017.12.12.00.13.23.flv', - 'ext': 'mp4', - 'title': 'ArtofZod_2017.12.12.00.13.23.flv', - 'thumbnail': r're:^https?://.*\.jpg' - }, - }, { - 'url': 'https://picarto.tv/videopopout/Plague', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - vod_info = self._parse_json( - self._search_regex( - r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage, - video_id), - video_id, transform_source=js_to_json) - - formats = self._extract_m3u8_formats( - vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'thumbnail': vod_info.get('vodThumb'), - 'formats': formats, - } diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py deleted file mode 100644 index ecf56ff8f..000000000 --- a/youtube_dl/extractor/piksel.py +++ /dev/null @@ -1,187 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - dict_get, - ExtractorError, - int_or_none, - parse_iso8601, - try_get, - unescapeHTML, -) - - -class PikselIE(InfoExtractor): - _VALID_URL = r'''(?x)https?:// - (?: - (?: - player\. - (?: - olympusattelecom| - vibebyvista - )| - (?:api|player)\.multicastmedia| - (?:api-ovp|player)\.piksel - )\.com| - (?: - mz-edge\.stream\.co| - movie-s\.nhk\.or - )\.jp| - vidego\.baltimorecity\.gov - )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)''' - _TESTS = [ - { - 'url': 'http://player.piksel.com/v/ums2867l', - 'md5': '34e34c8d89dc2559976a6079db531e85', - 'info_dict': { - 'id': 'ums2867l', - 'ext': 'mp4', - 'title': 'GX-005 with Caption', - 'timestamp': 1481335659, - 'upload_date': '20161210' - } - }, - { - # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al - 'url': 'https://player.piksel.com/v/v80kqp41', - 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', - 'info_dict': { - 'id': 'v80kqp41', - 'ext': 'mp4', - 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', - 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', - 'timestamp': 1486171129, - 'upload_date': '20170204' - } - }, - { - # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/ - 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477', - 'only_matching': True, - } - ] - - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)', - webpage) - if mobj: - return mobj.group('url') - - def _call_api(self, app_token, resource, display_id, query, fatal=True): - response = (self._download_json( - 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token), - display_id, query=query, fatal=fatal) or {}).get('response') - failure = try_get(response, lambda x: x['failure']['reason']) - if failure: - if fatal: - raise ExtractorError(failure, expected=True) - self.report_warning(failure) - return response - - def _real_extract(self, url): - ref_id, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - app_token = self._search_regex([ - r'clientAPI\s*:\s*"([^"]+)"', - r'data-de-api-key\s*=\s*"([^"]+)"' - ], webpage, 'app token') - query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} - program = self._call_api( - app_token, 'program', display_id, query)['WsProgramResponse']['program'] - video_id = program['uuid'] - video_data = program['asset'] - title = video_data['title'] - asset_type = dict_get(video_data, ['assetType', 'asset_type']) - - formats = [] - - def process_asset_file(asset_file): - if not asset_file: - return - # TODO: extract rtmp formats - http_url = asset_file.get('http_url') - if not http_url: - return - tbr = None - vbr = int_or_none(asset_file.get('videoBitrate'), 1024) - abr = int_or_none(asset_file.get('audioBitrate'), 1024) - if asset_type == 'video': - tbr = vbr + abr - elif asset_type == 'audio': - tbr = abr - - format_id = ['http'] - if tbr: - format_id.append(compat_str(tbr)) - - formats.append({ - 'format_id': '-'.join(format_id), - 'url': unescapeHTML(http_url), - 'vbr': vbr, - 'abr': abr, - 'width': int_or_none(asset_file.get('videoWidth')), - 'height': int_or_none(asset_file.get('videoHeight')), - 'filesize': int_or_none(asset_file.get('filesize')), - 'tbr': tbr, - }) - - def process_asset_files(asset_files): - for asset_file in (asset_files or []): - process_asset_file(asset_file) - - process_asset_files(video_data.get('assetFiles')) - process_asset_file(video_data.get('referenceFile')) - if not formats: - asset_id = video_data.get('assetid') or program.get('assetid') - if asset_id: - process_asset_files(try_get(self._call_api( - app_token, 'asset_file', display_id, { - 'assetid': asset_id, - }, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) - - m3u8_url = dict_get(video_data, [ - 'm3u8iPadURL', - 'ipadM3u8Url', - 'm3u8AndroidURL', - 'm3u8iPhoneURL', - 'iphoneM3u8Url']) - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) - if smil_url: - transform_source = None - if ref_id == 'nhkworld': - # TODO: figure out if this is something to be fixed in urljoin, - # _parse_smil_formats or keep it here - transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"') - formats.extend(self._extract_smil_formats( - re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id, - transform_source=transform_source, fatal=False)) - - self._sort_formats(formats) - - subtitles = {} - for caption in video_data.get('captions', []): - caption_url = caption.get('url') - if caption_url: - subtitles.setdefault(caption.get('locale', 'en'), []).append({ - 'url': caption_url}) - - return { - 'id': video_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnailUrl'), - 'timestamp': parse_iso8601(video_data.get('dateadd')), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py deleted file mode 100644 index 9f3501f77..000000000 --- a/youtube_dl/extractor/pinkbike.py +++ /dev/null @@ -1,97 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - remove_end, - remove_start, - str_to_int, - unified_strdate, -) - - -class PinkbikeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.pinkbike.com/video/402811/', - 'md5': '4814b8ca7651034cd87e3361d5c2155a', - 'info_dict': { - 'id': '402811', - 'ext': 'mp4', - 'title': 'Brandon Semenuk - RAW 100', - 'description': 'Official release: www.redbull.ca/rupertwalker', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 100, - 'upload_date': '20150406', - 'uploader': 'revelco', - 'location': 'Victoria, British Columbia, Canada', - 'view_count': int, - 'comment_count': int, - } - }, { - 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://www.pinkbike.com/video/%s' % video_id, video_id) - - formats = [] - for _, format_id, src in re.findall( - r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage): - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None)) - formats.append({ - 'url': src, - 'format_id': format_id, - 'height': height, - }) - self._sort_formats(formats) - - title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike') - description = self._html_search_regex( - r'(?s)id="media-description"[^>]*>(.+?)<', - webpage, 'description', default=None) or remove_start( - self._og_search_description(webpage), title + '. ') - thumbnail = self._og_search_thumbnail(webpage) - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'duration')) - - uploader = self._search_regex( - r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, - 'uploader', fatal=False) - upload_date = unified_strdate(self._search_regex( - r'class="fullTime"[^>]+title="([^"]+)"', - webpage, 'upload date', fatal=False)) - - location = self._html_search_regex( - r'(?s)<dt>Location</dt>\s*<dd>(.+?)<', - webpage, 'location', fatal=False) - - def extract_count(webpage, label): - return str_to_int(self._search_regex( - r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label, - webpage, label, fatal=False)) - - view_count = extract_count(webpage, 'Views') - comment_count = extract_count(webpage, 'Comments') - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'upload_date': upload_date, - 'uploader': uploader, - 'location': location, - 'view_count': view_count, - 'comment_count': comment_count, - 'formats': formats - } diff --git a/youtube_dl/extractor/pinterest.py b/youtube_dl/extractor/pinterest.py deleted file mode 100644 index 42528d746..000000000 --- a/youtube_dl/extractor/pinterest.py +++ /dev/null @@ -1,203 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - determine_ext, - float_or_none, - int_or_none, - try_get, - unified_timestamp, - url_or_none, -) - - -class PinterestBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)' - - def _call_api(self, resource, video_id, options): - return self._download_json( - 'https://www.pinterest.com/resource/%sResource/get/' % resource, - video_id, 'Download %s JSON metadata' % resource, query={ - 'data': json.dumps({'options': options}) - })['resource_response'] - - def _extract_video(self, data, extract_formats=True): - video_id = data['id'] - - title = (data.get('title') or data.get('grid_title') or video_id).strip() - - urls = [] - formats = [] - duration = None - if extract_formats: - for format_id, format_dict in data['videos']['video_list'].items(): - if not isinstance(format_dict, dict): - continue - format_url = url_or_none(format_dict.get('url')) - if not format_url or format_url in urls: - continue - urls.append(format_url) - duration = float_or_none(format_dict.get('duration'), scale=1000) - ext = determine_ext(format_url) - if 'hls' in format_id.lower() or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id=format_id, fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'width': int_or_none(format_dict.get('width')), - 'height': int_or_none(format_dict.get('height')), - 'duration': duration, - }) - self._sort_formats( - formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - description = data.get('description') or data.get('description_html') or data.get('seo_description') - timestamp = unified_timestamp(data.get('created_at')) - - def _u(field): - return try_get(data, lambda x: x['closeup_attribution'][field], compat_str) - - uploader = _u('full_name') - uploader_id = _u('id') - - repost_count = int_or_none(data.get('repin_count')) - comment_count = int_or_none(data.get('comment_count')) - categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list) - tags = data.get('hashtags') - - thumbnails = [] - images = data.get('images') - if isinstance(images, dict): - for thumbnail_id, thumbnail in images.items(): - if not isinstance(thumbnail, dict): - continue - thumbnail_url = url_or_none(thumbnail.get('url')) - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'thumbnails': thumbnails, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'repost_count': repost_count, - 'comment_count': comment_count, - 'categories': categories, - 'tags': tags, - 'formats': formats, - 'extractor_key': PinterestIE.ie_key(), - } - - -class PinterestIE(PinterestBaseIE): - _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE - _TESTS = [{ - 'url': 'https://www.pinterest.com/pin/664281013778109217/', - 'md5': '6550c2af85d6d9f3fe3b88954d1577fc', - 'info_dict': { - 'id': '664281013778109217', - 'ext': 'mp4', - 'title': 'Origami', - 'description': 'md5:b9d90ddf7848e897882de9e73344f7dd', - 'duration': 57.7, - 'timestamp': 1593073622, - 'upload_date': '20200625', - 'uploader': 'Love origami -I am Dafei', - 'uploader_id': '586523688879454212', - 'repost_count': 50, - 'comment_count': 0, - 'categories': list, - 'tags': list, - }, - }, { - 'url': 'https://co.pinterest.com/pin/824721750502199491/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._call_api( - 'Pin', video_id, { - 'field_set_key': 'unauth_react_main_pin', - 'id': video_id, - })['data'] - return self._extract_video(data) - - -class PinterestCollectionIE(PinterestBaseIE): - _VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE - _TESTS = [{ - 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/', - 'info_dict': { - 'id': '585890301462791043', - 'title': 'cool diys', - }, - 'playlist_count': 8, - }, { - 'url': 'https://www.pinterest.ca/fudohub/videos/', - 'info_dict': { - 'id': '682858430939307450', - 'title': 'VIDEOS', - }, - 'playlist_mincount': 365, - 'skip': 'Test with extract_formats=False', - }] - - @classmethod - def suitable(cls, url): - return False if PinterestIE.suitable(url) else super( - PinterestCollectionIE, cls).suitable(url) - - def _real_extract(self, url): - username, slug = re.match(self._VALID_URL, url).groups() - board = self._call_api( - 'Board', slug, { - 'slug': slug, - 'username': username - })['data'] - board_id = board['id'] - options = { - 'board_id': board_id, - 'page_size': 250, - } - bookmark = None - entries = [] - while True: - if bookmark: - options['bookmarks'] = [bookmark] - board_feed = self._call_api('BoardFeed', board_id, options) - for item in (board_feed.get('data') or []): - if not isinstance(item, dict) or item.get('type') != 'pin': - continue - video_id = item.get('id') - if video_id: - # Some pins may not be available anonymously via pin URL - # video = self._extract_video(item, extract_formats=False) - # video.update({ - # '_type': 'url_transparent', - # 'url': 'https://www.pinterest.com/pin/%s/' % video_id, - # }) - # entries.append(video) - entries.append(self._extract_video(item)) - bookmark = board_feed.get('bookmark') - if not bookmark: - break - return self.playlist_result( - entries, playlist_id=board_id, playlist_title=board.get('name')) diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py deleted file mode 100644 index e86c65396..000000000 --- a/youtube_dl/extractor/pladform.py +++ /dev/null @@ -1,125 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - xpath_text, - qualities, -) - - -class PladformIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - (?: - out\.pladform\.ru/player| - static\.pladform\.ru/player\.swf - ) - \?.*\bvideoid=| - video\.pladform\.ru/catalog/video/videoid/ - ) - (?P<id>\d+) - ''' - _TESTS = [{ - 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0', - 'md5': '53362fac3a27352da20fa2803cc5cd6f', - 'info_dict': { - 'id': '3777899', - 'ext': 'mp4', - 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко', - 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 3190, - }, - }, { - 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', - 'only_matching': True, - }, { - 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', - 'only_matching': True, - }] - - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - video_id = self._match_id(url) - - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - pl = qs.get('pl', ['1'])[0] - - video = self._download_xml( - 'http://out.pladform.ru/getVideo', video_id, query={ - 'pl': pl, - 'videoid': video_id, - }) - - def fail(text): - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, text), - expected=True) - - if video.tag == 'error': - fail(video.text) - - quality = qualities(('ld', 'sd', 'hd')) - - formats = [] - for src in video.findall('./src'): - if src is None: - continue - format_url = src.text - if not format_url: - continue - if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': src.text, - 'format_id': src.get('quality'), - 'quality': quality(src.get('quality')), - }) - - if not formats: - error = xpath_text(video, './cap', 'error', default=None) - if error: - fail(error) - - self._sort_formats(formats) - - webpage = self._download_webpage( - 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, - video_id) - - title = self._og_search_title(webpage, fatal=False) or xpath_text( - video, './/title', 'title', fatal=True) - description = self._search_regex( - r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) or xpath_text( - video, './/cover', 'cover') - - duration = int_or_none(xpath_text(video, './/time', 'duration')) - age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py deleted file mode 100644 index 23c8256b5..000000000 --- a/youtube_dl/extractor/platzi.py +++ /dev/null @@ -1,224 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_str, -) -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - str_or_none, - try_get, - url_or_none, - urlencode_postdata, - urljoin, -) - - -class PlatziBaseIE(InfoExtractor): - _LOGIN_URL = 'https://platzi.com/login/' - _NETRC_MACHINE = 'platzi' - - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'email': username, - 'password': password, - }) - - urlh = self._request_webpage( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata(login_form), - headers={'Referer': self._LOGIN_URL}) - - # login succeeded - if 'platzi.com/login' not in urlh.geturl(): - return - - login_error = self._webpage_read_content( - urlh, self._LOGIN_URL, None, 'Downloading login error page') - - login = self._parse_json( - self._search_regex( - r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), - None) - - for kind in ('error', 'password', 'nonFields'): - error = str_or_none(login.get('%sError' % kind)) - if error: - raise ExtractorError( - 'Unable to login: %s' % error, expected=True) - raise ExtractorError('Unable to log in') - - -class PlatziIE(PlatziBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?: - platzi\.com/clases| # es version - courses\.platzi\.com/classes # en version - )/[^/]+/(?P<id>\d+)-[^/?\#&]+ - ''' - - _TESTS = [{ - 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', - 'md5': '8f56448241005b561c10f11a595b37e3', - 'info_dict': { - 'id': '12074', - 'ext': 'mp4', - 'title': 'Creando nuestra primera página', - 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', - 'duration': 420, - }, - 'skip': 'Requires platzi account credentials', - }, { - 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', - 'info_dict': { - 'id': '13430', - 'ext': 'mp4', - 'title': 'Background', - 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', - 'duration': 360, - }, - 'skip': 'Requires platzi account credentials', - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - lecture_id = self._match_id(url) - - webpage = self._download_webpage(url, lecture_id) - - data = self._parse_json( - self._search_regex( - # client_data may contain "};" so that we have to try more - # strict regex first - (r'client_data\s*=\s*({.+?})\s*;\s*\n', - r'client_data\s*=\s*({.+?})\s*;'), - webpage, 'client data'), - lecture_id) - - material = data['initialState']['material'] - desc = material['description'] - title = desc['title'] - - formats = [] - for server_id, server in material['videos'].items(): - if not isinstance(server, dict): - continue - for format_id in ('hls', 'dash'): - format_url = url_or_none(server.get(format_id)) - if not format_url: - continue - if format_id == 'hls': - formats.extend(self._extract_m3u8_formats( - format_url, lecture_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id=format_id, - note='Downloading %s m3u8 information' % server_id, - fatal=False)) - elif format_id == 'dash': - formats.extend(self._extract_mpd_formats( - format_url, lecture_id, mpd_id=format_id, - note='Downloading %s MPD manifest' % server_id, - fatal=False)) - self._sort_formats(formats) - - content = str_or_none(desc.get('content')) - description = (clean_html(compat_b64decode(content).decode('utf-8')) - if content else None) - duration = int_or_none(material.get('duration'), invscale=60) - - return { - 'id': lecture_id, - 'title': title, - 'description': description, - 'duration': duration, - 'formats': formats, - } - - -class PlatziCourseIE(PlatziBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?: - platzi\.com/clases| # es version - courses\.platzi\.com/classes # en version - )/(?P<id>[^/?\#&]+) - ''' - _TESTS = [{ - 'url': 'https://platzi.com/clases/next-js/', - 'info_dict': { - 'id': '1311', - 'title': 'Curso de Next.js', - }, - 'playlist_count': 22, - }, { - 'url': 'https://courses.platzi.com/classes/communication-codestream/', - 'info_dict': { - 'id': '1367', - 'title': 'Codestream Course', - }, - 'playlist_count': 14, - }] - - @classmethod - def suitable(cls, url): - return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) - - def _real_extract(self, url): - course_name = self._match_id(url) - - webpage = self._download_webpage(url, course_name) - - props = self._parse_json( - self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), - course_name)['initialProps'] - - entries = [] - for chapter_num, chapter in enumerate(props['concepts'], 1): - if not isinstance(chapter, dict): - continue - materials = chapter.get('materials') - if not materials or not isinstance(materials, list): - continue - chapter_title = chapter.get('title') - chapter_id = str_or_none(chapter.get('id')) - for material in materials: - if not isinstance(material, dict): - continue - if material.get('material_type') != 'video': - continue - video_url = urljoin(url, material.get('url')) - if not video_url: - continue - entries.append({ - '_type': 'url_transparent', - 'url': video_url, - 'title': str_or_none(material.get('name')), - 'id': str_or_none(material.get('id')), - 'ie_key': PlatziIE.ie_key(), - 'chapter': chapter_title, - 'chapter_number': chapter_num, - 'chapter_id': chapter_id, - }) - - course_id = compat_str(try_get(props, lambda x: x['course']['id'])) - course_title = try_get(props, lambda x: x['course']['name'], compat_str) - - return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py deleted file mode 100644 index e766ccca3..000000000 --- a/youtube_dl/extractor/playfm.py +++ /dev/null @@ -1,75 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - int_or_none, - parse_iso8601, -) - - -class PlayFMIE(InfoExtractor): - IE_NAME = 'play.fm' - _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])' - - _TEST = { - 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12', - 'md5': 'c505f8307825a245d0c7ad1850001f22', - 'info_dict': { - 'id': '71276', - 'ext': 'mp3', - 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', - 'description': '', - 'duration': 5627, - 'timestamp': 1406033781, - 'upload_date': '20140722', - 'uploader': 'Dan Drastic', - 'uploader_id': '71170', - 'view_count': int, - 'comment_count': int, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - slug = mobj.group('slug') - - recordings = self._download_json( - 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id) - - error = recordings.get('error') - if isinstance(error, dict): - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error.get('message')), - expected=True) - - audio_url = recordings['audio'] - video_id = compat_str(recordings.get('id') or video_id) - title = recordings['title'] - description = recordings.get('description') - duration = int_or_none(recordings.get('recordingDuration')) - timestamp = parse_iso8601(recordings.get('created_at')) - uploader = recordings.get('page', {}).get('title') - uploader_id = compat_str(recordings.get('page', {}).get('id')) - view_count = int_or_none(recordings.get('playCount')) - comment_count = int_or_none(recordings.get('commentCount')) - categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')] - - return { - 'id': video_id, - 'url': audio_url, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'view_count': view_count, - 'comment_count': comment_count, - 'categories': categories, - } diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py deleted file mode 100644 index 1e30ab23a..000000000 --- a/youtube_dl/extractor/playplustv.py +++ /dev/null @@ -1,109 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - PUTRequest, -) - - -class PlayPlusTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' - _TEST = { - 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', - 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', - 'info_dict': { - 'id': 'db8d274a5163424e967f35a30ddafb8e', - 'ext': 'mp4', - 'title': 'Capítulo 179 - Final', - 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', - 'timestamp': 1529992740, - 'upload_date': '20180626', - }, - 'skip': 'Requires account credential', - } - _NETRC_MACHINE = 'playplustv' - _GEO_COUNTRIES = ['BR'] - _token = None - _profile_id = None - - def _call_api(self, resource, video_id=None, query=None): - return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ - 'Authorization': 'Bearer ' + self._token, - }, query=query) - - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - self.raise_login_required() - - req = PUTRequest( - 'https://api.playplus.tv/api/web/login', json.dumps({ - 'email': email, - 'password': password, - }).encode(), { - 'Content-Type': 'application/json; charset=utf-8', - }) - - try: - self._token = self._download_json(req, None)['token'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - raise ExtractorError(self._parse_json( - e.cause.read(), None)['errorMessage'], expected=True) - raise - - self._profile = self._call_api('Profiles')['list'][0]['_id'] - - def _real_extract(self, url): - project_id, media_id = re.match(self._VALID_URL, url).groups() - media = self._call_api( - 'Media', media_id, { - 'profileId': self._profile, - 'projectId': project_id, - 'mediaId': media_id, - })['obj'] - title = media['title'] - - formats = [] - for f in media.get('files', []): - f_url = f.get('url') - if not f_url: - continue - file_info = f.get('fileInfo') or {} - formats.append({ - 'url': f_url, - 'width': int_or_none(file_info.get('width')), - 'height': int_or_none(file_info.get('height')), - }) - self._sort_formats(formats) - - thumbnails = [] - for thumb in media.get('thumbs', []): - thumb_url = thumb.get('url') - if not thumb_url: - continue - thumbnails.append({ - 'url': thumb_url, - 'width': int_or_none(thumb.get('width')), - 'height': int_or_none(thumb.get('height')), - }) - - return { - 'id': media_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': clean_html(media.get('description')) or media.get('shortDescription'), - 'timestamp': int_or_none(media.get('publishDate'), 1000), - 'view_count': int_or_none(media.get('numberOfViews')), - 'comment_count': int_or_none(media.get('numberOfComments')), - 'tags': media.get('tags'), - } diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py deleted file mode 100644 index ddfc6f148..000000000 --- a/youtube_dl/extractor/plays.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class PlaysTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})' - _TESTS = [{ - 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', - 'md5': 'dfeac1198506652b5257a62762cec7bc', - 'info_dict': { - 'id': '56af17f56c95335490', - 'ext': 'mp4', - 'title': 'Bjergsen - When you outplay the Azir wall', - 'description': 'Posted by Bjergsen', - } - }, { - 'url': 'https://plays.tv/embeds/56af17f56c95335490', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'https://plays.tv/video/%s' % video_id, video_id) - - info = self._search_json_ld(webpage, video_id,) - - mpd_url, sources = re.search( - r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', - webpage).groups() - formats = self._extract_mpd_formats( - self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') - for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): - formats.append({ - 'url': self._proto_relative_url(format_url), - 'format_id': 'http-' + format_id, - 'height': int_or_none(height), - }) - self._sort_formats(formats) - - info.update({ - 'id': video_id, - 'description': self._og_search_description(webpage), - 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage), - 'formats': formats, - }) - - return info diff --git a/youtube_dl/extractor/playstuff.py b/youtube_dl/extractor/playstuff.py deleted file mode 100644 index 5a329957f..000000000 --- a/youtube_dl/extractor/playstuff.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - smuggle_url, - try_get, -) - - -class PlayStuffIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a', - 'md5': 'c82d3669e5247c64bc382577843e5bd0', - 'info_dict': { - 'id': '6250584958001', - 'ext': 'mp4', - 'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga', - 'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913', - 'uploader_id': '6005208634001', - 'timestamp': 1619491027, - 'upload_date': '20210427', - }, - 'add_ie': ['BrightcoveNew'], - }, { - # geo restricted, bypassable - 'url': 'https://play.stuff.co.nz/details/_6155660351001', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - state = self._parse_json( - self._search_regex( - r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'), - video_id) - - account_id = try_get( - state, lambda x: x['configurations']['accountId'], - compat_str) or '6005208634001' - player_id = try_get( - state, lambda x: x['configurations']['playerId'], - compat_str) or 'default' - - entries = [] - for item_id, video in state['items'].items(): - if not isinstance(video, dict): - continue - asset_id = try_get( - video, lambda x: x['content']['attributes']['assetId'], - compat_str) - if not asset_id: - continue - entries.append(self.url_result( - smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id), - {'geo_countries': ['NZ']}), - 'BrightcoveNew', video_id)) - - return self.playlist_result(entries, video_id) diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py deleted file mode 100644 index 4c5f57919..000000000 --- a/youtube_dl/extractor/playtvak.py +++ /dev/null @@ -1,191 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_urllib_parse_urlencode, -) -from ..utils import ( - ExtractorError, - int_or_none, - parse_iso8601, - qualities, -) - - -class PlaytvakIE(InfoExtractor): - IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz' - _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)' - _TESTS = [{ - 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko', - 'md5': '4525ae312c324b4be2f4603cc78ceb4a', - 'info_dict': { - 'id': 'A150730_150323_hodinovy-manzel_kuko', - 'ext': 'mp4', - 'title': 'Vyžeňte vosy a sršně ze zahrady', - 'description': 'md5:4436e61b7df227a093778efb7e373571', - 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', - 'duration': 279, - 'timestamp': 1438732860, - 'upload_date': '20150805', - 'is_live': False, - } - }, { # live video test - 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', - 'info_dict': { - 'id': 'A150624_164934_planespotting_cat', - 'ext': 'flv', - 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', - 'is_live': True, - }, - 'params': { - 'skip_download': True, # requires rtmpdump - }, - }, { # another live stream, this one without Misc.videoFLV - 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap', - 'info_dict': { - 'id': 'A151218_145728_hlavni-nadrazi_plap', - 'ext': 'flv', - 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'is_live': True, - }, - 'params': { - 'skip_download': True, # requires rtmpdump - }, - }, { # idnes.cz - 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku', - 'md5': '819832ba33cd7016e58a6658577fe289', - 'info_dict': { - 'id': 'A150809_104116_domaci_pku', - 'ext': 'mp4', - 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', - 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2', - 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', - 'duration': 39, - 'timestamp': 1438969140, - 'upload_date': '20150807', - 'is_live': False, - } - }, { # lidovky.cz - 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', - 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8', - 'info_dict': { - 'id': 'A150808_214044_ln-video_ELE', - 'ext': 'mp4', - 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', - 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c', - 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', - 'timestamp': 1439052180, - 'upload_date': '20150808', - 'is_live': False, - } - }, { # metro.cz - 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row', - 'md5': '84fc1deedcac37b7d4a6ccae7c716668', - 'info_dict': { - 'id': 'A141111_173251_metro-extra_row', - 'ext': 'mp4', - 'title': 'Recesisté udělali z billboardu kolotoč', - 'description': 'md5:7369926049588c3989a66c9c1a043c4c', - 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', - 'timestamp': 1415725500, - 'upload_date': '20141111', - 'is_live': False, - } - }, { - 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - info_url = self._html_search_regex( - r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') - - parsed_url = compat_urlparse.urlparse(info_url) - - qs = compat_urlparse.parse_qs(parsed_url.query) - qs.update({ - 'reklama': ['0'], - 'type': ['js'], - }) - - info_url = compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) - - json_info = self._download_json( - info_url, video_id, - transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) - - item = None - for i in json_info['items']: - if i.get('type') == 'video' or i.get('type') == 'stream': - item = i - break - if not item: - raise ExtractorError('No suitable stream found') - - quality = qualities(('low', 'middle', 'high')) - - formats = [] - for fmt in item['video']: - video_url = fmt.get('file') - if not video_url: - continue - - format_ = fmt['format'] - format_id = '%s_%s' % (format_, fmt['quality']) - preference = None - - if format_ in ('mp4', 'webm'): - ext = format_ - elif format_ == 'rtmp': - ext = 'flv' - elif format_ == 'apple': - ext = 'mp4' - # Some streams have mp3 audio which does not play - # well with ffmpeg filter aac_adtstoasc - preference = -1 - elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests - continue - else: # Other formats not supported yet - continue - - formats.append({ - 'url': video_url, - 'ext': ext, - 'format_id': format_id, - 'quality': quality(fmt.get('quality')), - 'preference': preference, - }) - self._sort_formats(formats) - - title = item['title'] - is_live = item['type'] == 'stream' - if is_live: - title = self._live_title(title) - description = self._og_search_description(webpage, default=None) or self._html_search_meta( - 'description', webpage, 'description', default=None) - timestamp = None - duration = None - if not is_live: - duration = int_or_none(item.get('length')) - timestamp = item.get('published') - if timestamp: - timestamp = parse_iso8601(timestamp[:-5]) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': item.get('image'), - 'duration': duration, - 'timestamp': timestamp, - 'is_live': is_live, - 'formats': formats, - } diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py deleted file mode 100644 index 4aef186ea..000000000 --- a/youtube_dl/extractor/playvid.py +++ /dev/null @@ -1,99 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, -) -from ..utils import ( - clean_html, - ExtractorError, -) - - -class PlayvidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' - _TESTS = [{ - 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', - 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', - 'info_dict': { - 'id': 'RnmBNgtrrJu', - 'ext': 'mp4', - 'title': 'md5:9256d01c6317e3f703848b5906880dc8', - 'duration': 82, - 'age_limit': 18, - }, - 'skip': 'Video removed due to ToS', - }, { - 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', - 'md5': '39d49df503ad7b8f23a4432cbf046477', - 'info_dict': { - 'id': 'hwb0GpNkzgH', - 'ext': 'mp4', - 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', - 'age_limit': 18, - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - m_error = re.search( - r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage) - if m_error: - raise ExtractorError(clean_html(m_error.group('msg')), expected=True) - - video_title = None - duration = None - video_thumbnail = None - formats = [] - - # most of the information is stored in the flashvars - flashvars = self._html_search_regex( - r'flashvars="(.+?)"', webpage, 'flashvars') - - infos = compat_urllib_parse_unquote(flashvars).split(r'&') - for info in infos: - videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) - if videovars_match: - key = videovars_match.group(1) - val = videovars_match.group(2) - - if key == 'title': - video_title = compat_urllib_parse_unquote_plus(val) - if key == 'duration': - try: - duration = int(val) - except ValueError: - pass - if key == 'big_thumb': - video_thumbnail = val - - videourl_match = re.match( - r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) - if videourl_match: - height = int(videourl_match.group('resolution')) - formats.append({ - 'height': height, - 'url': val, - }) - self._sort_formats(formats) - - # Extract title - should be in the flashvars; if not, look elsewhere - if video_title is None: - video_title = self._html_search_regex( - r'<title>(.*?)</title', webpage, 'title') - - return { - 'id': video_id, - 'formats': formats, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'duration': duration, - 'description': None, - 'age_limit': 18 - } diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py deleted file mode 100644 index 4d96a10a7..000000000 --- a/youtube_dl/extractor/playwire.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - dict_get, - float_or_none, -) - - -class PlaywireIE(InfoExtractor): - _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json', - 'md5': 'e6398701e3595888125729eaa2329ed9', - 'info_dict': { - 'id': '3353705', - 'ext': 'mp4', - 'title': 'S04_RM_UCL_Rus', - 'thumbnail': r're:^https?://.*\.png$', - 'duration': 145.94, - }, - }, { - # m3u8 in f4m - 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', - 'info_dict': { - 'id': '4840492', - 'ext': 'mp4', - 'title': 'ITV EL SHOW FULL', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # Multiple resolutions while bitrates missing - 'url': 'http://cdn.playwire.com/11625/embed/85228.html', - 'only_matching': True, - }, { - 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json', - 'only_matching': True, - }, { - 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') - - player = self._download_json( - 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id), - video_id) - - title = player['settings']['title'] - duration = float_or_none(player.get('duration'), 1000) - - content = player['content'] - thumbnail = content.get('poster') - src = content['media']['f4m'] - - formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') - for a_format in formats: - if not dict_get(a_format, ['tbr', 'width', 'height']): - a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py deleted file mode 100644 index 2d63855df..000000000 --- a/youtube_dl/extractor/pluralsight.py +++ /dev/null @@ -1,501 +0,0 @@ -from __future__ import unicode_literals - -import collections -import json -import os -import random -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - dict_get, - ExtractorError, - float_or_none, - int_or_none, - parse_duration, - qualities, - srt_subtitles_timecode, - try_get, - update_url_query, - urlencode_postdata, -) - - -class PluralsightBaseIE(InfoExtractor): - _API_BASE = 'https://app.pluralsight.com' - - _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE - _GRAPHQL_HEADERS = { - 'Content-Type': 'application/json;charset=UTF-8', - } - _GRAPHQL_COURSE_TMPL = ''' -query BootstrapPlayer { - rpc { - bootstrapPlayer { - profile { - firstName - lastName - email - username - userHandle - authed - isAuthed - plan - } - course(courseId: "%s") { - name - title - courseHasCaptions - translationLanguages { - code - name - } - supportsWideScreenVideoFormats - timestamp - modules { - name - title - duration - formattedDuration - author - authorized - clips { - authorized - clipId - duration - formattedDuration - id - index - moduleIndex - moduleTitle - name - title - watched - } - } - } - } - } -}''' - - def _download_course(self, course_id, url, display_id): - try: - return self._download_course_rpc(course_id, url, display_id) - except ExtractorError: - # Old API fallback - return self._download_json( - 'https://app.pluralsight.com/player/user/api/v1/player/payload', - display_id, data=urlencode_postdata({'courseId': course_id}), - headers={'Referer': url}) - - def _download_course_rpc(self, course_id, url, display_id): - response = self._download_json( - self._GRAPHQL_EP, display_id, data=json.dumps({ - 'query': self._GRAPHQL_COURSE_TMPL % course_id, - 'variables': {} - }).encode('utf-8'), headers=self._GRAPHQL_HEADERS) - - course = try_get( - response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'], - dict) - if course: - return course - - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, response['error']['message']), - expected=True) - - -class PluralsightIE(PluralsightBaseIE): - IE_NAME = 'pluralsight' - _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?' - _LOGIN_URL = 'https://app.pluralsight.com/id/' - - _NETRC_MACHINE = 'pluralsight' - - _TESTS = [{ - 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', - 'md5': '4d458cf5cf4c593788672419a8dd4cf8', - 'info_dict': { - 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', - 'ext': 'mp4', - 'title': 'Demo Monitoring', - 'duration': 338, - }, - 'skip': 'Requires pluralsight account credentials', - }, { - 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', - 'only_matching': True, - }, { - # available without pluralsight account - 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', - 'only_matching': True, - }, { - 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0', - 'only_matching': True, - }] - - GRAPHQL_VIEWCLIP_TMPL = ''' -query viewClip { - viewClip(input: { - author: "%(author)s", - clipIndex: %(clipIndex)d, - courseName: "%(courseName)s", - includeCaptions: %(includeCaptions)s, - locale: "%(locale)s", - mediaType: "%(mediaType)s", - moduleName: "%(moduleName)s", - quality: "%(quality)s" - }) { - urls { - url - cdn - rank - source - }, - status - } -}''' - - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'Username': username, - 'Password': password, - }) - - post_url = self._search_regex( - r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, - 'post url', default=self._LOGIN_URL, group='url') - - if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - - response = self._download_webpage( - post_url, None, 'Logging in', - data=urlencode_postdata(login_form), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - error = self._search_regex( - r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', - response, 'error message', default=None) - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - - if all(not re.search(p, response) for p in ( - r'__INITIAL_STATE__', r'["\']currentUser["\']', - # new layout? - r'>\s*Sign out\s*<')): - BLOCKED = 'Your account has been blocked due to suspicious activity' - if BLOCKED in response: - raise ExtractorError( - 'Unable to login: %s' % BLOCKED, expected=True) - MUST_AGREE = 'To continue using Pluralsight, you must agree to' - if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): - raise ExtractorError( - 'Unable to login: %s some documents. Go to pluralsight.com, ' - 'log in and agree with what Pluralsight requires.' - % MUST_AGREE, expected=True) - - raise ExtractorError('Unable to log in') - - def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): - captions = None - if clip_id: - captions = self._download_json( - '%s/transcript/api/v1/caption/json/%s/%s' - % (self._API_BASE, clip_id, lang), video_id, - 'Downloading captions JSON', 'Unable to download captions JSON', - fatal=False) - if not captions: - captions_post = { - 'a': author, - 'cn': int(clip_idx), - 'lc': lang, - 'm': name, - } - captions = self._download_json( - '%s/player/retrieve-captions' % self._API_BASE, video_id, - 'Downloading captions JSON', 'Unable to download captions JSON', - fatal=False, data=json.dumps(captions_post).encode('utf-8'), - headers={'Content-Type': 'application/json;charset=utf-8'}) - if captions: - return { - lang: [{ - 'ext': 'json', - 'data': json.dumps(captions), - }, { - 'ext': 'srt', - 'data': self._convert_subtitles(duration, captions), - }] - } - - @staticmethod - def _convert_subtitles(duration, subs): - srt = '' - TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset') - TEXT_KEYS = ('text', 'Text') - for num, current in enumerate(subs): - current = subs[num] - start, text = ( - float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)), - dict_get(current, TEXT_KEYS)) - if start is None or text is None: - continue - end = duration if num == len(subs) - 1 else float_or_none( - dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False)) - if end is None: - continue - srt += os.linesep.join( - ( - '%d' % num, - '%s --> %s' % ( - srt_subtitles_timecode(start), - srt_subtitles_timecode(end)), - text, - os.linesep, - )) - return srt - - def _real_extract(self, url): - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - - author = qs.get('author', [None])[0] - name = qs.get('name', [None])[0] - clip_idx = qs.get('clip', [None])[0] - course_name = qs.get('course', [None])[0] - - if any(not f for f in (author, name, clip_idx, course_name,)): - raise ExtractorError('Invalid URL', expected=True) - - display_id = '%s-%s' % (name, clip_idx) - - course = self._download_course(course_name, url, display_id) - - collection = course['modules'] - - clip = None - - for module_ in collection: - if name in (module_.get('moduleName'), module_.get('name')): - for clip_ in module_.get('clips', []): - clip_index = clip_.get('clipIndex') - if clip_index is None: - clip_index = clip_.get('index') - if clip_index is None: - continue - if compat_str(clip_index) == clip_idx: - clip = clip_ - break - - if not clip: - raise ExtractorError('Unable to resolve clip') - - title = clip['title'] - clip_id = clip.get('clipName') or clip.get('name') or clip['clipId'] - - QUALITIES = { - 'low': {'width': 640, 'height': 480}, - 'medium': {'width': 848, 'height': 640}, - 'high': {'width': 1024, 'height': 768}, - 'high-widescreen': {'width': 1280, 'height': 720}, - } - - QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) - quality_key = qualities(QUALITIES_PREFERENCE) - - AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) - - ALLOWED_QUALITIES = ( - AllowedQuality('webm', ['high', ]), - AllowedQuality('mp4', ['low', 'medium', 'high', ]), - ) - - # Some courses also offer widescreen resolution for high quality (see - # https://github.com/ytdl-org/youtube-dl/issues/7766) - widescreen = course.get('supportsWideScreenVideoFormats') is True - best_quality = 'high-widescreen' if widescreen else 'high' - if widescreen: - for allowed_quality in ALLOWED_QUALITIES: - allowed_quality.qualities.append(best_quality) - - # In order to minimize the number of calls to ViewClip API and reduce - # the probability of being throttled or banned by Pluralsight we will request - # only single format until formats listing was explicitly requested. - if self._downloader.params.get('listformats', False): - allowed_qualities = ALLOWED_QUALITIES - else: - def guess_allowed_qualities(): - req_format = self._downloader.params.get('format') or 'best' - req_format_split = req_format.split('-', 1) - if len(req_format_split) > 1: - req_ext, req_quality = req_format_split - req_quality = '-'.join(req_quality.split('-')[:2]) - for allowed_quality in ALLOWED_QUALITIES: - if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: - return (AllowedQuality(req_ext, (req_quality, )), ) - req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4' - return (AllowedQuality(req_ext, (best_quality, )), ) - allowed_qualities = guess_allowed_qualities() - - formats = [] - for ext, qualities_ in allowed_qualities: - for quality in qualities_: - f = QUALITIES[quality].copy() - clip_post = { - 'author': author, - 'includeCaptions': 'false', - 'clipIndex': int(clip_idx), - 'courseName': course_name, - 'locale': 'en', - 'moduleName': name, - 'mediaType': ext, - 'quality': '%dx%d' % (f['width'], f['height']), - } - format_id = '%s-%s' % (ext, quality) - - try: - viewclip = self._download_json( - self._GRAPHQL_EP, display_id, - 'Downloading %s viewclip graphql' % format_id, - data=json.dumps({ - 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post, - 'variables': {} - }).encode('utf-8'), - headers=self._GRAPHQL_HEADERS)['data']['viewClip'] - except ExtractorError: - # Still works but most likely will go soon - viewclip = self._download_json( - '%s/video/clips/viewclip' % self._API_BASE, display_id, - 'Downloading %s viewclip JSON' % format_id, fatal=False, - data=json.dumps(clip_post).encode('utf-8'), - headers={'Content-Type': 'application/json;charset=utf-8'}) - - # Pluralsight tracks multiple sequential calls to ViewClip API and start - # to return 429 HTTP errors after some time (see - # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead - # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842). - # To somewhat reduce the probability of these consequences - # we will sleep random amount of time before each call to ViewClip. - self._sleep( - random.randint(5, 10), display_id, - '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') - - if not viewclip: - continue - - clip_urls = viewclip.get('urls') - if not isinstance(clip_urls, list): - continue - - for clip_url_data in clip_urls: - clip_url = clip_url_data.get('url') - if not clip_url: - continue - cdn = clip_url_data.get('cdn') - clip_f = f.copy() - clip_f.update({ - 'url': clip_url, - 'ext': ext, - 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id, - 'quality': quality_key(quality), - 'source_preference': int_or_none(clip_url_data.get('rank')), - }) - formats.append(clip_f) - - self._sort_formats(formats) - - duration = int_or_none( - clip.get('duration')) or parse_duration(clip.get('formattedDuration')) - - # TODO: other languages? - subtitles = self.extract_subtitles( - author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) - - return { - 'id': clip_id, - 'title': title, - 'duration': duration, - 'creator': author, - 'formats': formats, - 'subtitles': subtitles, - } - - -class PluralsightCourseIE(PluralsightBaseIE): - IE_NAME = 'pluralsight:course' - _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)' - _TESTS = [{ - # Free course from Pluralsight Starter Subscription for Microsoft TechNet - # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz - 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', - 'info_dict': { - 'id': 'hosting-sql-server-windows-azure-iaas', - 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', - 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', - }, - 'playlist_count': 31, - }, { - # available without pluralsight account - 'url': 'https://www.pluralsight.com/courses/angularjs-get-started', - 'only_matching': True, - }, { - 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents', - 'only_matching': True, - }] - - def _real_extract(self, url): - course_id = self._match_id(url) - - # TODO: PSM cookie - - course = self._download_course(course_id, url, course_id) - - title = course['title'] - course_name = course['name'] - course_data = course['modules'] - description = course.get('description') or course.get('shortDescription') - - entries = [] - for num, module in enumerate(course_data, 1): - author = module.get('author') - module_name = module.get('name') - if not author or not module_name: - continue - for clip in module.get('clips', []): - clip_index = int_or_none(clip.get('index')) - if clip_index is None: - continue - clip_url = update_url_query( - '%s/player' % self._API_BASE, query={ - 'mode': 'live', - 'course': course_name, - 'author': author, - 'name': module_name, - 'clip': clip_index, - }) - entries.append({ - '_type': 'url_transparent', - 'url': clip_url, - 'ie_key': PluralsightIE.ie_key(), - 'chapter': module.get('title'), - 'chapter_number': num, - 'chapter_id': module.get('moduleRef'), - }) - - return self.playlist_result(entries, course_id, title, description) diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py deleted file mode 100644 index e782e3f1f..000000000 --- a/youtube_dl/extractor/podomatic.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class PodomaticIE(InfoExtractor): - IE_NAME = 'podomatic' - _VALID_URL = r'''(?x) - (?P<proto>https?):// - (?: - (?P<channel>[^.]+)\.podomatic\.com/entry| - (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes - )/ - (?P<id>[^/?#&]+) - ''' - - _TESTS = [{ - 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', - 'md5': '84bb855fcf3429e6bf72460e1eed782d', - 'info_dict': { - 'id': '2009-01-02T16_03_35-08_00', - 'ext': 'mp3', - 'uploader': 'Science Teaching Tips', - 'uploader_id': 'scienceteachingtips', - 'title': '64. When the Moon Hits Your Eye', - 'duration': 446, - } - }, { - 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', - 'md5': 'd2cf443931b6148e27638650e2638297', - 'info_dict': { - 'id': '2013-11-15T16_31_21-08_00', - 'ext': 'mp3', - 'uploader': 'Ostbahnhof / Techno Mix', - 'uploader_id': 'ostbahnhof', - 'title': 'Einunddreizig', - 'duration': 3799, - } - }, { - 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - channel = mobj.group('channel') or mobj.group('channel_2') - - json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' - + '?permalink=true&rtmp=0') % - (mobj.group('proto'), channel, video_id)) - data_json = self._download_webpage( - json_url, video_id, 'Downloading video info') - data = json.loads(data_json) - - video_url = data['downloadLink'] - if not video_url: - video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) - uploader = data['podcast'] - title = data['title'] - thumbnail = data['imageLocation'] - duration = int_or_none(data.get('length'), 1000) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'uploader': uploader, - 'uploader_id': channel, - 'thumbnail': thumbnail, - 'duration': duration, - } diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py deleted file mode 100644 index 80222d428..000000000 --- a/youtube_dl/extractor/pokemon.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - extract_attributes, - int_or_none, -) - - -class PokemonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))' - _TESTS = [{ - 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', - 'md5': '2fe8eaec69768b25ef898cda9c43062e', - 'info_dict': { - 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', - 'ext': 'mp4', - 'title': 'The Ol’ Raise and Switch!', - 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', - }, - 'add_id': ['LimelightMedia'], - }, { - # no data-video-title - 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008', - 'info_dict': { - 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1', - 'ext': 'mp4', - 'title': "Pokémon : L'ascension de Darkrai", - 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5', - }, - 'add_id': ['LimelightMedia'], - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', - 'only_matching': True, - }, { - 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/', - 'only_matching': True, - }, { - 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id or display_id) - video_data = extract_attributes(self._search_regex( - r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), - webpage, 'video data element')) - video_id = video_data['data-video-id'] - title = video_data.get('data-video-title') or self._html_search_meta( - 'pkm-title', webpage, ' title', default=None) or self._search_regex( - r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': 'limelight:media:%s' % video_id, - 'title': title, - 'description': video_data.get('data-video-summary'), - 'thumbnail': video_data.get('data-video-poster'), - 'series': 'Pokémon', - 'season_number': int_or_none(video_data.get('data-video-season')), - 'episode': title, - 'episode_number': int_or_none(video_data.get('data-video-episode')), - 'ie_key': 'LimelightMedia', - } diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py deleted file mode 100644 index 978d6f813..000000000 --- a/youtube_dl/extractor/polskieradio.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, - compat_urlparse -) -from ..utils import ( - extract_attributes, - int_or_none, - strip_or_none, - unified_timestamp, -) - - -class PolskieRadioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', - 'info_dict': { - 'id': '1587943', - 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', - 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', - }, - 'playlist': [{ - 'md5': '2984ee6ce9046d91fc233bc1a864a09a', - 'info_dict': { - 'id': '1540576', - 'ext': 'mp3', - 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', - 'timestamp': 1456594200, - 'upload_date': '20160227', - 'duration': 2364, - 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' - }, - }], - }, { - 'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal', - 'info_dict': { - 'id': '1635803', - 'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał', - 'description': 'md5:01cb7d0cad58664095d72b51a1ebada2', - }, - 'playlist_mincount': 12, - }, { - 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', - 'only_matching': True, - }, { - 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', - 'only_matching': True, - }, { - # with mp4 video - 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - content = self._search_regex( - r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>', - webpage, 'content') - - timestamp = unified_timestamp(self._html_search_regex( - r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', - webpage, 'timestamp', fatal=False)) - - thumbnail_url = self._og_search_thumbnail(webpage) - - entries = [] - - media_urls = set() - - for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content): - media = self._parse_json(data_media, playlist_id, fatal=False) - if not media.get('file') or not media.get('desc'): - continue - media_url = self._proto_relative_url(media['file'], 'http:') - if media_url in media_urls: - continue - media_urls.add(media_url) - entries.append({ - 'id': compat_str(media['id']), - 'url': media_url, - 'title': compat_urllib_parse_unquote(media['desc']), - 'duration': int_or_none(media.get('length')), - 'vcodec': 'none' if media.get('provider') == 'audio' else None, - 'timestamp': timestamp, - 'thumbnail': thumbnail_url - }) - - title = self._og_search_title(webpage).strip() - description = strip_or_none(self._og_search_description(webpage)) - - return self.playlist_result(entries, playlist_id, title, description) - - -class PolskieRadioCategoryIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', - 'info_dict': { - 'id': '5102', - 'title': 'HISTORIA ŻYWA', - }, - 'playlist_mincount': 38, - }, { - 'url': 'http://www.polskieradio.pl/7/4807', - 'info_dict': { - 'id': '4807', - 'title': 'Vademecum 1050. rocznicy Chrztu Polski' - }, - 'playlist_mincount': 5 - }, { - 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', - 'only_matching': True - }, { - 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', - 'info_dict': { - 'id': '4143', - 'title': 'Kierunek Kraków', - }, - 'playlist_mincount': 61 - }, { - 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', - 'info_dict': { - 'id': '214', - 'title': 'Muzyka', - }, - 'playlist_mincount': 61 - }, { - 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', - 'only_matching': True, - }, { - 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) - - def _entries(self, url, page, category_id): - content = page - for page_num in itertools.count(2): - for a_entry, entry_id in re.findall( - r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>', - content): - entry = extract_attributes(a_entry) - href = entry.get('href') - if not href: - continue - yield self.url_result( - compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), - entry_id, entry.get('title')) - mobj = re.search( - r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', - content) - if not mobj: - break - next_url = compat_urlparse.urljoin(url, mobj.group('url')) - content = self._download_webpage( - next_url, category_id, 'Downloading page %s' % page_num) - - def _real_extract(self, url): - category_id = self._match_id(url) - webpage = self._download_webpage(url, category_id) - title = self._html_search_regex( - r'<title>([^<]+) - [^<]+ - [^<]+', - webpage, 'title', fatal=False) - return self.playlist_result( - self._entries(url, webpage, category_id), - category_id, title) diff --git a/youtube_dl/extractor/popcorntimes.py b/youtube_dl/extractor/popcorntimes.py deleted file mode 100644 index 7bf7f9858..000000000 --- a/youtube_dl/extractor/popcorntimes.py +++ /dev/null @@ -1,99 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_chr, -) -from ..utils import int_or_none - - -class PopcorntimesIE(InfoExtractor): - _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P[^/]+)/(?P[^/?#&]+)' - _TEST = { - 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy', - 'md5': '93f210991ad94ba8c3485950a2453257', - 'info_dict': { - 'id': 'A1XCFvz', - 'display_id': 'haensel-und-gretel-opera-fantasy', - 'ext': 'mp4', - 'title': 'Hänsel und Gretel', - 'description': 'md5:1b8146791726342e7b22ce8125cf6945', - 'thumbnail': r're:^https?://.*\.jpg$', - 'creator': 'John Paul', - 'release_date': '19541009', - 'duration': 4260, - 'tbr': 5380, - 'width': 720, - 'height': 540, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id, display_id = mobj.group('id', 'display_id') - - webpage = self._download_webpage(url, display_id) - - title = self._search_regex( - r'

    ([^<]+)', webpage, 'title', - default=None) or self._html_search_meta( - 'ya:ovs:original_name', webpage, 'title', fatal=True) - - loc = self._search_regex( - r'PCTMLOC\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'loc', - group='value') - - loc_b64 = '' - for c in loc: - c_ord = ord(c) - if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'): - upper = ord('Z') if c_ord <= ord('Z') else ord('z') - c_ord += 13 - if upper < c_ord: - c_ord -= 26 - loc_b64 += compat_chr(c_ord) - - video_url = compat_b64decode(loc_b64).decode('utf-8') - - description = self._html_search_regex( - r'(?s)]+class=["\']pt-movie-desc[^>]+>(.+?)

    ', webpage, - 'description', fatal=False) - - thumbnail = self._search_regex( - r']+class=["\']video-preview[^>]+\bsrc=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'thumbnail', default=None, - group='value') or self._og_search_thumbnail(webpage) - - creator = self._html_search_meta( - 'video:director', webpage, 'creator', default=None) - - release_date = self._html_search_meta( - 'video:release_date', webpage, default=None) - if release_date: - release_date = release_date.replace('-', '') - - def int_meta(name): - return int_or_none(self._html_search_meta( - name, webpage, default=None)) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'creator': creator, - 'release_date': release_date, - 'duration': int_meta('video:duration'), - 'tbr': int_meta('ya:ovs:bitrate'), - 'width': int_meta('og:video:width'), - 'height': int_meta('og:video:height'), - 'http_headers': { - 'Referer': url, - }, - } diff --git a/youtube_dl/extractor/popcorntv.py b/youtube_dl/extractor/popcorntv.py deleted file mode 100644 index 9f834fb6c..000000000 --- a/youtube_dl/extractor/popcorntv.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - extract_attributes, - int_or_none, - unified_timestamp, -) - - -class PopcornTVIE(InfoExtractor): - _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P[^/]+)/(?P\d+)' - _TESTS = [{ - 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183', - 'md5': '47d65a48d147caf692ab8562fe630b45', - 'info_dict': { - 'id': '9183', - 'display_id': 'food-wars-battaglie-culinarie-episodio-01', - 'ext': 'mp4', - 'title': 'Food Wars, Battaglie Culinarie | Episodio 01', - 'description': 'md5:b8bea378faae4651d3b34c6e112463d0', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1497610857, - 'upload_date': '20170616', - 'duration': 1440, - 'view_count': int, - }, - }, { - 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id, video_id = mobj.group('display_id', 'id') - - webpage = self._download_webpage(url, display_id) - - m3u8_url = extract_attributes( - self._search_regex( - r'(]+itemprop=["\'](?:content|embed)Url[^>]*>)', - webpage, 'content' - ))['href'] - - formats = self._extract_m3u8_formats( - m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - - title = self._search_regex( - r']+itemprop=["\']name[^>]*>([^<]+)', webpage, - 'title', default=None) or self._og_search_title(webpage) - - description = self._html_search_regex( - r'(?s)]+itemprop=["\']description[^>]*>(.+?)', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - timestamp = unified_timestamp(self._html_search_meta( - 'uploadDate', webpage, 'timestamp')) - duration = int_or_none(self._html_search_meta( - 'duration', webpage), invscale=60) - view_count = int_or_none(self._html_search_meta( - 'interactionCount', webpage, 'view count')) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': view_count, - 'formats': formats, - } diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py deleted file mode 100644 index 20eac647a..000000000 --- a/youtube_dl/extractor/porn91.py +++ /dev/null @@ -1,63 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - parse_duration, - int_or_none, - ExtractorError, -) - - -class Porn91IE(InfoExtractor): - IE_NAME = '91porn' - _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P[\w\d]+)' - - _TEST = { - 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', - 'md5': '7fcdb5349354f40d41689bd0fa8db05a', - 'info_dict': { - 'id': '7e42283b4f5ab36da134', - 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', - 'ext': 'mp4', - 'duration': 431, - 'age_limit': 18, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - self._set_cookie('91porn.com', 'language', 'cn_CN') - - webpage = self._download_webpage( - 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id) - - if '作为游客,你每天只可观看10个视频' in webpage: - raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) - - title = self._search_regex( - r'
    ([^<]+)
    ', webpage, 'title') - title = title.replace('\n', '') - - video_link_url = self._search_regex( - r']+id=["\']fm-video_link[^>]+>([^<]+)', - webpage, 'video link') - videopage = self._download_webpage(video_link_url, video_id) - - info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0] - - duration = parse_duration(self._search_regex( - r'时长:\s*\s*(\d+:\d+)', webpage, 'duration', fatal=False)) - - comment_count = int_or_none(self._search_regex( - r'留言:\s*\s*(\d+)', webpage, 'comment count', fatal=False)) - - info_dict.update({ - 'id': video_id, - 'title': title, - 'duration': duration, - 'comment_count': comment_count, - 'age_limit': self._rta_search(webpage), - }) - - return info_dict diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py deleted file mode 100644 index 5726cab3a..000000000 --- a/youtube_dl/extractor/porncom.py +++ /dev/null @@ -1,103 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - js_to_json, - parse_filesize, - str_to_int, -) - - -class PornComIE(InfoExtractor): - _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P[^/]+)-)?(?P\d+)' - _TESTS = [{ - 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', - 'md5': '3f30ce76267533cd12ba999263156de7', - 'info_dict': { - 'id': '2603339', - 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', - 'ext': 'mp4', - 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 551, - 'view_count': int, - 'age_limit': 18, - 'categories': list, - 'tags': list, - }, - }, { - 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - webpage = self._download_webpage(url, display_id) - - config = self._parse_json( - self._search_regex( - (r'=\s*({.+?})\s*;\s*v1ar\b', - r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='), - webpage, 'config', default='{}'), - display_id, transform_source=js_to_json, fatal=False) - - if config: - title = config['title'] - formats = [{ - 'url': stream['url'], - 'format_id': stream.get('id'), - 'height': int_or_none(self._search_regex( - r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) - } for stream in config['streams'] if stream.get('url')] - thumbnail = (compat_urlparse.urljoin( - config['thumbCDN'], config['poster']) - if config.get('thumbCDN') and config.get('poster') else None) - duration = int_or_none(config.get('length')) - else: - title = self._search_regex( - (r'([^<]+)', r']*>([^<]+)'), - webpage, 'title') - formats = [{ - 'url': compat_urlparse.urljoin(url, format_url), - 'format_id': '%sp' % height, - 'height': int(height), - 'filesize_approx': parse_filesize(filesize), - } for format_url, height, filesize in re.findall( - r']+href="(/download/[^"]+)">[^<]*?(\d+)p]*>(\d+\s*[a-zA-Z]+)<', - webpage)] - thumbnail = None - duration = None - - self._sort_formats(formats) - - view_count = str_to_int(self._search_regex( - (r'Views:\s*\s*\s*([\d,.]+)', - r'class=["\']views["\'][^>]*>

    ([\d,.]+)'), webpage, - 'view count', fatal=False)) - - def extract_list(kind): - s = self._search_regex( - (r'(?s)%s:\s*\s*(.+?)' % kind.capitalize(), - r'(?s)]*>%s:(.+?)

    ' % kind.capitalize()), - webpage, kind, fatal=False) - return re.findall(r']+>([^<]+)', s or '') - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'formats': formats, - 'age_limit': 18, - 'categories': extract_list('categories'), - 'tags': extract_list('tags'), - } diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py deleted file mode 100644 index c6052ac9f..000000000 --- a/youtube_dl/extractor/pornhd.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - js_to_json, - merge_dicts, - urljoin, -) - - -class PornHdIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P\d+)(?:/(?P.+))?' - _TESTS = [{ - 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', - 'md5': '87f1540746c1d32ec7a2305c12b96b25', - 'info_dict': { - 'id': '9864', - 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', - 'ext': 'mp4', - 'title': 'Restroom selfie masturbation', - 'description': 'md5:3748420395e03e31ac96857a8f125b2b', - 'thumbnail': r're:^https?://.*\.jpg', - 'view_count': int, - 'like_count': int, - 'age_limit': 18, - }, - 'skip': 'HTTP Error 404: Not Found', - }, { - 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', - 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de', - 'info_dict': { - 'id': '1962', - 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', - 'ext': 'mp4', - 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759', - 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', - 'thumbnail': r're:^https?://.*\.jpg', - 'view_count': int, - 'like_count': int, - 'age_limit': 18, - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id or video_id) - - title = self._html_search_regex( - [r']+class=["\']video-name["\'][^>]*>([^<]+)', - r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') - - sources = self._parse_json(js_to_json(self._search_regex( - r"(?s)sources'?\s*[:=]\s*(\{.+?\})", - webpage, 'sources', default='{}')), video_id) - - info = {} - if not sources: - entries = self._parse_html5_media_entries(url, webpage, video_id) - if entries: - info = entries[0] - - if not sources and not info: - message = self._html_search_regex( - r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P.+?)]+class=["\']video-description[^>]+>(?P.+?)', - r'<(div|p)[^>]+class="description"[^>]*>(?P[^<]+)(?:(?!\1).)+)\1", webpage, - 'thumbnail', default=None, group='url') - - like_count = int_or_none(self._search_regex( - (r'(\d+)
    \s*likes', - r'(\d+)\s*]+>(?: |\s)*\blikes', - r'class=["\']save-count["\'][^>]*>\s*(\d+)'), - webpage, 'like count', fatal=False)) - - return merge_dicts(info, { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'like_count': like_count, - 'formats': formats, - 'age_limit': 18, - }) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py deleted file mode 100644 index e2e1500ff..000000000 --- a/youtube_dl/extractor/pornhub.py +++ /dev/null @@ -1,767 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import functools -import itertools -import operator -import re - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_request, -) -from .openload import PhantomJSwrapper -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - merge_dicts, - NO_DEFAULT, - orderedSet, - remove_quotes, - str_to_int, - update_url_query, - urlencode_postdata, - url_or_none, -) - - -class PornHubBaseIE(InfoExtractor): - _NETRC_MACHINE = 'pornhub' - _PORNHUB_HOST_RE = r'(?:(?Ppornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)' - - def _download_webpage_handle(self, *args, **kwargs): - def dl(*args, **kwargs): - return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) - - ret = dl(*args, **kwargs) - - if not ret: - return ret - - webpage, urlh = ret - - if any(re.search(p, webpage) for p in ( - r']+\bonload=["\']go\(\)', - r'document\.cookie\s*=\s*["\']RNKEY=', - r'document\.location\.reload\(true\)')): - url_or_request = args[0] - url = (url_or_request.get_full_url() - if isinstance(url_or_request, compat_urllib_request.Request) - else url_or_request) - phantom = PhantomJSwrapper(self, required_version='2.0') - phantom.get(url, html=webpage) - webpage, urlh = dl(*args, **kwargs) - - return webpage, urlh - - def _real_initialize(self): - self._logged_in = False - - def _login(self, host): - if self._logged_in: - return - - site = host.split('.')[0] - - # Both sites pornhub and pornhubpremium have separate accounts - # so there should be an option to provide credentials for both. - # At the same time some videos are available under the same video id - # on both sites so that we have to identify them as the same video. - # For that purpose we have to keep both in the same extractor - # but under different netrc machines. - username, password = self._get_login_info(netrc_machine=site) - if username is None: - return - - login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '') - login_page = self._download_webpage( - login_url, None, 'Downloading %s login page' % site) - - def is_logged(webpage): - return any(re.search(p, webpage) for p in ( - r'class=["\']signOut', - r'>Sign\s+[Oo]ut\s*<')) - - if is_logged(login_page): - self._logged_in = True - return - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'username': username, - 'password': password, - }) - - response = self._download_json( - 'https://www.%s/front/authenticate' % host, None, - 'Logging in to %s' % site, - data=urlencode_postdata(login_form), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': login_url, - 'X-Requested-With': 'XMLHttpRequest', - }) - - if response.get('success') == '1': - self._logged_in = True - return - - message = response.get('message') - if message is not None: - raise ExtractorError( - 'Unable to login: %s' % message, expected=True) - - raise ExtractorError('Unable to log in') - - -class PornHubIE(PornHubBaseIE): - IE_DESC = 'PornHub and Thumbzilla' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:[^/]+\.)? - %s - /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| - (?:www\.)?thumbzilla\.com/video/ - ) - (?P[\da-z]+) - ''' % PornHubBaseIE._PORNHUB_HOST_RE - _TESTS = [{ - 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', - 'md5': 'a6391306d050e4547f62b3f485dd9ba9', - 'info_dict': { - 'id': '648719015', - 'ext': 'mp4', - 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', - 'uploader': 'Babes', - 'upload_date': '20130628', - 'timestamp': 1372447216, - 'duration': 361, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'age_limit': 18, - 'tags': list, - 'categories': list, - }, - }, { - # non-ASCII title - 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', - 'info_dict': { - 'id': '1331683002', - 'ext': 'mp4', - 'title': '重庆婷婷女王足交', - 'upload_date': '20150213', - 'timestamp': 1423804862, - 'duration': 1753, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'age_limit': 18, - 'tags': list, - 'categories': list, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy', - }, { - # subtitles - 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', - 'info_dict': { - 'id': 'ph5af5fef7c2aa7', - 'ext': 'mp4', - 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor', - 'uploader': 'BFFs', - 'duration': 622, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'age_limit': 18, - 'tags': list, - 'categories': list, - 'subtitles': { - 'en': [{ - "ext": 'srt' - }] - }, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'This video has been disabled', - }, { - 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', - 'only_matching': True, - }, { - # removed at the request of cam4.com - 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', - 'only_matching': True, - }, { - # removed at the request of the copyright owner - 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', - 'only_matching': True, - }, { - # removed by uploader - 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', - 'only_matching': True, - }, { - # private video - 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', - 'only_matching': True, - }, { - 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', - 'only_matching': True, - }, { - 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933', - 'only_matching': True, - }, { - 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', - 'only_matching': True, - }, { - # Some videos are available with the same id on both premium - # and non-premium sites (e.g. this and the following test) - 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3', - 'only_matching': True, - }, { - 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', - 'only_matching': True, - }, { - # geo restricted - 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156', - 'only_matching': True, - }, { - 'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)', - webpage) - - def _extract_count(self, pattern, webpage, name): - return str_to_int(self._search_regex( - pattern, webpage, '%s count' % name, fatal=False)) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') or 'pornhub.com' - video_id = mobj.group('id') - - self._login(host) - - self._set_cookie(host, 'age_verified', '1') - - def dl_webpage(platform): - self._set_cookie(host, 'platform', platform) - return self._download_webpage( - 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), - video_id, 'Downloading %s webpage' % platform) - - webpage = dl_webpage('pc') - - error_msg = self._html_search_regex( - (r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', - r'(?s)]+class=["\']noVideo["\'][^>]*>(?P.+?)'), - webpage, 'error message', default=None, group='error') - if error_msg: - error_msg = re.sub(r'\s+', ' ', error_msg) - raise ExtractorError( - 'PornHub said: %s' % error_msg, - expected=True, video_id=video_id) - - if any(re.search(p, webpage) for p in ( - r'class=["\']geoBlocked["\']', - r'>\s*This content is unavailable in your country')): - self.raise_geo_restricted() - - # video_title from flashvars contains whitespace instead of non-ASCII (see - # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying - # on that anymore. - title = self._html_search_meta( - 'twitter:title', webpage, default=None) or self._html_search_regex( - (r'(?s)]+class=["\']title["\'][^>]*>(?P.+?)</h1>', - r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1', - r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), - webpage, 'title', group='title') - - video_urls = [] - video_urls_set = set() - subtitles = {} - - flashvars = self._parse_json( - self._search_regex( - r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), - video_id) - if flashvars: - subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) - if subtitle_url: - subtitles.setdefault('en', []).append({ - 'url': subtitle_url, - 'ext': 'srt', - }) - thumbnail = flashvars.get('image_url') - duration = int_or_none(flashvars.get('video_duration')) - media_definitions = flashvars.get('mediaDefinitions') - if isinstance(media_definitions, list): - for definition in media_definitions: - if not isinstance(definition, dict): - continue - video_url = definition.get('videoUrl') - if not video_url or not isinstance(video_url, compat_str): - continue - if video_url in video_urls_set: - continue - video_urls_set.add(video_url) - video_urls.append( - (video_url, int_or_none(definition.get('quality')))) - else: - thumbnail, duration = [None] * 2 - - def extract_js_vars(webpage, pattern, default=NO_DEFAULT): - assignments = self._search_regex( - pattern, webpage, 'encoded url', default=default) - if not assignments: - return {} - - assignments = assignments.split(';') - - js_vars = {} - - def parse_js_value(inp): - inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) - if '+' in inp: - inps = inp.split('+') - return functools.reduce( - operator.concat, map(parse_js_value, inps)) - inp = inp.strip() - if inp in js_vars: - return js_vars[inp] - return remove_quotes(inp) - - for assn in assignments: - assn = assn.strip() - if not assn: - continue - assn = re.sub(r'var\s+', '', assn) - vname, value = assn.split('=', 1) - js_vars[vname] = parse_js_value(value) - return js_vars - - def add_video_url(video_url): - v_url = url_or_none(video_url) - if not v_url: - return - if v_url in video_urls_set: - return - video_urls.append((v_url, None)) - video_urls_set.add(v_url) - - def parse_quality_items(quality_items): - q_items = self._parse_json(quality_items, video_id, fatal=False) - if not isinstance(q_items, list): - return - for item in q_items: - if isinstance(item, dict): - add_video_url(item.get('url')) - - if not video_urls: - FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') - js_vars = extract_js_vars( - webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), - default=None) - if js_vars: - for key, format_url in js_vars.items(): - if key.startswith(FORMAT_PREFIXES[-1]): - parse_quality_items(format_url) - elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]): - add_video_url(format_url) - if not video_urls and re.search( - r'<[^>]+\bid=["\']lockedPlayer', webpage): - raise ExtractorError( - 'Video %s is locked' % video_id, expected=True) - - if not video_urls: - js_vars = extract_js_vars( - dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') - add_video_url(js_vars['mediastring']) - - for mobj in re.finditer( - r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', - webpage): - video_url = mobj.group('url') - if video_url not in video_urls_set: - video_urls.append((video_url, None)) - video_urls_set.add(video_url) - - upload_date = None - formats = [] - - def add_format(format_url, height=None): - ext = determine_ext(format_url) - if ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash', fatal=False)) - return - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - return - if not height: - height = int_or_none(self._search_regex( - r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height', - default=None)) - formats.append({ - 'url': format_url, - 'format_id': '%dp' % height if height else None, - 'height': height, - }) - - for video_url, height in video_urls: - if not upload_date: - upload_date = self._search_regex( - r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) - if upload_date: - upload_date = upload_date.replace('/', '') - if '/video/get_media' in video_url: - medias = self._download_json(video_url, video_id, fatal=False) - if isinstance(medias, list): - for media in medias: - if not isinstance(media, dict): - continue - video_url = url_or_none(media.get('videoUrl')) - if not video_url: - continue - height = int_or_none(media.get('quality')) - add_format(video_url, height) - continue - add_format(video_url) - self._sort_formats( - formats, field_preference=('height', 'width', 'fps', 'format_id')) - - video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', - webpage, 'uploader', default=None) - - def extract_vote_count(kind, name): - return self._extract_count( - (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind, - r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind), - webpage, name) - - view_count = self._extract_count( - r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') - like_count = extract_vote_count('Up', 'like') - dislike_count = extract_vote_count('Down', 'dislike') - comment_count = self._extract_count( - r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - - def extract_list(meta_key): - div = self._search_regex( - r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' - % meta_key, webpage, meta_key, default=None) - if div: - return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) - - info = self._search_json_ld(webpage, video_id, default={}) - # description provided in JSON-LD is irrelevant - info['description'] = None - - return merge_dicts({ - 'id': video_id, - 'uploader': video_uploader, - 'upload_date': upload_date, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'comment_count': comment_count, - 'formats': formats, - 'age_limit': 18, - 'tags': extract_list('tags'), - 'categories': extract_list('categories'), - 'subtitles': subtitles, - }, info) - - -class PornHubPlaylistBaseIE(PornHubBaseIE): - def _extract_page(self, url): - return int_or_none(self._search_regex( - r'\bpage=(\d+)', url, 'page', default=None)) - - def _extract_entries(self, webpage, host): - # Only process container div with main playlist content skipping - # drop-down menu that uses similar pattern for videos (see - # https://github.com/ytdl-org/youtube-dl/issues/11594). - container = self._search_regex( - r'(?s)(<div[^>]+class=["\']container.+)', webpage, - 'container', default=webpage) - - return [ - self.url_result( - 'http://www.%s/%s' % (host, video_url), - PornHubIE.ie_key(), video_title=title) - for video_url, title in orderedSet(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', - container)) - ] - - -class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE - _TESTS = [{ - 'url': 'https://www.pornhub.com/model/zoe_ph', - 'playlist_mincount': 118, - }, { - 'url': 'https://www.pornhub.com/pornstar/liz-vicious', - 'info_dict': { - 'id': 'liz-vicious', - }, - 'playlist_mincount': 118, - }, { - 'url': 'https://www.pornhub.com/users/russianveet69', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/channels/povd', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', - 'only_matching': True, - }, { - # Unavailable via /videos page, but available with direct pagination - # on pornstar page (see [1]), requires premium - # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 - 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west', - 'only_matching': True, - }, { - # Same as before, multi page - 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', - 'only_matching': True, - }, { - 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('id') - videos_url = '%s/videos' % mobj.group('url') - page = self._extract_page(url) - if page: - videos_url = update_url_query(videos_url, {'page': page}) - return self.url_result( - videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) - - -class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): - @staticmethod - def _has_more(webpage): - return re.search( - r'''(?x) - <li[^>]+\bclass=["\']page_next| - <link[^>]+\brel=["\']next| - <button[^>]+\bid=["\']moreDataBtn - ''', webpage) is not None - - def _entries(self, url, host, item_id): - page = self._extract_page(url) - - VIDEOS = '/videos' - - def download_page(base_url, num, fallback=False): - note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '') - return self._download_webpage( - base_url, item_id, note, query={'page': num}) - - def is_404(e): - return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404 - - base_url = url - has_page = page is not None - first_page = page if has_page else 1 - for page_num in (first_page, ) if has_page else itertools.count(first_page): - try: - try: - webpage = download_page(base_url, page_num) - except ExtractorError as e: - # Some sources may not be available via /videos page, - # trying to fallback to main page pagination (see [1]) - # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 - if is_404(e) and page_num == first_page and VIDEOS in base_url: - base_url = base_url.replace(VIDEOS, '') - webpage = download_page(base_url, page_num, fallback=True) - else: - raise - except ExtractorError as e: - if is_404(e) and page_num != first_page: - break - raise - page_entries = self._extract_entries(webpage, host) - if not page_entries: - break - for e in page_entries: - yield e - if not self._has_more(webpage): - break - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - item_id = mobj.group('id') - - self._login(host) - - return self.playlist_result(self._entries(url, host, item_id), item_id) - - -class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE - _TESTS = [{ - 'url': 'https://www.pornhub.com/model/zoe_ph/videos', - 'only_matching': True, - }, { - 'url': 'http://www.pornhub.com/users/rushandlia/videos', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', - 'info_dict': { - 'id': 'pornstar/jenny-blighe/videos', - }, - 'playlist_mincount': 149, - }, { - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', - 'info_dict': { - 'id': 'pornstar/jenny-blighe/videos', - }, - 'playlist_mincount': 40, - }, { - # default sorting as Top Rated Videos - 'url': 'https://www.pornhub.com/channels/povd/videos', - 'info_dict': { - 'id': 'channels/povd/videos', - }, - 'playlist_mincount': 293, - }, { - # Top Rated Videos - 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra', - 'only_matching': True, - }, { - # Most Recent Videos - 'url': 'https://www.pornhub.com/channels/povd/videos?o=da', - 'only_matching': True, - }, { - # Most Viewed Videos - 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', - 'only_matching': True, - }, { - 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', - 'only_matching': True, - }, { - # Most Viewed Videos - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', - 'only_matching': True, - }, { - # Top Rated Videos - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', - 'only_matching': True, - }, { - # Longest Videos - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', - 'only_matching': True, - }, { - # Newest Videos - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/video', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/video?page=3', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/video/search?search=123', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/categories/teen', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/categories/teen?page=3', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/hd', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/hd?page=3', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/described-video', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/described-video?page=2', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/playlist/44121572', - 'info_dict': { - 'id': 'playlist/44121572', - }, - 'playlist_mincount': 132, - }, { - 'url': 'https://www.pornhub.com/playlist/4667351', - 'only_matching': True, - }, { - 'url': 'https://de.pornhub.com/playlist/4667351', - 'only_matching': True, - }, { - 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return (False - if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) - else super(PornHubPagedVideoListIE, cls).suitable(url)) - - -class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE - _TESTS = [{ - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', - 'info_dict': { - 'id': 'jenny-blighe', - }, - 'playlist_mincount': 129, - }, { - 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', - 'only_matching': True, - }, { - 'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload', - 'only_matching': True, - }] diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py deleted file mode 100644 index 1b5b9a320..000000000 --- a/youtube_dl/extractor/pornotube.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor -from ..utils import int_or_none - - -class PornotubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science', - 'md5': '60fc5a4f0d93a97968fc7999d98260c9', - 'info_dict': { - 'id': '4964', - 'ext': 'mp4', - 'upload_date': '20141203', - 'title': 'Weird Hot and Wet Science', - 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0', - 'categories': ['Adult Humor', 'Blondes'], - 'uploader': 'Alpha Blue Archives', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1417582800, - 'age_limit': 18, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - token = self._download_json( - 'https://api.aebn.net/auth/v2/origins/authenticate', - video_id, note='Downloading token', - data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'), - headers={ - 'Content-Type': 'application/json', - 'Origin': 'http://www.pornotube.com', - })['tokenKey'] - - video_url = self._download_json( - 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id, - video_id, note='Downloading delivery information', - headers={'Authorization': token})['mediaUrl'] - - FIELDS = ( - 'title', 'description', 'startSecond', 'endSecond', 'publishDate', - 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber' - ) - - info = self._download_json( - 'https://api.aebn.net/content/v2/clips/%s?fields=%s' - % (video_id, ','.join(FIELDS)), video_id, - note='Downloading metadata', - headers={'Authorization': token}) - - if isinstance(info, list): - info = info[0] - - title = info['title'] - - timestamp = int_or_none(info.get('publishDate'), scale=1000) - uploader = info.get('studios', [{}])[0].get('name') - movie_id = info.get('movieId') - primary_image_number = info.get('primaryImageNumber') - thumbnail = None - if movie_id and primary_image_number: - thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( - movie_id, movie_id, primary_image_number) - start = int_or_none(info.get('startSecond')) - end = int_or_none(info.get('endSecond')) - duration = end - start if start and end else None - categories = [c['name'] for c in info.get('categories', []) if c.get('name')] - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': info.get('description'), - 'duration': duration, - 'timestamp': timestamp, - 'uploader': uploader, - 'thumbnail': thumbnail, - 'categories': categories, - 'age_limit': 18, - } diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py deleted file mode 100644 index b6b71069d..000000000 --- a/youtube_dl/extractor/pornovoisines.py +++ /dev/null @@ -1,108 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - float_or_none, - unified_strdate, -) - - -class PornoVoisinesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)' - - _TEST = { - 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', - 'md5': '6f8aca6a058592ab49fe701c8ba8317b', - 'info_dict': { - 'id': '919', - 'display_id': 'recherche-appartement', - 'ext': 'mp4', - 'title': 'Recherche appartement', - 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20140925', - 'duration': 120, - 'view_count': int, - 'average_rating': float, - 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], - 'age_limit': 18, - 'subtitles': { - 'fr': [{ - 'ext': 'vtt', - }] - }, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - settings_url = self._download_json( - 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, - video_id, note='Getting settings URL')['video_settings_url'] - settings = self._download_json(settings_url, video_id)['data'] - - formats = [] - for kind, data in settings['variants'].items(): - if kind == 'HLS': - formats.extend(self._extract_m3u8_formats( - data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) - elif kind == 'MP4': - for item in data: - formats.append({ - 'url': item['url'], - 'height': item.get('height'), - 'bitrate': item.get('bitrate'), - }) - self._sort_formats(formats) - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - - # The webpage has a bug - there's no space between "thumb" and src= - thumbnail = self._html_search_regex( - r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', - webpage, 'thumbnail', fatal=False, group='url') - - upload_date = unified_strdate(self._search_regex( - r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) - duration = settings.get('main', {}).get('duration') - view_count = int_or_none(self._search_regex( - r'(\d+) vues', webpage, 'view count', fatal=False)) - average_rating = self._search_regex( - r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) - if average_rating: - average_rating = float_or_none(average_rating.replace(',', '.')) - - categories = self._html_search_regex( - r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False) - if categories: - categories = [category.strip() for category in categories.split(',')] - - subtitles = {'fr': [{ - 'url': subtitle, - } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} - - return { - 'id': video_id, - 'display_id': display_id, - 'formats': formats, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'view_count': view_count, - 'average_rating': average_rating, - 'categories': categories, - 'age_limit': 18, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py deleted file mode 100644 index 2831368b6..000000000 --- a/youtube_dl/extractor/pornoxo.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - str_to_int, -) - - -class PornoXOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' - _TEST = { - 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', - 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', - 'info_dict': { - 'id': '7564', - 'ext': 'flv', - 'title': 'Striptease From Sexy Secretary!', - 'display_id': 'striptease-from-sexy-secretary', - 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', - 'categories': list, # NSFW - 'thumbnail': r're:https?://.*\.jpg$', - 'age_limit': 18, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id, display_id = mobj.groups() - - webpage = self._download_webpage(url, video_id) - video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) - - title = self._html_search_regex( - r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') - - view_count = str_to_int(self._html_search_regex( - r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) - - categories_str = self._html_search_regex( - r'<meta name="description" content=".*featuring\s*([^"]+)"', - webpage, 'categories', fatal=False) - categories = ( - None if categories_str is None - else categories_str.split(',')) - - video_data.update({ - 'id': video_id, - 'title': title, - 'display_id': display_id, - 'description': self._html_search_meta('description', webpage), - 'categories': categories, - 'view_count': view_count, - 'age_limit': 18, - }) - - return video_data diff --git a/youtube_dl/extractor/pr0gramm.py b/youtube_dl/extractor/pr0gramm.py deleted file mode 100644 index b68224fd5..000000000 --- a/youtube_dl/extractor/pr0gramm.py +++ /dev/null @@ -1,105 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - -import re -from ..utils import ( - merge_dicts, -) - - -class Pr0grammStaticIE(InfoExtractor): - # Possible urls: - # https://pr0gramm.com/static/5466437 - _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)' - _TEST = { - 'url': 'https://pr0gramm.com/static/5466437', - 'md5': '52fa540d70d3edc286846f8ca85938aa', - 'info_dict': { - 'id': '5466437', - 'ext': 'mp4', - 'title': 'pr0gramm-5466437 by g11st', - 'uploader': 'g11st', - 'upload_date': '20221221', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # Fetch media sources - entries = self._parse_html5_media_entries(url, webpage, video_id) - media_info = entries[0] - - # this raises if there are no formats - self._sort_formats(media_info.get('formats') or []) - - # Fetch author - uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader') - - # Fetch approx upload timestamp from filename - # Have None-defaults in case the extraction fails - uploadDay = None - uploadMon = None - uploadYear = None - uploadTimestr = None - # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4) - m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage) - - if (m): - # Up to a day of accuracy should suffice... - uploadDay = m.groupdict().get('day') - uploadMon = m.groupdict().get('mon') - uploadYear = m.groupdict().get('year') - uploadTimestr = uploadYear + uploadMon + uploadDay - - return merge_dicts({ - 'id': video_id, - 'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''), - 'uploader': uploader, - 'upload_date': uploadTimestr - }, media_info) - - -# This extractor is for the primary url (used for sharing, and appears in the -# location bar) Since this page loads the DOM via JS, yt-dl can't find any -# video information here. So let's redirect to a compatibility version of -# the site, which does contain the <video>-element by itself, without requiring -# js to be ran. -class Pr0grammIE(InfoExtractor): - # Possible urls: - # https://pr0gramm.com/new/546637 - # https://pr0gramm.com/new/video/546637 - # https://pr0gramm.com/top/546637 - # https://pr0gramm.com/top/video/546637 - # https://pr0gramm.com/user/g11st/uploads/5466437 - # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290 - # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030 - # https://pr0gramm.com/user/froschler/1elf/5232030 - # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id! - # https://pr0gramm.com/top/fruher war alles damals/5498175 - - _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)' - _TEST = { - 'url': 'https://pr0gramm.com/new/video/5466437', - 'info_dict': { - 'id': '5466437', - 'ext': 'mp4', - 'title': 'pr0gramm-5466437 by g11st', - 'uploader': 'g11st', - 'upload_date': '20221221', - } - } - - def _generic_title(): - return "oof" - - def _real_extract(self, url): - video_id = self._match_id(url) - - return self.url_result( - 'https://pr0gramm.com/static/' + video_id, - video_id=video_id, - ie=Pr0grammStaticIE.ie_key()) diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py deleted file mode 100644 index b5c279203..000000000 --- a/youtube_dl/extractor/presstv.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import remove_start - - -class PressTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' - - _TEST = { - 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', - 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', - 'info_dict': { - 'id': '459911', - 'display_id': 'Australian-sewerage-treatment-facility-', - 'ext': 'mp4', - 'title': 'Organic mattresses used to clean waste water', - 'upload_date': '20160409', - 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - webpage = self._download_webpage(url, display_id) - - # extract video URL from webpage - video_url = self._hidden_inputs(webpage)['inpPlayback'] - - # build list of available formats - # specified in http://www.presstv.ir/Scripts/playback.js - base_url = 'http://192.99.219.222:82/presstv' - _formats = [ - (180, '_low200.mp4'), - (360, '_low400.mp4'), - (720, '_low800.mp4'), - (1080, '.mp4') - ] - - formats = [{ - 'url': base_url + video_url[:-4] + extension, - 'format_id': '%dp' % height, - 'height': height, - } for height, extension in _formats] - - # extract video metadata - title = remove_start( - self._html_search_meta('title', webpage, fatal=True), 'PressTV-') - - thumbnail = self._og_search_thumbnail(webpage) - description = self._og_search_description(webpage) - - upload_date = '%04d%02d%02d' % ( - int(mobj.group('y')), - int(mobj.group('m')), - int(mobj.group('d')), - ) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'description': description - } diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py deleted file mode 100644 index e47088292..000000000 --- a/youtube_dl/extractor/prosiebensat1.py +++ /dev/null @@ -1,500 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from hashlib import sha1 -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - determine_ext, - float_or_none, - int_or_none, - merge_dicts, - unified_strdate, -) - - -class ProSiebenSat1BaseIE(InfoExtractor): - _GEO_BYPASS = False - _ACCESS_ID = None - _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' - _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' - - def _extract_video_info(self, url, clip_id): - client_location = url - - video = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos', - clip_id, 'Downloading videos JSON', query={ - 'access_token': self._TOKEN, - 'client_location': client_location, - 'client_name': self._CLIENT_NAME, - 'ids': clip_id, - })[0] - - if video.get('is_protected') is True: - raise ExtractorError('This video is DRM protected.', expected=True) - - formats = [] - if self._ACCESS_ID: - raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID - protocols = self._download_json( - self._V4_BASE_URL + 'protocols', clip_id, - 'Downloading protocols JSON', - headers=self.geo_verification_headers(), query={ - 'access_id': self._ACCESS_ID, - 'client_token': sha1((raw_ct).encode()).hexdigest(), - 'video_id': clip_id, - }, fatal=False, expected_status=(403,)) or {} - error = protocols.get('error') or {} - if error.get('title') == 'Geo check failed': - self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) - server_token = protocols.get('server_token') - if server_token: - urls = (self._download_json( - self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ - 'access_id': self._ACCESS_ID, - 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), - 'protocols': self._SUPPORTED_PROTOCOLS, - 'server_token': server_token, - 'video_id': clip_id, - }, fatal=False) or {}).get('urls') or {} - for protocol, variant in urls.items(): - source_url = variant.get('clear', {}).get('url') - if not source_url: - continue - if protocol == 'dash': - formats.extend(self._extract_mpd_formats( - source_url, clip_id, mpd_id=protocol, fatal=False)) - elif protocol == 'hls': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id=protocol, fatal=False)) - else: - formats.append({ - 'url': source_url, - 'format_id': protocol, - }) - if not formats: - source_ids = [compat_str(source['id']) for source in video['sources']] - - client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() - - sources = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, - clip_id, 'Downloading sources JSON', query={ - 'access_token': self._TOKEN, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': self._CLIENT_NAME, - }) - server_id = sources['server_id'] - - def fix_bitrate(bitrate): - bitrate = int_or_none(bitrate) - if not bitrate: - return None - return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate - - for source_id in source_ids: - client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() - urls = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, - clip_id, 'Downloading urls JSON', fatal=False, query={ - 'access_token': self._TOKEN, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': self._CLIENT_NAME, - 'server_id': server_id, - 'source_ids': source_id, - }) - if not urls: - continue - if urls.get('status_code') != 0: - raise ExtractorError('This video is unavailable', expected=True) - urls_sources = urls['sources'] - if isinstance(urls_sources, dict): - urls_sources = urls_sources.values() - for source in urls_sources: - source_url = source.get('url') - if not source_url: - continue - protocol = source.get('protocol') - mimetype = source.get('mimetype') - if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, clip_id, f4m_id='hds', fatal=False)) - elif mimetype == 'application/x-mpegURL': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif mimetype == 'application/dash+xml': - formats.extend(self._extract_mpd_formats( - source_url, clip_id, mpd_id='dash', fatal=False)) - else: - tbr = fix_bitrate(source['bitrate']) - if protocol in ('rtmp', 'rtmpe'): - mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) - if not mobj: - continue - path = mobj.group('path') - mp4colon_index = path.rfind('mp4:') - app = path[:mp4colon_index] - play_path = path[mp4colon_index:] - formats.append({ - 'url': '%s/%s' % (mobj.group('url'), app), - 'app': app, - 'play_path': play_path, - 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', - 'page_url': 'http://www.prosieben.de', - 'tbr': tbr, - 'ext': 'flv', - 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), - }) - else: - formats.append({ - 'url': source_url, - 'tbr': tbr, - 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), - }) - self._sort_formats(formats) - - return { - 'duration': float_or_none(video.get('duration')), - 'formats': formats, - } - - -class ProSiebenSat1IE(ProSiebenSat1BaseIE): - IE_NAME = 'prosiebensat1' - IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - (?:beta\.)? - (?: - prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia - )\.(?:de|at|ch)| - ran\.de|fem\.com|advopedia\.de|galileo\.tv/video - ) - /(?P<id>.+) - ''' - - _TESTS = [ - { - # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242 - # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215: - # - malformed f4m manifest support - # - proper handling of URLs starting with `https?://` in 2.0 manifests - # - recursive child f4m manifests extraction - 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', - 'info_dict': { - 'id': '2104602', - 'ext': 'mp4', - 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2', - 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', - 'upload_date': '20131231', - 'duration': 5845.04, - 'series': 'CIRCUS HALLIGALLI', - 'season_number': 2, - 'episode': 'Episode 18 - Staffel 2', - 'episode_number': 18, - }, - }, - { - 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', - 'info_dict': { - 'id': '2570327', - 'ext': 'mp4', - 'title': 'Lady-Umstyling für Audrina', - 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', - 'upload_date': '20131014', - 'duration': 606.76, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'Seems to be broken', - }, - { - 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', - 'info_dict': { - 'id': '2429369', - 'ext': 'mp4', - 'title': 'Countdown für die Autowerkstatt', - 'description': 'md5:809fc051a457b5d8666013bc40698817', - 'upload_date': '20140223', - 'duration': 2595.04, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', - 'info_dict': { - 'id': '2904997', - 'ext': 'mp4', - 'title': 'Sexy laufen in Ugg Boots', - 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', - 'upload_date': '20140122', - 'duration': 245.32, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', - 'info_dict': { - 'id': '2906572', - 'ext': 'mp4', - 'title': 'Im Interview: Kai Wiesinger', - 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', - 'upload_date': '20140203', - 'duration': 522.56, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', - 'info_dict': { - 'id': '2992323', - 'ext': 'mp4', - 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', - 'description': 'md5:2669cde3febe9bce13904f701e774eb6', - 'upload_date': '20141014', - 'duration': 2410.44, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', - 'info_dict': { - 'id': '3004256', - 'ext': 'mp4', - 'title': 'Schalke: Tönnies möchte Raul zurück', - 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', - 'upload_date': '20140226', - 'duration': 228.96, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', - 'info_dict': { - 'id': '2572814', - 'ext': 'mp4', - 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', - 'description': 'md5:6ddb02b0781c6adf778afea606652e38', - 'timestamp': 1382041620, - 'upload_date': '20131017', - 'duration': 469.88, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag', - 'info_dict': { - 'id': '2156342', - 'ext': 'mp4', - 'title': 'Kurztrips zum Valentinstag', - 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', - 'duration': 307.24, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', - 'info_dict': { - 'id': '439664', - 'title': 'Episode 8 - Ganze Folge - Playlist', - 'description': 'md5:63b8963e71f481782aeea877658dec84', - }, - 'playlist_count': 2, - 'skip': 'This video is unavailable', - }, - { - # title in <h2 class="subtitle"> - 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', - 'info_dict': { - 'id': '4895826', - 'ext': 'mp4', - 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', - 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', - 'upload_date': '20170302', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'geo restricted to Germany', - }, - { - # geo restricted to Germany - 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', - 'only_matching': True, - }, - { - # geo restricted to Germany - 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', - 'only_matching': True, - }, - { - # geo restricted to Germany - 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', - 'only_matching': True, - }, - { - 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', - 'only_matching': True, - }, - { - 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', - 'only_matching': True, - }, - ] - - _TOKEN = 'prosieben' - _SALT = '01!8d8F_)r9]4s[qeuXfP%' - _CLIENT_NAME = 'kolibri-2.0.19-splec4' - - _ACCESS_ID = 'x_prosiebenmaxx-de' - _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' - _IV = 'Aeluchoc6aevechuipiexeeboowedaok' - - _CLIPID_REGEXES = [ - r'"clip_id"\s*:\s+"(\d+)"', - r'clipid: "(\d+)"', - r'clip[iI]d=(\d+)', - r'clip[iI][dD]\s*=\s*["\'](\d+)', - r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", - r'proMamsId"\s*:\s*"(\d+)', - r'proMamsId"\s*:\s*"(\d+)', - ] - _TITLE_REGEXES = [ - r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', - r'<header class="clearfix">\s*<h3>(.+?)</h3>', - r'<!-- start video -->\s*<h1>(.+?)</h1>', - r'<h1 class="att-name">\s*(.+?)</h1>', - r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', - r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', - r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', - r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', - ] - _DESCRIPTION_REGEXES = [ - r'<p itemprop="description">\s*(.+?)</p>', - r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', - r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', - r'<p class="att-description">\s*(.+?)\s*</p>', - r'<p class="video-description" itemprop="description">\s*(.+?)</p>', - r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', - ] - _UPLOAD_DATE_REGEXES = [ - r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', - r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', - r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', - r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', - ] - _PAGE_TYPE_REGEXES = [ - r'<meta name="page_type" content="([^"]+)">', - r"'itemType'\s*:\s*'([^']*)'", - ] - _PLAYLIST_ID_REGEXES = [ - r'content[iI]d=(\d+)', - r"'itemId'\s*:\s*'([^']*)'", - ] - _PLAYLIST_CLIP_REGEXES = [ - r'(?s)data-qvt=.+?<a href="([^"]+)"', - ] - - def _extract_clip(self, url, webpage): - clip_id = self._html_search_regex( - self._CLIPID_REGEXES, webpage, 'clip id') - title = self._html_search_regex( - self._TITLE_REGEXES, webpage, 'title', - default=None) or self._og_search_title(webpage) - info = self._extract_video_info(url, clip_id) - description = self._html_search_regex( - self._DESCRIPTION_REGEXES, webpage, 'description', default=None) - if description is None: - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - upload_date = unified_strdate( - self._html_search_meta('og:published_time', webpage, - 'upload date', default=None) - or self._html_search_regex(self._UPLOAD_DATE_REGEXES, - webpage, 'upload date', default=None)) - - json_ld = self._search_json_ld(webpage, clip_id, default={}) - - return merge_dicts(info, { - 'id': clip_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - }, json_ld) - - def _extract_playlist(self, url, webpage): - playlist_id = self._html_search_regex( - self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') - playlist = self._parse_json( - self._search_regex( - r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', - webpage, 'playlist'), - playlist_id) - entries = [] - for item in playlist: - clip_id = item.get('id') or item.get('upc') - if not clip_id: - continue - info = self._extract_video_info(url, clip_id) - info.update({ - 'id': clip_id, - 'title': item.get('title') or item.get('teaser', {}).get('headline'), - 'description': item.get('teaser', {}).get('description'), - 'thumbnail': item.get('poster'), - 'duration': float_or_none(item.get('duration')), - 'series': item.get('tvShowTitle'), - 'uploader': item.get('broadcastPublisher'), - }) - entries.append(info) - return self.playlist_result(entries, playlist_id) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - page_type = self._search_regex( - self._PAGE_TYPE_REGEXES, webpage, - 'page type', default='clip').lower() - if page_type == 'clip': - return self._extract_clip(url, webpage) - elif page_type == 'playlist': - return self._extract_playlist(url, webpage) - else: - raise ExtractorError( - 'Unsupported page type %s' % page_type, expected=True) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py deleted file mode 100644 index ca71665e0..000000000 --- a/youtube_dl/extractor/puhutv.py +++ /dev/null @@ -1,239 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, -) -from ..utils import ( - ExtractorError, - int_or_none, - float_or_none, - parse_resolution, - str_or_none, - try_get, - unified_timestamp, - url_or_none, - urljoin, -) - - -class PuhuTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle' - IE_NAME = 'puhutv' - _TESTS = [{ - # film - 'url': 'https://puhutv.com/sut-kardesler-izle', - 'md5': 'a347470371d56e1585d1b2c8dab01c96', - 'info_dict': { - 'id': '5085', - 'display_id': 'sut-kardesler', - 'ext': 'mp4', - 'title': 'Süt Kardeşler', - 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 4832.44, - 'creator': 'Arzu Film', - 'timestamp': 1561062602, - 'upload_date': '20190620', - 'release_year': 1976, - 'view_count': int, - 'tags': list, - }, - }, { - # episode, geo restricted, bypassable with --geo-verification-proxy - 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', - 'only_matching': True, - }, { - # 4k, with subtitles - 'url': 'https://puhutv.com/dip-1-bolum-izle', - 'only_matching': True, - }] - _SUBTITLE_LANGS = { - 'English': 'en', - 'Deutsch': 'de', - 'عربى': 'ar' - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - info = self._download_json( - urljoin(url, '/api/slug/%s-izle' % display_id), - display_id)['data'] - - video_id = compat_str(info['id']) - show = info.get('title') or {} - title = info.get('name') or show['name'] - if info.get('display_name'): - title = '%s %s' % (title, info['display_name']) - - try: - videos = self._download_json( - 'https://puhutv.com/api/assets/%s/videos' % video_id, - display_id, 'Downloading video JSON', - headers=self.geo_verification_headers()) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self.raise_geo_restricted() - raise - - urls = [] - formats = [] - - for video in videos['data']['videos']: - media_url = url_or_none(video.get('url')) - if not media_url or media_url in urls: - continue - urls.append(media_url) - - playlist = video.get('is_playlist') - if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - - quality = int_or_none(video.get('quality')) - f = { - 'url': media_url, - 'ext': 'mp4', - 'height': quality - } - video_format = video.get('video_format') - is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False - if is_hls: - format_id = 'hls' - f['protocol'] = 'm3u8_native' - elif video_format == 'mp4': - format_id = 'http' - else: - continue - if quality: - format_id += '-%sp' % quality - f['format_id'] = format_id - formats.append(f) - self._sort_formats(formats) - - creator = try_get( - show, lambda x: x['producer']['name'], compat_str) - - content = info.get('content') or {} - - images = try_get( - content, lambda x: x['images']['wide'], dict) or {} - thumbnails = [] - for image_id, image_url in images.items(): - if not isinstance(image_url, compat_str): - continue - if not image_url.startswith(('http', '//')): - image_url = 'https://%s' % image_url - t = parse_resolution(image_id) - t.update({ - 'id': image_id, - 'url': image_url - }) - thumbnails.append(t) - - tags = [] - for genre in show.get('genres') or []: - if not isinstance(genre, dict): - continue - genre_name = genre.get('name') - if genre_name and isinstance(genre_name, compat_str): - tags.append(genre_name) - - subtitles = {} - for subtitle in content.get('subtitles') or []: - if not isinstance(subtitle, dict): - continue - lang = subtitle.get('language') - sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) - if not lang or not isinstance(lang, compat_str) or not sub_url: - continue - subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ - 'url': sub_url - }] - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': info.get('description') or show.get('description'), - 'season_id': str_or_none(info.get('season_id')), - 'season_number': int_or_none(info.get('season_number')), - 'episode_number': int_or_none(info.get('episode_number')), - 'release_year': int_or_none(show.get('released_at')), - 'timestamp': unified_timestamp(info.get('created_at')), - 'creator': creator, - 'view_count': int_or_none(content.get('watch_count')), - 'duration': float_or_none(content.get('duration_in_ms'), 1000), - 'tags': tags, - 'subtitles': subtitles, - 'thumbnails': thumbnails, - 'formats': formats - } - - -class PuhuTVSerieIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay' - IE_NAME = 'puhutv:serie' - _TESTS = [{ - 'url': 'https://puhutv.com/deniz-yildizi-detay', - 'info_dict': { - 'title': 'Deniz Yıldızı', - 'id': 'deniz-yildizi', - }, - 'playlist_mincount': 205, - }, { - # a film detail page which is using same url with serie page - 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', - 'only_matching': True, - }] - - def _extract_entries(self, seasons): - for season in seasons: - season_id = season.get('id') - if not season_id: - continue - page = 1 - has_more = True - while has_more is True: - season = self._download_json( - 'https://galadriel.puhutv.com/seasons/%s' % season_id, - season_id, 'Downloading page %s' % page, query={ - 'page': page, - 'per': 40, - }) - episodes = season.get('episodes') - if isinstance(episodes, list): - for ep in episodes: - slug_path = str_or_none(ep.get('slugPath')) - if not slug_path: - continue - video_id = str_or_none(int_or_none(ep.get('id'))) - yield self.url_result( - 'https://puhutv.com/%s' % slug_path, - ie=PuhuTVIE.ie_key(), video_id=video_id, - video_title=ep.get('name') or ep.get('eventLabel')) - page += 1 - has_more = season.get('hasMore') - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - info = self._download_json( - urljoin(url, '/api/slug/%s-detay' % playlist_id), - playlist_id)['data'] - - seasons = info.get('seasons') - if seasons: - return self.playlist_result( - self._extract_entries(seasons), playlist_id, info.get('name')) - - # For films, these are using same url with series - video_id = info.get('slug') or info['assets'][0]['slug'] - return self.url_result( - 'https://puhutv.com/%s-izle' % video_id, - PuhuTVIE.ie_key(), video_id) diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py deleted file mode 100644 index 80091b85f..000000000 --- a/youtube_dl/extractor/puls4.py +++ /dev/null @@ -1,57 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .prosiebensat1 import ProSiebenSat1BaseIE -from ..utils import ( - unified_strdate, - parse_duration, - compat_str, -) - - -class Puls4IE(ProSiebenSat1BaseIE): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' - _TESTS = [{ - 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', - 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', - 'info_dict': { - 'id': '118118', - 'ext': 'flv', - 'title': 'Tobias Homberger von myclubs im #2min2miotalk', - 'description': 'md5:f9def7c5e8745d6026d8885487d91955', - 'upload_date': '20160830', - 'uploader': 'PULS_4', - }, - }, { - 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', - 'only_matching': True, - }, { - 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', - 'only_matching': True, - }] - _TOKEN = 'puls4' - _SALT = '01!kaNgaiNgah1Ie4AeSha' - _CLIENT_NAME = '' - - def _real_extract(self, url): - path = self._match_id(url) - content_path = self._download_json( - 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] - media = self._download_json( - 'http://www.puls4.com' + content_path, - content_path)['mediaCurrent'] - player_content = media['playerContent'] - info = self._extract_video_info(url, player_content['id']) - info.update({ - 'id': compat_str(media['objectId']), - 'title': player_content['title'], - 'description': media.get('description'), - 'thumbnail': media.get('previewLink'), - 'upload_date': unified_strdate(media.get('date')), - 'duration': parse_duration(player_content.get('duration')), - 'episode': player_content.get('episodePartName'), - 'show': media.get('channel'), - 'season_id': player_content.get('seasonId'), - 'uploader': player_content.get('sourceCompany'), - }) - return info diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py deleted file mode 100644 index b8ac93a62..000000000 --- a/youtube_dl/extractor/pyvideo.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import int_or_none - - -class PyvideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' - - _TESTS = [{ - 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', - 'info_dict': { - 'id': 'become-a-logging-expert-in-30-minutes', - }, - 'playlist_count': 2, - }, { - 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', - 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', - 'info_dict': { - 'id': '2542', - 'ext': 'm4v', - 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - category = mobj.group('category') - video_id = mobj.group('id') - - entries = [] - - data = self._download_json( - 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' - % (category, video_id), video_id, fatal=False) - - if data: - for video in data['videos']: - video_url = video.get('url') - if video_url: - if video.get('type') == 'youtube': - entries.append(self.url_result(video_url, 'Youtube')) - else: - entries.append({ - 'id': compat_str(data.get('id') or video_id), - 'url': video_url, - 'title': data['title'], - 'description': data.get('description') or data.get('summary'), - 'thumbnail': data.get('thumbnail_url'), - 'duration': int_or_none(data.get('duration')), - }) - else: - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - media_urls = self._search_regex( - r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') - for m in re.finditer( - r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): - media_url = m.group('url') - if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): - entries.append(self.url_result(media_url, 'Youtube')) - else: - entries.append({ - 'id': video_id, - 'url': media_url, - 'title': title, - }) - - return self.playlist_result(entries, video_id) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py deleted file mode 100644 index 084308aeb..000000000 --- a/youtube_dl/extractor/qqmusic.py +++ /dev/null @@ -1,369 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import random -import re -import time - -from .common import InfoExtractor -from ..utils import ( - clean_html, - ExtractorError, - strip_jsonp, - unescapeHTML, -) - - -class QQMusicIE(InfoExtractor): - IE_NAME = 'qqmusic' - IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html' - _TESTS = [{ - 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', - 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', - 'info_dict': { - 'id': '004295Et37taLD', - 'ext': 'mp3', - 'title': '可惜没如果', - 'release_date': '20141227', - 'creator': '林俊杰', - 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, { - 'note': 'There is no mp3-320 version of this song.', - 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', - 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', - 'info_dict': { - 'id': '004MsGEo3DdNxV', - 'ext': 'mp3', - 'title': '如果', - 'release_date': '20050626', - 'creator': '李季美', - 'description': 'md5:46857d5ed62bc4ba84607a805dccf437', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, { - 'note': 'lyrics not in .lrc format', - 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', - 'info_dict': { - 'id': '001JyApY11tIp6', - 'ext': 'mp3', - 'title': 'Shadows Over Transylvania', - 'release_date': '19970225', - 'creator': 'Dark Funeral', - 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'params': { - 'skip_download': True, - }, - }] - - _FORMATS = { - 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, - 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, - 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} - } - - # Reference: m_r_GetRUin() in top_player.js - # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js - @staticmethod - def m_r_get_ruin(): - curMs = int(time.time() * 1000) % 1000 - return int(round(random.random() * 2147483647) * curMs % 1E10) - - def _real_extract(self, url): - mid = self._match_id(url) - - detail_info_page = self._download_webpage( - 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, - mid, note='Download song detail info', - errnote='Unable to get song detail info', encoding='gbk') - - song_name = self._html_search_regex( - r"songname:\s*'([^']+)'", detail_info_page, 'song name') - - publish_time = self._html_search_regex( - r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, - 'publish time', default=None) - if publish_time: - publish_time = publish_time.replace('-', '') - - singer = self._html_search_regex( - r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) - - lrc_content = self._html_search_regex( - r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', - detail_info_page, 'LRC lyrics', default=None) - if lrc_content: - lrc_content = lrc_content.replace('\\n', '\n') - - thumbnail_url = None - albummid = self._search_regex( - [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], - detail_info_page, 'album mid', default=None) - if albummid: - thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ - % (albummid[-2:-1], albummid[-1], albummid) - - guid = self.m_r_get_ruin() - - vkey = self._download_json( - 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, - mid, note='Retrieve vkey', errnote='Unable to get vkey', - transform_source=strip_jsonp)['key'] - - formats = [] - for format_id, details in self._FORMATS.items(): - formats.append({ - 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' - % (details['prefix'], mid, details['ext'], vkey, guid), - 'format': format_id, - 'format_id': format_id, - 'preference': details['preference'], - 'abr': details.get('abr'), - }) - self._check_formats(formats, mid) - self._sort_formats(formats) - - actual_lrc_lyrics = ''.join( - line + '\n' for line in re.findall( - r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) - - info_dict = { - 'id': mid, - 'formats': formats, - 'title': song_name, - 'release_date': publish_time, - 'creator': singer, - 'description': lrc_content, - 'thumbnail': thumbnail_url - } - if actual_lrc_lyrics: - info_dict['subtitles'] = { - 'origin': [{ - 'ext': 'lrc', - 'data': actual_lrc_lyrics, - }] - } - return info_dict - - -class QQPlaylistBaseIE(InfoExtractor): - @staticmethod - def qq_static_url(category, mid): - return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) - - def get_singer_all_songs(self, singmid, num): - return self._download_webpage( - r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, - query={ - 'format': 'json', - 'inCharset': 'utf8', - 'outCharset': 'utf-8', - 'platform': 'yqq', - 'needNewCode': 0, - 'singermid': singmid, - 'order': 'listen', - 'begin': 0, - 'num': num, - 'songstatus': 1, - }) - - def get_entries_from_page(self, singmid): - entries = [] - - default_num = 1 - json_text = self.get_singer_all_songs(singmid, default_num) - json_obj_all_songs = self._parse_json(json_text, singmid) - - if json_obj_all_songs['code'] == 0: - total = json_obj_all_songs['data']['total'] - json_text = self.get_singer_all_songs(singmid, total) - json_obj_all_songs = self._parse_json(json_text, singmid) - - for item in json_obj_all_songs['data']['list']: - if item['musicData'].get('songmid') is not None: - songmid = item['musicData']['songmid'] - entries.append(self.url_result( - r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) - - return entries - - -class QQMusicSingerIE(QQPlaylistBaseIE): - IE_NAME = 'qqmusic:singer' - IE_DESC = 'QQ音乐 - 歌手' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html' - _TEST = { - 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', - 'info_dict': { - 'id': '001BLpXF2DyJe2', - 'title': '林俊杰', - 'description': 'md5:870ec08f7d8547c29c93010899103751', - }, - 'playlist_mincount': 12, - } - - def _real_extract(self, url): - mid = self._match_id(url) - - entries = self.get_entries_from_page(mid) - singer_page = self._download_webpage(url, mid, 'Download singer page') - singer_name = self._html_search_regex( - r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) - singer_desc = None - - if mid: - singer_desc_page = self._download_xml( - 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, - 'Donwload singer description XML', - query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, - headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) - - singer_desc = singer_desc_page.find('./data/info/desc').text - - return self.playlist_result(entries, mid, singer_name, singer_desc) - - -class QQMusicAlbumIE(QQPlaylistBaseIE): - IE_NAME = 'qqmusic:album' - IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html' - - _TESTS = [{ - 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', - 'info_dict': { - 'id': '000gXCTb2AhRR1', - 'title': '我们都是这样长大的', - 'description': 'md5:179c5dce203a5931970d306aa9607ea6', - }, - 'playlist_count': 4, - }, { - 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', - 'info_dict': { - 'id': '002Y5a3b3AlCu3', - 'title': '그리고...', - 'description': 'md5:a48823755615508a95080e81b51ba729', - }, - 'playlist_count': 8, - }] - - def _real_extract(self, url): - mid = self._match_id(url) - - album = self._download_json( - 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, - mid, 'Download album page')['data'] - - entries = [ - self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] - ) for song in album['list'] - ] - album_name = album.get('name') - album_detail = album.get('desc') - if album_detail is not None: - album_detail = album_detail.strip() - - return self.playlist_result(entries, mid, album_name, album_detail) - - -class QQMusicToplistIE(QQPlaylistBaseIE): - IE_NAME = 'qqmusic:toplist' - IE_DESC = 'QQ音乐 - 排行榜' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html' - - _TESTS = [{ - 'url': 'https://y.qq.com/n/yqq/toplist/123.html', - 'info_dict': { - 'id': '123', - 'title': '美国iTunes榜', - 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', - }, - 'playlist_count': 100, - }, { - 'url': 'https://y.qq.com/n/yqq/toplist/3.html', - 'info_dict': { - 'id': '3', - 'title': '巅峰榜·欧美', - 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', - }, - 'playlist_count': 100, - }, { - 'url': 'https://y.qq.com/n/yqq/toplist/106.html', - 'info_dict': { - 'id': '106', - 'title': '韩国Mnet榜', - 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', - }, - 'playlist_count': 50, - }] - - def _real_extract(self, url): - list_id = self._match_id(url) - - toplist_json = self._download_json( - 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, - note='Download toplist page', - query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) - - entries = [self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', - song['data']['songmid']) - for song in toplist_json['songlist']] - - topinfo = toplist_json.get('topinfo', {}) - list_name = topinfo.get('ListName') - list_description = topinfo.get('info') - return self.playlist_result(entries, list_id, list_name, list_description) - - -class QQMusicPlaylistIE(QQPlaylistBaseIE): - IE_NAME = 'qqmusic:playlist' - IE_DESC = 'QQ音乐 - 歌单' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html' - - _TESTS = [{ - 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', - 'info_dict': { - 'id': '3462654915', - 'title': '韩国5月新歌精选下旬', - 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', - }, - 'playlist_count': 40, - 'skip': 'playlist gone', - }, { - 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', - 'info_dict': { - 'id': '1374105607', - 'title': '易入人心的华语民谣', - 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', - }, - 'playlist_count': 20, - }] - - def _real_extract(self, url): - list_id = self._match_id(url) - - list_json = self._download_json( - 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', - list_id, 'Download list page', - query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, - transform_source=strip_jsonp) - if not len(list_json.get('cdlist', [])): - if list_json.get('code'): - raise ExtractorError( - 'QQ Music said: error %d in fetching playlist info' % list_json['code'], - expected=True) - raise ExtractorError('Unable to get playlist info') - - cdlist = list_json['cdlist'][0] - entries = [self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) - for song in cdlist['songlist']] - - list_name = cdlist.get('dissname') - list_description = clean_html(unescapeHTML(cdlist.get('desc'))) - return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py deleted file mode 100644 index e2202d603..000000000 --- a/youtube_dl/extractor/r7.py +++ /dev/null @@ -1,112 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import int_or_none - - -class R7IE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| - noticias\.r7\.com(?:/[^/]+)+/[^/]+-| - player\.r7\.com/video/i/ - ) - (?P<id>[\da-f]{24}) - ''' - _TESTS = [{ - 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', - 'md5': '403c4e393617e8e8ddc748978ee8efde', - 'info_dict': { - 'id': '54e7050b0cf2ff57e0279389', - 'ext': 'mp4', - 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', - 'description': 'md5:01812008664be76a6479aa58ec865b72', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 98, - 'like_count': int, - 'view_count': int, - }, - }, { - 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', - 'only_matching': True, - }, { - 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', - 'only_matching': True, - }, { - 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://player-api.r7.com/video/i/%s' % video_id, video_id) - - title = video['title'] - - formats = [] - media_url_hls = video.get('media_url_hls') - if media_url_hls: - formats.extend(self._extract_m3u8_formats( - media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - media_url = video.get('media_url') - if media_url: - f = { - 'url': media_url, - 'format_id': 'http', - } - # m3u8 format always matches the http format, let's copy metadata from - # one to another - m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none', formats)) - if len(m3u8_formats) == 1: - f_copy = m3u8_formats[0].copy() - f_copy.update(f) - f_copy['protocol'] = 'http' - f = f_copy - formats.append(f) - self._sort_formats(formats) - - description = video.get('description') - thumbnail = video.get('thumb') - duration = int_or_none(video.get('media_duration')) - like_count = int_or_none(video.get('likes')) - view_count = int_or_none(video.get('views')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'like_count': like_count, - 'view_count': view_count, - 'formats': formats, - } - - -class R7ArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' - _TEST = { - 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', - 'only_matching': True, - } - - @classmethod - def suitable(cls, url): - return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - video_id = self._search_regex( - r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', - webpage, 'video id') - - return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py deleted file mode 100644 index 2c35f9845..000000000 --- a/youtube_dl/extractor/radiobremen.py +++ /dev/null @@ -1,63 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import parse_duration - - -class RadioBremenIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)' - IE_NAME = 'radiobremen' - - _TEST = { - 'url': 'http://www.radiobremen.de/mediathek/?id=141876', - 'info_dict': { - 'id': '141876', - 'ext': 'mp4', - 'duration': 178, - 'width': 512, - 'title': 'Druck auf Patrick Öztürk', - 'thumbnail': r're:https?://.*\.jpg$', - 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id - meta_doc = self._download_webpage( - meta_url, video_id, 'Downloading metadata') - title = self._html_search_regex( - r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title') - description = self._html_search_regex( - r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False) - duration = parse_duration(self._html_search_regex( - r'Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>', - meta_doc, 'duration', fatal=False)) - - page_doc = self._download_webpage( - url, video_id, 'Downloading video information') - mobj = re.search( - r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", - page_doc) - video_url = ( - "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % - (video_id, video_id, mobj.group("secret"), mobj.group('width'))) - - formats = [{ - 'url': video_url, - 'ext': 'mp4', - 'width': int(mobj.group('width')), - }] - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'formats': formats, - 'thumbnail': mobj.group('thumbnail'), - } diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py deleted file mode 100644 index a28b1a24c..000000000 --- a/youtube_dl/extractor/radiocanada.py +++ /dev/null @@ -1,171 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - unified_strdate, -) - - -class RadioCanadaIE(InfoExtractor): - IE_NAME = 'radiocanada' - _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' - _TESTS = [ - { - 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', - 'info_dict': { - 'id': '7184272', - 'ext': 'mp4', - 'title': 'Le parcours du tireur capté sur vidéo', - 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', - 'upload_date': '20141023', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, - { - # empty Title - 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', - 'info_dict': { - 'id': '7754998', - 'ext': 'mp4', - 'title': 'letelejournal22h', - 'description': 'INTEGRALE WEB 22H-TJ', - 'upload_date': '20170720', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # with protectionType but not actually DRM protected - 'url': 'radiocanada:toutv:140872', - 'info_dict': { - 'id': '140872', - 'title': 'Épisode 1', - 'series': 'District 31', - }, - 'only_matching': True, - } - ] - _GEO_COUNTRIES = ['CA'] - _access_token = None - _claims = None - - def _call_api(self, path, video_id=None, app_code=None, query=None): - if not query: - query = {} - query.update({ - 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', - 'output': 'json', - }) - if video_id: - query.update({ - 'appCode': app_code, - 'idMedia': video_id, - }) - if self._access_token: - query['access_token'] = self._access_token - try: - return self._download_json( - 'https://services.radio-canada.ca/media/' + path, video_id, query=query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): - data = self._parse_json(e.cause.read().decode(), None) - error = data.get('error_description') or data['errorMessage']['text'] - raise ExtractorError(error, expected=True) - raise - - def _extract_info(self, app_code, video_id): - metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] - - def get_meta(name): - for meta in metas: - if meta.get('name') == name: - text = meta.get('text') - if text: - return text - - # protectionType does not necessarily mean the video is DRM protected (see - # https://github.com/ytdl-org/youtube-dl/pull/18609). - if get_meta('protectionType'): - self.report_warning('This video is probably DRM protected.') - - query = { - 'connectionType': 'hd', - 'deviceType': 'ipad', - 'multibitrate': 'true', - } - if self._claims: - query['claims'] = self._claims - v_data = self._call_api('validation/v2/', video_id, app_code, query) - v_url = v_data.get('url') - if not v_url: - error = v_data['message'] - if error == "Le contenu sélectionné n'est pas disponible dans votre pays": - raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) - if error == 'Le contenu sélectionné est disponible seulement en premium': - self.raise_login_required(error) - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error), expected=True) - formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') - self._sort_formats(formats) - - subtitles = {} - closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') - if closed_caption_url: - subtitles['fr'] = [{ - 'url': closed_caption_url, - 'ext': determine_ext(closed_caption_url, 'vtt'), - }] - - return { - 'id': video_id, - 'title': get_meta('Title') or get_meta('AV-nomEmission'), - 'description': get_meta('Description') or get_meta('ShortDescription'), - 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), - 'duration': int_or_none(get_meta('length')), - 'series': get_meta('Emission'), - 'season_number': int_or_none('SrcSaison'), - 'episode_number': int_or_none('SrcEpisode'), - 'upload_date': unified_strdate(get_meta('Date')), - 'subtitles': subtitles, - 'formats': formats, - } - - def _real_extract(self, url): - return self._extract_info(*re.match(self._VALID_URL, url).groups()) - - -class RadioCanadaAudioVideoIE(InfoExtractor): - IE_NAME = 'radiocanada:audiovideo' - _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', - 'info_dict': { - 'id': '7527184', - 'ext': 'mp4', - 'title': 'Barack Obama au Vietnam', - 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', - 'upload_date': '20160523', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', - 'only_matching': True, - }] - - def _real_extract(self, url): - return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py deleted file mode 100644 index 2c06c8b1e..000000000 --- a/youtube_dl/extractor/radiode.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class RadioDeIE(InfoExtractor): - IE_NAME = 'radio.de' - _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' - _TEST = { - 'url': 'http://ndr2.radio.de/', - 'info_dict': { - 'id': 'ndr2', - 'ext': 'mp3', - 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:591c49c702db1a33751625ebfb67f273', - 'thumbnail': r're:^https?://.*\.png', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - } - } - - def _real_extract(self, url): - radio_id = self._match_id(url) - webpage = self._download_webpage(url, radio_id) - jscode = self._search_regex( - r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", - webpage, 'broadcast') - - broadcast = self._parse_json(jscode, radio_id) - title = self._live_title(broadcast['name']) - description = broadcast.get('description') or broadcast.get('shortDescription') - thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') - - formats = [{ - 'url': stream['streamUrl'], - 'ext': stream['streamContentFormat'].lower(), - 'acodec': stream['streamContentFormat'], - 'abr': stream['bitRate'], - 'asr': stream['sampleRate'] - } for stream in broadcast['streamUrls']] - self._sort_formats(formats) - - return { - 'id': radio_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'is_live': True, - 'formats': formats, - } diff --git a/youtube_dl/extractor/radiofrance.py b/youtube_dl/extractor/radiofrance.py deleted file mode 100644 index a8afc0014..000000000 --- a/youtube_dl/extractor/radiofrance.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class RadioFranceIE(InfoExtractor): - _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' - IE_NAME = 'radiofrance' - - _TEST = { - 'url': 'http://maison.radiofrance.fr/radiovisions/one-one', - 'md5': 'bdbb28ace95ed0e04faab32ba3160daf', - 'info_dict': { - 'id': 'one-one', - 'ext': 'ogg', - 'title': 'One to one', - 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", - 'uploader': 'Thomas Hercouët', - }, - } - - def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') - description = self._html_search_regex( - r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>', - webpage, 'description', fatal=False) - uploader = self._html_search_regex( - r'<div class="credit">  © (.*?)</div>', - webpage, 'uploader', fatal=False) - - formats_str = self._html_search_regex( - r'class="jp-jplayer[^"]*" data-source="([^"]+)">', - webpage, 'audio URLs') - formats = [ - { - 'format_id': fm[0], - 'url': fm[1], - 'vcodec': 'none', - 'preference': i, - } - for i, fm in - enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) - ] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'uploader': uploader, - } diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py deleted file mode 100644 index 3f74f0c01..000000000 --- a/youtube_dl/extractor/radiojavan.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - parse_resolution, - str_to_int, - unified_strdate, - urlencode_postdata, - urljoin, -) - - -class RadioJavanIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' - _TEST = { - 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', - 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', - 'info_dict': { - 'id': 'chaartaar-ashoobam', - 'ext': 'mp4', - 'title': 'Chaartaar - Ashoobam', - 'thumbnail': r're:^https?://.*\.jpe?g$', - 'upload_date': '20150215', - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - download_host = self._download_json( - 'https://www.radiojavan.com/videos/video_host', video_id, - data=urlencode_postdata({'id': video_id}), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': url, - }).get('host', 'https://host1.rjmusicmedia.com') - - webpage = self._download_webpage(url, video_id) - - formats = [] - for format_id, _, video_path in re.findall( - r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', - webpage): - f = parse_resolution(format_id) - f.update({ - 'url': urljoin(download_host, video_path), - 'format_id': format_id, - }) - formats.append(f) - self._sort_formats(formats) - - title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) - - upload_date = unified_strdate(self._search_regex( - r'class="date_added">Date added: ([^<]+)<', - webpage, 'upload date', fatal=False)) - - view_count = str_to_int(self._search_regex( - r'class="views">Plays: ([\d,]+)', - webpage, 'view count', fatal=False)) - like_count = str_to_int(self._search_regex( - r'class="rating">([\d,]+) likes', - webpage, 'like count', fatal=False)) - dislike_count = str_to_int(self._search_regex( - r'class="rating">([\d,]+) dislikes', - webpage, 'dislike count', fatal=False)) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'formats': formats, - } diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py deleted file mode 100644 index 563d3400f..000000000 --- a/youtube_dl/extractor/rai.py +++ /dev/null @@ -1,603 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - determine_ext, - ExtractorError, - find_xpath_attr, - fix_xml_ampersands, - GeoRestrictedError, - HEADRequest, - int_or_none, - parse_duration, - remove_start, - strip_or_none, - try_get, - unified_strdate, - unified_timestamp, - update_url_query, - urljoin, - xpath_text, -) - - -class RaiBaseIE(InfoExtractor): - _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _GEO_COUNTRIES = ['IT'] - _GEO_BYPASS = False - - def _extract_relinker_info(self, relinker_url, video_id): - if not re.match(r'https?://', relinker_url): - return {'formats': [{'url': relinker_url}]} - - formats = [] - geoprotection = None - is_live = None - duration = None - - for platform in ('mon', 'flash', 'native'): - relinker = self._download_xml( - relinker_url, video_id, - note='Downloading XML metadata for platform %s' % platform, - transform_source=fix_xml_ampersands, - query={'output': 45, 'pl': platform}, - headers=self.geo_verification_headers()) - - if not geoprotection: - geoprotection = xpath_text( - relinker, './geoprotection', default=None) == 'Y' - - if not is_live: - is_live = xpath_text( - relinker, './is_live', default=None) == 'Y' - if not duration: - duration = parse_duration(xpath_text( - relinker, './duration', default=None)) - - url_elem = find_xpath_attr(relinker, './url', 'type', 'content') - if url_elem is None: - continue - - media_url = url_elem.text - - # This does not imply geo restriction (e.g. - # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) - if '/video_no_available.mp4' in media_url: - continue - - ext = determine_ext(media_url) - if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): - continue - - if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'f4m' or platform == 'flash': - manifest_url = update_url_query( - media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), - {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) - formats.extend(self._extract_f4m_formats( - manifest_url, video_id, f4m_id='hds', fatal=False)) - else: - bitrate = int_or_none(xpath_text(relinker, 'bitrate')) - formats.append({ - 'url': media_url, - 'tbr': bitrate if bitrate > 0 else None, - 'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http', - }) - - if not formats and geoprotection is True: - self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - - formats.extend(self._create_http_urls(relinker_url, formats)) - - return dict((k, v) for k, v in { - 'is_live': is_live, - 'duration': duration, - 'formats': formats, - }.items() if v is not None) - - def _create_http_urls(self, relinker_url, fmts): - _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?' - _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' - _QUALITY = { - # tbr: w, h - '250': [352, 198], - '400': [512, 288], - '700': [512, 288], - '800': [700, 394], - '1200': [736, 414], - '1800': [1024, 576], - '2400': [1280, 720], - '3200': [1440, 810], - '3600': [1440, 810], - '5000': [1920, 1080], - '10000': [1920, 1080], - } - - def test_url(url): - resp = self._request_webpage( - HEADRequest(url), None, headers={'User-Agent': 'Rai'}, - fatal=False, errnote=False, note=False) - - if resp is False: - return False - - if resp.code == 200: - return False if resp.url == url else resp.url - return None - - def get_format_info(tbr): - import math - br = int_or_none(tbr) - if len(fmts) == 1 and not br: - br = fmts[0].get('tbr') - if br > 300: - tbr = compat_str(math.floor(br / 100) * 100) - else: - tbr = '250' - - # try extracting info from available m3u8 formats - format_copy = None - for f in fmts: - if f.get('tbr'): - br_limit = math.floor(br / 100) - if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1: - format_copy = f.copy() - return { - 'width': format_copy.get('width'), - 'height': format_copy.get('height'), - 'tbr': format_copy.get('tbr'), - 'vcodec': format_copy.get('vcodec'), - 'acodec': format_copy.get('acodec'), - 'fps': format_copy.get('fps'), - 'format_id': 'https-%s' % tbr, - } if format_copy else { - 'width': _QUALITY[tbr][0], - 'height': _QUALITY[tbr][1], - 'format_id': 'https-%s' % tbr, - 'tbr': int(tbr), - } - - loc = test_url(_MP4_TMPL % (relinker_url, '*')) - if not isinstance(loc, compat_str): - return [] - - mobj = re.match( - _RELINKER_REG, - test_url(relinker_url) or '') - if not mobj: - return [] - - available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*'] - available_qualities = [i for i in available_qualities if i] - - formats = [] - for q in available_qualities: - fmt = { - 'url': _MP4_TMPL % (relinker_url, q), - 'protocol': 'https', - 'ext': 'mp4', - } - fmt.update(get_format_info(q)) - formats.append(fmt) - return formats - - @staticmethod - def _extract_subtitles(url, video_data): - STL_EXT = 'stl' - SRT_EXT = 'srt' - subtitles = {} - subtitles_array = video_data.get('subtitlesArray') or [] - for k in ('subtitles', 'subtitlesUrl'): - subtitles_array.append({'url': video_data.get(k)}) - for subtitle in subtitles_array: - sub_url = subtitle.get('url') - if sub_url and isinstance(sub_url, compat_str): - sub_lang = subtitle.get('language') or 'it' - sub_url = urljoin(url, sub_url) - sub_ext = determine_ext(sub_url, SRT_EXT) - subtitles.setdefault(sub_lang, []).append({ - 'ext': sub_ext, - 'url': sub_url, - }) - if STL_EXT == sub_ext: - subtitles[sub_lang].append({ - 'ext': SRT_EXT, - 'url': sub_url[:-len(STL_EXT)] + SRT_EXT, - }) - return subtitles - - -class RaiPlayIE(RaiBaseIE): - _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE - _TESTS = [{ - 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', - 'md5': '8970abf8caf8aef4696e7b1f2adfc696', - 'info_dict': { - 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', - 'ext': 'mp4', - 'title': 'Report del 07/04/2014', - 'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014', - 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Rai Gulp', - 'duration': 6160, - 'series': 'Report', - 'season': '2013/14', - 'subtitles': { - 'it': 'count:2', - }, - }, - 'params': { - 'skip_download': True, - }, - }, { - # 1080p direct mp4 url - 'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html', - 'md5': '2e501e8651d72f05ffe8f5d286ad560b', - 'info_dict': { - 'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642', - 'ext': 'mp4', - 'title': 'Leonardo - S1E1', - 'alt_title': 'St 1 Ep 1 - Episodio 1', - 'description': 'md5:f5360cd267d2de146e4e3879a5a47d31', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Rai 1', - 'duration': 3229, - 'series': 'Leonardo', - 'season': 'Season 1', - }, - }, { - 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', - 'only_matching': True, - }, { - # subtitles at 'subtitlesArray' key (see #27698) - 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html', - 'only_matching': True, - }, { - # DRM protected - 'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - base, video_id = re.match(self._VALID_URL, url).groups() - - media = self._download_json( - base + '.json', video_id, 'Downloading video JSON') - - if try_get( - media, - (lambda x: x['rights_management']['rights']['drm'], - lambda x: x['program_info']['rights_management']['rights']['drm']), - dict): - raise ExtractorError('This video is DRM protected.', expected=True) - - title = media['name'] - - video = media['video'] - - relinker_info = self._extract_relinker_info(video['content_url'], video_id) - self._sort_formats(relinker_info['formats']) - - thumbnails = [] - for _, value in media.get('images', {}).items(): - if value: - thumbnails.append({ - 'url': urljoin(url, value), - }) - - date_published = media.get('date_published') - time_published = media.get('time_published') - if date_published and time_published: - date_published += ' ' + time_published - - subtitles = self._extract_subtitles(url, video) - - program_info = media.get('program_info') or {} - season = media.get('season') - - info = { - 'id': remove_start(media.get('id'), 'ContentItem-') or video_id, - 'display_id': video_id, - 'title': self._live_title(title) if relinker_info.get( - 'is_live') else title, - 'alt_title': strip_or_none(media.get('subtitle')), - 'description': media.get('description'), - 'uploader': strip_or_none(media.get('channel')), - 'creator': strip_or_none(media.get('editor') or None), - 'duration': parse_duration(video.get('duration')), - 'timestamp': unified_timestamp(date_published), - 'thumbnails': thumbnails, - 'series': program_info.get('name'), - 'season_number': int_or_none(season), - 'season': season if (season and not season.isdigit()) else None, - 'episode': media.get('episode_title'), - 'episode_number': int_or_none(media.get('episode')), - 'subtitles': subtitles, - } - - info.update(relinker_info) - return info - - -class RaiPlayLiveIE(RaiPlayIE): - _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))' - _TESTS = [{ - 'url': 'http://www.raiplay.it/dirette/rainews24', - 'info_dict': { - 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', - 'display_id': 'rainews24', - 'ext': 'mp4', - 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', - 'uploader': 'Rai News 24', - 'creator': 'Rai News 24', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - }] - - -class RaiPlayPlaylistIE(InfoExtractor): - _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))' - _TESTS = [{ - 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', - 'info_dict': { - 'id': 'nondirloalmiocapo', - 'title': 'Non dirlo al mio capo', - 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', - }, - 'playlist_mincount': 12, - }] - - def _real_extract(self, url): - base, playlist_id = re.match(self._VALID_URL, url).groups() - - program = self._download_json( - base + '.json', playlist_id, 'Downloading program JSON') - - entries = [] - for b in (program.get('blocks') or []): - for s in (b.get('sets') or []): - s_id = s.get('id') - if not s_id: - continue - medias = self._download_json( - '%s/%s.json' % (base, s_id), s_id, - 'Downloading content set JSON', fatal=False) - if not medias: - continue - for m in (medias.get('items') or []): - path_id = m.get('path_id') - if not path_id: - continue - video_url = urljoin(url, path_id) - entries.append(self.url_result( - video_url, ie=RaiPlayIE.ie_key(), - video_id=RaiPlayIE._match_id(video_url))) - - return self.playlist_result( - entries, playlist_id, program.get('name'), - try_get(program, lambda x: x['program_info']['description'])) - - -class RaiIE(RaiBaseIE): - _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE - _TESTS = [{ - # var uniquename = "ContentItem-..." - # data-id="ContentItem-..." - 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', - 'info_dict': { - 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', - 'ext': 'mp4', - 'title': 'TG PRIMO TEMPO', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1758, - 'upload_date': '20140612', - }, - 'skip': 'This content is available only in Italy', - }, { - # with ContentItem in many metas - 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', - 'info_dict': { - 'id': '1632c009-c843-4836-bb65-80c33084a64b', - 'ext': 'mp4', - 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', - 'description': 'I film in uscita questa settimana.', - 'thumbnail': r're:^https?://.*\.png$', - 'duration': 833, - 'upload_date': '20161103', - } - }, { - # with ContentItem in og:url - 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', - 'md5': '06345bd97c932f19ffb129973d07a020', - 'info_dict': { - 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', - 'ext': 'mp4', - 'title': 'TG1 ore 20:00 del 03/11/2016', - 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 2214, - 'upload_date': '20161103', - } - }, { - # initEdizione('ContentItem-...' - 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', - 'info_dict': { - 'id': 'c2187016-8484-4e3a-8ac8-35e475b07303', - 'ext': 'mp4', - 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}', - 'duration': 2274, - 'upload_date': '20170401', - }, - 'skip': 'Changes daily', - }, { - # HLS live stream with ContentItem in og:url - 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', - 'info_dict': { - 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', - 'ext': 'mp4', - 'title': 'La diretta di Rainews24', - }, - 'params': { - 'skip_download': True, - }, - }, { - # ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key - 'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html', - 'info_dict': { - 'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd', - 'ext': 'mp4', - 'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015', - 'description': 'md5:d291b03407ec505f95f27970c0b025f4', - 'upload_date': '20150913', - 'subtitles': { - 'it': 'count:2', - }, - }, - 'params': { - 'skip_download': True, - }, - }, { - # Direct MMS URL - 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', - 'only_matching': True, - }, { - 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html', - 'only_matching': True, - }] - - def _extract_from_content_id(self, content_id, url): - media = self._download_json( - 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id, - content_id, 'Downloading video JSON') - - title = media['name'].strip() - - media_type = media['type'] - if 'Audio' in media_type: - relinker_info = { - 'formats': [{ - 'format_id': media.get('formatoAudio'), - 'url': media['audioUrl'], - 'ext': media.get('formatoAudio'), - }] - } - elif 'Video' in media_type: - relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) - else: - raise ExtractorError('not a media file') - - self._sort_formats(relinker_info['formats']) - - thumbnails = [] - for image_type in ('image', 'image_medium', 'image_300'): - thumbnail_url = media.get(image_type) - if thumbnail_url: - thumbnails.append({ - 'url': compat_urlparse.urljoin(url, thumbnail_url), - }) - - subtitles = self._extract_subtitles(url, media) - - info = { - 'id': content_id, - 'title': title, - 'description': strip_or_none(media.get('desc')), - 'thumbnails': thumbnails, - 'uploader': media.get('author'), - 'upload_date': unified_strdate(media.get('date')), - 'duration': parse_duration(media.get('length')), - 'subtitles': subtitles, - } - - info.update(relinker_info) - - return info - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - content_item_id = None - - content_item_url = self._html_search_meta( - ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url', - 'twitter:player', 'jsonlink'), webpage, default=None) - if content_item_url: - content_item_id = self._search_regex( - r'ContentItem-(%s)' % self._UUID_RE, content_item_url, - 'content item id', default=None) - - if not content_item_id: - content_item_id = self._search_regex( - r'''(?x) - (?: - (?:initEdizione|drawMediaRaiTV)\(| - <(?:[^>]+\bdata-id|var\s+uniquename)=| - <iframe[^>]+\bsrc= - ) - (["\']) - (?:(?!\1).)*\bContentItem-(?P<id>%s) - ''' % self._UUID_RE, - webpage, 'content item id', default=None, group='id') - - content_item_ids = set() - if content_item_id: - content_item_ids.add(content_item_id) - if video_id not in content_item_ids: - content_item_ids.add(video_id) - - for content_item_id in content_item_ids: - try: - return self._extract_from_content_id(content_item_id, url) - except GeoRestrictedError: - raise - except ExtractorError: - pass - - relinker_url = self._proto_relative_url(self._search_regex( - r'''(?x) - (?: - var\s+videoURL| - mediaInfo\.mediaUri - )\s*=\s* - ([\'"]) - (?P<url> - (?:https?:)? - //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? - (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 - ''', - webpage, 'relinker URL', group='url')) - - relinker_info = self._extract_relinker_info( - urljoin(url, relinker_url), video_id) - self._sort_formats(relinker_info['formats']) - - title = self._search_regex( - r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1', - webpage, 'title', group='title', - default=None) or self._og_search_title(webpage) - - info = { - 'id': video_id, - 'title': title, - } - - info.update(relinker_info) - - return info diff --git a/youtube_dl/extractor/raywenderlich.py b/youtube_dl/extractor/raywenderlich.py deleted file mode 100644 index 5411ece21..000000000 --- a/youtube_dl/extractor/raywenderlich.py +++ /dev/null @@ -1,179 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .vimeo import VimeoIE -from ..compat import compat_str -from ..utils import ( - ExtractorError, - int_or_none, - merge_dicts, - try_get, - unescapeHTML, - unified_timestamp, - urljoin, -) - - -class RayWenderlichIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - videos\.raywenderlich\.com/courses| - (?:www\.)?raywenderlich\.com - )/ - (?P<course_id>[^/]+)/lessons/(?P<id>\d+) - ''' - - _TESTS = [{ - 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', - 'info_dict': { - 'id': '248377018', - 'ext': 'mp4', - 'title': 'Introduction', - 'description': 'md5:804d031b3efa9fcb49777d512d74f722', - 'timestamp': 1513906277, - 'upload_date': '20171222', - 'duration': 133, - 'uploader': 'Ray Wenderlich', - 'uploader_id': 'user3304672', - }, - 'params': { - 'noplaylist': True, - 'skip_download': True, - }, - 'add_ie': [VimeoIE.ie_key()], - 'expected_warnings': ['HTTP Error 403: Forbidden'], - }, { - 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', - 'only_matching': True, - }] - - @staticmethod - def _extract_video_id(data, lesson_id): - if not data: - return - groups = try_get(data, lambda x: x['groups'], list) or [] - if not groups: - return - for group in groups: - if not isinstance(group, dict): - continue - contents = try_get(data, lambda x: x['contents'], list) or [] - for content in contents: - if not isinstance(content, dict): - continue - ordinal = int_or_none(content.get('ordinal')) - if ordinal != lesson_id: - continue - video_id = content.get('identifier') - if video_id: - return compat_str(video_id) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - course_id, lesson_id = mobj.group('course_id', 'id') - display_id = '%s/%s' % (course_id, lesson_id) - - webpage = self._download_webpage(url, display_id) - - thumbnail = self._og_search_thumbnail( - webpage, default=None) or self._html_search_meta( - 'twitter:image', webpage, 'thumbnail') - - if '>Subscribe to unlock' in webpage: - raise ExtractorError( - 'This content is only available for subscribers', - expected=True) - - info = { - 'thumbnail': thumbnail, - } - - vimeo_id = self._search_regex( - r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) - - if not vimeo_id: - data = self._parse_json( - self._search_regex( - r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, - 'data collection', default='{}', group='data'), - display_id, transform_source=unescapeHTML, fatal=False) - video_id = self._extract_video_id( - data, lesson_id) or self._search_regex( - r'/videos/(\d+)/', thumbnail, 'video id') - headers = { - 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest', - } - csrf_token = self._html_search_meta( - 'csrf-token', webpage, 'csrf token', default=None) - if csrf_token: - headers['X-CSRF-Token'] = csrf_token - video = self._download_json( - 'https://videos.raywenderlich.com/api/v1/videos/%s.json' - % video_id, display_id, headers=headers)['video'] - vimeo_id = video['clips'][0]['provider_id'] - info.update({ - '_type': 'url_transparent', - 'title': video.get('name'), - 'description': video.get('description') or video.get( - 'meta_description'), - 'duration': int_or_none(video.get('duration')), - 'timestamp': unified_timestamp(video.get('created_at')), - }) - - return merge_dicts(info, self.url_result( - VimeoIE._smuggle_referrer( - 'https://player.vimeo.com/video/%s' % vimeo_id, url), - ie=VimeoIE.ie_key(), video_id=vimeo_id)) - - -class RayWenderlichCourseIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - videos\.raywenderlich\.com/courses| - (?:www\.)?raywenderlich\.com - )/ - (?P<id>[^/]+) - ''' - - _TEST = { - 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', - 'info_dict': { - 'title': 'Testing in iOS', - 'id': '3530-testing-in-ios', - }, - 'params': { - 'noplaylist': False, - }, - 'playlist_count': 29, - } - - @classmethod - def suitable(cls, url): - return False if RayWenderlichIE.suitable(url) else super( - RayWenderlichCourseIE, cls).suitable(url) - - def _real_extract(self, url): - course_id = self._match_id(url) - - webpage = self._download_webpage(url, course_id) - - entries = [] - lesson_urls = set() - for lesson_url in re.findall( - r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): - if lesson_url in lesson_urls: - continue - lesson_urls.add(lesson_url) - entries.append(self.url_result( - urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) - - title = self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', default=None) - - return self.playlist_result(entries, course_id, title) diff --git a/youtube_dl/extractor/rbgtum.py b/youtube_dl/extractor/rbgtum.py deleted file mode 100644 index da48ebbc4..000000000 --- a/youtube_dl/extractor/rbgtum.py +++ /dev/null @@ -1,97 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class RbgTumIE(InfoExtractor): - _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)' - _TESTS = [{ - # Combined view - 'url': 'https://live.rbg.tum.de/w/cpp/22128', - 'md5': '53a5e7b3e07128e33bbf36687fe1c08f', - 'info_dict': { - 'id': 'cpp/22128', - 'ext': 'mp4', - 'title': 'Lecture: October 18. 2022', - 'series': 'Concepts of C++ programming (IN2377)', - } - }, { - # Presentation only - 'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES', - 'md5': '36c584272179f3e56b0db5d880639cba', - 'info_dict': { - 'id': 'I2DL/12349/PRES', - 'ext': 'mp4', - 'title': 'Lecture 3: Introduction to Neural Networks', - 'series': 'Introduction to Deep Learning (IN2346)', - } - }, { - # Camera only - 'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM', - 'md5': 'e04189d92ff2f56aedf5cede65d37aad', - 'info_dict': { - 'id': 'fvv-info/16130/CAM', - 'ext': 'mp4', - 'title': 'Fachschaftsvollversammlung', - 'series': 'Fachschaftsvollversammlung Informatik', - } - }, ] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8') - lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title') - lecture_series_title = self._html_search_regex( - r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?', webpage, 'series') - - formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': lecture_title, - 'series': lecture_series_title, - 'formats': formats, - } - - -class RbgTumCourseIE(InfoExtractor): - _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P.+)' - _TESTS = [{ - 'url': 'https://live.rbg.tum.de/course/2022/S/fpv', - 'info_dict': { - 'title': 'Funktionale Programmierung und Verifikation (IN0003)', - 'id': '2022/S/fpv', - }, - 'params': { - 'noplaylist': False, - }, - 'playlist_count': 13, - }, { - 'url': 'https://live.rbg.tum.de/course/2022/W/set', - 'info_dict': { - 'title': 'SET FSMPIC', - 'id': '2022/W/set', - }, - 'params': { - 'noplaylist': False, - }, - 'playlist_count': 6, - }, ] - - def _real_extract(self, url): - course_id = self._match_id(url) - webpage = self._download_webpage(url, course_id) - - lecture_series_title = self._html_search_regex(r'(?si)(.*)', webpage, 'title') - - lecture_urls = [] - for lecture_url in re.findall(r'(?i)href="/w/(.+)(?[^/]+)/episodes/(?P[^/?#&]+)' - _TEST = { - 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', - 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', - 'info_dict': { - 'id': 'ford-lopatin-live-at-primavera-sound-2011', - 'ext': 'mp3', - 'title': 'Main Stage - Ford & Lopatin at Primavera Sound', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 2452, - 'timestamp': 1307103164, - 'upload_date': '20110603', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - show_id = mobj.group('show_id') - episode_id = mobj.group('id') - - webpage = self._download_webpage(url, episode_id) - - episode = self._parse_json( - self._search_regex( - r'__INITIAL_STATE__\s*=\s*({.+?})\s*', - webpage, 'json data'), - episode_id)['episodes'][show_id][episode_id] - - title = episode['title'] - - show_title = episode.get('showTitle') - if show_title: - title = '%s - %s' % (show_title, title) - - formats = [{ - 'url': update_url_query(episode['audioURL'], query={'cbr': abr}), - 'format_id': compat_str(abr), - 'abr': abr, - 'vcodec': 'none', - } for abr in (96, 128, 192, 256)] - self._check_formats(formats, episode_id) - - description = clean_html(episode.get('longTeaser')) - thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape')) - duration = int_or_none(episode.get('duration')) - timestamp = unified_timestamp(episode.get('publishedAt')) - - return { - 'id': episode_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'formats': formats, - } diff --git a/youtube_dl/extractor/rds.py b/youtube_dl/extractor/rds.py deleted file mode 100644 index 0c497856e..000000000 --- a/youtube_dl/extractor/rds.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - parse_duration, - parse_iso8601, - js_to_json, -) -from ..compat import compat_str - - -class RDSIE(InfoExtractor): - IE_DESC = 'RDS.ca' - _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P[^/]+)-\d+\.\d+' - - _TESTS = [{ - # has two 9c9media ContentPackages, the web player selects the first ContentPackage - 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606', - 'info_dict': { - 'id': '2083309', - 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande', - 'ext': 'flv', - 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande', - 'description': 'md5:83fa38ecc4a79b19e433433254077f25', - 'timestamp': 1606129030, - 'upload_date': '20201123', - 'duration': 773.039, - } - }, { - 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) - video_id = compat_str(item['id']) - title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( - 'title', webpage, 'title', fatal=True) - description = self._og_search_description(webpage) or self._html_search_meta( - 'description', webpage, 'description') - thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( - [r']+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', - r']+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], - webpage, 'thumbnail', fatal=False) - timestamp = parse_iso8601(self._search_regex( - r']+itemprop="uploadDate"[^>]+content="([^"]+)"', - webpage, 'upload date', fatal=False)) - duration = parse_duration(self._search_regex( - r']+itemprop="duration"[^>]+content="([^"]+)"', - webpage, 'duration', fatal=False)) - age_limit = self._family_friendly_search(webpage) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'display_id': display_id, - 'url': '9c9media:rds_web:%s' % video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'age_limit': age_limit, - 'ie_key': 'NineCNineMedia', - } diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py deleted file mode 100644 index 6d000b372..000000000 --- a/youtube_dl/extractor/redbulltv.py +++ /dev/null @@ -1,231 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - float_or_none, - ExtractorError, -) - - -class RedBullTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?PAP-\w+)' - _TESTS = [{ - # film - 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', - 'md5': 'fb0445b98aa4394e504b413d98031d1f', - 'info_dict': { - 'id': 'AP-1Q6XCDTAN1W11', - 'ext': 'mp4', - 'title': 'ABC of... WRC - ABC of... S1E6', - 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', - 'duration': 1582.04, - }, - }, { - # episode - 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11', - 'info_dict': { - 'id': 'AP-1PMHKJFCW1W11', - 'ext': 'mp4', - 'title': 'Grime - Hashtags S2E4', - 'description': 'md5:5546aa612958c08a98faaad4abce484d', - 'duration': 904, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11', - 'only_matching': True, - }] - - def extract_info(self, video_id): - session = self._download_json( - 'https://api.redbull.tv/v3/session', video_id, - note='Downloading access token', query={ - 'category': 'personal_computer', - 'os_family': 'http', - }) - if session.get('code') == 'error': - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, session['message'])) - token = session['token'] - - try: - video = self._download_json( - 'https://api.redbull.tv/v3/products/' + video_id, - video_id, note='Downloading video information', - headers={'Authorization': token} - ) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - error_message = self._parse_json( - e.cause.read().decode(), video_id)['error'] - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, error_message), expected=True) - raise - - title = video['title'].strip() - - formats = self._extract_m3u8_formats( - 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), - video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) - - subtitles = {} - for resource in video.get('resources', []): - if resource.startswith('closed_caption_'): - splitted_resource = resource.split('_') - if splitted_resource[2]: - subtitles.setdefault('en', []).append({ - 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource), - 'ext': splitted_resource[2], - }) - - subheading = video.get('subheading') - if subheading: - title += ' - %s' % subheading - - return { - 'id': video_id, - 'title': title, - 'description': video.get('long_description') or video.get( - 'short_description'), - 'duration': float_or_none(video.get('duration'), scale=1000), - 'formats': formats, - 'subtitles': subtitles, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.extract_info(video_id) - - -class RedBullEmbedIE(RedBullTVIE): - _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?Prrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' - _TESTS = [{ - # HLS manifest accessible only using assetId - 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT', - 'only_matching': True, - }] - _VIDEO_ESSENSE_TMPL = '''... on %s { - videoEssence { - attributes - } - }''' - - def _real_extract(self, url): - rrn_id = self._match_id(url) - asset_id = self._download_json( - 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', - rrn_id, headers={ - 'Accept': 'application/json', - 'API-KEY': 'e90a1ff11335423998b100c929ecc866', - }, query={ - 'query': '''{ - resource(id: "%s", enforceGeoBlocking: false) { - %s - %s - } -}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), - })['data']['resource']['videoEssence']['attributes']['assetId'] - return self.extract_info(asset_id) - - -class RedBullTVRrnContentIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P[a-z]{2,3})-(?P[a-z]{2})/tv/(?:video|live|film)/(?Prrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _TESTS = [{ - 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me', - 'only_matching': True, - }] - - def _real_extract(self, url): - region, lang, rrn_id = re.search(self._VALID_URL, url).groups() - rrn_id += ':%s-%s' % (lang, region.upper()) - return self.url_result( - 'https://www.redbull.com/embed/' + rrn_id, - RedBullEmbedIE.ie_key(), rrn_id) - - -class RedBullIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P[a-z]{2,3})-(?P[a-z]{2})/(?P(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04', - 'md5': 'db8271a7200d40053a1809ed0dd574ff', - 'info_dict': { - 'id': 'AA-1MT8DQWA91W14', - 'ext': 'mp4', - 'title': 'Grime - Hashtags S2E4', - 'description': 'md5:5546aa612958c08a98faaad4abce484d', - }, - }, { - 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12', - 'only_matching': True, - }, { - 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', - 'only_matching': True, - }, { - # only available on the int-en website so a fallback is need for the API - # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero - 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia', - 'only_matching': True, - }] - _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr'] - _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe'] - - def _real_extract(self, url): - region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups() - if filter_type == 'episodes': - filter_type = 'episode-videos' - elif filter_type == 'live': - filter_type = 'live-videos' - - regions = [region.upper()] - if region != 'int': - if region in self._LAT_FALLBACK_MAP: - regions.append('LAT') - if lang in self._INT_FALLBACK_LIST: - regions.append('INT') - locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions]) - - rrn_id = self._download_json( - 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, - display_id, query={ - 'filter[type]': filter_type, - 'filter[uriSlug]': display_id, - 'rb3Schema': 'v1:hero', - })['data']['id'] - - return self.url_result( - 'https://www.redbull.com/embed/' + rrn_id, - RedBullEmbedIE.ie_key(), rrn_id) diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py deleted file mode 100644 index 222fa0172..000000000 --- a/youtube_dl/extractor/reddit.py +++ /dev/null @@ -1,161 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - float_or_none, - try_get, - unescapeHTML, - url_or_none, -) - - -class RedditIE(InfoExtractor): - _VALID_URL = r'https?://v\.redd\.it/(?P[^/?#&]+)' - _TEST = { - # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ - 'url': 'https://v.redd.it/zv89llsvexdz', - 'md5': '0a070c53eba7ec4534d95a5a1259e253', - 'info_dict': { - 'id': 'zv89llsvexdz', - 'ext': 'mp4', - 'title': 'zv89llsvexdz', - }, - 'params': { - 'format': 'bestvideo', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - formats = self._extract_m3u8_formats( - 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, - 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - - formats.extend(self._extract_mpd_formats( - 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, - mpd_id='dash', fatal=False)) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'formats': formats, - } - - -class RedditRIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P[^/?#&]+))' - _TESTS = [{ - 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', - 'info_dict': { - 'id': 'zv89llsvexdz', - 'ext': 'mp4', - 'title': 'That small heart attack.', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - 'thumbnails': 'count:4', - 'timestamp': 1501941939, - 'upload_date': '20170805', - 'uploader': 'Antw87', - 'duration': 12, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'age_limit': 0, - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, { - 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', - 'only_matching': True, - }, { - # imgur - 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', - 'only_matching': True, - }, { - # imgur @ old reddit - 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', - 'only_matching': True, - }, { - # streamable - 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', - 'only_matching': True, - }, { - # youtube - 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', - 'only_matching': True, - }, { - # reddit video @ nm reddit - 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - url, video_id = mobj.group('url', 'id') - - video_id = self._match_id(url) - - data = self._download_json( - url + '/.json', video_id)[0]['data']['children'][0]['data'] - - video_url = data['url'] - - # Avoid recursing into the same reddit URL - if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: - raise ExtractorError('No media found', expected=True) - - over_18 = data.get('over_18') - if over_18 is True: - age_limit = 18 - elif over_18 is False: - age_limit = 0 - else: - age_limit = None - - thumbnails = [] - - def add_thumbnail(src): - if not isinstance(src, dict): - return - thumbnail_url = url_or_none(src.get('url')) - if not thumbnail_url: - return - thumbnails.append({ - 'url': unescapeHTML(thumbnail_url), - 'width': int_or_none(src.get('width')), - 'height': int_or_none(src.get('height')), - }) - - for image in try_get(data, lambda x: x['preview']['images']) or []: - if not isinstance(image, dict): - continue - add_thumbnail(image.get('source')) - resolutions = image.get('resolutions') - if isinstance(resolutions, list): - for resolution in resolutions: - add_thumbnail(resolution) - - return { - '_type': 'url_transparent', - 'url': video_url, - 'title': data.get('title'), - 'thumbnails': thumbnails, - 'timestamp': float_or_none(data.get('created_utc')), - 'uploader': data.get('author'), - 'duration': int_or_none(try_get( - data, - (lambda x: x['media']['reddit_video']['duration'], - lambda x: x['secure_media']['reddit_video']['duration']))), - 'like_count': int_or_none(data.get('ups')), - 'dislike_count': int_or_none(data.get('downs')), - 'comment_count': int_or_none(data.get('num_comments')), - 'age_limit': age_limit, - } diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py deleted file mode 100644 index a1ca791ca..000000000 --- a/youtube_dl/extractor/redtube.py +++ /dev/null @@ -1,136 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - merge_dicts, - str_to_int, - unified_strdate, - url_or_none, -) - - -class RedTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://www.redtube.com/66418', - 'md5': 'fc08071233725f26b8f014dba9590005', - 'info_dict': { - 'id': '66418', - 'ext': 'mp4', - 'title': 'Sucked on a toilet', - 'upload_date': '20110811', - 'duration': 596, - 'view_count': int, - 'age_limit': 18, - } - }, { - 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', - 'only_matching': True, - }, { - 'url': 'http://it.redtube.com/66418', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)', - webpage) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://www.redtube.com/%s' % video_id, video_id) - - ERRORS = ( - (('video-deleted-info', '>This video has been removed'), 'has been removed'), - (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'), - ) - - for patterns, message in ERRORS: - if any(p in webpage for p in patterns): - raise ExtractorError( - 'Video %s %s' % (video_id, message), expected=True) - - info = self._search_json_ld(webpage, video_id, default={}) - - if not info.get('title'): - info['title'] = self._html_search_regex( - (r']+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P(?:(?!\1).)+)</h\1>', - r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), - webpage, 'title', group='title', - default=None) or self._og_search_title(webpage) - - formats = [] - sources = self._parse_json( - self._search_regex( - r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), - video_id, fatal=False) - if sources and isinstance(sources, dict): - for format_id, format_url in sources.items(): - if format_url: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'height': int_or_none(format_id), - }) - medias = self._parse_json( - self._search_regex( - r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, - 'media definitions', default='{}'), - video_id, fatal=False) - if medias and isinstance(medias, list): - for media in medias: - format_url = url_or_none(media.get('videoUrl')) - if not format_url: - continue - if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - continue - format_id = media.get('quality') - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'height': int_or_none(format_id), - }) - if not formats: - video_url = self._html_search_regex( - r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') - formats.append({'url': video_url}) - self._sort_formats(formats) - - thumbnail = self._og_search_thumbnail(webpage) - upload_date = unified_strdate(self._search_regex( - r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', - webpage, 'upload date', default=None)) - duration = int_or_none(self._og_search_property( - 'video:duration', webpage, default=None) or self._search_regex( - r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) - view_count = str_to_int(self._search_regex( - (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', - r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', - r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), - webpage, 'view count', default=None)) - - # No self-labeling, but they describe themselves as - # "Home of Videos Porno" - age_limit = 18 - - return merge_dicts(info, { - 'id': video_id, - 'ext': 'mp4', - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - }) diff --git a/youtube_dl/extractor/regiotv.py b/youtube_dl/extractor/regiotv.py deleted file mode 100644 index e250a52f0..000000000 --- a/youtube_dl/extractor/regiotv.py +++ /dev/null @@ -1,62 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - -from ..utils import ( - sanitized_Request, - xpath_text, - xpath_with_ns, -) - - -class RegioTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.regio-tv.de/video/395808.html', - 'info_dict': { - 'id': '395808', - 'ext': 'mp4', - 'title': 'Wir in Ludwigsburg', - 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!', - } - }, { - 'url': 'http://www.regio-tv.de/video/395808', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - key = self._search_regex( - r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key') - title = self._og_search_title(webpage) - - SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>' - - request = sanitized_Request( - 'http://v.telvi.de/', - SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8')) - video_data = self._download_xml(request, video_id, 'Downloading video XML') - - NS_MAP = { - 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', - 'soap': 'http://schemas.xmlsoap.org/soap/envelope/', - } - - video_url = xpath_text( - video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True) - thumbnail = xpath_text( - video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail') - description = self._og_search_description( - webpage) or self._html_search_meta('description', webpage) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py deleted file mode 100644 index 7c8909d95..000000000 --- a/youtube_dl/extractor/rentv.py +++ /dev/null @@ -1,106 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - determine_ext, - int_or_none, - url_or_none, -) - - -class RENTVIE(InfoExtractor): - _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://ren.tv/video/epizod/118577', - 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', - 'info_dict': { - 'id': '118577', - 'ext': 'mp4', - 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', - 'timestamp': 1472230800, - 'upload_date': '20160826', - } - }, { - 'url': 'http://ren.tv/player/118577', - 'only_matching': True, - }, { - 'url': 'rentv:118577', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) - config = self._parse_json(self._search_regex( - r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) - title = config['title'] - formats = [] - for video in config['src']: - src = url_or_none(video.get('src')) - if not src: - continue - ext = determine_ext(src) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': src, - }) - self._sort_formats(formats) - return { - 'id': video_id, - 'title': title, - 'description': config.get('description'), - 'thumbnail': config.get('image'), - 'duration': int_or_none(config.get('duration')), - 'timestamp': int_or_none(config.get('date')), - 'formats': formats, - } - - -class RENTVArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' - _TESTS = [{ - 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v', - 'md5': 'ebd63c4680b167693745ab91343df1d6', - 'info_dict': { - 'id': '136472', - 'ext': 'mp4', - 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла', - 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.', - } - }, { - # TODO: invalid m3u8 - 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', - 'info_dict': { - 'id': 'playlist', - 'ext': 'mp4', - 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', - 'uploader': 'ren.tv', - }, - 'params': { - # m3u8 downloads - 'skip_download': True, - }, - 'skip': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), display_id) - - entries = [] - for config_profile in drupal_settings.get('ren_jwplayer', {}).values(): - media_id = config_profile.get('mediaid') - if not media_id: - continue - media_id = compat_str(media_id) - entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id)) - return self.playlist_result(entries, display_id) diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py deleted file mode 100644 index d47fb45ca..000000000 --- a/youtube_dl/extractor/restudy.py +++ /dev/null @@ -1,44 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class RestudyIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'https://www.restudy.dk/video/play/id/1637', - 'info_dict': { - 'id': '1637', - 'ext': 'flv', - 'title': 'Leiden-frosteffekt', - 'description': 'Denne video er et eksperiment med flydende kvælstof.', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - }, { - 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage).strip() - description = self._og_search_description(webpage).strip() - - formats = self._extract_smil_formats( - 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, - video_id) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'formats': formats, - } diff --git a/youtube_dl/extractor/reuters.py b/youtube_dl/extractor/reuters.py deleted file mode 100644 index 9dc482d21..000000000 --- a/youtube_dl/extractor/reuters.py +++ /dev/null @@ -1,69 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - int_or_none, - unescapeHTML, -) - - -class ReutersIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', - 'md5': '8015113643a0b12838f160b0b81cc2ee', - 'info_dict': { - 'id': '368575562', - 'ext': 'mp4', - 'title': 'San Francisco police chief resigns', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) - video_data = js_to_json(self._search_regex( - r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', - webpage, 'video data')) - - def get_json_value(key, fatal=False): - return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) - - title = unescapeHTML(get_json_value('title', fatal=True)) - mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() - - mas_data = self._download_json( - 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), - video_id, transform_source=js_to_json) - formats = [] - for f in mas_data: - f_url = f.get('url') - if not f_url: - continue - method = f.get('method') - if method == 'hls': - formats.extend(self._extract_m3u8_formats( - f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - container = f.get('container') - ext = '3gp' if method == 'mobile' else container - formats.append({ - 'format_id': ext, - 'url': f_url, - 'ext': ext, - 'container': container if method != 'mobile' else None, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': get_json_value('thumb'), - 'duration': int_or_none(get_json_value('seconds')), - 'formats': formats, - } diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py deleted file mode 100644 index 4cb99c244..000000000 --- a/youtube_dl/extractor/reverbnation.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - qualities, - str_or_none, -) - - -class ReverbNationIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' - _TESTS = [{ - 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', - 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', - 'info_dict': { - 'id': '16965047', - 'ext': 'mp3', - 'title': 'MONA LISA', - 'uploader': 'ALKILADOS', - 'uploader_id': '216429', - 'thumbnail': r're:^https?://.*\.jpg', - }, - }] - - def _real_extract(self, url): - song_id = self._match_id(url) - - api_res = self._download_json( - 'https://api.reverbnation.com/song/%s' % song_id, - song_id, - note='Downloading information of song %s' % song_id - ) - - THUMBNAILS = ('thumbnail', 'image') - quality = qualities(THUMBNAILS) - thumbnails = [] - for thumb_key in THUMBNAILS: - if api_res.get(thumb_key): - thumbnails.append({ - 'url': api_res[thumb_key], - 'preference': quality(thumb_key) - }) - - return { - 'id': song_id, - 'title': api_res['name'], - 'url': api_res['url'], - 'uploader': api_res.get('artist', {}).get('name'), - 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnails': thumbnails, - 'ext': 'mp3', - 'vcodec': 'none', - } diff --git a/youtube_dl/extractor/rice.py b/youtube_dl/extractor/rice.py deleted file mode 100644 index f855719ac..000000000 --- a/youtube_dl/extractor/rice.py +++ /dev/null @@ -1,116 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_parse_qs -from ..utils import ( - xpath_text, - xpath_element, - int_or_none, - parse_iso8601, - ExtractorError, -) - - -class RICEIE(InfoExtractor): - _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' - _TEST = { - 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', - 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', - 'info_dict': { - 'id': 'YEWIvbhb40aqdjMD1ALSqw', - 'ext': 'mp4', - 'title': 'Active Learning in Archeology', - 'upload_date': '20140616', - 'timestamp': 1402926346, - } - } - _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' - - def _real_extract(self, url): - qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) - if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'): - raise ExtractorError('Invalid URL', expected=True) - - portal_id = qs['PortalID'][0] - playlist_id = qs['DestinationID'][0] - content_id = qs['ContentID'][0] - - content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={ - 'portalId': portal_id, - 'playlistId': playlist_id, - 'contentId': content_id - }) - metadata = xpath_element(content_data, './/metaData', fatal=True) - title = xpath_text(metadata, 'primaryTitle', fatal=True) - encodings = xpath_element(content_data, './/encodings', fatal=True) - player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={ - 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True), - 'contentId': content_id, - }) - - common_fmt = {} - dimensions = xpath_text(encodings, 'dimensions') - if dimensions: - wh = dimensions.split('x') - if len(wh) == 2: - common_fmt.update({ - 'width': int_or_none(wh[0]), - 'height': int_or_none(wh[1]), - }) - - formats = [] - rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS)) - if rtsp_path: - fmt = { - 'url': rtsp_path, - 'format_id': 'rtsp', - } - fmt.update(common_fmt) - formats.append(fmt) - for source in player_data.findall(self._xpath_ns('.//Source', self._NS)): - video_url = xpath_text(source, self._xpath_ns('File', self._NS)) - if not video_url: - continue - if '.m3u8' in video_url: - formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - fmt = { - 'url': video_url, - 'format_id': video_url.split(':')[0], - } - fmt.update(common_fmt) - rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) - if rtmp: - fmt.update({ - 'url': rtmp.group('url'), - 'play_path': rtmp.group('playpath'), - 'app': rtmp.group('app'), - 'ext': 'flv', - }) - formats.append(fmt) - self._sort_formats(formats) - - thumbnails = [] - for content_asset in content_data.findall('.//contentAssets'): - asset_type = xpath_text(content_asset, 'type') - if asset_type == 'image': - image_url = xpath_text(content_asset, 'httpPath') - if not image_url: - continue - thumbnails.append({ - 'id': xpath_text(content_asset, 'ID'), - 'url': image_url, - }) - - return { - 'id': content_id, - 'title': title, - 'description': xpath_text(metadata, 'abstract'), - 'duration': int_or_none(xpath_text(metadata, 'duration')), - 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')), - 'thumbnails': thumbnails, - 'formats': formats, - } diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py deleted file mode 100644 index c3623edcc..000000000 --- a/youtube_dl/extractor/rmcdecouverte.py +++ /dev/null @@ -1,55 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .brightcove import BrightcoveLegacyIE -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) -from ..utils import smuggle_url - - -class RMCDecouverteIE(InfoExtractor): - _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))' - - _TESTS = [{ - 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', - 'info_dict': { - 'id': '5983675500001', - 'ext': 'mp4', - 'title': 'CORVETTE', - 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', - 'uploader_id': '1969646226001', - 'upload_date': '20181226', - 'timestamp': 1545861635, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'only available for a week', - }, { - # live, geo restricted, bypassable - 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') or mobj.group('live_id') - webpage = self._download_webpage(url, display_id) - brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - if brightcove_legacy_url: - brightcove_id = compat_parse_qs(compat_urlparse.urlparse( - brightcove_legacy_url).query)['@videoPlayer'][0] - else: - brightcove_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'brightcove id') - return self.url_result( - smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['FR']}), - 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py deleted file mode 100644 index 69934ef2b..000000000 --- a/youtube_dl/extractor/ro220.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class Ro220IE(InfoExtractor): - IE_NAME = '220.ro' - _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<id>[^/]+)' - _TEST = { - 'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/', - 'md5': '03af18b73a07b4088753930db7a34add', - 'info_dict': { - 'id': 'LYV6doKo7f', - 'ext': 'mp4', - 'title': 'Luati-le Banii sez 4 ep 1', - 'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - url = compat_urllib_parse_unquote(self._search_regex( - r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url')) - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - - formats = [{ - 'format_id': 'sd', - 'url': url, - 'ext': 'mp4', - }] - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py deleted file mode 100644 index cd6904bc9..000000000 --- a/youtube_dl/extractor/rockstargames.py +++ /dev/null @@ -1,69 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class RockstarGamesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://www.rockstargames.com/videos/video/11544/', - 'md5': '03b5caa6e357a4bd50e3143fc03e5733', - 'info_dict': { - 'id': '11544', - 'ext': 'mp4', - 'title': 'Further Adventures in Finance and Felony Trailer', - 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1464876000, - 'upload_date': '20160602', - } - }, { - 'url': 'http://www.rockstargames.com/videos#/?video=48', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'https://www.rockstargames.com/videoplayer/videos/get-video.json', - video_id, query={ - 'id': video_id, - 'locale': 'en_us', - })['video'] - - title = video['title'] - - formats = [] - for video in video['files_processed']['video/mp4']: - if not video.get('src'): - continue - resolution = video.get('resolution') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', resolution or '', 'height', default=None)) - formats.append({ - 'url': self._proto_relative_url(video['src']), - 'format_id': resolution, - 'height': height, - }) - - if not formats: - youtube_id = video.get('youtube_id') - if youtube_id: - return self.url_result(youtube_id, 'Youtube') - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': video.get('description'), - 'thumbnail': self._proto_relative_url(video.get('screencap')), - 'timestamp': parse_iso8601(video.get('created')), - 'formats': formats, - } diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py deleted file mode 100644 index 8883639b2..000000000 --- a/youtube_dl/extractor/roosterteeth.py +++ /dev/null @@ -1,137 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, -) -from ..utils import ( - ExtractorError, - int_or_none, - str_or_none, - urlencode_postdata, -) - - -class RoosterTeethIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' - _NETRC_MACHINE = 'roosterteeth' - _TESTS = [{ - 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', - 'md5': 'e2bd7764732d785ef797700a2489f212', - 'info_dict': { - 'id': '9156', - 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', - 'ext': 'mp4', - 'title': 'Million Dollars, But... The Game Announcement', - 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', - 'thumbnail': r're:^https?://.*\.png$', - 'series': 'Million Dollars, But...', - 'episode': 'Million Dollars, But... The Game Announcement', - }, - }, { - 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', - 'only_matching': True, - }, { - 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', - 'only_matching': True, - }, { - 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', - 'only_matching': True, - }, { - 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', - 'only_matching': True, - }, { - # only available for FIRST members - 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', - 'only_matching': True, - }, { - 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', - 'only_matching': True, - }] - _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/' - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - try: - self._download_json( - 'https://auth.roosterteeth.com/oauth/token', - None, 'Logging in', data=urlencode_postdata({ - 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', - 'grant_type': 'password', - 'username': username, - 'password': password, - })) - except ExtractorError as e: - msg = 'Unable to login' - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - resp = self._parse_json(e.cause.read().decode(), None, fatal=False) - if resp: - error = resp.get('extra_info') or resp.get('error_description') or resp.get('error') - if error: - msg += ': ' + error - self.report_warning(msg) - - def _real_initialize(self): - if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'): - return - self._login() - - def _real_extract(self, url): - display_id = self._match_id(url) - api_episode_url = self._EPISODE_BASE_URL + display_id - - try: - m3u8_url = self._download_json( - api_episode_url + '/videos', display_id, - 'Downloading video JSON metadata')['data'][0]['attributes']['url'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: - self.raise_login_required( - '%s is only available for FIRST members' % display_id) - raise - - formats = self._extract_m3u8_formats( - m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') - self._sort_formats(formats) - - episode = self._download_json( - api_episode_url, display_id, - 'Downloading episode JSON metadata')['data'][0] - attributes = episode['attributes'] - title = attributes.get('title') or attributes['display_title'] - video_id = compat_str(episode['id']) - - thumbnails = [] - for image in episode.get('included', {}).get('images', []): - if image.get('type') == 'episode_image': - img_attributes = image.get('attributes') or {} - for k in ('thumb', 'small', 'medium', 'large'): - img_url = img_attributes.get(k) - if img_url: - thumbnails.append({ - 'id': k, - 'url': img_url, - }) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': attributes.get('description') or attributes.get('caption'), - 'thumbnails': thumbnails, - 'series': attributes.get('show_title'), - 'season_number': int_or_none(attributes.get('season_number')), - 'season_id': attributes.get('season_id'), - 'episode': title, - 'episode_number': int_or_none(attributes.get('number')), - 'episode_id': str_or_none(episode.get('uuid')), - 'formats': formats, - 'channel_id': attributes.get('channel_id'), - 'duration': int_or_none(attributes.get('length')), - } diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py deleted file mode 100644 index 14c8e8236..000000000 --- a/youtube_dl/extractor/rottentomatoes.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .internetvideoarchive import InternetVideoArchiveIE - - -class RottenTomatoesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', - 'info_dict': { - 'id': '11028566', - 'ext': 'mp4', - 'title': 'Toy Story 3', - 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') - - return { - '_type': 'url_transparent', - 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, - 'ie_key': InternetVideoArchiveIE.ie_key(), - 'id': video_id, - 'title': self._og_search_title(webpage), - } diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dl/extractor/roxwel.py deleted file mode 100644 index 65284643b..000000000 --- a/youtube_dl/extractor/roxwel.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import unified_strdate, determine_ext - - -class RoxwelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' - - _TEST = { - 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', - 'info_dict': { - 'id': 'passionpittakeawalklive', - 'ext': 'flv', - 'title': 'Take A Walk (live)', - 'uploader': 'Passion Pit', - 'uploader_id': 'passionpit', - 'upload_date': '20120928', - 'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - filename = mobj.group('filename') - info_url = 'http://www.roxwel.com/api/videos/%s' % filename - info = self._download_json(info_url, filename) - - rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) - best_rate = rtmp_rates[-1] - url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) - rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url') - ext = determine_ext(rtmp_url) - if ext == 'f4v': - rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) - - return { - 'id': filename, - 'title': info['title'], - 'url': rtmp_url, - 'ext': 'flv', - 'description': info['description'], - 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), - 'uploader': info['artist'], - 'uploader_id': info['artistname'], - 'upload_date': unified_strdate(info['dbdate']), - } diff --git a/youtube_dl/extractor/rozhlas.py b/youtube_dl/extractor/rozhlas.py deleted file mode 100644 index fccf69401..000000000 --- a/youtube_dl/extractor/rozhlas.py +++ /dev/null @@ -1,50 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - remove_start, -) - - -class RozhlasIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://prehravac.rozhlas.cz/audio/3421320', - 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', - 'info_dict': { - 'id': '3421320', - 'ext': 'mp3', - 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', - 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' - } - }, { - 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', - 'only_matching': True, - }] - - def _real_extract(self, url): - audio_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id) - - title = self._html_search_regex( - r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', - webpage, 'title', default=None) or remove_start( - self._og_search_title(webpage), 'Radio Wave - ') - description = self._html_search_regex( - r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', - webpage, 'description', fatal=False, group='url') - duration = int_or_none(self._search_regex( - r'data-duration=["\'](\d+)', webpage, 'duration', default=None)) - - return { - 'id': audio_id, - 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id, - 'title': title, - 'description': description, - 'duration': duration, - 'vcodec': 'none', - } diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py deleted file mode 100644 index 3b0f3080b..000000000 --- a/youtube_dl/extractor/rtbf.py +++ /dev/null @@ -1,161 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - strip_or_none, -) - - -class RTBFIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:www\.)?rtbf\.be/ - (?: - video/[^?]+\?.*\bid=| - ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| - auvio/[^/]+\?.*\b(?P<live>l)?id= - )(?P<id>\d+)''' - _TESTS = [{ - 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', - 'md5': '8c876a1cceeb6cf31b476461ade72384', - 'info_dict': { - 'id': '1921274', - 'ext': 'mp4', - 'title': 'Les Diables au coeur (épisode 2)', - 'description': '(du 25/04/2014)', - 'duration': 3099.54, - 'upload_date': '20140425', - 'timestamp': 1398456300, - } - }, { - # geo restricted - 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', - 'only_matching': True, - }, { - 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', - 'only_matching': True, - }, { - 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', - 'only_matching': True, - }, { - # Live - 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', - 'only_matching': True, - }, { - # Audio - 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', - 'only_matching': True, - }, { - # With Subtitle - 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', - 'only_matching': True, - }] - _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' - _PROVIDERS = { - 'YOUTUBE': 'Youtube', - 'DAILYMOTION': 'Dailymotion', - 'VIMEO': 'Vimeo', - } - _QUALITIES = [ - ('mobile', 'SD'), - ('web', 'MD'), - ('high', 'HD'), - ] - - def _real_extract(self, url): - live, media_id = re.match(self._VALID_URL, url).groups() - embed_page = self._download_webpage( - 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), - media_id, query={'id': media_id}) - data = self._parse_json(self._html_search_regex( - r'data-media="([^"]+)"', embed_page, 'media data'), media_id) - - error = data.get('error') - if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) - - provider = data.get('provider') - if provider in self._PROVIDERS: - return self.url_result(data['url'], self._PROVIDERS[provider]) - - title = data['title'] - is_live = data.get('isLive') - if is_live: - title = self._live_title(title) - height_re = r'-(\d+)p\.' - formats = [] - - m3u8_url = data.get('urlHlsAes128') or data.get('urlHls') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) - - fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x - http_url = data.get('url') - if formats and http_url and re.search(height_re, http_url): - http_url = fix_url(http_url) - for m3u8_f in formats[:]: - height = m3u8_f.get('height') - if not height: - continue - f = m3u8_f.copy() - del f['protocol'] - f.update({ - 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), - 'url': re.sub(height_re, '-%dp.' % height, http_url), - }) - formats.append(f) - else: - sources = data.get('sources') or {} - for key, format_id in self._QUALITIES: - format_url = sources.get(key) - if not format_url: - continue - height = int_or_none(self._search_regex( - height_re, format_url, 'height', default=None)) - formats.append({ - 'format_id': format_id, - 'url': fix_url(format_url), - 'height': height, - }) - - mpd_url = data.get('urlDash') - if not data.get('drm') and mpd_url: - formats.extend(self._extract_mpd_formats( - mpd_url, media_id, mpd_id='dash', fatal=False)) - - audio_url = data.get('urlAudio') - if audio_url: - formats.append({ - 'format_id': 'audio', - 'url': audio_url, - 'vcodec': 'none', - }) - self._sort_formats(formats) - - subtitles = {} - for track in (data.get('tracks') or {}).values(): - sub_url = track.get('url') - if not sub_url: - continue - subtitles.setdefault(track.get('lang') or 'fr', []).append({ - 'url': sub_url, - }) - - return { - 'id': media_id, - 'formats': formats, - 'title': title, - 'description': strip_or_none(data.get('description')), - 'thumbnail': data.get('thumbnail'), - 'duration': float_or_none(data.get('realDuration')), - 'timestamp': int_or_none(data.get('liveFrom')), - 'series': data.get('programLabel'), - 'subtitles': subtitles, - 'is_live': is_live, - } diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py deleted file mode 100644 index 1fbc72915..000000000 --- a/youtube_dl/extractor/rte.py +++ /dev/null @@ -1,167 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - float_or_none, - parse_iso8601, - str_or_none, - try_get, - unescapeHTML, - url_or_none, - ExtractorError, -) - - -class RteBaseIE(InfoExtractor): - def _real_extract(self, url): - item_id = self._match_id(url) - - info_dict = {} - formats = [] - - ENDPOINTS = ( - 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', - 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', - ) - - for num, ep_url in enumerate(ENDPOINTS, start=1): - try: - data = self._download_json(ep_url + item_id, item_id) - except ExtractorError as ee: - if num < len(ENDPOINTS) or formats: - continue - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) - if error_info: - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error_info['message']), - expected=True) - raise - - # NB the string values in the JSON are stored using XML escaping(!) - show = try_get(data, lambda x: x['shows'][0], dict) - if not show: - continue - - if not info_dict: - title = unescapeHTML(show['title']) - description = unescapeHTML(show.get('description')) - thumbnail = show.get('thumbnail') - duration = float_or_none(show.get('duration'), 1000) - timestamp = parse_iso8601(show.get('published')) - info_dict = { - 'id': item_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - } - - mg = try_get(show, lambda x: x['media:group'][0], dict) - if not mg: - continue - - if mg.get('url'): - m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) - if m: - m = m.groupdict() - formats.append({ - 'url': m['url'] + '/' + m['app'], - 'app': m['app'], - 'play_path': m['playpath'], - 'player_url': url, - 'ext': 'flv', - 'format_id': 'rtmp', - }) - - if mg.get('hls_server') and mg.get('hls_url'): - formats.extend(self._extract_m3u8_formats( - mg['hls_server'] + mg['hls_url'], item_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - - if mg.get('hds_server') and mg.get('hds_url'): - formats.extend(self._extract_f4m_formats( - mg['hds_server'] + mg['hds_url'], item_id, - f4m_id='hds', fatal=False)) - - mg_rte_server = str_or_none(mg.get('rte:server')) - mg_url = str_or_none(mg.get('url')) - if mg_rte_server and mg_url: - hds_url = url_or_none(mg_rte_server + mg_url) - if hds_url: - formats.extend(self._extract_f4m_formats( - hds_url, item_id, f4m_id='hds', fatal=False)) - - self._sort_formats(formats) - - info_dict['formats'] = formats - return info_dict - - -class RteIE(RteBaseIE): - IE_NAME = 'rte' - IE_DESC = 'Raidió Teilifís Éireann TV' - _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', - 'md5': '4a76eb3396d98f697e6e8110563d2604', - 'info_dict': { - 'id': '10478715', - 'ext': 'mp4', - 'title': 'iWitness', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'The spirit of Ireland, one voice and one minute at a time.', - 'duration': 60.046, - 'upload_date': '20151012', - 'timestamp': 1444694160, - }, - } - - -class RteRadioIE(RteBaseIE): - IE_NAME = 'rte:radio' - IE_DESC = 'Raidió Teilifís Éireann radio' - # Radioplayer URLs have two distinct specifier formats, - # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>: - # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_ - # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. - # An <id> uniquely defines an individual recording, and is the only part we require. - _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)' - - _TESTS = [{ - # Old-style player URL; HLS and RTMPE formats - 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', - 'md5': 'c79ccb2c195998440065456b69760411', - 'info_dict': { - 'id': '10507902', - 'ext': 'mp4', - 'title': 'Gloria', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', - 'timestamp': 1451203200, - 'upload_date': '20151227', - 'duration': 7230.0, - }, - }, { - # New-style player URL; RTMPE formats only - 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', - 'info_dict': { - 'id': '3250678', - 'ext': 'flv', - 'title': 'The Lyric Concert with Paul Herriott', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': '', - 'timestamp': 1333742400, - 'upload_date': '20120406', - 'duration': 7199.016, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }] diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py deleted file mode 100644 index 70f000ca8..000000000 --- a/youtube_dl/extractor/rtl2.py +++ /dev/null @@ -1,207 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..aes import aes_cbc_decrypt -from ..compat import ( - compat_b64decode, - compat_ord, - compat_str, -) -from ..utils import ( - bytes_to_intlist, - ExtractorError, - intlist_to_bytes, - int_or_none, - strip_or_none, -) - - -class RTL2IE(InfoExtractor): - IE_NAME = 'rtl2' - _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)' - _TESTS = [{ - 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', - 'info_dict': { - 'id': 'folge-203-0', - 'ext': 'f4v', - 'title': 'GRIP sucht den Sommerkönig', - 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], - }, { - 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', - 'info_dict': { - 'id': 'anna-erwischt-alex', - 'ext': 'mp4', - 'title': 'Anna erwischt Alex!', - 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], - }] - - def _real_extract(self, url): - vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups() - if not vico_id: - webpage = self._download_webpage(url, display_id) - - mobj = re.search( - r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"', - webpage) - if mobj: - vico_id = mobj.group('vico_id') - vivi_id = mobj.group('vivi_id') - else: - vico_id = self._html_search_regex( - r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') - vivi_id = self._html_search_regex( - r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') - - info = self._download_json( - 'https://service.rtl2.de/api-player-vipo/video.php', - display_id, query={ - 'vico_id': vico_id, - 'vivi_id': vivi_id, - }) - video_info = info['video'] - title = video_info['titel'] - - formats = [] - - rtmp_url = video_info.get('streamurl') - if rtmp_url: - rtmp_url = rtmp_url.replace('\\', '') - stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') - rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] - - formats.append({ - 'format_id': 'rtmp', - 'url': rtmp_url, - 'play_path': stream_url, - 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', - 'page_url': url, - 'flash_version': 'LNX 11,2,202,429', - 'rtmp_conn': rtmp_conn, - 'no_resume': True, - 'preference': 1, - }) - - m3u8_url = video_info.get('streamurl_hls') - if m3u8_url: - formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) - - self._sort_formats(formats) - - return { - 'id': display_id, - 'title': title, - 'thumbnail': video_info.get('image'), - 'description': video_info.get('beschreibung'), - 'duration': int_or_none(video_info.get('duration')), - 'formats': formats, - } - - -class RTL2YouBaseIE(InfoExtractor): - _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/' - - -class RTL2YouIE(RTL2YouBaseIE): - IE_NAME = 'rtl2:you' - _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du', - 'info_dict': { - 'id': '15740', - 'ext': 'mp4', - 'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!', - 'description': 'md5:ddaa95c61b372b12b66e115b2772fe01', - 'age_limit': 12, - }, - }, { - 'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712', - 'only_matching': True, - }] - _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!' - _GEO_COUNTRIES = ['DE'] - - def _real_extract(self, url): - video_id = self._match_id(url) - - stream_data = self._download_json( - self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) - - data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':') - stream_url = intlist_to_bytes(aes_cbc_decrypt( - bytes_to_intlist(compat_b64decode(data)), - bytes_to_intlist(self._AES_KEY), - bytes_to_intlist(compat_b64decode(iv)) - )) - if b'rtl2_you_video_not_found' in stream_url: - raise ExtractorError('video not found', expected=True) - - formats = self._extract_m3u8_formats( - stream_url[:-compat_ord(stream_url[-1])].decode(), - video_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) - - video_data = self._download_json( - self._BACKWERK_BASE_URL + 'video/' + video_id, video_id) - - series = video_data.get('formatTitle') - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': strip_or_none(video_data.get('description')), - 'thumbnail': video_data.get('image'), - 'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000), - 'series': series, - 'episode': episode, - 'age_limit': int_or_none(video_data.get('minimumAge')), - } - - -class RTL2YouSeriesIE(RTL2YouBaseIE): - IE_NAME = 'rtl2:you:series' - _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)' - _TEST = { - 'url': 'http://you.rtl2.de/videos/115/dragon-ball', - 'info_dict': { - 'id': '115', - }, - 'playlist_mincount': 5, - } - - def _real_extract(self, url): - series_id = self._match_id(url) - stream_data = self._download_json( - self._BACKWERK_BASE_URL + 'videos', - series_id, query={ - 'formatId': series_id, - 'limit': 1000000000, - }) - - entries = [] - for video in stream_data.get('videos', []): - video_id = compat_str(video['videoId']) - if not video_id: - continue - entries.append(self.url_result( - 'http://you.rtl2.de/video/%s/%s' % (series_id, video_id), - 'RTL2You', video_id)) - return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py deleted file mode 100644 index 9eaa06f25..000000000 --- a/youtube_dl/extractor/rtlnl.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, -) - - -class RtlNlIE(InfoExtractor): - IE_NAME = 'rtl.nl' - IE_DESC = 'rtl.nl and rtlxl.nl' - _VALID_URL = r'''(?x) - https?://(?:(?:www|static)\.)? - (?: - rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| - rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| - embed\.rtl\.nl/\#uuid= - ) - (?P<id>[0-9a-f-]+)''' - - _TESTS = [{ - # new URL schema - 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', - 'md5': '490428f1187b60d714f34e1f2e3af0b6', - 'info_dict': { - 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', - 'ext': 'mp4', - 'title': 'RTL Nieuws', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'timestamp': 1593293400, - 'upload_date': '20200627', - 'duration': 661.08, - }, - }, { - # old URL schema - 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', - 'md5': '473d1946c1fdd050b2c0161a4b13c373', - 'info_dict': { - 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', - 'ext': 'mp4', - 'title': 'RTL Nieuws', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'timestamp': 1461951000, - 'upload_date': '20160429', - 'duration': 1167.96, - }, - 'skip': '404', - }, { - # best format available a3t - 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', - 'md5': 'dea7474214af1271d91ef332fb8be7ea', - 'info_dict': { - 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', - 'ext': 'mp4', - 'timestamp': 1424039400, - 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', - 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', - 'upload_date': '20150215', - 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', - } - }, { - # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) - # best format available nettv - 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', - 'info_dict': { - 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', - 'ext': 'mp4', - 'title': 'RTL Nieuws - Meer beelden van overval juwelier', - 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', - 'timestamp': 1437233400, - 'upload_date': '20150718', - 'duration': 30.474, - }, - 'params': { - 'skip_download': True, - }, - }, { - # encrypted m3u8 streams, georestricted - 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', - 'only_matching': True, - }, { - 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', - 'only_matching': True, - }, { - 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', - 'only_matching': True, - }, { - 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', - 'only_matching': True, - }, { - 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', - 'only_matching': True, - }, { - # new embed URL schema - 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', - 'only_matching': True, - }] - - def _real_extract(self, url): - uuid = self._match_id(url) - info = self._download_json( - 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, - uuid) - - material = info['material'][0] - title = info['abstracts'][0]['name'] - subtitle = material.get('title') - if subtitle: - title += ' - %s' % subtitle - description = material.get('synopsis') - - meta = info.get('meta', {}) - - videopath = material['videopath'] - m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath - - formats = self._extract_m3u8_formats( - m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) - self._sort_formats(formats) - - thumbnails = [] - - for p in ('poster_base_url', '"thumb_base_url"'): - if not meta.get(p): - continue - - thumbnails.append({ - 'url': self._proto_relative_url(meta[p] + uuid), - 'width': int_or_none(self._search_regex( - r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), - 'height': int_or_none(self._search_regex( - r'/sz=[0-9]+x([0-9]+)', - meta[p], 'thumbnail height', fatal=False)) - }) - - return { - 'id': uuid, - 'title': title, - 'formats': formats, - 'timestamp': material['original_date'], - 'description': description, - 'duration': parse_duration(material.get('duration')), - 'thumbnails': thumbnails, - } diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py deleted file mode 100644 index 02986f442..000000000 --- a/youtube_dl/extractor/rtp.py +++ /dev/null @@ -1,66 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - js_to_json, -) - - -class RTPIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' - _TESTS = [{ - 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', - 'md5': 'e736ce0c665e459ddb818546220b4ef8', - 'info_dict': { - 'id': 'e174042', - 'ext': 'mp3', - 'title': 'Paixões Cruzadas', - 'description': 'As paixões musicais de António Cartaxo e António Macedo', - 'thumbnail': r're:^https?://.*\.jpg', - }, - }, { - 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - title = self._html_search_meta( - 'twitter:title', webpage, display_name='title', fatal=True) - - config = self._parse_json(self._search_regex( - r'(?s)RTPPlayer\(({.+?})\);', webpage, - 'player config'), video_id, js_to_json) - file_url = config['file'] - ext = determine_ext(file_url) - if ext == 'm3u8': - file_key = config.get('fileKey') - formats = self._extract_m3u8_formats( - file_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=file_key) - if file_key: - formats.append({ - 'url': 'https://cdn-ondemand.rtp.pt' + file_key, - 'preference': 1, - }) - self._sort_formats(formats) - else: - formats = [{ - 'url': file_url, - 'ext': ext, - }] - if config.get('mediaType') == 'audio': - for f in formats: - f['vcodec'] = 'none' - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': self._html_search_meta(['description', 'twitter:description'], webpage), - 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), - } diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py deleted file mode 100644 index aed35f8a9..000000000 --- a/youtube_dl/extractor/rts.py +++ /dev/null @@ -1,235 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .srgssr import SRGSSRIE -from ..compat import compat_str -from ..utils import ( - determine_ext, - int_or_none, - parse_duration, - parse_iso8601, - unescapeHTML, - urljoin, -) - - -class RTSIE(SRGSSRIE): - IE_DESC = 'RTS.ch' - _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html' - - _TESTS = [ - { - 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', - 'md5': '753b877968ad8afaeddccc374d4256a5', - 'info_dict': { - 'id': '3449373', - 'display_id': 'les-enfants-terribles', - 'ext': 'mp4', - 'duration': 1488, - 'title': 'Les Enfants Terribles', - 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', - 'uploader': 'Divers', - 'upload_date': '19680921', - 'timestamp': -40280400, - 'thumbnail': r're:^https?://.*\.image', - 'view_count': int, - }, - 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], - }, - { - 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', - 'info_dict': { - 'id': '5624065', - 'title': 'Passe-moi les jumelles', - }, - 'playlist_mincount': 4, - }, - { - 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', - 'info_dict': { - 'id': '5745975', - 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', - 'ext': 'mp4', - 'duration': 48, - 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', - 'description': 'Hockey - Playoff', - 'uploader': 'Hockey', - 'upload_date': '20140403', - 'timestamp': 1396556882, - 'thumbnail': r're:^https?://.*\.image', - 'view_count': int, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], - 'skip': 'Blocked outside Switzerland', - }, - { - 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', - 'md5': '9bb06503773c07ce83d3cbd793cebb91', - 'info_dict': { - 'id': '5745356', - 'display_id': 'londres-cachee-par-un-epais-smog', - 'ext': 'mp4', - 'duration': 33, - 'title': 'Londres cachée par un épais smog', - 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', - 'uploader': 'L\'actu en vidéo', - 'upload_date': '20140403', - 'timestamp': 1396537322, - 'thumbnail': r're:^https?://.*\.image', - 'view_count': int, - }, - 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], - }, - { - 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', - 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', - 'info_dict': { - 'id': '5706148', - 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014', - 'ext': 'mp3', - 'duration': 123, - 'title': '"Urban Hippie", de Damien Krisl', - 'description': 'Des Hippies super glam.', - 'upload_date': '20140403', - 'timestamp': 1396551600, - }, - }, - { - # article with videos on rhs - 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', - 'info_dict': { - 'id': '6693917', - 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', - }, - 'playlist_mincount': 5, - }, - { - 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - media_id = m.group('rts_id') or m.group('id') - display_id = m.group('display_id') or media_id - - def download_json(internal_id): - return self._download_json( - 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, - display_id) - - all_info = download_json(media_id) - - # media_id extracted out of URL is not always a real id - if 'video' not in all_info and 'audio' not in all_info: - entries = [] - - for item in all_info.get('items', []): - item_url = item.get('url') - if not item_url: - continue - entries.append(self.url_result(item_url, 'RTS')) - - if not entries: - page, urlh = self._download_webpage_handle(url, display_id) - if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id: - return self.url_result(urlh.geturl(), 'RTS') - - # article with videos on rhs - videos = re.findall( - r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"', - page) - if not videos: - videos = re.findall( - r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', - page) - if videos: - entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] - - if entries: - return self.playlist_result(entries, media_id, all_info.get('title')) - - internal_id = self._html_search_regex( - r'<(?:video|audio) data-id="([0-9]+)"', page, - 'internal video id') - all_info = download_json(internal_id) - - media_type = 'video' if 'video' in all_info else 'audio' - - # check for errors - self._get_media_data('rts', media_type, media_id) - - info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] - - title = info['title'] - - def extract_bitrate(url): - return int_or_none(self._search_regex( - r'-([0-9]+)k\.', url, 'bitrate', default=None)) - - formats = [] - streams = info.get('streams', {}) - for format_id, format_url in streams.items(): - if format_id == 'hds_sd' and 'hds' in streams: - continue - if format_id == 'hls_sd' and 'hls' in streams: - continue - ext = determine_ext(format_url) - if ext in ('m3u8', 'f4m'): - format_url = self._get_tokenized_src(format_url, media_id, format_id) - if ext == 'f4m': - formats.extend(self._extract_f4m_formats( - format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0', - media_id, f4m_id=format_id, fatal=False)) - else: - formats.extend(self._extract_m3u8_formats( - format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) - else: - formats.append({ - 'format_id': format_id, - 'url': format_url, - 'tbr': extract_bitrate(format_url), - }) - - download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '') - for media in info.get('media', []): - media_url = media.get('url') - if not media_url or re.match(r'https?://', media_url): - continue - rate = media.get('rate') - ext = media.get('ext') or determine_ext(media_url, 'mp4') - format_id = ext - if rate: - format_id += '-%dk' % rate - formats.append({ - 'format_id': format_id, - 'url': urljoin(download_base, media_url), - 'tbr': rate or extract_bitrate(media_url), - }) - - self._check_formats(formats, media_id) - self._sort_formats(formats) - - duration = info.get('duration') or info.get('cutout') or info.get('cutduration') - if isinstance(duration, compat_str): - duration = parse_duration(duration) - - return { - 'id': media_id, - 'display_id': display_id, - 'formats': formats, - 'title': title, - 'description': info.get('intro'), - 'duration': duration, - 'view_count': int_or_none(info.get('plays')), - 'uploader': info.get('programName'), - 'timestamp': parse_iso8601(info.get('broadcast_date')), - 'thumbnail': unescapeHTML(info.get('preview_image_url')), - } diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py deleted file mode 100644 index d2fb754cf..000000000 --- a/youtube_dl/extractor/rtve.py +++ /dev/null @@ -1,268 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import base64 -import io -import re -import sys - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_struct_unpack, -) -from ..utils import ( - determine_ext, - ExtractorError, - float_or_none, - qualities, - remove_end, - remove_start, - std_headers, -) - -_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) - - -class RTVEALaCartaIE(InfoExtractor): - IE_NAME = 'rtve.es:alacarta' - IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' - - _TESTS = [{ - 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', - 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', - 'info_dict': { - 'id': '2491869', - 'ext': 'mp4', - 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', - 'duration': 5024.566, - 'series': 'Balonmano', - }, - 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], - }, { - 'note': 'Live stream', - 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', - 'info_dict': { - 'id': '1694255', - 'ext': 'mp4', - 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'is_live': True, - }, - 'params': { - 'skip_download': 'live stream', - }, - }, { - 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', - 'md5': 'd850f3c8731ea53952ebab489cf81cbf', - 'info_dict': { - 'id': '4236788', - 'ext': 'mp4', - 'title': 'Servir y proteger - Capítulo 104', - 'duration': 3222.0, - }, - 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], - }, { - 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', - 'only_matching': True, - }, { - 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', - 'only_matching': True, - }] - - def _real_initialize(self): - user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') - self._manager = self._download_json( - 'http://www.rtve.es/odin/loki/' + user_agent_b64, - None, 'Fetching manager info')['manager'] - - @staticmethod - def _decrypt_url(png): - encrypted_data = io.BytesIO(compat_b64decode(png)[8:]) - while True: - length = compat_struct_unpack('!I', encrypted_data.read(4))[0] - chunk_type = encrypted_data.read(4) - if chunk_type == b'IEND': - break - data = encrypted_data.read(length) - if chunk_type == b'tEXt': - alphabet_data, text = data.split(b'\0') - quality, url_data = text.split(b'%%') - alphabet = [] - e = 0 - d = 0 - for l in _bytes_to_chr(alphabet_data): - if d == 0: - alphabet.append(l) - d = e = (e + 1) % 4 - else: - d -= 1 - url = '' - f = 0 - e = 3 - b = 1 - for letter in _bytes_to_chr(url_data): - if f == 0: - l = int(letter) * 10 - f = 1 - else: - if e == 0: - l += int(letter) - url += alphabet[l] - e = (b + 3) % 4 - f = 0 - b += 1 - else: - e -= 1 - - yield quality.decode(), url - encrypted_data.read(4) # CRC - - def _extract_png_formats(self, video_id): - png = self._download_webpage( - 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), - video_id, 'Downloading url information', query={'q': 'v2'}) - q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) - formats = [] - for quality, video_url in self._decrypt_url(png): - ext = determine_ext(video_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, 'dash', fatal=False)) - else: - formats.append({ - 'format_id': quality, - 'quality': q(quality), - 'url': video_url, - }) - self._sort_formats(formats) - return formats - - def _real_extract(self, url): - video_id = self._match_id(url) - info = self._download_json( - 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, - video_id)['page']['items'][0] - if info['state'] == 'DESPU': - raise ExtractorError('The video is no longer available', expected=True) - title = info['title'].strip() - formats = self._extract_png_formats(video_id) - - subtitles = None - sbt_file = info.get('sbtFile') - if sbt_file: - subtitles = self.extract_subtitles(video_id, sbt_file) - - is_live = info.get('live') is True - - return { - 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'formats': formats, - 'thumbnail': info.get('image'), - 'subtitles': subtitles, - 'duration': float_or_none(info.get('duration'), 1000), - 'is_live': is_live, - 'series': info.get('programTitle'), - } - - def _get_subtitles(self, video_id, sub_file): - subs = self._download_json( - sub_file + '.json', video_id, - 'Downloading subtitles info')['page']['items'] - return dict( - (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) - for s in subs) - - -class RTVEInfantilIE(RTVEALaCartaIE): - IE_NAME = 'rtve.es:infantil' - IE_DESC = 'RTVE infantil' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' - - _TESTS = [{ - 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', - 'md5': '5747454717aedf9f9fdf212d1bcfc48d', - 'info_dict': { - 'id': '3040283', - 'ext': 'mp4', - 'title': 'Maneras de vivir', - 'thumbnail': r're:https?://.+/1426182947956\.JPG', - 'duration': 357.958, - }, - 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], - }] - - -class RTVELiveIE(RTVEALaCartaIE): - IE_NAME = 'rtve.es:live' - IE_DESC = 'RTVE.es live streams' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' - - _TESTS = [{ - 'url': 'http://www.rtve.es/directo/la-1/', - 'info_dict': { - 'id': 'la-1', - 'ext': 'mp4', - 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - }, - 'params': { - 'skip_download': 'live stream', - } - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') - title = remove_start(title, 'Estoy viendo ') - - vidplayer_id = self._search_regex( - (r'playerId=player([0-9]+)', - r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', - r'data-id=["\'](\d+)'), - webpage, 'internal video ID') - - return { - 'id': video_id, - 'title': self._live_title(title), - 'formats': self._extract_png_formats(vidplayer_id), - 'is_live': True, - } - - -class RTVETelevisionIE(InfoExtractor): - IE_NAME = 'rtve.es:television' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' - - _TEST = { - 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', - 'info_dict': { - 'id': '3069778', - 'ext': 'mp4', - 'title': 'Documentos TV - La revolución del móvil', - 'duration': 3496.948, - }, - 'params': { - 'skip_download': True, - }, - } - - def _real_extract(self, url): - page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) - - alacarta_url = self._search_regex( - r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', - webpage, 'alacarta url', default=None) - if alacarta_url is None: - raise ExtractorError( - 'The webpage doesn\'t contain any video', expected=True) - - return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py deleted file mode 100644 index 6a00f7007..000000000 --- a/youtube_dl/extractor/rtvnh.py +++ /dev/null @@ -1,62 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class RTVNHIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.rtvnh.nl/video/131946', - 'md5': 'cdbec9f44550763c8afc96050fa747dc', - 'info_dict': { - 'id': '131946', - 'ext': 'mp4', - 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', - 'thumbnail': r're:^https?:.*\.jpg$' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - meta = self._parse_json(self._download_webpage( - 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) - - status = meta.get('status') - if status != 200: - raise ExtractorError( - '%s returned error code %d' % (self.IE_NAME, status), expected=True) - - formats = [] - rtmp_formats = self._extract_smil_formats( - 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) - formats.extend(rtmp_formats) - - for rtmp_format in rtmp_formats: - rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - rtsp_format = rtmp_format.copy() - del rtsp_format['play_path'] - del rtsp_format['ext'] - rtsp_format.update({ - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'url': rtmp_url.replace('rtmp://', 'rtsp://'), - 'protocol': 'rtsp', - }) - formats.append(rtsp_format) - http_base_url = rtmp_url.replace('rtmp://', 'http://') - formats.extend(self._extract_m3u8_formats( - http_base_url + '/playlist.m3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - http_base_url + '/manifest.f4m', - video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': meta['title'].strip(), - 'thumbnail': meta.get('image'), - 'formats': formats - } diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dl/extractor/rtvs.py deleted file mode 100644 index 6573b260d..000000000 --- a/youtube_dl/extractor/rtvs.py +++ /dev/null @@ -1,47 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class RTVSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P<id>\d+)' - _TESTS = [{ - # radio archive - 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', - 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', - 'info_dict': { - 'id': '414872', - 'ext': 'mp3', - 'title': 'Ostrov pokladov 1 časť.mp3' - }, - 'params': { - 'skip_download': True, - } - }, { - # tv archive - 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', - 'md5': '85e2c55cf988403b70cac24f5c086dc6', - 'info_dict': { - 'id': '63118', - 'ext': 'mp4', - 'title': 'Amaro Džives - Náš deň', - 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.' - }, - 'params': { - 'skip_download': True, - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - playlist_url = self._search_regex( - r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'playlist url', group='url') - - data = self._download_json( - playlist_url, video_id, 'Downloading playlist')[0] - return self._parse_jwplayer_data(data, video_id=video_id) diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py deleted file mode 100644 index 3c8053a26..000000000 --- a/youtube_dl/extractor/ruhd.py +++ /dev/null @@ -1,45 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class RUHDIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)' - _TEST = { - 'url': 'http://www.ruhd.ru/play.php?vid=207', - 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', - 'info_dict': { - 'id': '207', - 'ext': 'divx', - 'title': 'КОТ бааааам', - 'description': 'классный кот)', - 'thumbnail': r're:^http://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex( - r'<param name="src" value="([^"]+)"', webpage, 'video url') - title = self._html_search_regex( - r'<title>([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', - webpage, 'title') - description = self._html_search_regex( - r'(?s)
    (.+?)', - webpage, 'description', fatal=False) - thumbnail = self._html_search_regex( - r'[0-9a-z]+)' - _TESTS = [{ - 'url': 'https://rumble.com/embed/v5pv5f', - 'md5': '36a18a049856720189f30977ccbb2c34', - 'info_dict': { - 'id': 'v5pv5f', - 'ext': 'mp4', - 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', - 'timestamp': 1571611968, - 'upload_date': '20191020', - } - }, { - 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - video = self._download_json( - 'https://rumble.com/embedJS/', video_id, - query={'request': 'video', 'v': video_id}) - title = video['title'] - - formats = [] - for height, ua in (video.get('ua') or {}).items(): - for i in range(2): - f_url = try_get(ua, lambda x: x[i], compat_str) - if f_url: - ext = determine_ext(f_url) - f = { - 'ext': ext, - 'format_id': '%s-%sp' % (ext, height), - 'height': int_or_none(height), - 'url': f_url, - } - bitrate = try_get(ua, lambda x: x[i + 2]['bitrate']) - if bitrate: - f['tbr'] = int_or_none(bitrate) - formats.append(f) - self._sort_formats(formats) - - author = video.get('author') or {} - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': video.get('i'), - 'timestamp': parse_iso8601(video.get('pubDate')), - 'channel': author.get('name'), - 'channel_url': author.get('url'), - 'duration': int_or_none(video.get('duration')), - } diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py deleted file mode 100644 index 8f54d5675..000000000 --- a/youtube_dl/extractor/rutube.py +++ /dev/null @@ -1,313 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import itertools - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_parse_qs, - compat_urllib_parse_urlparse, -) -from ..utils import ( - determine_ext, - bool_or_none, - int_or_none, - try_get, - unified_timestamp, - url_or_none, -) - - -class RutubeBaseIE(InfoExtractor): - def _download_api_info(self, video_id, query=None): - if not query: - query = {} - query['format'] = 'json' - return self._download_json( - 'http://rutube.ru/api/video/%s/' % video_id, - video_id, 'Downloading video JSON', - 'Unable to download video JSON', query=query) - - @staticmethod - def _extract_info(video, video_id=None, require_title=True): - title = video['title'] if require_title else video.get('title') - - age_limit = video.get('is_adult') - if age_limit is not None: - age_limit = 18 if age_limit is True else 0 - - uploader_id = try_get(video, lambda x: x['author']['id']) - category = try_get(video, lambda x: x['category']['name']) - - return { - 'id': video.get('id') or video_id if video_id else video['id'], - 'title': title, - 'description': video.get('description'), - 'thumbnail': video.get('thumbnail_url'), - 'duration': int_or_none(video.get('duration')), - 'uploader': try_get(video, lambda x: x['author']['name']), - 'uploader_id': compat_str(uploader_id) if uploader_id else None, - 'timestamp': unified_timestamp(video.get('created_ts')), - 'category': [category] if category else None, - 'age_limit': age_limit, - 'view_count': int_or_none(video.get('hits')), - 'comment_count': int_or_none(video.get('comments_count')), - 'is_live': bool_or_none(video.get('is_livestream')), - } - - def _download_and_extract_info(self, video_id, query=None): - return self._extract_info( - self._download_api_info(video_id, query=query), video_id) - - def _download_api_options(self, video_id, query=None): - if not query: - query = {} - query['format'] = 'json' - return self._download_json( - 'http://rutube.ru/api/play/options/%s/' % video_id, - video_id, 'Downloading options JSON', - 'Unable to download options JSON', - headers=self.geo_verification_headers(), query=query) - - def _extract_formats(self, options, video_id): - formats = [] - for format_id, format_url in options['video_balancer'].items(): - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - format_url, video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - return formats - - def _download_and_extract_formats(self, video_id, query=None): - return self._extract_formats( - self._download_api_options(video_id, query=query), video_id) - - -class RutubeIE(RutubeBaseIE): - IE_NAME = 'rutube' - IE_DESC = 'Rutube videos' - _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P[\da-z]{32})' - - _TESTS = [{ - 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'md5': '1d24f180fac7a02f3900712e5a5764d6', - 'info_dict': { - 'id': '3eac3b4561676c17df9132a9a1e62e3e', - 'ext': 'mp4', - 'title': 'Раненный кенгуру забежал в аптеку', - 'description': 'http://www.ntdtv.ru ', - 'duration': 81, - 'uploader': 'NTDRussian', - 'uploader_id': '29790', - 'timestamp': 1381943602, - 'upload_date': '20131016', - 'age_limit': 0, - }, - }, { - 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', - 'only_matching': True, - }, { - 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', - 'only_matching': True, - }, { - 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', - 'only_matching': True, - }, { - 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) - - @staticmethod - def _extract_urls(webpage): - return [mobj.group('url') for mobj in re.finditer( - r']+?src=(["\'])(?P(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1', - webpage)] - - def _real_extract(self, url): - video_id = self._match_id(url) - info = self._download_and_extract_info(video_id) - info['formats'] = self._download_and_extract_formats(video_id) - return info - - -class RutubeEmbedIE(RutubeBaseIE): - IE_NAME = 'rutube:embed' - IE_DESC = 'Rutube embedded videos' - _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)' - - _TESTS = [{ - 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', - 'info_dict': { - 'id': 'a10e53b86e8f349080f718582ce4c661', - 'ext': 'mp4', - 'timestamp': 1387830582, - 'upload_date': '20131223', - 'uploader_id': '297833', - 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix

    восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', - 'uploader': 'subziro89 ILya', - 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://rutube.ru/play/embed/8083783', - 'only_matching': True, - }, { - # private video - 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', - 'only_matching': True, - }] - - def _real_extract(self, url): - embed_id = self._match_id(url) - # Query may contain private videos token and should be passed to API - # requests (see #19163) - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - options = self._download_api_options(embed_id, query) - video_id = options['effective_video'] - formats = self._extract_formats(options, video_id) - info = self._download_and_extract_info(video_id, query) - info.update({ - 'extractor_key': 'Rutube', - 'formats': formats, - }) - return info - - -class RutubePlaylistBaseIE(RutubeBaseIE): - def _next_page_url(self, page_num, playlist_id, *args, **kwargs): - return self._PAGE_TEMPLATE % (playlist_id, page_num) - - def _entries(self, playlist_id, *args, **kwargs): - next_page_url = None - for pagenum in itertools.count(1): - page = self._download_json( - next_page_url or self._next_page_url( - pagenum, playlist_id, *args, **kwargs), - playlist_id, 'Downloading page %s' % pagenum) - - results = page.get('results') - if not results or not isinstance(results, list): - break - - for result in results: - video_url = url_or_none(result.get('video_url')) - if not video_url: - continue - entry = self._extract_info(result, require_title=False) - entry.update({ - '_type': 'url', - 'url': video_url, - 'ie_key': RutubeIE.ie_key(), - }) - yield entry - - next_page_url = page.get('next') - if not next_page_url or not page.get('has_next'): - break - - def _extract_playlist(self, playlist_id, *args, **kwargs): - return self.playlist_result( - self._entries(playlist_id, *args, **kwargs), - playlist_id, kwargs.get('playlist_name')) - - def _real_extract(self, url): - return self._extract_playlist(self._match_id(url)) - - -class RutubeChannelIE(RutubePlaylistBaseIE): - IE_NAME = 'rutube:channel' - IE_DESC = 'Rutube channels' - _VALID_URL = r'https?://rutube\.ru/tags/video/(?P\d+)' - _TESTS = [{ - 'url': 'http://rutube.ru/tags/video/1800/', - 'info_dict': { - 'id': '1800', - }, - 'playlist_mincount': 68, - }] - - _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' - - -class RutubeMovieIE(RutubePlaylistBaseIE): - IE_NAME = 'rutube:movie' - IE_DESC = 'Rutube movies' - _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P\d+)' - _TESTS = [] - - _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' - _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' - - def _real_extract(self, url): - movie_id = self._match_id(url) - movie = self._download_json( - self._MOVIE_TEMPLATE % movie_id, movie_id, - 'Downloading movie JSON') - return self._extract_playlist( - movie_id, playlist_name=movie.get('name')) - - -class RutubePersonIE(RutubePlaylistBaseIE): - IE_NAME = 'rutube:person' - IE_DESC = 'Rutube person videos' - _VALID_URL = r'https?://rutube\.ru/video/person/(?P\d+)' - _TESTS = [{ - 'url': 'http://rutube.ru/video/person/313878/', - 'info_dict': { - 'id': '313878', - }, - 'playlist_mincount': 37, - }] - - _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' - - -class RutubePlaylistIE(RutubePlaylistBaseIE): - IE_NAME = 'rutube:playlist' - IE_DESC = 'Rutube playlists' - _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P\d+)' - _TESTS = [{ - 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', - 'info_dict': { - 'id': '3097', - }, - 'playlist_count': 27, - }, { - 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', - 'only_matching': True, - }] - - _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' - - @classmethod - def suitable(cls, url): - if not super(RutubePlaylistIE, cls).suitable(url): - return False - params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) - - def _next_page_url(self, page_num, playlist_id, item_kind): - return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) - - def _real_extract(self, url): - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - playlist_kind = qs['pl_type'][0] - playlist_id = qs['pl_id'][0] - return self._extract_playlist(playlist_id, item_kind=playlist_kind) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py deleted file mode 100644 index 05f319396..000000000 --- a/youtube_dl/extractor/rutv.py +++ /dev/null @@ -1,212 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - str_to_int -) - - -class RUTVIE(InfoExtractor): - IE_DESC = 'RUTV.RU' - _VALID_URL = r'''(?x) - https?:// - (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/ - (?P - flash\d+v/container\.swf\?id=| - iframe/(?Pswf|video|live)/id/| - index/iframe/cast_id/ - ) - (?P\d+) - ''' - - _TESTS = [ - { - 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', - 'info_dict': { - 'id': '774471', - 'ext': 'mp4', - 'title': 'Монологи на все времена', - 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', - 'duration': 2906, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', - 'info_dict': { - 'id': '774016', - 'ext': 'mp4', - 'title': 'Чужой в семье Сталина', - 'description': '', - 'duration': 2539, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', - 'info_dict': { - 'id': '766888', - 'ext': 'mp4', - 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', - 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', - 'duration': 279, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', - 'info_dict': { - 'id': '771852', - 'ext': 'mp4', - 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', - 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', - 'duration': 3096, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', - 'info_dict': { - 'id': '51499', - 'ext': 'flv', - 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', - 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', - }, - 'skip': 'Translation has finished', - }, - { - 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', - 'info_dict': { - 'id': '21', - 'ext': 'mp4', - 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'is_live': True, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', - 'only_matching': True, - }, - ] - - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r']+?src=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) - if mobj: - return mobj.group('url') - - mobj = re.search( - r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - video_path = mobj.group('path') - - if re.match(r'flash\d+v', video_path): - video_type = 'video' - elif video_path.startswith('iframe'): - video_type = mobj.group('type') - if video_type == 'swf': - video_type = 'video' - elif video_path.startswith('index/iframe/cast_id'): - video_type = 'live' - - is_live = video_type == 'live' - - json_data = self._download_json( - 'http://player.rutv.ru/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id), - video_id, 'Downloading JSON') - - if json_data['errors']: - raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) - - playlist = json_data['data']['playlist'] - medialist = playlist['medialist'] - media = medialist[0] - - if media['errors']: - raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) - - view_count = playlist.get('count_views') - priority_transport = playlist['priority_transport'] - - thumbnail = media['picture'] - width = int_or_none(media['width']) - height = int_or_none(media['height']) - description = media['anons'] - title = media['title'] - duration = int_or_none(media.get('duration')) - - formats = [] - - for transport, links in media['sources'].items(): - for quality, url in links.items(): - preference = -1 if priority_transport == transport else -2 - if transport == 'rtmp': - mobj = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?P.+)$', url) - if not mobj: - continue - fmt = { - 'url': mobj.group('url'), - 'play_path': mobj.group('playpath'), - 'app': mobj.group('app'), - 'page_url': 'http://player.rutv.ru', - 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', - 'rtmp_live': True, - 'ext': 'flv', - 'vbr': str_to_int(quality), - 'preference': preference, - } - elif transport == 'm3u8': - formats.extend(self._extract_m3u8_formats( - url, video_id, 'mp4', preference=preference, m3u8_id='hls')) - continue - else: - fmt = { - 'url': url - } - fmt.update({ - 'width': width, - 'height': height, - 'format_id': '%s-%s' % (transport, quality), - }) - formats.append(fmt) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'description': description, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'duration': duration, - 'formats': formats, - 'is_live': is_live, - } diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py deleted file mode 100644 index c50cd3ecd..000000000 --- a/youtube_dl/extractor/ruutu.py +++ /dev/null @@ -1,227 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse -from ..utils import ( - determine_ext, - ExtractorError, - find_xpath_attr, - int_or_none, - unified_strdate, - url_or_none, - xpath_attr, - xpath_text, -) - - -class RuutuIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/| - static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid= - ) - (?P\d+) - ''' - _TESTS = [ - { - 'url': 'http://www.ruutu.fi/video/2058907', - 'md5': 'ab2093f39be1ca8581963451b3c0234f', - 'info_dict': { - 'id': '2058907', - 'ext': 'mp4', - 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', - 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 114, - 'age_limit': 0, - }, - }, - { - 'url': 'http://www.ruutu.fi/video/2057306', - 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', - 'info_dict': { - 'id': '2057306', - 'ext': 'mp4', - 'title': 'Superpesis: katso koko kausi Ruudussa', - 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 40, - 'age_limit': 0, - }, - }, - { - 'url': 'http://www.supla.fi/supla/2231370', - 'md5': 'df14e782d49a2c0df03d3be2a54ef949', - 'info_dict': { - 'id': '2231370', - 'ext': 'mp4', - 'title': 'Osa 1: Mikael Jungner', - 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 0, - }, - }, - # Episode where is "NOT-USED", but has other - # downloadable sources available. - { - 'url': 'http://www.ruutu.fi/video/3193728', - 'only_matching': True, - }, - { - # audio podcast - 'url': 'https://www.supla.fi/supla/3382410', - 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', - 'info_dict': { - 'id': '3382410', - 'ext': 'mp3', - 'title': 'Mikä ihmeen poltergeist?', - 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 0, - }, - 'expected_warnings': [ - 'HTTP Error 502: Bad Gateway', - 'Failed to download m3u8 information', - ], - }, - { - 'url': 'http://www.supla.fi/audio/2231370', - 'only_matching': True, - }, - { - 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', - 'only_matching': True, - }, - { - # episode - 'url': 'https://www.ruutu.fi/video/3401964', - 'info_dict': { - 'id': '3401964', - 'ext': 'mp4', - 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17', - 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 2582, - 'age_limit': 12, - 'upload_date': '20190508', - 'series': 'Temptation Island Suomi', - 'season_number': 5, - 'episode_number': 17, - 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'], - }, - 'params': { - 'skip_download': True, - }, - }, - { - # premium - 'url': 'https://www.ruutu.fi/video/3618715', - 'only_matching': True, - }, - ] - _API_BASE = 'https://gatling.nelonenmedia.fi' - - def _real_extract(self, url): - video_id = self._match_id(url) - - video_xml = self._download_xml( - '%s/media-xml-cache' % self._API_BASE, video_id, - query={'id': video_id}) - - formats = [] - processed_urls = [] - - def extract_formats(node): - for child in node: - if child.tag.endswith('Files'): - extract_formats(child) - elif child.tag.endswith('File'): - video_url = child.text - if (not video_url or video_url in processed_urls - or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): - continue - processed_urls.append(video_url) - ext = determine_ext(video_url) - auth_video_url = url_or_none(self._download_webpage( - '%s/auth/access/v2' % self._API_BASE, video_id, - note='Downloading authenticated %s stream URL' % ext, - fatal=False, query={'stream': video_url})) - if auth_video_url: - processed_urls.append(auth_video_url) - video_url = auth_video_url - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - video_url, video_id, f4m_id='hds', fatal=False)) - elif ext == 'mpd': - # video-only and audio-only streams are of different - # duration resulting in out of sync issue - continue - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - elif ext == 'mp3' or child.tag == 'AudioMediaFile': - formats.append({ - 'format_id': 'audio', - 'url': video_url, - 'vcodec': 'none', - }) - else: - proto = compat_urllib_parse_urlparse(video_url).scheme - if not child.tag.startswith('HTTP') and proto != 'rtmp': - continue - preference = -1 if proto == 'rtmp' else 1 - label = child.get('label') - tbr = int_or_none(child.get('bitrate')) - format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto - if not self._is_valid_url(video_url, video_id, format_id): - continue - width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'width': width, - 'height': height, - 'tbr': tbr, - 'preference': preference, - }) - - extract_formats(video_xml.find('./Clip')) - - def pv(name): - node = find_xpath_attr( - video_xml, './Clip/PassthroughVariables/variable', 'name', name) - if node is not None: - return node.get('value') - - if not formats: - drm = xpath_text(video_xml, './Clip/DRM', default=None) - if drm: - raise ExtractorError('This video is DRM protected.', expected=True) - ns_st_cds = pv('ns_st_cds') - if ns_st_cds != 'free': - raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) - - self._sort_formats(formats) - - themes = pv('themes') - - return { - 'id': video_id, - 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), - 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), - 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), - 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), - 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), - 'upload_date': unified_strdate(pv('date_start')), - 'series': pv('series_name'), - 'season_number': int_or_none(pv('season_number')), - 'episode_number': int_or_none(pv('episode_number')), - 'categories': themes.split(',') if themes else [], - 'formats': formats, - } diff --git a/youtube_dl/extractor/ruv.py b/youtube_dl/extractor/ruv.py deleted file mode 100644 index 8f3cc4095..000000000 --- a/youtube_dl/extractor/ruv.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - unified_timestamp, -) - - -class RuvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P[^/]+(?:/\d+)?)' - _TESTS = [{ - # m3u8 - 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', - 'md5': '66347652f4e13e71936817102acc1724', - 'info_dict': { - 'id': '1144499', - 'display_id': 'fh-valur/20170516', - 'ext': 'mp4', - 'title': 'FH - Valur', - 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', - 'timestamp': 1494963600, - 'upload_date': '20170516', - }, - }, { - # mp3 - 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', - 'md5': '395ea250c8a13e5fdb39d4670ef85378', - 'info_dict': { - 'id': '1153630', - 'display_id': 'morgunutvarpid/20170619', - 'ext': 'mp3', - 'title': 'Morgunútvarpið', - 'description': 'md5:a4cf1202c0a1645ca096b06525915418', - 'timestamp': 1497855000, - 'upload_date': '20170619', - }, - }, { - 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', - 'only_matching': True, - }, { - 'url': 'http://www.ruv.is/node/1151854', - 'only_matching': True, - }, { - 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', - 'only_matching': True, - }, { - 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - title = self._og_search_title(webpage) - - FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' - - media_url = self._html_search_regex( - FIELD_RE % 'src', webpage, 'video URL', group='url') - - video_id = self._search_regex( - r']+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', - webpage, 'video id', default=display_id) - - ext = determine_ext(media_url) - - if ext == 'm3u8': - formats = self._extract_m3u8_formats( - media_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - elif ext == 'mp3': - formats = [{ - 'format_id': 'mp3', - 'url': media_url, - 'vcodec': 'none', - }] - else: - formats = [{ - 'url': media_url, - }] - - description = self._og_search_description(webpage, default=None) - thumbnail = self._og_search_thumbnail( - webpage, default=None) or self._search_regex( - FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) - timestamp = unified_timestamp(self._html_search_meta( - 'article:published_time', webpage, 'timestamp', fatal=False)) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'formats': formats, - } diff --git a/youtube_dl/extractor/s4c.py b/youtube_dl/extractor/s4c.py deleted file mode 100644 index b152e6680..000000000 --- a/youtube_dl/extractor/s4c.py +++ /dev/null @@ -1,124 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -from functools import partial as partial_f - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - merge_dicts, - T, - traverse_obj, - txt_or_none, - url_or_none, -) - - -class S4CIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P\d+)' - _TESTS = [{ - 'url': 'https://www.s4c.cymru/clic/programme/861362209', - 'info_dict': { - 'id': '861362209', - 'ext': 'mp4', - 'title': 'Y Swn', - 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', - 'duration': 5340, - 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg', - }, - }, { - 'url': 'https://www.s4c.cymru/clic/programme/856636948', - 'info_dict': { - 'id': '856636948', - 'ext': 'mp4', - 'title': 'Am Dro', - 'duration': 2880, - 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', - 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - details = self._download_json( - 'https://www.s4c.cymru/df/full_prog_details', - video_id, query={ - 'lang': 'e', - 'programme_id': video_id, - }, fatal=False) - - player_config = self._download_json( - 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={ - 'programme_id': video_id, - 'signed': '0', - 'lang': 'en', - 'mode': 'od', - 'appId': 'clic', - 'streamName': '', - }, note='Downloading player config JSON') - - m3u8_url = self._download_json( - 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ - 'mode': 'od', - 'application': 'clic', - 'region': 'WW', - 'extra': 'false', - 'thirdParty': 'false', - 'filename': player_config['filename'], - }, note='Downloading streaming urls JSON')['hls'] - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native') - self._sort_formats(formats) - - subtitles = {} - for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))): - subtitles.setdefault(sub.get('3', 'en'), []).append({ - 'url': sub['0'], - 'name': sub.get('1'), - }) - - return merge_dicts({ - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - 'thumbnail': url_or_none(player_config.get('poster')), - }, traverse_obj(details, ('full_prog_details', 0, { - 'title': (('programme_title', 'series_title'), T(txt_or_none)), - 'description': ('full_billing', T(txt_or_none)), - 'duration': ('duration', T(partial_f(float_or_none, invscale=60))), - }), get_all=False), - rev=True) - - -class S4CSeriesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P\d+)' - _TESTS = [{ - 'url': 'https://www.s4c.cymru/clic/series/864982911', - 'playlist_mincount': 6, - 'info_dict': { - 'id': '864982911', - 'title': 'Iaith ar Daith', - }, - }, { - 'url': 'https://www.s4c.cymru/clic/series/866852587', - 'playlist_mincount': 8, - 'info_dict': { - 'id': '866852587', - 'title': 'FFIT Cymru', - }, - }] - - def _real_extract(self, url): - series_id = self._match_id(url) - series_details = self._download_json( - 'https://www.s4c.cymru/df/series_details', series_id, query={ - 'lang': 'e', - 'series_id': series_id, - 'show_prog_in_series': 'Y' - }, note='Downloading series details JSON') - - return self.playlist_result( - (self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id) - for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))), - playlist_id=series_id, playlist_title=traverse_obj( - series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none)))) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py deleted file mode 100644 index 2cc665122..000000000 --- a/youtube_dl/extractor/safari.py +++ /dev/null @@ -1,264 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor - -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) -from ..utils import ( - ExtractorError, - update_url_query, -) - - -class SafariBaseIE(InfoExtractor): - _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/' - _NETRC_MACHINE = 'safari' - - _API_BASE = 'https://learning.oreilly.com/api/v1' - _API_FORMAT = 'json' - - LOGGED_IN = False - - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - _, urlh = self._download_webpage_handle( - 'https://learning.oreilly.com/accounts/login-check/', None, - 'Downloading login page') - - def is_logged(urlh): - return 'learning.oreilly.com/home/' in urlh.geturl() - - if is_logged(urlh): - self.LOGGED_IN = True - return - - redirect_url = urlh.geturl() - parsed_url = compat_urlparse.urlparse(redirect_url) - qs = compat_parse_qs(parsed_url.query) - next_uri = compat_urlparse.urljoin( - 'https://api.oreilly.com', qs['next'][0]) - - auth, urlh = self._download_json_handle( - 'https://www.oreilly.com/member/auth/login/', None, 'Logging in', - data=json.dumps({ - 'email': username, - 'password': password, - 'redirect_uri': next_uri, - }).encode(), headers={ - 'Content-Type': 'application/json', - 'Referer': redirect_url, - }, expected_status=400) - - credentials = auth.get('credentials') - if (not auth.get('logged_in') and not auth.get('redirect_uri') - and credentials): - raise ExtractorError( - 'Unable to login: %s' % credentials, expected=True) - - # oreilly serves two same instances of the following cookies - # in Set-Cookie header and expects first one to be actually set - for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'): - self._apply_first_set_cookie_header(urlh, cookie) - - _, urlh = self._download_webpage_handle( - auth.get('redirect_uri') or next_uri, None, 'Completing login',) - - if is_logged(urlh): - self.LOGGED_IN = True - return - - raise ExtractorError('Unable to log in') - - -class SafariIE(SafariBaseIE): - IE_NAME = 'safari' - IE_DESC = 'safaribooksonline.com online video' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ - (?: - library/view/[^/]+/(?P[^/]+)/(?P[^/?\#&]+)\.html| - videos/[^/]+/[^/]+/(?P[^-]+-[^/?\#&]+) - ) - ''' - - _TESTS = [{ - 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', - 'md5': 'dcc5a425e79f2564148652616af1f2a3', - 'info_dict': { - 'id': '0_qbqx90ic', - 'ext': 'mp4', - 'title': 'Introduction to Hadoop Fundamentals LiveLessons', - 'timestamp': 1437758058, - 'upload_date': '20150724', - 'uploader_id': 'stork', - }, - }, { - # non-digits in course id - 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', - 'only_matching': True, - }, { - 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', - 'only_matching': True, - }, { - 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', - 'only_matching': True, - }, { - 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', - 'only_matching': True, - }, { - 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', - 'only_matching': True, - }] - - _PARTNER_ID = '1926081' - _UICONF_ID = '29375172' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - reference_id = mobj.group('reference_id') - if reference_id: - video_id = reference_id - partner_id = self._PARTNER_ID - ui_id = self._UICONF_ID - else: - video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) - - webpage, urlh = self._download_webpage_handle(url, video_id) - - mobj = re.match(self._VALID_URL, urlh.geturl()) - reference_id = mobj.group('reference_id') - if not reference_id: - reference_id = self._search_regex( - r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'kaltura reference id', group='id') - partner_id = self._search_regex( - r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'kaltura widget id', default=self._PARTNER_ID, - group='id') - ui_id = self._search_regex( - r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'kaltura uiconf id', default=self._UICONF_ID, - group='id') - - query = { - 'wid': '_%s' % partner_id, - 'uiconf_id': ui_id, - 'flashvars[referenceId]': reference_id, - } - - if self.LOGGED_IN: - kaltura_session = self._download_json( - '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), - video_id, 'Downloading kaltura session JSON', - 'Unable to download kaltura session JSON', fatal=False, - headers={'Accept': 'application/json'}) - if kaltura_session: - session = kaltura_session.get('session') - if session: - query['flashvars[ks]'] = session - - return self.url_result(update_url_query( - 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), - 'Kaltura') - - -class SafariApiIE(SafariBaseIE): - IE_NAME = 'safari:api' - _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' - - _TESTS = [{ - 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', - 'only_matching': True, - }, { - 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - part = self._download_json( - url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), - 'Downloading part JSON') - return self.url_result(part['web_url'], SafariIE.ie_key()) - - -class SafariCourseIE(SafariBaseIE): - IE_NAME = 'safari:course' - IE_DESC = 'safaribooksonline.com online courses' - - _VALID_URL = r'''(?x) - https?:// - (?: - (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ - (?: - library/view/[^/]+| - api/v1/book| - videos/[^/]+ - )| - techbus\.safaribooksonline\.com - ) - /(?P[^/]+) - ''' - - _TESTS = [{ - 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', - 'info_dict': { - 'id': '9780133392838', - 'title': 'Hadoop Fundamentals LiveLessons', - }, - 'playlist_count': 22, - 'skip': 'Requires safaribooksonline account credentials', - }, { - 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', - 'only_matching': True, - }, { - 'url': 'http://techbus.safaribooksonline.com/9780134426365', - 'only_matching': True, - }, { - 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', - 'only_matching': True, - }, { - 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', - 'only_matching': True, - }, { - 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) - else super(SafariCourseIE, cls).suitable(url)) - - def _real_extract(self, url): - course_id = self._match_id(url) - - course_json = self._download_json( - '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), - course_id, 'Downloading course JSON') - - if 'chapters' not in course_json: - raise ExtractorError( - 'No chapters found for course %s' % course_id, expected=True) - - entries = [ - self.url_result(chapter, SafariApiIE.ie_key()) - for chapter in course_json['chapters']] - - course_title = course_json['title'] - - return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py deleted file mode 100644 index 806c3c354..000000000 --- a/youtube_dl/extractor/samplefocus.py +++ /dev/null @@ -1,100 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - extract_attributes, - get_element_by_attribute, - int_or_none, -) - - -class SampleFocusIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P[^/?&#]+)' - _TESTS = [{ - 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar', - 'md5': '48c8d62d60be467293912e0e619a5120', - 'info_dict': { - 'id': '40316', - 'display_id': 'lil-peep-sad-emo-guitar', - 'ext': 'mp3', - 'title': 'Lil Peep Sad Emo Guitar', - 'thumbnail': r're:^https?://.+\.png', - 'license': 'Standard License', - 'uploader': 'CapsCtrl', - 'uploader_id': 'capsctrl', - 'like_count': int, - 'comment_count': int, - 'categories': ['Samples', 'Guitar', 'Electric guitar'], - }, - }, { - 'url': 'https://samplefocus.com/samples/dababy-style-bass-808', - 'only_matching': True - }, { - 'url': 'https://samplefocus.com/samples/young-chop-kick', - 'only_matching': True - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - sample_id = self._search_regex( - r']+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P\d+)', - webpage, 'sample id', group='id') - - title = self._og_search_title(webpage, fatal=False) or self._html_search_regex( - r'

    (.+?)

    ', webpage, 'title') - - mp3_url = self._search_regex( - r']+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P(?:(?!\2).)+)', - webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex( - r']+itemprop=(["\'])contentUrl\1[^>]*>', - webpage, 'mp3 url', group=0))['content'] - - thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex( - r']+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P(?:(?!\1).)+)', - webpage, 'mp3', fatal=False, group='url') - - comments = [] - for author_id, author, body in re.findall(r'(?s)]+class="comment-author">]+href="/users/([^"]+)">([^"]+).+?]+class="comment-body">([^>]+)

    ', webpage): - comments.append({ - 'author': author, - 'author_id': author_id, - 'text': body, - }) - - uploader_id = uploader = None - mobj = re.search(r'>By ]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage) - if mobj: - uploader_id, uploader = mobj.groups() - - breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage) - categories = [] - if breadcrumb: - for _, name in re.findall(r']+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb): - categories.append(name) - - def extract_count(klass): - return int_or_none(self._html_search_regex( - r']+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass, - webpage, klass, fatal=False)) - - return { - 'id': sample_id, - 'title': title, - 'url': mp3_url, - 'display_id': display_id, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'license': self._html_search_regex( - r']+href=(["\'])/license\1[^>]*>(?P[^<]+)<', - webpage, 'license', fatal=False, group='license'), - 'uploader_id': uploader_id, - 'like_count': extract_count('sample-%s-favorites' % sample_id), - 'comment_count': extract_count('comments'), - 'comments': comments, - 'categories': categories, - } diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py deleted file mode 100644 index 49a9b313a..000000000 --- a/youtube_dl/extractor/sapo.py +++ /dev/null @@ -1,119 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - parse_duration, - unified_strdate, -) - - -class SapoIE(InfoExtractor): - IE_DESC = 'SAPO Vídeos' - _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P[\da-zA-Z]{20})' - - _TESTS = [ - { - 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', - 'md5': '79ee523f6ecb9233ac25075dee0eda83', - 'note': 'SD video', - 'info_dict': { - 'id': 'UBz95kOtiWYUMTA5Ghfi', - 'ext': 'mp4', - 'title': 'Benfica - Marcas na Hitória', - 'description': 'md5:c9082000a128c3fd57bf0299e1367f22', - 'duration': 264, - 'uploader': 'tiago_1988', - 'upload_date': '20080229', - 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], - }, - }, - { - 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', - 'md5': '90a2f283cfb49193fe06e861613a72aa', - 'note': 'HD video', - 'info_dict': { - 'id': 'IyusNAZ791ZdoCY5H5IF', - 'ext': 'mp4', - 'title': 'Codebits VII - Report', - 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', - 'duration': 144, - 'uploader': 'codebits', - 'upload_date': '20140427', - 'categories': ['codebits', 'codebits2014'], - }, - }, - { - 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', - 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', - 'note': 'v2 video', - 'info_dict': { - 'id': 'yLqjzPtbTimsn2wWBKHz', - 'ext': 'mp4', - 'title': 'Hipnose Condicionativa 4', - 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', - 'duration': 692, - 'uploader': 'sapozen', - 'upload_date': '20090609', - 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], - }, - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - item = self._download_xml( - 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') - - title = item.find('./title').text - description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text - thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') - duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) - uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text - upload_date = unified_strdate(item.find('./pubDate').text) - view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) - comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) - tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text - categories = tags.split() if tags else [] - age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 - - video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text - video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') - - formats = [{ - 'url': video_url, - 'ext': 'mp4', - 'format_id': 'sd', - 'width': int(video_size[0]), - 'height': int(video_size[1]), - }] - - if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': - formats.append({ - 'url': re.sub(r'/mov/1$', '/mov/39', video_url), - 'ext': 'mp4', - 'format_id': 'hd', - 'width': 1280, - 'height': 720, - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'uploader': uploader, - 'upload_date': upload_date, - 'view_count': view_count, - 'comment_count': comment_count, - 'categories': categories, - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py deleted file mode 100644 index 21e44b69a..000000000 --- a/youtube_dl/extractor/savefrom.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import os.path -import re - -from .common import InfoExtractor - - -class SaveFromIE(InfoExtractor): - IE_NAME = 'savefrom.net' - _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P.*)$' - - _TEST = { - 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com', - 'info_dict': { - 'id': 'UlVRAPW2WJY', - 'ext': 'mp4', - 'title': 'About Team Radical MMA | MMA Fighting', - 'upload_date': '20120816', - 'uploader': 'Howcast', - 'uploader_id': 'Howcast', - 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', - }, - 'params': { - 'skip_download': True - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = os.path.splitext(url.split('/')[-1])[0] - - return self.url_result(mobj.group('url'), video_id=video_id) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py deleted file mode 100644 index 0a806ee4e..000000000 --- a/youtube_dl/extractor/sbs.py +++ /dev/null @@ -1,78 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - smuggle_url, - ExtractorError, -) - - -class SBSIE(InfoExtractor): - IE_DESC = 'sbs.com.au' - _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P[0-9]+)' - - _TESTS = [{ - # Original URL is handled by the generic IE which finds the iframe: - # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation - 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', - 'md5': '3150cf278965eeabb5b4cea1c963fe0a', - 'info_dict': { - 'id': '_rFBPRPO4pMR', - 'ext': 'mp4', - 'title': 'Dingo Conservation (The Feed)', - 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', - 'thumbnail': r're:http://.*\.jpg', - 'duration': 308, - 'timestamp': 1408613220, - 'upload_date': '20140821', - 'uploader': 'SBSC', - }, - }, { - 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', - 'only_matching': True, - }, { - 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', - 'only_matching': True, - }, { - 'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723', - 'only_matching': True, - }, { - 'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842', - 'only_matching': True, - }, { - 'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866', - 'only_matching': True, - }, { - 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - player_params = self._download_json( - 'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id) - - error = player_params.get('error') - if error: - error_message = 'Sorry, The video you are looking for does not exist.' - video_data = error.get('results') or {} - error_code = error.get('errorCode') - if error_code == 'ComingSoon': - error_message = '%s is not yet available.' % video_data.get('title', '') - elif error_code in ('Forbidden', 'intranetAccessOnly'): - error_message = 'Sorry, This video cannot be accessed via this website' - elif error_code == 'Expired': - error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '') - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) - - urls = player_params['releaseUrls'] - theplatform_url = (urls.get('progressive') or urls.get('html') - or urls.get('standard') or player_params['relatedItemsURL']) - - return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'id': video_id, - 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), - } diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py deleted file mode 100644 index 69a0d01f3..000000000 --- a/youtube_dl/extractor/screencast.py +++ /dev/null @@ -1,123 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_request, -) -from ..utils import ( - ExtractorError, -) - - -class ScreencastIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P[a-zA-Z0-9]+)' - _TESTS = [{ - 'url': 'http://www.screencast.com/t/3ZEjQXlT', - 'md5': '917df1c13798a3e96211dd1561fded83', - 'info_dict': { - 'id': '3ZEjQXlT', - 'ext': 'm4v', - 'title': 'Color Measurement with Ocean Optics Spectrometers', - 'description': 'md5:240369cde69d8bed61349a199c5fb153', - 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } - }, { - 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', - 'md5': 'e8e4b375a7660a9e7e35c33973410d34', - 'info_dict': { - 'id': 'V2uXehPJa1ZI', - 'ext': 'mov', - 'title': 'The Amadeus Spectrometer', - 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', - 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } - }, { - 'url': 'http://www.screencast.com/t/aAB3iowa', - 'md5': 'dedb2734ed00c9755761ccaee88527cd', - 'info_dict': { - 'id': 'aAB3iowa', - 'ext': 'mp4', - 'title': 'Google Earth Export', - 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', - 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } - }, { - 'url': 'http://www.screencast.com/t/X3ddTrYh', - 'md5': '669ee55ff9c51988b4ebc0877cc8b159', - 'info_dict': { - 'id': 'X3ddTrYh', - 'ext': 'wmv', - 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', - 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', - 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } - }, { - 'url': 'http://screencast.com/t/aAB3iowa', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex( - r'(?:(?!\1).)+)\1', - webpage, 'video url', default=None, group='url') - - if video_url is None: - video_url = self._html_search_meta( - 'og:video', webpage, default=None) - - if video_url is None: - raise ExtractorError('Cannot find video') - - title = self._og_search_title(webpage, default=None) - if title is None: - title = self._html_search_regex( - [r'Title: ([^<]+)
    ', - r'class="tabSeperator">>(.+?)<', - r'([^<]+)'], - webpage, 'title') - thumbnail = self._og_search_thumbnail(webpage) - description = self._og_search_description(webpage, default=None) - if description is None: - description = self._html_search_meta('description', webpage) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py deleted file mode 100644 index 0afdc1715..000000000 --- a/youtube_dl/extractor/screencastomatic.py +++ /dev/null @@ -1,51 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - get_element_by_class, - int_or_none, - remove_start, - strip_or_none, - unified_strdate, -) - - -class ScreencastOMaticIE(InfoExtractor): - _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', - 'md5': '483583cb80d92588f15ccbedd90f0c18', - 'info_dict': { - 'id': 'c2lD3BeOPl', - 'ext': 'mp4', - 'title': 'Welcome to 3-4 Philosophy @ DECV!', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', - 'duration': 369, - 'upload_date': '20141216', - } - }, { - 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl', - 'only_matching': True, - }, { - 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'https://screencast-o-matic.com/player/' + video_id, video_id) - info = self._parse_html5_media_entries(url, webpage, video_id)[0] - info.update({ - 'id': video_id, - 'title': get_element_by_class('overlayTitle', webpage), - 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None, - 'duration': int_or_none(self._search_regex( - r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};', - webpage, 'duration', default=None)), - 'upload_date': unified_strdate(remove_start( - get_element_by_class('overlayPublished', webpage), 'Published: ')), - }) - return info diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py deleted file mode 100644 index b40b4c4af..000000000 --- a/youtube_dl/extractor/scrippsnetworks.py +++ /dev/null @@ -1,152 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import hashlib -import re - -from .aws import AWSIE -from .anvato import AnvatoIE -from .common import InfoExtractor -from ..utils import ( - smuggle_url, - urlencode_postdata, - xpath_text, -) - - -class ScrippsNetworksWatchIE(AWSIE): - IE_NAME = 'scrippsnetworks:watch' - _VALID_URL = r'''(?x) - https?:// - watch\. - (?Pgeniuskitchen)\.com/ - (?: - player\.[A-Z0-9]+\.html\#| - show/(?:[^/]+/){2}| - player/ - ) - (?P\d+) - ''' - _TESTS = [{ - 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', - 'info_dict': { - 'id': '4194875', - 'ext': 'mp4', - 'title': 'Ample Hills Ice Cream Bike', - 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', - 'uploader': 'ANV', - 'upload_date': '20171011', - 'timestamp': 1507698000, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [AnvatoIE.ie_key()], - }] - - _SNI_TABLE = { - 'geniuskitchen': 'genius', - } - - _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' - _AWS_PROXY_HOST = 'web.api.video.snidigital.com' - - _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - site_id, video_id = mobj.group('site', 'id') - - aws_identity_id_json = json.dumps({ - 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION - }).encode('utf-8') - token = self._download_json( - 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, - data=aws_identity_id_json, - headers={ - 'Accept': '*/*', - 'Content-Type': 'application/x-amz-json-1.1', - 'Referer': url, - 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), - 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', - 'X-Amz-User-Agent': self._AWS_USER_AGENT, - })['Token'] - - sts = self._download_xml( - 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ - 'Action': 'AssumeRoleWithWebIdentity', - 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', - 'RoleSessionName': 'web-identity', - 'Version': '2011-06-15', - 'WebIdentityToken': token, - }), headers={ - 'Referer': url, - 'X-Amz-User-Agent': self._AWS_USER_AGENT, - 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', - }) - - def get(key): - return xpath_text( - sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, - fatal=True) - - mcp_id = self._aws_execute_api({ - 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), - 'access_key': get('AccessKeyId'), - 'secret_key': get('SecretAccessKey'), - 'session_token': get('SessionToken'), - }, video_id)['results'][0]['mcpId'] - - return self.url_result( - smuggle_url( - 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, - {'geo_countries': ['US']}), - AnvatoIE.ie_key(), video_id=mcp_id) - - -class ScrippsNetworksIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' - _TESTS = [{ - 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', - 'info_dict': { - 'id': '0260338', - 'ext': 'mp4', - 'title': 'The Best of the Best', - 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.', - 'timestamp': 1475678834, - 'upload_date': '20161005', - 'uploader': 'SCNI-SCND', - }, - 'add_ie': ['ThePlatform'], - }, { - 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790', - 'only_matching': True, - }, { - 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591', - 'only_matching': True, - }, { - 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929', - 'only_matching': True, - }, { - 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', - 'only_matching': True, - }, { - 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368', - 'only_matching': True, - }] - _ACCOUNT_MAP = { - 'cookingchanneltv': 2433005105, - 'discovery': 2706091867, - 'diynetwork': 2433004575, - 'foodnetwork': 2433005105, - 'hgtv': 2433004575, - 'travelchannel': 2433005739, - } - _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true' - - def _real_extract(self, url): - site, guid = re.match(self._VALID_URL, url).groups() - return self.url_result(smuggle_url( - self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid), - {'force_smil_url': True}), 'ThePlatform', guid) diff --git a/youtube_dl/extractor/scte.py b/youtube_dl/extractor/scte.py deleted file mode 100644 index ca1de63b6..000000000 --- a/youtube_dl/extractor/scte.py +++ /dev/null @@ -1,144 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - decode_packed_codes, - ExtractorError, - urlencode_postdata, -) - - -class SCTEBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' - _NETRC_MACHINE = 'scte' - - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_popup = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login popup') - - def is_logged(webpage): - return any(re.search(p, webpage) for p in ( - r'class=["\']welcome\b', r'>Sign Out<')) - - # already logged in - if is_logged(login_popup): - return - - login_form = self._hidden_inputs(login_popup) - - login_form.update({ - 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, - 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, - 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', - }) - - response = self._download_webpage( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata(login_form)) - - if '|pageRedirect|' not in response and not is_logged(response): - error = self._html_search_regex( - r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)\d+)' - _TESTS = [{ - 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', - 'info_dict': { - 'title': 'Introduction to DOCSIS Engineering Professional', - 'id': '31484', - }, - 'playlist_count': 5, - 'skip': 'Requires account credentials', - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._search_regex(r'

    (.+?)

    ', webpage, 'title') - - context_id = self._search_regex(r'context-(\d+)', webpage, video_id) - content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id - context = decode_packed_codes(self._download_webpage( - '%smobile/data.js' % content_base, video_id)) - - data = self._parse_xml( - self._search_regex( - r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), - video_id) - - entries = [] - for asset in data.findall('.//asset'): - asset_url = asset.get('url') - if not asset_url or not asset_url.endswith('.mp4'): - continue - asset_id = self._search_regex( - r'video_([^_]+)_', asset_url, 'asset id', default=None) - if not asset_id: - continue - entries.append({ - 'id': asset_id, - 'title': title, - 'url': content_base + asset_url, - }) - - return self.playlist_result(entries, video_id, title) - - -class SCTECourseIE(SCTEBaseIE): - _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P\d+)' - _TESTS = [{ - 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', - 'only_matching': True, - }, { - 'url': 'https://learning.scte.org/course/view.php?id=3639', - 'only_matching': True, - }, { - 'url': 'https://learning.scte.org/course/view.php?id=3073', - 'only_matching': True, - }] - - def _real_extract(self, url): - course_id = self._match_id(url) - - webpage = self._download_webpage(url, course_id) - - title = self._search_regex( - r'

    (.+?)

    ', webpage, 'title', default=None) - - entries = [] - for mobj in re.finditer( - r'''(?x) - ]+ - href=(["\']) - (?P - https?://learning\.scte\.org/mod/ - (?Pscorm|subcourse)/view\.php?(?:(?!\1).)*? - \bid=\d+ - ) - ''', - webpage): - item_url = mobj.group('url') - if item_url == url: - continue - ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' - else SCTECourseIE.ie_key()) - entries.append(self.url_result(item_url, ie=ie)) - - return self.playlist_result(entries, course_id, title) diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py deleted file mode 100644 index 7872dc80d..000000000 --- a/youtube_dl/extractor/seeker.py +++ /dev/null @@ -1,58 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - get_element_by_class, - strip_or_none, -) - - -class SeekerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html' - _TESTS = [{ - 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', - 'md5': '897d44bbe0d8986a2ead96de565a92db', - 'info_dict': { - 'id': 'Elrn3gnY', - 'ext': 'mp4', - 'title': 'Should Trump Be Required To Release His Tax Returns?', - 'description': 'md5:41efa8cfa8d627841045eec7b018eb45', - 'timestamp': 1490090165, - 'upload_date': '20170321', - } - }, { - 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', - 'playlist': [ - { - 'md5': '0497b9f20495174be73ae136949707d2', - 'info_dict': { - 'id': 'FihYQ8AE', - 'ext': 'mp4', - 'title': 'The Pros & Cons Of Zoos', - 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c', - 'timestamp': 1490039133, - 'upload_date': '20170320', - }, - } - ], - 'info_dict': { - 'id': '1834116536', - 'title': 'After Gorilla Killing, Changes Ahead for Zoos', - 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.', - }, - }] - - def _real_extract(self, url): - display_id, article_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - entries = [] - for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage): - entries.append(self.url_result( - 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id)) - return self.playlist_result( - entries, article_id, - self._og_search_title(webpage), - strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py deleted file mode 100644 index b8ac58713..000000000 --- a/youtube_dl/extractor/senateisvp.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unsmuggle_url, -) -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) - - -class SenateISVPIE(InfoExtractor): - _COMM_MAP = [ - ['ag', '76440', 'http://ag-f.akamaihd.net'], - ['aging', '76442', 'http://aging-f.akamaihd.net'], - ['approps', '76441', 'http://approps-f.akamaihd.net'], - ['armed', '76445', 'http://armed-f.akamaihd.net'], - ['banking', '76446', 'http://banking-f.akamaihd.net'], - ['budget', '76447', 'http://budget-f.akamaihd.net'], - ['cecc', '76486', 'http://srs-f.akamaihd.net'], - ['commerce', '80177', 'http://commerce1-f.akamaihd.net'], - ['csce', '75229', 'http://srs-f.akamaihd.net'], - ['dpc', '76590', 'http://dpc-f.akamaihd.net'], - ['energy', '76448', 'http://energy-f.akamaihd.net'], - ['epw', '76478', 'http://epw-f.akamaihd.net'], - ['ethics', '76449', 'http://ethics-f.akamaihd.net'], - ['finance', '76450', 'http://finance-f.akamaihd.net'], - ['foreign', '76451', 'http://foreign-f.akamaihd.net'], - ['govtaff', '76453', 'http://govtaff-f.akamaihd.net'], - ['help', '76452', 'http://help-f.akamaihd.net'], - ['indian', '76455', 'http://indian-f.akamaihd.net'], - ['intel', '76456', 'http://intel-f.akamaihd.net'], - ['intlnarc', '76457', 'http://intlnarc-f.akamaihd.net'], - ['jccic', '85180', 'http://jccic-f.akamaihd.net'], - ['jec', '76458', 'http://jec-f.akamaihd.net'], - ['judiciary', '76459', 'http://judiciary-f.akamaihd.net'], - ['rpc', '76591', 'http://rpc-f.akamaihd.net'], - ['rules', '76460', 'http://rules-f.akamaihd.net'], - ['saa', '76489', 'http://srs-f.akamaihd.net'], - ['smbiz', '76461', 'http://smbiz-f.akamaihd.net'], - ['srs', '75229', 'http://srs-f.akamaihd.net'], - ['uscc', '76487', 'http://srs-f.akamaihd.net'], - ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], - ['arch', '', 'http://ussenate-f.akamaihd.net/'] - ] - IE_NAME = 'senate.gov' - _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' - _TESTS = [{ - 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', - 'info_dict': { - 'id': 'judiciary031715', - 'ext': 'mp4', - 'title': 'Integrated Senate Video Player', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', - 'info_dict': { - 'id': 'commerce011514', - 'ext': 'mp4', - 'title': 'Integrated Senate Video Player' - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', - # checksum differs each time - 'info_dict': { - 'id': 'intel090613', - 'ext': 'mp4', - 'title': 'Integrated Senate Video Player' - } - }, { - # From http://www.c-span.org/video/?96791-1 - 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', - 'only_matching': True, - }] - - @staticmethod - def _search_iframe_url(webpage): - mobj = re.search( - r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", - webpage) - if mobj: - return mobj.group('url') - - def _get_info_for_comm(self, committee): - for entry in self._COMM_MAP: - if entry[0] == committee: - return entry[1:] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) - if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): - raise ExtractorError('Invalid URL', expected=True) - - video_id = re.sub(r'.mp4$', '', qs['filename'][0]) - - webpage = self._download_webpage(url, video_id) - - if smuggled_data.get('force_title'): - title = smuggled_data['force_title'] - else: - title = self._html_search_regex(r'([^<]+)', webpage, video_id) - poster = qs.get('poster') - thumbnail = poster[0] if poster else None - - video_type = qs['type'][0] - committee = video_type if video_type == 'arch' else qs['comm'][0] - stream_num, domain = self._get_info_for_comm(committee) - - formats = [] - if video_type == 'arch': - filename = video_id if '.' in video_id else video_id + '.mp4' - formats = [{ - # All parameters in the query string are necessary to prevent a 403 error - 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=', - }] - else: - hdcore_sign = 'hdcore=3.1.0' - url_params = (domain, video_id, stream_num) - f4m_url = '%s/z/%s_1@%s/manifest.f4m?' % url_params + hdcore_sign - m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params - for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): - # URLs without the extra param induce an 404 error - entry.update({'extra_param_to_segment_url': hdcore_sign}) - formats.append(entry) - for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): - mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) - if mobj: - entry['format_id'] += mobj.group('tag') - formats.append(entry) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py deleted file mode 100644 index 9d9652949..000000000 --- a/youtube_dl/extractor/sendtonews.py +++ /dev/null @@ -1,105 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - parse_iso8601, - update_url_query, - int_or_none, - determine_protocol, - unescapeHTML, -) - - -class SendtoNewsIE(InfoExtractor): - _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P[0-9A-Za-z-]+)' - - _TEST = { - # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ - 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', - 'info_dict': { - 'id': 'GxfCe0Zo7D-175909-5588' - }, - 'playlist_count': 8, - # test the first video only to prevent lengthy tests - 'playlist': [{ - 'info_dict': { - 'id': '240385', - 'ext': 'mp4', - 'title': 'Indians introduce Encarnacion', - 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland', - 'duration': 137.898, - 'thumbnail': r're:https?://.*\.jpg$', - 'upload_date': '20170105', - 'timestamp': 1483649762, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' - - @classmethod - def _extract_url(cls, webpage): - mobj = re.search(r'''(?x)]+src=([\'"]) - (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? - .*\bSC=(?P[0-9a-zA-Z-]+).* - \1>''', webpage) - if mobj: - sc = mobj.group('SC') - return cls._URL_TEMPLATE % sc - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - data_url = update_url_query( - url.replace('embedplayer.php', 'data_read.php'), - {'cmd': 'loadInitial'}) - playlist_data = self._download_json(data_url, playlist_id) - - entries = [] - for video in playlist_data['playlistData'][0]: - info_dict = self._parse_jwplayer_data( - video['jwconfiguration'], - require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True}) - - for f in info_dict['formats']: - if f.get('tbr'): - continue - tbr = int_or_none(self._search_regex( - r'/(\d+)k/', f['url'], 'bitrate', default=None)) - if not tbr: - continue - f.update({ - 'format_id': '%s-%d' % (determine_protocol(f), tbr), - 'tbr': tbr, - }) - self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id')) - - thumbnails = [] - if video.get('thumbnailUrl'): - thumbnails.append({ - 'id': 'normal', - 'url': video['thumbnailUrl'], - }) - if video.get('smThumbnailUrl'): - thumbnails.append({ - 'id': 'small', - 'url': video['smThumbnailUrl'], - }) - info_dict.update({ - 'title': video['S_headLine'].strip(), - 'description': unescapeHTML(video.get('S_fullStory')), - 'thumbnails': thumbnails, - 'duration': float_or_none(video.get('SM_length')), - 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), - }) - entries.append(info_dict) - - return self.playlist_result(entries, playlist_id) diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py deleted file mode 100644 index 1610ddc2c..000000000 --- a/youtube_dl/extractor/servus.py +++ /dev/null @@ -1,148 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - float_or_none, - int_or_none, - unified_timestamp, - urlencode_postdata, - url_or_none, -) - - -class ServusIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)| - (?:servustv|pm-wissen)\.com/videos - ) - /(?P[aA]{2}-\w+|\d+-\d+) - ''' - _TESTS = [{ - # new URL schema - 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/', - 'md5': '60474d4c21f3eb148838f215c37f02b9', - 'info_dict': { - 'id': 'AA-1T6VBU5PW1W12', - 'ext': 'mp4', - 'title': 'Die Grünen aus Sicht des Volkes', - 'alt_title': 'Talk im Hangar-7 Voxpops Gruene', - 'description': 'md5:1247204d85783afe3682644398ff2ec4', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 62.442, - 'timestamp': 1605193976, - 'upload_date': '20201112', - 'series': 'Talk im Hangar-7', - 'season': 'Season 9', - 'season_number': 9, - 'episode': 'Episode 31 - September 14', - 'episode_number': 31, - } - }, { - # old URL schema - 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/', - 'only_matching': True, - }, { - 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/', - 'only_matching': True, - }, { - 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/', - 'only_matching': True, - }, { - 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/', - 'only_matching': True, - }, { - 'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url).upper() - - token = self._download_json( - 'https://auth.redbullmediahouse.com/token', video_id, - 'Downloading token', data=urlencode_postdata({ - 'grant_type': 'client_credentials', - }), headers={ - 'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==', - }) - access_token = token['access_token'] - token_type = token.get('token_type', 'Bearer') - - video = self._download_json( - 'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id, - video_id, 'Downloading video JSON', headers={ - 'Authorization': '%s %s' % (token_type, access_token), - }) - - formats = [] - thumbnail = None - for resource in video['resources']: - if not isinstance(resource, dict): - continue - format_url = url_or_none(resource.get('url')) - if not format_url: - continue - extension = resource.get('extension') - type_ = resource.get('type') - if extension == 'jpg' or type_ == 'reference_keyframe': - thumbnail = format_url - continue - ext = determine_ext(format_url) - if type_ == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash', fatal=False)) - elif type_ == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif extension == 'mp4' or ext == 'mp4': - formats.append({ - 'url': format_url, - 'format_id': type_, - 'width': int_or_none(resource.get('width')), - 'height': int_or_none(resource.get('height')), - }) - self._sort_formats(formats) - - attrs = {} - for attribute in video['attributes']: - if not isinstance(attribute, dict): - continue - key = attribute.get('fieldKey') - value = attribute.get('fieldValue') - if not key or not value: - continue - attrs[key] = value - - title = attrs.get('title_stv') or video_id - alt_title = attrs.get('title') - description = attrs.get('long_description') or attrs.get('short_description') - series = attrs.get('label') - season = attrs.get('season') - episode = attrs.get('chapter') - duration = float_or_none(attrs.get('duration'), scale=1000) - season_number = int_or_none(self._search_regex( - r'Season (\d+)', season or '', 'season number', default=None)) - episode_number = int_or_none(self._search_regex( - r'Episode (\d+)', episode or '', 'episode number', default=None)) - - return { - 'id': video_id, - 'title': title, - 'alt_title': alt_title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': unified_timestamp(video.get('lastPublished')), - 'series': series, - 'season': season, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'formats': formats, - } diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dl/extractor/sevenplus.py deleted file mode 100644 index 240afc18f..000000000 --- a/youtube_dl/extractor/sevenplus.py +++ /dev/null @@ -1,94 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .brightcove import BrightcoveNewIE -from ..compat import ( - compat_HTTPError, - compat_str, -) -from ..utils import ( - ExtractorError, - try_get, - update_url_query, -) - - -class SevenPlusIE(BrightcoveNewIE): - IE_NAME = '7plus' - _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P[^?]+\?.*?\bepisode-id=(?P[^&#]+))' - _TESTS = [{ - 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003', - 'info_dict': { - 'id': 'MTYS7-003', - 'ext': 'mp4', - 'title': 'S7 E3 - Wind Surf', - 'description': 'md5:29c6a69f21accda7601278f81b46483d', - 'uploader_id': '5303576322001', - 'upload_date': '20171201', - 'timestamp': 1512106377, - 'series': 'Mighty Ships', - 'season_number': 7, - 'episode_number': 3, - 'episode': 'Wind Surf', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - } - }, { - 'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001', - 'only_matching': True, - }] - - def _real_extract(self, url): - path, episode_id = re.match(self._VALID_URL, url).groups() - - try: - media = self._download_json( - 'https://videoservice.swm.digital/playback', episode_id, query={ - 'appId': '7plus', - 'deviceType': 'web', - 'platformType': 'web', - 'accountId': 5303576322001, - 'referenceId': 'ref:' + episode_id, - 'deliveryId': 'csai', - 'videoType': 'vod', - })['media'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - raise ExtractorError(self._parse_json( - e.cause.read().decode(), episode_id)[0]['error_code'], expected=True) - raise - - for source in media.get('sources', {}): - src = source.get('src') - if not src: - continue - source['src'] = update_url_query(src, {'rule': ''}) - - info = self._parse_brightcove_metadata(media, episode_id) - - content = self._download_json( - 'https://component-cdn.swm.digital/content/' + path, - episode_id, headers={ - 'market-id': 4, - }, fatal=False) or {} - for item in content.get('items', {}): - if item.get('componentData', {}).get('componentType') == 'infoPanel': - for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]: - value = item.get(src_key) - if value: - info[dst_key] = value - info['series'] = try_get( - item, lambda x: x['seriesLogo']['name'], compat_str) - mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title']) - if mobj: - info.update({ - 'season_number': int(mobj.group(1)), - 'episode_number': int(mobj.group(2)), - 'episode': mobj.group(3), - }) - - return info diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py deleted file mode 100644 index 3df51520b..000000000 --- a/youtube_dl/extractor/sexu.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class SexuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P\d+)' - _TEST = { - 'url': 'http://sexu.com/961791/', - 'md5': 'ff615aca9691053c94f8f10d96cd7884', - 'info_dict': { - 'id': '961791', - 'ext': 'mp4', - 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b', - 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403', - 'categories': list, # NSFW - 'thumbnail': r're:https?://.*\.jpg$', - 'age_limit': 18, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - jwvideo = self._parse_json( - self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'), - video_id) - - sources = jwvideo['sources'] - - formats = [{ - 'url': source['file'].replace('\\', ''), - 'format_id': source.get('label'), - 'height': int(self._search_regex( - r'^(\d+)[pP]', source.get('label', ''), 'height', - default=None)), - } for source in sources if source.get('file')] - self._sort_formats(formats) - - title = self._html_search_regex( - r'([^<]+)\s*-\s*Sexu\.Com', webpage, 'title') - - description = self._html_search_meta( - 'description', webpage, 'description') - - thumbnail = jwvideo.get('image') - - categories_str = self._html_search_meta( - 'keywords', webpage, 'categories') - categories = ( - None if categories_str is None - else categories_str.split(',')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'categories': categories, - 'formats': formats, - 'age_limit': 18, - } diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py deleted file mode 100644 index 7a1c7e38b..000000000 --- a/youtube_dl/extractor/seznamzpravy.py +++ /dev/null @@ -1,169 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) -from ..utils import ( - urljoin, - int_or_none, - parse_codecs, - try_get, -) - - -def _raw_id(src_url): - return compat_urllib_parse_urlparse(src_url).path.split('/')[-1] - - -class SeznamZpravyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc=' - _TESTS = [{ - 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy', - 'info_dict': { - 'id': '170889', - 'ext': 'mp4', - 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)', - 'thumbnail': r're:^https?://.*\.jpe?g', - 'duration': 241, - 'series': 'Svět bez obalu', - }, - 'params': { - 'skip_download': True, - }, - }, { - # with Location key - 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5§ionPrefixPostroll=%2Fzpravy%2Fvyzva®ression=false', - 'info_dict': { - 'id': '185688', - 'ext': 'mp4', - 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu', - 'thumbnail': r're:^https?://.*\.jpe?g', - 'series': 'Výzva', - }, - 'params': { - 'skip_download': True, - }, - }] - - @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') for mobj in re.finditer( - r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1', - webpage)] - - def _extract_sdn_formats(self, sdn_url, video_id): - sdn_data = self._download_json(sdn_url, video_id) - - if sdn_data.get('Location'): - sdn_url = sdn_data['Location'] - sdn_data = self._download_json(sdn_url, video_id) - - formats = [] - mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {} - for format_id, format_data in mp4_formats.items(): - relative_url = format_data.get('url') - if not relative_url: - continue - - try: - width, height = format_data.get('resolution') - except (TypeError, ValueError): - width, height = None, None - - f = { - 'url': urljoin(sdn_url, relative_url), - 'format_id': 'http-%s' % format_id, - 'tbr': int_or_none(format_data.get('bandwidth'), scale=1000), - 'width': int_or_none(width), - 'height': int_or_none(height), - } - f.update(parse_codecs(format_data.get('codec'))) - formats.append(f) - - pls = sdn_data.get('pls', {}) - - def get_url(format_id): - return try_get(pls, lambda x: x[format_id]['url'], compat_str) - - dash_rel_url = get_url('dash') - if dash_rel_url: - formats.extend(self._extract_mpd_formats( - urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash', - fatal=False)) - - hls_rel_url = get_url('hls') - if hls_rel_url: - formats.extend(self._extract_m3u8_formats( - urljoin(sdn_url, hls_rel_url), video_id, ext='mp4', - m3u8_id='hls', fatal=False)) - - self._sort_formats(formats) - return formats - - def _real_extract(self, url): - params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - - src = params['src'][0] - title = params['title'][0] - video_id = params.get('contentId', [_raw_id(src)])[0] - formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id) - - duration = int_or_none(params.get('duration', [None])[0]) - series = params.get('series', [None])[0] - thumbnail = params.get('poster', [None])[0] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'series': series, - 'formats': formats, - } - - -class SeznamZpravyArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P\d+)' - _API_URL = 'https://apizpravy.seznam.cz/' - - _TESTS = [{ - # two videos on one page, with SDN URL - 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990', - 'info_dict': { - 'id': '35990', - 'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2', - 'description': 'md5:933f7b06fa337a814ba199d3596d27ba', - }, - 'playlist_count': 2, - }, { - # video with live stream URL - 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489', - 'info_dict': { - 'id': '38489', - 'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60', - 'description': 'md5:428e7926a1a81986ec7eb23078004fb4', - }, - 'playlist_count': 1, - }] - - def _real_extract(self, url): - article_id = self._match_id(url) - - webpage = self._download_webpage(url, article_id) - - info = self._search_json_ld(webpage, article_id, default={}) - - title = info.get('title') or self._og_search_title(webpage, fatal=False) - description = info.get('description') or self._og_search_description(webpage) - - return self.playlist_result([ - self.url_result(entry_url, ie=SeznamZpravyIE.ie_key()) - for entry_url in SeznamZpravyIE._extract_urls(webpage)], - article_id, title, description) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py deleted file mode 100644 index 88b938e05..000000000 --- a/youtube_dl/extractor/shahid.py +++ /dev/null @@ -1,225 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import math -import re - -from .aws import AWSIE -from ..compat import compat_HTTPError -from ..utils import ( - clean_html, - ExtractorError, - InAdvancePagedList, - int_or_none, - parse_iso8601, - str_or_none, - urlencode_postdata, -) - - -class ShahidBaseIE(AWSIE): - _AWS_PROXY_HOST = 'api2.shahid.net' - _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' - _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/' - - def _handle_error(self, e): - fail_data = self._parse_json( - e.cause.read().decode('utf-8'), None, fatal=False) - if fail_data: - faults = fail_data.get('faults', []) - faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) - if faults_message: - raise ExtractorError(faults_message, expected=True) - - def _call_api(self, path, video_id, request=None): - query = {} - if request: - query['request'] = json.dumps(request) - try: - return self._aws_execute_api({ - 'uri': '/proxy/v2/' + path, - 'access_key': 'AKIAI6X4TYCIXM2B7MUQ', - 'secret_key': '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', - }, video_id, query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - self._handle_error(e) - raise - - -class ShahidIE(ShahidBaseIE): - _NETRC_MACHINE = 'shahid' - _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?Pepisode|clip|movie)-(?P\d+)' - _TESTS = [{ - 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924', - 'info_dict': { - 'id': '816924', - 'ext': 'mp4', - 'title': 'متحف الدحيح الموسم 1 كليب 1', - 'timestamp': 1602806400, - 'upload_date': '20201016', - 'description': 'برومو', - 'duration': 22, - 'categories': ['كوميديا'], - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746', - 'only_matching': True - }, { - # shahid plus subscriber only - 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511', - 'only_matching': True - }, { - 'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319', - 'only_matching': True - }] - - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return - - try: - user_data = self._download_json( - 'https://shahid.mbc.net/wd/service/users/login', - None, 'Logging in', data=json.dumps({ - 'email': email, - 'password': password, - 'basic': 'false', - }).encode('utf-8'), headers={ - 'Content-Type': 'application/json; charset=UTF-8', - })['user'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - self._handle_error(e) - raise - - self._download_webpage( - 'https://shahid.mbc.net/populateContext', - None, 'Populate Context', data=urlencode_postdata({ - 'firstName': user_data['firstName'], - 'lastName': user_data['lastName'], - 'userName': user_data['email'], - 'csg_user_name': user_data['email'], - 'subscriberId': user_data['id'], - 'sessionId': user_data['sessionId'], - })) - - def _real_extract(self, url): - page_type, video_id = re.match(self._VALID_URL, url).groups() - if page_type == 'clip': - page_type = 'episode' - - playout = self._call_api( - 'playout/new/url/' + video_id, video_id)['playout'] - - if playout.get('drm'): - raise ExtractorError('This video is DRM protected.', expected=True) - - formats = self._extract_m3u8_formats(re.sub( - # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html - r'aws\.manifestfilter=[\w:;,-]+&?', - '', playout['url']), video_id, 'mp4') - self._sort_formats(formats) - - # video = self._call_api( - # 'product/id', video_id, { - # 'id': video_id, - # 'productType': 'ASSET', - # 'productSubType': page_type.upper() - # })['productModel'] - - response = self._download_json( - 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), - video_id, 'Downloading video JSON', query={ - 'apiKey': 'sh@hid0nlin3', - 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - }) - data = response.get('data', {}) - error = data.get('error') - if error: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), - expected=True) - - video = data[page_type] - title = video['title'] - categories = [ - category['name'] - for category in video.get('genres', []) if 'name' in category] - - return { - 'id': video_id, - 'title': title, - 'description': video.get('description'), - 'thumbnail': video.get('thumbnailUrl'), - 'duration': int_or_none(video.get('duration')), - 'timestamp': parse_iso8601(video.get('referenceDate')), - 'categories': categories, - 'series': video.get('showTitle') or video.get('showName'), - 'season': video.get('seasonTitle'), - 'season_number': int_or_none(video.get('seasonNumber')), - 'season_id': str_or_none(video.get('seasonId')), - 'episode_number': int_or_none(video.get('number')), - 'episode_id': video_id, - 'formats': formats, - } - - -class ShahidShowIE(ShahidBaseIE): - _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P\d+)' - _TESTS = [{ - 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', - 'info_dict': { - 'id': '79187', - 'title': 'رامز قرش البحر', - 'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff', - }, - 'playlist_mincount': 32, - }, { - 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', - 'only_matching': True - }] - _PAGE_SIZE = 30 - - def _real_extract(self, url): - show_id = self._match_id(url) - - product = self._call_api( - 'playableAsset', show_id, {'showId': show_id})['productModel'] - playlist = product['playlist'] - playlist_id = playlist['id'] - show = product.get('show', {}) - - def page_func(page_num): - playlist = self._call_api( - 'product/playlist', show_id, { - 'playListId': playlist_id, - 'pageNumber': page_num, - 'pageSize': 30, - 'sorts': [{ - 'order': 'DESC', - 'type': 'SORTDATE' - }], - }) - for product in playlist.get('productList', {}).get('products', []): - product_url = product.get('productUrl', []).get('url') - if not product_url: - continue - yield self.url_result( - product_url, 'Shahid', - str_or_none(product.get('id')), - product.get('title')) - - entries = InAdvancePagedList( - page_func, - math.ceil(playlist['count'] / self._PAGE_SIZE), - self._PAGE_SIZE) - - return self.playlist_result( - entries, show_id, show.get('title'), show.get('description')) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py deleted file mode 100644 index 93ab2a167..000000000 --- a/youtube_dl/extractor/shared.py +++ /dev/null @@ -1,141 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote_plus, -) -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - js_to_json, - KNOWN_EXTENSIONS, - parse_filesize, - rot47, - url_or_none, - urlencode_postdata, -) - - -class SharedBaseIE(InfoExtractor): - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage, urlh = self._download_webpage_handle(url, video_id) - - if self._FILE_NOT_FOUND in webpage: - raise ExtractorError( - 'Video %s does not exist' % video_id, expected=True) - - video_url = self._extract_video_url(webpage, video_id, url) - - title = self._extract_title(webpage) - filesize = int_or_none(self._extract_filesize(webpage)) - - return { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'filesize': filesize, - 'title': title, - } - - def _extract_title(self, webpage): - return compat_b64decode(self._html_search_meta( - 'full:title', webpage, 'title')).decode('utf-8') - - def _extract_filesize(self, webpage): - return self._html_search_meta( - 'full:size', webpage, 'file size', fatal=False) - - -class SharedIE(SharedBaseIE): - IE_DESC = 'shared.sx' - _VALID_URL = r'https?://shared\.sx/(?P[\da-z]{10})' - _FILE_NOT_FOUND = '>File does not exist<' - - _TEST = { - 'url': 'http://shared.sx/0060718775', - 'md5': '106fefed92a8a2adb8c98e6a0652f49b', - 'info_dict': { - 'id': '0060718775', - 'ext': 'mp4', - 'title': 'Bmp4', - 'filesize': 1720110, - }, - } - - def _extract_video_url(self, webpage, video_id, url): - download_form = self._hidden_inputs(webpage) - - video_page = self._download_webpage( - url, video_id, 'Downloading video page', - data=urlencode_postdata(download_form), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': url, - }) - - video_url = self._html_search_regex( - r'data-url=(["\'])(?P(?:(?!\1).)+)\1', - video_page, 'video URL', group='url') - - return video_url - - -class VivoIE(SharedBaseIE): - IE_DESC = 'vivo.sx' - _VALID_URL = r'https?://vivo\.s[xt]/(?P[\da-z]{10})' - _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' - - _TESTS = [{ - 'url': 'http://vivo.sx/d7ddda0e78', - 'md5': '15b3af41be0b4fe01f4df075c2678b2c', - 'info_dict': { - 'id': 'd7ddda0e78', - 'ext': 'mp4', - 'title': 'Chicken', - 'filesize': 515659, - }, - }, { - 'url': 'http://vivo.st/d7ddda0e78', - 'only_matching': True, - }] - - def _extract_title(self, webpage): - title = self._html_search_regex( - r'data-name\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'title', default=None, group='title') - if title: - ext = determine_ext(title) - if ext.lower() in KNOWN_EXTENSIONS: - title = title.rpartition('.' + ext)[0] - return title - return self._og_search_title(webpage) - - def _extract_filesize(self, webpage): - return parse_filesize(self._search_regex( - r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', - webpage, 'filesize', fatal=False)) - - def _extract_video_url(self, webpage, video_id, url): - def decode_url_old(encoded_url): - return compat_b64decode(encoded_url).decode('utf-8') - - stream_url = self._search_regex( - r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'stream url', default=None, group='url') - if stream_url: - stream_url = url_or_none(decode_url_old(stream_url)) - if stream_url: - return stream_url - - def decode_url(encoded_url): - return rot47(compat_urllib_parse_unquote_plus(encoded_url)) - - return decode_url(self._parse_json( - self._search_regex( - r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, - 'stream'), - video_id, transform_source=js_to_json)['source']) diff --git a/youtube_dl/extractor/showroomlive.py b/youtube_dl/extractor/showroomlive.py deleted file mode 100644 index efd9d561f..000000000 --- a/youtube_dl/extractor/showroomlive.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - int_or_none, - urljoin, -) - - -class ShowRoomLiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)' - _TEST = { - 'url': 'https://www.showroom-live.com/48_Nana_Okada', - 'only_matching': True, - } - - def _real_extract(self, url): - broadcaster_id = self._match_id(url) - - webpage = self._download_webpage(url, broadcaster_id) - - room_id = self._search_regex( - (r'SrGlobal\.roomId\s*=\s*(\d+)', - r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id') - - room = self._download_json( - urljoin(url, '/api/room/profile?room_id=%s' % room_id), - broadcaster_id) - - is_live = room.get('is_onlive') - if is_live is not True: - raise ExtractorError('%s is offline' % broadcaster_id, expected=True) - - uploader = room.get('performer_name') or broadcaster_id - title = room.get('room_name') or room.get('main_name') or uploader - - streaming_url_list = self._download_json( - urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id), - broadcaster_id)['streaming_url_list'] - - formats = [] - for stream in streaming_url_list: - stream_url = stream.get('url') - if not stream_url: - continue - stream_type = stream.get('type') - if stream_type == 'hls': - m3u8_formats = self._extract_m3u8_formats( - stream_url, broadcaster_id, ext='mp4', m3u8_id='hls', - live=True) - for f in m3u8_formats: - f['quality'] = int_or_none(stream.get('quality', 100)) - formats.extend(m3u8_formats) - elif stream_type == 'rtmp': - stream_name = stream.get('stream_name') - if not stream_name: - continue - formats.append({ - 'url': stream_url, - 'play_path': stream_name, - 'page_url': url, - 'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf', - 'rtmp_live': True, - 'ext': 'flv', - 'format_id': 'rtmp', - 'format_note': stream.get('label'), - 'quality': int_or_none(stream.get('quality', 100)), - }) - self._sort_formats(formats) - - return { - 'id': compat_str(room.get('live_id') or broadcaster_id), - 'title': self._live_title(title), - 'description': room.get('description'), - 'timestamp': int_or_none(room.get('current_live_started_at')), - 'uploader': uploader, - 'uploader_id': broadcaster_id, - 'view_count': int_or_none(room.get('view_num')), - 'formats': formats, - 'is_live': True, - } diff --git a/youtube_dl/extractor/simplecast.py b/youtube_dl/extractor/simplecast.py deleted file mode 100644 index 2d0b3c06d..000000000 --- a/youtube_dl/extractor/simplecast.py +++ /dev/null @@ -1,160 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - clean_podcast_url, - int_or_none, - parse_iso8601, - strip_or_none, - try_get, - urlencode_postdata, -) - - -class SimplecastBaseIE(InfoExtractor): - _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - _API_BASE = 'https://api.simplecast.com/' - - def _call_api(self, path_tmpl, video_id): - return self._download_json( - self._API_BASE + path_tmpl % video_id, video_id) - - def _call_search_api(self, resource, resource_id, resource_url): - return self._download_json( - 'https://api.simplecast.com/%ss/search' % resource, resource_id, - data=urlencode_postdata({'url': resource_url})) - - def _parse_episode(self, episode): - episode_id = episode['id'] - title = episode['title'].strip() - audio_file = episode.get('audio_file') or {} - audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url'] - - season = episode.get('season') or {} - season_href = season.get('href') - season_id = None - if season_href: - season_id = self._search_regex( - r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX, - season_href, 'season id', default=None) - - webpage_url = episode.get('episode_url') - channel_url = None - if webpage_url: - channel_url = self._search_regex( - r'(https?://[^/]+\.simplecast\.com)', - webpage_url, 'channel url', default=None) - - return { - 'id': episode_id, - 'display_id': episode.get('slug'), - 'title': title, - 'url': clean_podcast_url(audio_file_url), - 'webpage_url': webpage_url, - 'channel_url': channel_url, - 'series': try_get(episode, lambda x: x['podcast']['title']), - 'season_number': int_or_none(season.get('number')), - 'season_id': season_id, - 'thumbnail': episode.get('image_url'), - 'episode_id': episode_id, - 'episode_number': int_or_none(episode.get('number')), - 'description': strip_or_none(episode.get('description')), - 'timestamp': parse_iso8601(episode.get('published_at')), - 'duration': int_or_none(episode.get('duration')), - 'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')), - } - - -class SimplecastIE(SimplecastBaseIE): - IE_NAME = 'simplecast' - _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX - _COMMON_TEST_INFO = { - 'display_id': 'errant-signal-chris-franklin-new-wave-video-essays', - 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', - 'ext': 'mp3', - 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays', - 'episode_number': 1, - 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', - 'description': 'md5:34752789d3d2702e2d2c975fbd14f357', - 'season_number': 1, - 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13', - 'series': 'The RE:BIND.io Podcast', - 'duration': 5343, - 'timestamp': 1580979475, - 'upload_date': '20200206', - 'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', - 'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$', - } - _TESTS = [{ - 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876', - 'md5': '8c93be7be54251bf29ee97464eabe61c', - 'info_dict': _COMMON_TEST_INFO, - }, { - 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r'''(?x)<iframe[^>]+src=["\'] - ( - https?://(?:embed\.simplecast\.com/[0-9a-f]{8}| - player\.simplecast\.com/%s - ))''' % SimplecastBaseIE._UUID_REGEX, webpage) - - def _real_extract(self, url): - episode_id = self._match_id(url) - episode = self._call_api('episodes/%s', episode_id) - return self._parse_episode(episode) - - -class SimplecastEpisodeIE(SimplecastBaseIE): - IE_NAME = 'simplecast:episode' - _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)' - _TEST = { - 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', - 'md5': '8c93be7be54251bf29ee97464eabe61c', - 'info_dict': SimplecastIE._COMMON_TEST_INFO, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - episode = self._call_search_api( - 'episode', mobj.group(1), mobj.group(0)) - return self._parse_episode(episode) - - -class SimplecastPodcastIE(SimplecastBaseIE): - IE_NAME = 'simplecast:podcast' - _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)' - _TESTS = [{ - 'url': 'https://the-re-bind-io-podcast.simplecast.com', - 'playlist_mincount': 33, - 'info_dict': { - 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c', - 'title': 'The RE:BIND.io Podcast', - }, - }, { - 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes', - 'only_matching': True, - }] - - def _real_extract(self, url): - subdomain = self._match_id(url) - site = self._call_search_api('site', subdomain, url) - podcast = site['podcast'] - podcast_id = podcast['id'] - podcast_title = podcast.get('title') - - def entries(): - episodes = self._call_api('podcasts/%s/episodes', podcast_id) - for episode in (episodes.get('collection') or []): - info = self._parse_episode(episode) - info['series'] = podcast_title - yield info - - return self.playlist_result(entries(), podcast_id, podcast_title) diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py deleted file mode 100644 index 07b766b4a..000000000 --- a/youtube_dl/extractor/sina.py +++ /dev/null @@ -1,115 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - HEADRequest, - ExtractorError, - int_or_none, - update_url_query, - qualities, - get_element_by_attribute, - clean_html, -) - - -class SinaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/ - (?: - (?:view/|.*\#)(?P<video_id>\d+)| - .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)| - # This is used by external sites like Weibo - api/sinawebApi/outplay.php/(?P<token>.+?)\.swf - ) - ''' - - _TESTS = [ - { - 'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622', - 'md5': 'd38433e2fc886007729735650ae4b3e9', - 'info_dict': { - 'id': '250576622', - 'ext': 'mp4', - 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名', - } - }, - { - 'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html', - 'info_dict': { - 'id': '101314253', - 'ext': 'flv', - 'title': '军方提高对朝情报监视级别', - }, - 'skip': 'the page does not exist or has been deleted', - }, - { - 'url': 'http://video.sina.com.cn/view/250587748.html', - 'md5': '3d1807a25c775092aab3bc157fff49b4', - 'info_dict': { - 'id': '250587748', - 'ext': 'mp4', - 'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光', - }, - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('video_id') - if not video_id: - if mobj.group('token') is not None: - # The video id is in the redirected url - self.to_screen('Getting video id') - request = HEADRequest(url) - _, urlh = self._download_webpage_handle(request, 'NA', False) - return self._real_extract(urlh.geturl()) - else: - pseudo_id = mobj.group('pseudo_id') - webpage = self._download_webpage(url, pseudo_id) - error = get_element_by_attribute('class', 'errtitle', webpage) - if error: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, clean_html(error)), expected=True) - video_id = self._search_regex( - r"video_id\s*:\s*'(\d+)'", webpage, 'video id') - - video_data = self._download_json( - 'http://s.video.sina.com.cn/video/h5play', - video_id, query={'video_id': video_id}) - if video_data['code'] != 1: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, video_data['message']), expected=True) - else: - video_data = video_data['data'] - title = video_data['title'] - description = video_data.get('description') - if description: - description = description.strip() - - preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd']) - formats = [] - for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items(): - file_api = quality.get('file_api') - file_id = quality.get('file_id') - if not file_api or not file_id: - continue - formats.append({ - 'format_id': quality_id, - 'url': update_url_query(file_api, {'vid': file_id}), - 'preference': preference(quality_id), - 'ext': 'mp4', - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': video_data.get('image'), - 'duration': int_or_none(video_data.get('length')), - 'timestamp': int_or_none(video_data.get('create_time')), - 'formats': formats, - } diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py deleted file mode 100644 index 7ec66ecf3..000000000 --- a/youtube_dl/extractor/sixplay.py +++ /dev/null @@ -1,129 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) -from ..utils import ( - determine_ext, - int_or_none, - try_get, - qualities, -) - - -class SixPlayIE(InfoExtractor): - IE_NAME = '6play' - _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', - 'md5': '31fcd112637baa0c2ab92c4fcd8baf27', - 'info_dict': { - 'id': '12041051', - 'ext': 'mp4', - 'title': 'Le but qui a marqué l\'histoire du football français !', - 'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851', - }, - }, { - 'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869', - 'only_matching': True, - }, { - 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989', - 'only_matching': True, - }, { - 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787', - 'only_matching': True, - }] - - def _real_extract(self, url): - domain, video_id = re.search(self._VALID_URL, url).groups() - service, consumer_name = { - '6play.fr': ('6play', 'm6web'), - 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), - 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'), - 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'), - }.get(domain, ('6play', 'm6web')) - - data = self._download_json( - 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id), - video_id, headers={ - 'x-customer-name': consumer_name - }, query={ - 'csa': 5, - 'with': 'clips', - }) - - clip_data = data['clips'][0] - title = clip_data['title'] - - urls = [] - quality_key = qualities(['lq', 'sd', 'hq', 'hd']) - formats = [] - subtitles = {} - assets = clip_data.get('assets') or [] - for asset in assets: - asset_url = asset.get('full_physical_path') - protocol = asset.get('protocol') - if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls: - continue - urls.append(asset_url) - container = asset.get('video_container') - ext = determine_ext(asset_url) - if protocol == 'http_subtitle' or ext == 'vtt': - subtitles.setdefault('fr', []).append({'url': asset_url}) - continue - if container == 'm3u8' or ext == 'm3u8': - if protocol == 'usp': - if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]: - urlh = self._request_webpage( - asset_url, video_id, fatal=False, - headers=self.geo_verification_headers()) - if not urlh: - continue - asset_url = urlh.geturl() - asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/') - for i in range(3, 0, -1): - asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i) - m3u8_formats = self._extract_m3u8_formats( - asset_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - formats.extend(self._extract_mpd_formats( - asset_url.replace('.m3u8', '.mpd'), - video_id, mpd_id='dash', fatal=False)) - if m3u8_formats: - break - else: - formats.extend(self._extract_m3u8_formats( - asset_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif container == 'mp4' or ext == 'mp4': - quality = asset.get('video_quality') - formats.append({ - 'url': asset_url, - 'format_id': quality, - 'quality': quality_key(quality), - 'ext': ext, - }) - self._sort_formats(formats) - - def get(getter): - for src in (data, clip_data): - v = try_get(src, getter, compat_str) - if v: - return v - - return { - 'id': video_id, - 'title': title, - 'description': get(lambda x: x['description']), - 'duration': int_or_none(clip_data.get('duration')), - 'series': get(lambda x: x['program']['title']), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/sky.py b/youtube_dl/extractor/sky.py deleted file mode 100644 index ff2c977a0..000000000 --- a/youtube_dl/extractor/sky.py +++ /dev/null @@ -1,131 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - extract_attributes, - smuggle_url, - strip_or_none, - urljoin, -) - - -class SkyBaseIE(InfoExtractor): - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' - _SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)' - - def _process_ooyala_element(self, webpage, sdc_el, url): - sdc = extract_attributes(sdc_el) - provider = sdc.get('data-provider') - if provider == 'ooyala': - video_id = sdc['data-sdc-video-id'] - video_url = 'ooyala:%s' % video_id - ie_key = 'Ooyala' - ooyala_el = self._search_regex( - r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id, - webpage, 'video data', fatal=False) - if ooyala_el: - ooyala_attrs = extract_attributes(ooyala_el) or {} - if ooyala_attrs.get('data-token-required') == 'true': - token_fetch_url = (self._parse_json(ooyala_attrs.get( - 'data-token-fetch-options', '{}'), - video_id, fatal=False) or {}).get('url') - if token_fetch_url: - embed_token = self._download_json(urljoin( - url, token_fetch_url), video_id, fatal=False) - if embed_token: - video_url = smuggle_url( - video_url, {'embed_token': embed_token}) - elif provider == 'brightcove': - video_id = sdc['data-video-id'] - account_id = sdc.get('data-account-id') or '6058004172001' - player_id = sdc.get('data-player-id') or 'RC9PQUaJ6' - video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id) - ie_key = 'BrightcoveNew' - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': video_url, - 'ie_key': ie_key, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - info = self._process_ooyala_element(webpage, self._search_regex( - self._SDC_EL_REGEX, webpage, 'sdc element'), url) - info.update({ - 'title': self._og_search_title(webpage), - 'description': strip_or_none(self._og_search_description(webpage)), - }) - return info - - -class SkySportsIE(SkyBaseIE): - IE_NAME = 'sky:sports' - _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', - 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', - 'info_dict': { - 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ', - 'ext': 'mp4', - 'title': 'Bale: It\'s our time to shine', - 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', - }, - 'add_ie': ['Ooyala'], - }, { - 'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook', - 'only_matching': True, - }, { - 'url': 'https://www.skysports.com/watch/video/tv-shows/12118508/rainford-brent-how-ace-programme-helps', - 'only_matching': True, - }] - - -class SkyNewsIE(SkyBaseIE): - IE_NAME = 'sky:news' - _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)' - _TEST = { - 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962', - 'md5': '411e8893fd216c75eaf7e4c65d364115', - 'info_dict': { - 'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM', - 'ext': 'mp4', - 'title': 'Russian plane inspected after deadly fire', - 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.', - 'uploader_id': '6058004172001', - 'timestamp': 1567112345, - 'upload_date': '20190829', - }, - 'add_ie': ['BrightcoveNew'], - } - - -class SkySportsNewsIE(SkyBaseIE): - IE_NAME = 'sky:sports:news' - _VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass', - 'info_dict': { - 'id': '10871916', - 'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass', - 'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.', - }, - 'playlist_count': 2, - } - - def _real_extract(self, url): - article_id = self._match_id(url) - webpage = self._download_webpage(url, article_id) - - entries = [] - for sdc_el in re.findall(self._SDC_EL_REGEX, webpage): - entries.append(self._process_ooyala_element(webpage, sdc_el, url)) - - return self.playlist_result( - entries, article_id, self._og_search_title(webpage), - self._html_search_meta(['og:description', 'description'], webpage)) diff --git a/youtube_dl/extractor/skyit.py b/youtube_dl/extractor/skyit.py deleted file mode 100644 index 14a4d8d4c..000000000 --- a/youtube_dl/extractor/skyit.py +++ /dev/null @@ -1,239 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_parse_qs, - compat_urllib_parse_urlparse, -) -from ..utils import ( - dict_get, - int_or_none, - parse_duration, - unified_timestamp, -) - - -class SkyItPlayerIE(InfoExtractor): - IE_NAME = 'player.sky.it' - _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)' - _GEO_BYPASS = False - _DOMAIN = 'sky' - _PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s' - # http://static.sky.it/static/skyplayer/conf.json - _TOKEN_MAP = { - 'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q', - 'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C', - 'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota', - 'salesforce': 'C6D585FD1615272C98DE38235F38BD86', - 'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE', - 'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk', - 'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3', - 'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd', - 'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp', - } - - def _player_url_result(self, video_id): - return self.url_result( - self._PLAYER_TMPL % (video_id, self._DOMAIN), - SkyItPlayerIE.ie_key(), video_id) - - def _parse_video(self, video, video_id): - title = video['title'] - is_live = video.get('type') == 'live' - hls_url = video.get(('streaming' if is_live else 'hls') + '_url') - if not hls_url and video.get('geoblock' if is_live else 'geob'): - self.raise_geo_restricted(countries=['IT']) - - if is_live: - formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4') - else: - formats = self._extract_akamai_formats( - hls_url, video_id, {'http': 'videoplatform.sky.it'}) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'formats': formats, - 'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')), - 'description': video.get('short_desc') or None, - 'timestamp': unified_timestamp(video.get('create_date')), - 'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')), - 'is_live': is_live, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - domain = compat_parse_qs(compat_urllib_parse_urlparse( - url).query).get('domain', [None])[0] - token = dict_get(self._TOKEN_MAP, (domain, 'sky')) - video = self._download_json( - 'https://apid.sky.it/vdp/v1/getVideoData', - video_id, query={ - 'caller': 'sky', - 'id': video_id, - 'token': token - }, headers=self.geo_verification_headers()) - return self._parse_video(video, video_id) - - -class SkyItVideoIE(SkyItPlayerIE): - IE_NAME = 'video.sky.it' - _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227', - 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd', - 'info_dict': { - 'id': '631227', - 'ext': 'mp4', - 'title': 'Uomo ucciso da uno squalo in Australia', - 'timestamp': 1606036192, - 'upload_date': '20201122', - } - }, { - 'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820', - 'only_matching': True, - }, { - 'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._player_url_result(video_id) - - -class SkyItVideoLiveIE(SkyItPlayerIE): - IE_NAME = 'video.sky.it:live' - _VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)' - _TEST = { - 'url': 'https://video.sky.it/diretta/tg24', - 'info_dict': { - 'id': '1', - 'ext': 'mp4', - 'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}', - 'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - asset_id = compat_str(self._parse_json(self._search_regex( - r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>', - webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id']) - livestream = self._download_json( - 'https://apid.sky.it/vdp/v1/getLivestream', - asset_id, query={'id': asset_id}) - return self._parse_video(livestream, asset_id) - - -class SkyItIE(SkyItPlayerIE): - IE_NAME = 'sky.it' - _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' - _TESTS = [{ - 'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol', - 'info_dict': { - 'id': '631201', - 'ext': 'mp4', - 'title': 'Un rosso alla violenza: in campo per i diritti delle donne', - 'upload_date': '20201121', - 'timestamp': 1605995753, - }, - 'expected_warnings': ['Unable to download f4m manifest'], - }, { - 'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo', - 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd', - 'info_dict': { - 'id': '631227', - 'ext': 'mp4', - 'title': 'Uomo ucciso da uno squalo in Australia', - 'timestamp': 1606036192, - 'upload_date': '20201122', - }, - }] - _VIDEO_ID_REGEX = r'data-videoid="(\d+)"' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - self._VIDEO_ID_REGEX, webpage, 'video id') - return self._player_url_result(video_id) - - -class SkyItAcademyIE(SkyItIE): - IE_NAME = 'skyacademy.it' - _VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' - _TESTS = [{ - 'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/', - 'md5': 'ced5c26638b7863190cbc44dd6f6ba08', - 'info_dict': { - 'id': '523458', - 'ext': 'mp4', - 'title': 'Sky Academy "The Best CineCamp 2019"', - 'timestamp': 1562843784, - 'upload_date': '20190711', - } - }] - _DOMAIN = 'skyacademy' - _VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"' - - -class SkyItArteIE(SkyItIE): - IE_NAME = 'arte.sky.it' - _VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)' - _TESTS = [{ - 'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/', - 'md5': '515aee97b87d7a018b6c80727d3e7e17', - 'info_dict': { - 'id': '627926', - 'ext': 'mp4', - 'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani", - 'upload_date': '20201106', - 'timestamp': 1604664493, - } - }] - _DOMAIN = 'skyarte' - _VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)' - - -class CieloTVItIE(SkyItIE): - IE_NAME = 'cielotv.it' - _VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html' - _TESTS = [{ - 'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html', - 'md5': 'c4deed77552ba901c2a0d9258320304b', - 'info_dict': { - 'id': '499240', - 'ext': 'mp4', - 'title': 'Il lunedì è sempre un dramma', - 'upload_date': '20190329', - 'timestamp': 1553862178, - } - }] - _DOMAIN = 'cielo' - _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"' - - -class TV8ItIE(SkyItVideoIE): - IE_NAME = 'tv8.it' - _VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/', - 'md5': '9ab906a3f75ea342ed928442f9dabd21', - 'info_dict': { - 'id': '630529', - 'ext': 'mp4', - 'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero', - 'timestamp': 1605721374, - 'upload_date': '20201118', - } - }] - _DOMAIN = 'mtv8' diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py deleted file mode 100644 index b7f8ac736..000000000 --- a/youtube_dl/extractor/skylinewebcams.py +++ /dev/null @@ -1,42 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class SkylineWebcamsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html' - _TEST = { - 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html', - 'info_dict': { - 'id': 'scalinata-piazza-di-spagna-barcaccia', - 'ext': 'mp4', - 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - stream_url = self._search_regex( - r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, - 'stream url', group='url') - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - - return { - 'id': video_id, - 'url': stream_url, - 'ext': 'mp4', - 'title': self._live_title(title), - 'description': description, - 'is_live': True, - } diff --git a/youtube_dl/extractor/skynewsarabia.py b/youtube_dl/extractor/skynewsarabia.py deleted file mode 100644 index fffc9aa22..000000000 --- a/youtube_dl/extractor/skynewsarabia.py +++ /dev/null @@ -1,117 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - parse_iso8601, - parse_duration, -) - - -class SkyNewsArabiaBaseIE(InfoExtractor): - _IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images' - - def _call_api(self, path, value): - return self._download_json('http://api.skynewsarabia.com/web/rest/v2/%s/%s.json' % (path, value), value) - - def _get_limelight_media_id(self, url): - return self._search_regex(r'/media/[^/]+/([a-z0-9]{32})', url, 'limelight media id') - - def _get_image_url(self, image_path_template, width='1600', height='1200'): - return self._IMAGE_BASE_URL + image_path_template.format(width=width, height=height) - - def _extract_video_info(self, video_data): - video_id = compat_str(video_data['id']) - topic = video_data.get('topicTitle') - return { - '_type': 'url_transparent', - 'url': 'limelight:media:%s' % self._get_limelight_media_id(video_data['videoUrl'][0]['url']), - 'id': video_id, - 'title': video_data['headline'], - 'description': video_data.get('summary'), - 'thumbnail': self._get_image_url(video_data['mediaAsset']['imageUrl']), - 'timestamp': parse_iso8601(video_data.get('date')), - 'duration': parse_duration(video_data.get('runTime')), - 'tags': video_data.get('tags', []), - 'categories': [topic] if topic else [], - 'webpage_url': 'http://www.skynewsarabia.com/web/video/%s' % video_id, - 'ie_key': 'LimelightMedia', - } - - -class SkyNewsArabiaIE(SkyNewsArabiaBaseIE): - IE_NAME = 'skynewsarabia:video' - _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.skynewsarabia.com/web/video/794902/%D9%86%D8%B5%D9%81-%D9%85%D9%84%D9%8A%D9%88%D9%86-%D9%85%D8%B5%D8%A8%D8%A7%D8%AD-%D8%B4%D8%AC%D8%B1%D8%A9-%D9%83%D8%B1%D9%8A%D8%B3%D9%85%D8%A7%D8%B3', - 'info_dict': { - 'id': '794902', - 'ext': 'flv', - 'title': 'نصف مليون مصباح على شجرة كريسماس', - 'description': 'md5:22f1b27f0850eeb10c7e59b1f16eb7c6', - 'upload_date': '20151128', - 'timestamp': 1448697198, - 'duration': 2119, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._call_api('video', video_id) - return self._extract_video_info(video_data) - - -class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE): - IE_NAME = 'skynewsarabia:article' - _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9', - 'info_dict': { - 'id': '794549', - 'ext': 'flv', - 'title': 'بالفيديو.. ألعاب ذكية تحاكي واقع المنطقة', - 'description': 'md5:0c373d29919a851e080ee4edd0c5d97f', - 'upload_date': '20151126', - 'timestamp': 1448559336, - 'duration': 281.6, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - 'url': 'http://www.skynewsarabia.com/web/article/794844/%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D9%82%D9%88%D8%A7%D8%B1%D8%A8-%D8%A7%D9%94%D8%B3%D9%84%D8%AD%D8%A9-%D9%84%D9%85%D9%8A%D9%84%D9%8A%D8%B4%D9%8A%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D9%88%D8%AB%D9%8A-%D9%88%D8%B5%D8%A7%D9%84%D8%AD', - 'info_dict': { - 'id': '794844', - 'title': 'إحباط تهريب أسلحة لميليشيات الحوثي وصالح بجنوب اليمن', - 'description': 'md5:5c927b8b2e805796e7f693538d96fc7e', - }, - 'playlist_mincount': 2, - }] - - def _real_extract(self, url): - article_id = self._match_id(url) - article_data = self._call_api('article', article_id) - media_asset = article_data['mediaAsset'] - if media_asset['type'] == 'VIDEO': - topic = article_data.get('topicTitle') - return { - '_type': 'url_transparent', - 'url': 'limelight:media:%s' % self._get_limelight_media_id(media_asset['videoUrl'][0]['url']), - 'id': article_id, - 'title': article_data['headline'], - 'description': article_data.get('summary'), - 'thumbnail': self._get_image_url(media_asset['imageUrl']), - 'timestamp': parse_iso8601(article_data.get('date')), - 'tags': article_data.get('tags', []), - 'categories': [topic] if topic else [], - 'webpage_url': url, - 'ie_key': 'LimelightMedia', - } - entries = [self._extract_video_info(item) for item in article_data.get('inlineItems', []) if item['type'] == 'VIDEO'] - return self.playlist_result(entries, article_id, article_data['headline'], article_data.get('summary')) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py deleted file mode 100644 index e89ebebe7..000000000 --- a/youtube_dl/extractor/slideshare.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import unicode_literals - -import re -import json - -from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) -from ..utils import ( - ExtractorError, - get_element_by_id, -) - - -class SlideshareIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' - - _TEST = { - 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', - 'info_dict': { - 'id': '25665706', - 'ext': 'mp4', - 'title': 'Managing Scale and Complexity', - 'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - page_title = mobj.group('title') - webpage = self._download_webpage(url, page_title) - slideshare_obj = self._search_regex( - r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);', - webpage, 'slideshare object') - info = json.loads(slideshare_obj) - if info['slideshow']['type'] != 'video': - raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) - - doc = info['doc'] - bucket = info['jsplayer']['video_bucket'] - ext = info['jsplayer']['video_extension'] - video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) - description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex( - r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage, - 'description', fatal=False) - - return { - '_type': 'video', - 'id': info['slideshow']['id'], - 'title': info['slideshow']['title'], - 'ext': ext, - 'url': video_url, - 'thumbnail': info['slideshow']['pin_image_url'], - 'description': description.strip() if description else None, - } diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py deleted file mode 100644 index 9409a0100..000000000 --- a/youtube_dl/extractor/slideslive.py +++ /dev/null @@ -1,109 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - bool_or_none, - smuggle_url, - try_get, - url_or_none, -) - - -class SlidesLiveIE(InfoExtractor): - _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)' - _TESTS = [{ - # video_service_name = YOUTUBE - 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', - 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', - 'info_dict': { - 'id': 'LMtgR8ba0b0', - 'ext': 'mp4', - 'title': 'GCC IA16 backend', - 'description': 'Watch full version of this video at https://slideslive.com/38902413.', - 'uploader': 'SlidesLive Videos - A', - 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', - 'timestamp': 1597615266, - 'upload_date': '20170925', - } - }, { - # video_service_name = yoda - 'url': 'https://slideslive.com/38935785', - 'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a', - 'info_dict': { - 'id': 'RMraDYN5ozA_', - 'ext': 'mp4', - 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges', - }, - 'params': { - 'format': 'bestvideo', - }, - }, { - # video_service_name = youtube - 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', - 'only_matching': True, - }, { - # video_service_name = url - 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', - 'only_matching': True, - }, { - # video_service_name = vimeo - 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._download_json( - 'https://ben.slideslive.com/player/' + video_id, video_id) - service_name = video_data['video_service_name'].lower() - assert service_name in ('url', 'yoda', 'vimeo', 'youtube') - service_id = video_data['video_service_id'] - subtitles = {} - for sub in try_get(video_data, lambda x: x['subtitles'], list) or []: - if not isinstance(sub, dict): - continue - webvtt_url = url_or_none(sub.get('webvtt_url')) - if not webvtt_url: - continue - lang = sub.get('language') or 'en' - subtitles.setdefault(lang, []).append({ - 'url': webvtt_url, - }) - info = { - 'id': video_id, - 'thumbnail': video_data.get('thumbnail'), - 'is_live': bool_or_none(video_data.get('is_live')), - 'subtitles': subtitles, - } - if service_name in ('url', 'yoda'): - info['title'] = video_data['title'] - if service_name == 'url': - info['url'] = service_id - else: - formats = [] - _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s' - # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol - formats.extend(self._extract_m3u8_formats( - _MANIFEST_PATTERN % (service_id, 'm3u8'), - service_id, 'mp4', m3u8_id='hls', fatal=False)) - formats.extend(self._extract_mpd_formats( - _MANIFEST_PATTERN % (service_id, 'mpd'), service_id, - mpd_id='dash', fatal=False)) - self._sort_formats(formats) - info.update({ - 'id': service_id, - 'formats': formats, - }) - else: - info.update({ - '_type': 'url_transparent', - 'url': service_id, - 'ie_key': service_name.capitalize(), - 'title': video_data.get('title'), - }) - if service_name == 'vimeo': - info['url'] = smuggle_url( - 'https://player.vimeo.com/video/' + service_id, - {'http_headers': {'Referer': url}}) - return info diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py deleted file mode 100644 index 661f9e59d..000000000 --- a/youtube_dl/extractor/slutload.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class SlutloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)' - _TESTS = [{ - 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - 'md5': '868309628ba00fd488cf516a113fd717', - 'info_dict': { - 'id': 'TD73btpBqSxc', - 'ext': 'mp4', - 'title': 'virginie baisee en cam', - 'age_limit': 18, - 'thumbnail': r're:https?://.*?\.jpg' - }, - }, { - # mobile site - 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/', - 'only_matching': True, - }, { - 'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/', - 'only_matching': True, - }, { - 'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - embed_page = self._download_webpage( - 'http://www.slutload.com/embed_player/%s' % video_id, video_id, - 'Downloading embed page', fatal=False) - - if embed_page: - def extract(what): - return self._html_search_regex( - r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what, - embed_page, 'video %s' % what, default=None, group='url') - - video_url = extract('url') - if video_url: - title = self._html_search_regex( - r'<title>([^<]+)', embed_page, 'title', default=video_id) - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': extract('preview'), - 'age_limit': 18 - } - - webpage = self._download_webpage( - 'http://www.slutload.com/video/_/%s/' % video_id, video_id) - title = self._html_search_regex( - r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip() - info = self._parse_html5_media_entries(url, webpage, video_id)[0] - info.update({ - 'id': video_id, - 'title': title, - 'age_limit': 18, - }) - return info diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py deleted file mode 100644 index f77354748..000000000 --- a/youtube_dl/extractor/snotr.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - parse_duration, - parse_filesize, - str_to_int, -) - - -class SnotrIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)' - _TESTS = [{ - 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', - 'info_dict': { - 'id': '13708', - 'ext': 'mp4', - 'title': 'Drone flying through fireworks!', - 'duration': 248, - 'filesize_approx': 40700000, - 'description': 'A drone flying through Fourth of July Fireworks', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'expected_warnings': ['description'], - }, { - 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', - 'info_dict': { - 'id': '530', - 'ext': 'mp4', - 'title': 'David Letteman - George W. Bush Top 10', - 'duration': 126, - 'filesize_approx': 8500000, - 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - - description = self._og_search_description(webpage) - info_dict = self._parse_html5_media_entries( - url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0] - - view_count = str_to_int(self._html_search_regex( - r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)', - webpage, 'view count', fatal=False)) - - duration = parse_duration(self._html_search_regex( - r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)', - webpage, 'duration', fatal=False)) - - filesize_approx = parse_filesize(self._html_search_regex( - r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)', - webpage, 'filesize', fatal=False)) - - info_dict.update({ - 'id': video_id, - 'description': description, - 'title': title, - 'view_count': view_count, - 'duration': duration, - 'filesize_approx': filesize_approx, - }) - - return info_dict diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py deleted file mode 100644 index a62ed84f1..000000000 --- a/youtube_dl/extractor/sohu.py +++ /dev/null @@ -1,202 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) -from ..utils import ( - ExtractorError, - int_or_none, - try_get, -) - - -class SohuIE(InfoExtractor): - _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?' - - # Sohu videos give different MD5 sums on Travis CI and my machine - _TESTS = [{ - 'note': 'This video is available only in Mainland China', - 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super', - 'info_dict': { - 'id': '382479172', - 'ext': 'mp4', - 'title': 'MV:Far East Movement《The Illest》', - }, - 'skip': 'On available in China', - }, { - 'url': 'http://tv.sohu.com/20150305/n409385080.shtml', - 'info_dict': { - 'id': '409385080', - 'ext': 'mp4', - 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》', - } - }, { - 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml', - 'info_dict': { - 'id': '78693464', - 'ext': 'mp4', - 'title': '【爱范品】第31期:MWC见不到的奇葩手机', - } - }, { - 'note': 'Multipart video', - 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml', - 'info_dict': { - 'id': '78910339', - 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', - }, - 'playlist': [{ - 'info_dict': { - 'id': '78910339_part1', - 'ext': 'mp4', - 'duration': 294, - 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', - } - }, { - 'info_dict': { - 'id': '78910339_part2', - 'ext': 'mp4', - 'duration': 300, - 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', - } - }, { - 'info_dict': { - 'id': '78910339_part3', - 'ext': 'mp4', - 'duration': 150, - 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', - } - }] - }, { - 'note': 'Video with title containing dash', - 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml', - 'info_dict': { - 'id': '78932792', - 'ext': 'mp4', - 'title': 'youtube-dl testing video', - }, - 'params': { - 'skip_download': True - } - }] - - def _real_extract(self, url): - - def _fetch_data(vid_id, mytv=False): - if mytv: - base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' - else: - base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' - - return self._download_json( - base_data_url + vid_id, video_id, - 'Downloading JSON data for %s' % vid_id, - headers=self.geo_verification_headers()) - - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - mytv = mobj.group('mytv') is not None - - webpage = self._download_webpage(url, video_id) - - title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage)) - - vid = self._html_search_regex( - r'var vid ?= ?["\'](\d+)["\']', - webpage, 'video path') - vid_data = _fetch_data(vid, mytv) - if vid_data['play'] != 1: - if vid_data.get('status') == 12: - raise ExtractorError( - '%s said: There\'s something wrong in the video.' % self.IE_NAME, - expected=True) - else: - self.raise_geo_restricted( - '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME) - - formats_json = {} - for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): - vid_id = vid_data['data'].get('%sVid' % format_id) - if not vid_id: - continue - vid_id = compat_str(vid_id) - formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv) - - part_count = vid_data['data']['totalBlocks'] - - playlist = [] - for i in range(part_count): - formats = [] - for format_id, format_data in formats_json.items(): - allot = format_data['allot'] - - data = format_data['data'] - clips_url = data['clipsURL'] - su = data['su'] - - video_url = 'newflv.sohu.ccgslb.net' - cdnId = None - retries = 0 - - while 'newflv.sohu.ccgslb.net' in video_url: - params = { - 'prot': 9, - 'file': clips_url[i], - 'new': su[i], - 'prod': 'flash', - 'rb': 1, - } - - if cdnId is not None: - params['idc'] = cdnId - - download_note = 'Downloading %s video URL part %d of %d' % ( - format_id, i + 1, part_count) - - if retries > 0: - download_note += ' (retry #%d)' % retries - part_info = self._parse_json(self._download_webpage( - 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)), - video_id, download_note), video_id) - - video_url = part_info['url'] - cdnId = part_info.get('nid') - - retries += 1 - if retries > 5: - raise ExtractorError('Failed to get video URL') - - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'filesize': int_or_none( - try_get(data, lambda x: x['clipsBytes'][i])), - 'width': int_or_none(data.get('width')), - 'height': int_or_none(data.get('height')), - 'fps': int_or_none(data.get('fps')), - }) - self._sort_formats(formats) - - playlist.append({ - 'id': '%s_part%d' % (video_id, i + 1), - 'title': title, - 'duration': vid_data['data']['clipsDuration'][i], - 'formats': formats, - }) - - if len(playlist) == 1: - info = playlist[0] - info['id'] = video_id - else: - info = { - '_type': 'multi_video', - 'entries': playlist, - 'id': video_id, - 'title': title, - } - - return info diff --git a/youtube_dl/extractor/sonyliv.py b/youtube_dl/extractor/sonyliv.py deleted file mode 100644 index fedfceb62..000000000 --- a/youtube_dl/extractor/sonyliv.py +++ /dev/null @@ -1,112 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import time -import uuid - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - ExtractorError, - int_or_none, -) - - -class SonyLIVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true', - 'info_dict': { - 'title': 'Bachelors Delight - Achaari Cheese Toast', - 'id': '1000022678', - 'ext': 'mp4', - 'upload_date': '20200411', - 'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb', - 'timestamp': 1586632091, - 'duration': 185, - 'season_number': 1, - 'episode': 'Achaari Cheese Toast', - 'episode_number': 1, - 'release_year': 2016, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true', - 'only_matching': True, - }, { - 'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925', - 'only_matching': True, - }, { - 'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true', - 'only_matching': True, - }, { - 'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true', - 'only_matching': True, - }, { - 'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779', - 'only_matching': True, - }] - _GEO_COUNTRIES = ['IN'] - _TOKEN = None - - def _call_api(self, version, path, video_id): - headers = {} - if self._TOKEN: - headers['security_token'] = self._TOKEN - try: - return self._download_json( - 'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path), - video_id, headers=headers)['resultObj'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - message = self._parse_json( - e.cause.read().decode(), video_id)['message'] - if message == 'Geoblocked Country': - self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - raise ExtractorError(message) - raise - - def _real_initialize(self): - self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None) - - def _real_extract(self, url): - video_id = self._match_id(url) - content = self._call_api( - '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id) - if content.get('isEncrypted'): - raise ExtractorError('This video is DRM protected.', expected=True) - dash_url = content['videoURL'] - headers = { - 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000) - } - formats = self._extract_mpd_formats( - dash_url, video_id, mpd_id='dash', headers=headers, fatal=False) - formats.extend(self._extract_m3u8_formats( - dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'), - video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False)) - for f in formats: - f.setdefault('http_headers', {}).update(headers) - self._sort_formats(formats) - - metadata = self._call_api( - '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata'] - title = metadata['title'] - episode = metadata.get('episodeTitle') - if episode and title != episode: - title += ' - ' + episode - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': content.get('posterURL'), - 'description': metadata.get('longDescription') or metadata.get('shortDescription'), - 'timestamp': int_or_none(metadata.get('creationDate'), 1000), - 'duration': int_or_none(metadata.get('duration')), - 'season_number': int_or_none(metadata.get('season')), - 'episode': episode, - 'episode_number': int_or_none(metadata.get('episodeNumber')), - 'release_year': int_or_none(metadata.get('year')), - } diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py deleted file mode 100644 index abb85e1e5..000000000 --- a/youtube_dl/extractor/soundcloud.py +++ /dev/null @@ -1,815 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools -import re - -from .common import ( - InfoExtractor, - SearchInfoExtractor -) -from ..compat import ( - compat_HTTPError, - compat_kwargs, - compat_str, - compat_urlparse, -) -from ..utils import ( - error_to_compat_str, - ExtractorError, - float_or_none, - HEADRequest, - int_or_none, - KNOWN_EXTENSIONS, - mimetype2ext, - str_or_none, - try_get, - unified_timestamp, - update_url_query, - url_or_none, - urlhandle_detect_ext, -) - - -class SoundcloudEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)' - _TEST = { - # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/ - 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey', - 'only_matching': True, - } - - @staticmethod - def _extract_urls(webpage): - return [m.group('url') for m in re.finditer( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', - webpage)] - - def _real_extract(self, url): - query = compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query) - api_url = query['url'][0] - secret_token = query.get('secret_token') - if secret_token: - api_url = update_url_query(api_url, {'secret_token': secret_token[0]}) - return self.url_result(api_url) - - -class SoundcloudIE(InfoExtractor): - """Information extractor for soundcloud.com - To access the media, the uid of the song and a stream token - must be extracted from the page source and the script must make - a request to media.soundcloud.com/crossdomain.xml. Then - the media can be grabbed by requesting from an url composed - of the stream token and uid - """ - - _VALID_URL = r'''(?x)^(?:https?://)? - (?:(?:(?:www\.|m\.)?soundcloud\.com/ - (?!stations/track) - (?P<uploader>[\w\d-]+)/ - (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) - (?P<title>[\w\d-]+)/? - (?P<token>[^?]+?)?(?:[?].*)?$) - |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+) - (?:/?\?secret_token=(?P<secret_token>[^&]+))?) - ) - ''' - IE_NAME = 'soundcloud' - _TESTS = [ - { - 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', - 'md5': 'ebef0a451b909710ed1d7787dddbf0d7', - 'info_dict': { - 'id': '62986583', - 'ext': 'mp3', - 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', - 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', - 'uploader': 'E.T. ExTerrestrial Music', - 'uploader_id': '1571244', - 'timestamp': 1349920598, - 'upload_date': '20121011', - 'duration': 143.216, - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - } - }, - # geo-restricted - { - 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', - 'info_dict': { - 'id': '47127627', - 'ext': 'mp3', - 'title': 'Goldrushed', - 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', - 'uploader': 'The Royal Concept', - 'uploader_id': '9615865', - 'timestamp': 1337635207, - 'upload_date': '20120521', - 'duration': 227.155, - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, - # private link - { - 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', - 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', - 'info_dict': { - 'id': '123998367', - 'ext': 'mp3', - 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'description': 'test chars: \"\'/\\ä↭', - 'uploader': 'jaimeMF', - 'uploader_id': '69767071', - 'timestamp': 1386604920, - 'upload_date': '20131209', - 'duration': 9.927, - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, - # private link (alt format) - { - 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', - 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', - 'info_dict': { - 'id': '123998367', - 'ext': 'mp3', - 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'description': 'test chars: \"\'/\\ä↭', - 'uploader': 'jaimeMF', - 'uploader_id': '69767071', - 'timestamp': 1386604920, - 'upload_date': '20131209', - 'duration': 9.927, - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, - # downloadable song - { - 'url': 'https://soundcloud.com/oddsamples/bus-brakes', - 'md5': '7624f2351f8a3b2e7cd51522496e7631', - 'info_dict': { - 'id': '128590877', - 'ext': 'mp3', - 'title': 'Bus Brakes', - 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', - 'uploader': 'oddsamples', - 'uploader_id': '73680509', - 'timestamp': 1389232924, - 'upload_date': '20140109', - 'duration': 17.346, - 'license': 'cc-by-sa', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, - # private link, downloadable format - { - 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', - 'md5': '64a60b16e617d41d0bef032b7f55441e', - 'info_dict': { - 'id': '340344461', - 'ext': 'wav', - 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', - 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', - 'uploader': 'Ori Uplift Music', - 'uploader_id': '12563093', - 'timestamp': 1504206263, - 'upload_date': '20170831', - 'duration': 7449.096, - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, - # no album art, use avatar pic for thumbnail - { - 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real', - 'md5': '59c7872bc44e5d99b7211891664760c2', - 'info_dict': { - 'id': '309699954', - 'ext': 'mp3', - 'title': 'Sideways (Prod. Mad Real)', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'uploader': 'garyvee', - 'uploader_id': '2366352', - 'timestamp': 1488152409, - 'upload_date': '20170226', - 'duration': 207.012, - 'thumbnail': r're:https?://.*\.jpg', - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', - 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', - 'info_dict': { - 'id': '583011102', - 'ext': 'mp3', - 'title': 'Mezzo Valzer', - 'description': 'md5:4138d582f81866a530317bae316e8b61', - 'uploader': 'Micronie', - 'uploader_id': '3352531', - 'timestamp': 1551394171, - 'upload_date': '20190228', - 'duration': 180.157, - 'thumbnail': r're:https?://.*\.jpg', - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, - { - # with AAC HQ format available via OAuth token - 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1', - 'only_matching': True, - }, - ] - - _API_V2_BASE = 'https://api-v2.soundcloud.com/' - _BASE_URL = 'https://soundcloud.com/' - _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' - - _ARTWORK_MAP = { - 'mini': 16, - 'tiny': 20, - 'small': 32, - 'badge': 47, - 't67x67': 67, - 'large': 100, - 't300x300': 300, - 'crop': 400, - 't500x500': 500, - 'original': 0, - } - - def _store_client_id(self, client_id): - self._downloader.cache.store('soundcloud', 'client_id', client_id) - - def _update_client_id(self): - webpage = self._download_webpage('https://soundcloud.com/', None) - for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)): - script = self._download_webpage(src, None, fatal=False) - if script: - client_id = self._search_regex( - r'client_id\s*:\s*"([0-9a-zA-Z]{32})"', - script, 'client id', default=None) - if client_id: - self._CLIENT_ID = client_id - self._store_client_id(client_id) - return - raise ExtractorError('Unable to extract client id') - - def _download_json(self, *args, **kwargs): - non_fatal = kwargs.get('fatal') is False - if non_fatal: - del kwargs['fatal'] - query = kwargs.get('query', {}).copy() - for _ in range(2): - query['client_id'] = self._CLIENT_ID - kwargs['query'] = query - try: - return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs)) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - self._store_client_id(None) - self._update_client_id() - continue - elif non_fatal: - self._downloader.report_warning(error_to_compat_str(e)) - return False - raise - - def _real_initialize(self): - self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' - - @classmethod - def _resolv_url(cls, url): - return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url - - def _extract_info_dict(self, info, full_title=None, secret_token=None): - track_id = compat_str(info['id']) - title = info['title'] - - format_urls = set() - formats = [] - query = {'client_id': self._CLIENT_ID} - if secret_token: - query['secret_token'] = secret_token - - if info.get('downloadable') and info.get('has_downloads_left'): - download_url = update_url_query( - self._API_V2_BASE + 'tracks/' + track_id + '/download', query) - redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') - if redirect_url: - urlh = self._request_webpage( - HEADRequest(redirect_url), track_id, fatal=False) - if urlh: - format_url = urlh.geturl() - format_urls.add(format_url) - formats.append({ - 'format_id': 'download', - 'ext': urlhandle_detect_ext(urlh) or 'mp3', - 'filesize': int_or_none(urlh.headers.get('Content-Length')), - 'url': format_url, - 'preference': 10, - }) - - def invalid_url(url): - return not url or url in format_urls - - def add_format(f, protocol, is_preview=False): - mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url) - if mobj: - for k, v in mobj.groupdict().items(): - if not f.get(k): - f[k] = v - format_id_list = [] - if protocol: - format_id_list.append(protocol) - ext = f.get('ext') - if ext == 'aac': - f['abr'] = '256' - for k in ('ext', 'abr'): - v = f.get(k) - if v: - format_id_list.append(v) - preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url']) - if preview: - format_id_list.append('preview') - abr = f.get('abr') - if abr: - f['abr'] = int(abr) - if protocol == 'hls': - protocol = 'm3u8' if ext == 'aac' else 'm3u8_native' - else: - protocol = 'http' - f.update({ - 'format_id': '_'.join(format_id_list), - 'protocol': protocol, - 'preference': -10 if preview else None, - }) - formats.append(f) - - # New API - transcodings = try_get( - info, lambda x: x['media']['transcodings'], list) or [] - for t in transcodings: - if not isinstance(t, dict): - continue - format_url = url_or_none(t.get('url')) - if not format_url: - continue - stream = self._download_json( - format_url, track_id, query=query, fatal=False) - if not isinstance(stream, dict): - continue - stream_url = url_or_none(stream.get('url')) - if invalid_url(stream_url): - continue - format_urls.add(stream_url) - stream_format = t.get('format') or {} - protocol = stream_format.get('protocol') - if protocol != 'hls' and '/hls' in format_url: - protocol = 'hls' - ext = None - preset = str_or_none(t.get('preset')) - if preset: - ext = preset.split('_')[0] - if ext not in KNOWN_EXTENSIONS: - ext = mimetype2ext(stream_format.get('mime_type')) - add_format({ - 'url': stream_url, - 'ext': ext, - }, 'http' if protocol == 'progressive' else protocol, - t.get('snipped') or '/preview/' in format_url) - - for f in formats: - f['vcodec'] = 'none' - - if not formats and info.get('policy') == 'BLOCK': - self.raise_geo_restricted() - self._sort_formats(formats) - - user = info.get('user') or {} - - thumbnails = [] - artwork_url = info.get('artwork_url') - thumbnail = artwork_url or user.get('avatar_url') - if isinstance(thumbnail, compat_str): - if re.search(self._IMAGE_REPL_RE, thumbnail): - for image_id, size in self._ARTWORK_MAP.items(): - i = { - 'id': image_id, - 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail), - } - if image_id == 'tiny' and not artwork_url: - size = 18 - elif image_id == 'original': - i['preference'] = 10 - if size: - i.update({ - 'width': size, - 'height': size, - }) - thumbnails.append(i) - else: - thumbnails = [{'url': thumbnail}] - - def extract_count(key): - return int_or_none(info.get('%s_count' % key)) - - return { - 'id': track_id, - 'uploader': user.get('username'), - 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'), - 'uploader_url': user.get('permalink_url'), - 'timestamp': unified_timestamp(info.get('created_at')), - 'title': title, - 'description': info.get('description'), - 'thumbnails': thumbnails, - 'duration': float_or_none(info.get('duration'), 1000), - 'webpage_url': info.get('permalink_url'), - 'license': info.get('license'), - 'view_count': extract_count('playback'), - 'like_count': extract_count('favoritings') or extract_count('likes'), - 'comment_count': extract_count('comment'), - 'repost_count': extract_count('reposts'), - 'genre': info.get('genre'), - 'formats': formats - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - track_id = mobj.group('track_id') - - query = {} - if track_id: - info_json_url = self._API_V2_BASE + 'tracks/' + track_id - full_title = track_id - token = mobj.group('secret_token') - if token: - query['secret_token'] = token - else: - full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title') - token = mobj.group('token') - if token: - resolve_title += '/%s' % token - info_json_url = self._resolv_url(self._BASE_URL + resolve_title) - - info = self._download_json( - info_json_url, full_title, 'Downloading info JSON', query=query) - - return self._extract_info_dict(info, full_title, token) - - -class SoundcloudPlaylistBaseIE(SoundcloudIE): - def _extract_set(self, playlist, token=None): - playlist_id = compat_str(playlist['id']) - tracks = playlist.get('tracks') or [] - if not all([t.get('permalink_url') for t in tracks]) and token: - tracks = self._download_json( - self._API_V2_BASE + 'tracks', playlist_id, - 'Downloading tracks', query={ - 'ids': ','.join([compat_str(t['id']) for t in tracks]), - 'playlistId': playlist_id, - 'playlistSecretToken': token, - }) - entries = [] - for track in tracks: - track_id = str_or_none(track.get('id')) - url = track.get('permalink_url') - if not url: - if not track_id: - continue - url = self._API_V2_BASE + 'tracks/' + track_id - if token: - url += '?secret_token=' + token - entries.append(self.url_result( - url, SoundcloudIE.ie_key(), track_id)) - return self.playlist_result( - entries, playlist_id, - playlist.get('title'), - playlist.get('description')) - - -class SoundcloudSetIE(SoundcloudPlaylistBaseIE): - _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' - IE_NAME = 'soundcloud:set' - _TESTS = [{ - 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', - 'info_dict': { - 'id': '2284613', - 'title': 'The Royal Concept EP', - 'description': 'md5:71d07087c7a449e8941a70a29e34671e', - }, - 'playlist_mincount': 5, - }, { - 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title') - token = mobj.group('token') - if token: - full_title += '/' + token - - info = self._download_json(self._resolv_url( - self._BASE_URL + full_title), full_title) - - if 'errors' in info: - msgs = (compat_str(err['error_message']) for err in info['errors']) - raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) - - return self._extract_set(info, token) - - -class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): - def _extract_playlist(self, base_url, playlist_id, playlist_title): - # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. - # https://developers.soundcloud.com/blog/offset-pagination-deprecated - COMMON_QUERY = { - 'limit': 200, - 'linked_partitioning': '1', - } - - query = COMMON_QUERY.copy() - query['offset'] = 0 - - next_href = base_url - - entries = [] - for i in itertools.count(): - response = self._download_json( - next_href, playlist_id, - 'Downloading track page %s' % (i + 1), query=query) - - collection = response['collection'] - - if not isinstance(collection, list): - collection = [] - - # Empty collection may be returned, in this case we proceed - # straight to next_href - - def resolve_entry(candidates): - for cand in candidates: - if not isinstance(cand, dict): - continue - permalink_url = url_or_none(cand.get('permalink_url')) - if not permalink_url: - continue - return self.url_result( - permalink_url, - SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - str_or_none(cand.get('id')), cand.get('title')) - - for e in collection: - entry = resolve_entry((e, e.get('track'), e.get('playlist'))) - if entry: - entries.append(entry) - - next_href = response.get('next_href') - if not next_href: - break - - next_href = response['next_href'] - parsed_next_href = compat_urlparse.urlparse(next_href) - query = compat_urlparse.parse_qs(parsed_next_href.query) - query.update(COMMON_QUERY) - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': playlist_title, - 'entries': entries, - } - - -class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:(?:www|m)\.)?soundcloud\.com/ - (?P<user>[^/]+) - (?:/ - (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight) - )? - /?(?:[?#].*)?$ - ''' - IE_NAME = 'soundcloud:user' - _TESTS = [{ - 'url': 'https://soundcloud.com/soft-cell-official', - 'info_dict': { - 'id': '207965082', - 'title': 'Soft Cell (All)', - }, - 'playlist_mincount': 28, - }, { - 'url': 'https://soundcloud.com/soft-cell-official/tracks', - 'info_dict': { - 'id': '207965082', - 'title': 'Soft Cell (Tracks)', - }, - 'playlist_mincount': 27, - }, { - 'url': 'https://soundcloud.com/soft-cell-official/albums', - 'info_dict': { - 'id': '207965082', - 'title': 'Soft Cell (Albums)', - }, - 'playlist_mincount': 1, - }, { - 'url': 'https://soundcloud.com/jcv246/sets', - 'info_dict': { - 'id': '12982173', - 'title': 'Jordi / cv (Sets)', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://soundcloud.com/jcv246/reposts', - 'info_dict': { - 'id': '12982173', - 'title': 'Jordi / cv (Reposts)', - }, - 'playlist_mincount': 6, - }, { - 'url': 'https://soundcloud.com/clalberg/likes', - 'info_dict': { - 'id': '11817582', - 'title': 'clalberg (Likes)', - }, - 'playlist_mincount': 5, - }, { - 'url': 'https://soundcloud.com/grynpyret/spotlight', - 'info_dict': { - 'id': '7098329', - 'title': 'Grynpyret (Spotlight)', - }, - 'playlist_mincount': 1, - }] - - _BASE_URL_MAP = { - 'all': 'stream/users/%s', - 'tracks': 'users/%s/tracks', - 'albums': 'users/%s/albums', - 'sets': 'users/%s/playlists', - 'reposts': 'stream/users/%s/reposts', - 'likes': 'users/%s/likes', - 'spotlight': 'users/%s/spotlight', - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader = mobj.group('user') - - user = self._download_json( - self._resolv_url(self._BASE_URL + uploader), - uploader, 'Downloading user info') - - resource = mobj.group('rsrc') or 'all' - - return self._extract_playlist( - self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'], - str_or_none(user.get('id')), - '%s (%s)' % (user['username'], resource.capitalize())) - - -class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)' - IE_NAME = 'soundcloud:trackstation' - _TESTS = [{ - 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', - 'info_dict': { - 'id': '286017854', - 'title': 'Track station: your text', - }, - 'playlist_mincount': 47, - }] - - def _real_extract(self, url): - track_name = self._match_id(url) - - track = self._download_json(self._resolv_url(url), track_name) - track_id = self._search_regex( - r'soundcloud:track-stations:(\d+)', track['id'], 'track id') - - return self._extract_playlist( - self._API_V2_BASE + 'stations/%s/tracks' % track['id'], - track_id, 'Track station: %s' % track['title']) - - -class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): - _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' - IE_NAME = 'soundcloud:playlist' - _TESTS = [{ - 'url': 'https://api.soundcloud.com/playlists/4110309', - 'info_dict': { - 'id': '4110309', - 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', - 'description': 're:.*?TILT Brass - Bowery Poetry Club', - }, - 'playlist_count': 6, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') - - query = {} - token = mobj.group('token') - if token: - query['secret_token'] = token - - data = self._download_json( - self._API_V2_BASE + 'playlists/' + playlist_id, - playlist_id, 'Downloading playlist', query=query) - - return self._extract_set(data, token) - - -class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): - IE_NAME = 'soundcloud:search' - IE_DESC = 'Soundcloud search' - _MAX_RESULTS = float('inf') - _TESTS = [{ - 'url': 'scsearch15:post-avant jazzcore', - 'info_dict': { - 'title': 'post-avant jazzcore', - }, - 'playlist_count': 15, - }] - - _SEARCH_KEY = 'scsearch' - _MAX_RESULTS_PER_PAGE = 200 - _DEFAULT_RESULTS_PER_PAGE = 50 - - def _get_collection(self, endpoint, collection_id, **query): - limit = min( - query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), - self._MAX_RESULTS_PER_PAGE) - query.update({ - 'limit': limit, - 'linked_partitioning': 1, - 'offset': 0, - }) - next_url = update_url_query(self._API_V2_BASE + endpoint, query) - - collected_results = 0 - - for i in itertools.count(1): - response = self._download_json( - next_url, collection_id, 'Downloading page {0}'.format(i), - 'Unable to download API page') - - collection = response.get('collection', []) - if not collection: - break - - collection = list(filter(bool, collection)) - collected_results += len(collection) - - for item in collection: - yield self.url_result(item['uri'], SoundcloudIE.ie_key()) - - if not collection or collected_results >= limit: - break - - next_url = response.get('next_href') - if not next_url: - break - - def _get_n_results(self, query, n): - tracks = self._get_collection('search/tracks', query, limit=n, q=query) - return self.playlist_result(tracks, playlist_title=query) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py deleted file mode 100644 index 3d78a9d76..000000000 --- a/youtube_dl/extractor/soundgasm.py +++ /dev/null @@ -1,77 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class SoundgasmIE(InfoExtractor): - IE_NAME = 'soundgasm' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)' - _TEST = { - 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', - 'md5': '010082a2c802c5275bb00030743e75ad', - 'info_dict': { - 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', - 'ext': 'm4a', - 'title': 'Piano sample', - 'description': 'Royalty Free Sample Music', - 'uploader': 'ytdl', - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - audio_url = self._html_search_regex( - r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'audio URL', group='url') - - title = self._search_regex( - r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)', - webpage, 'title', default=display_id) - - description = self._html_search_regex( - (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>', - r'(?s)<li>Description:\s(.*?)<\/li>'), - webpage, 'description', fatal=False) - - audio_id = self._search_regex( - r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id) - - return { - 'id': audio_id, - 'display_id': display_id, - 'url': audio_url, - 'vcodec': 'none', - 'title': title, - 'description': description, - 'uploader': mobj.group('user'), - } - - -class SoundgasmProfileIE(InfoExtractor): - IE_NAME = 'soundgasm:profile' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$' - _TEST = { - 'url': 'http://soundgasm.net/u/ytdl', - 'info_dict': { - 'id': 'ytdl', - }, - 'playlist_count': 1, - } - - def _real_extract(self, url): - profile_id = self._match_id(url) - - webpage = self._download_webpage(url, profile_id) - - entries = [ - self.url_result(audio_url, 'Soundgasm') - for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] - - return self.playlist_result(entries, profile_id) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py deleted file mode 100644 index 0774da06e..000000000 --- a/youtube_dl/extractor/southpark.py +++ /dev/null @@ -1,127 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .mtv import MTVServicesInfoExtractor - - -class SouthParkIE(MTVServicesInfoExtractor): - IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' - - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - - _TESTS = [{ - 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', - 'info_dict': { - 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', - 'ext': 'mp4', - 'title': 'South Park|Bat Daded', - 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', - 'timestamp': 1112760000, - 'upload_date': '20050406', - }, - }, { - 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1', - 'only_matching': True, - }, { - 'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1', - 'only_matching': True, - }] - - def _get_feed_query(self, uri): - return { - 'accountOverride': 'intl.mtvi.com', - 'arcEp': 'shared.southpark.global', - 'ep': '90877963', - 'imageEp': 'shared.southpark.global', - 'mgid': uri, - } - - -class SouthParkEsIE(SouthParkIE): - IE_NAME = 'southpark.cc.com:español' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' - _LANG = 'es' - - _TESTS = [{ - 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', - 'info_dict': { - 'title': 'Cartman Consigue Una Sonda Anal', - 'description': 'Cartman Consigue Una Sonda Anal', - }, - 'playlist_count': 4, - 'skip': 'Geo-restricted', - }] - - -class SouthParkDeIE(SouthParkIE): - IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden|collections)/(?P<id>.+?)(\?|#|$))' - _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' - - _TESTS = [{ - 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', - 'info_dict': { - 'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2', - 'ext': 'mp4', - 'title': 'South Park|The Government Won\'t Respect My Privacy', - 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', - 'timestamp': 1380160800, - 'upload_date': '20130926', - }, - }, { - # non-ASCII characters in initial URL - 'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen', - 'info_dict': { - 'title': 'Hashtag „Aufwärmen“', - 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.', - }, - 'playlist_count': 3, - }, { - # non-ASCII characters in redirect URL - 'url': 'http://www.southpark.de/alle-episoden/s18e09', - 'info_dict': { - 'title': 'Hashtag „Aufwärmen“', - 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.', - }, - 'playlist_count': 3, - }, { - 'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1', - 'only_matching': True, - }] - - -class SouthParkNlIE(SouthParkIE): - IE_NAME = 'southpark.nl' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' - _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' - - _TESTS = [{ - 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', - 'info_dict': { - 'title': 'Freemium Isn\'t Free', - 'description': 'Stan is addicted to the new Terrance and Phillip mobile game.', - }, - 'playlist_mincount': 3, - }] - - -class SouthParkDkIE(SouthParkIE): - IE_NAME = 'southparkstudios.dk' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))' - _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' - - _TESTS = [{ - 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', - 'info_dict': { - 'title': 'Grounded Vindaloop', - 'description': 'Butters is convinced he\'s living in a virtual reality.', - }, - 'playlist_mincount': 3, - }, { - 'url': 'http://www.southparkstudios.dk/collections/2476/superhero-showdown/1', - 'only_matching': True, - }, { - 'url': 'http://www.southparkstudios.nu/collections/2476/superhero-showdown/1', - 'only_matching': True, - }] diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py deleted file mode 100644 index 37cb8c839..000000000 --- a/youtube_dl/extractor/spankbang.py +++ /dev/null @@ -1,198 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - merge_dicts, - parse_duration, - parse_resolution, - str_to_int, - url_or_none, - urlencode_postdata, - urljoin, -) - - -class SpankBangIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:[^/]+\.)?spankbang\.com/ - (?: - (?P<id>[\da-z]+)/(?:video|play|embed)\b| - [\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+ - ) - ''' - _TESTS = [{ - 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', - 'md5': '1cc433e1d6aa14bc376535b8679302f7', - 'info_dict': { - 'id': '3vvn', - 'ext': 'mp4', - 'title': 'fantasy solo', - 'description': 'dillion harper masturbates on a bed', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'silly2587', - 'timestamp': 1422571989, - 'upload_date': '20150129', - 'age_limit': 18, - } - }, { - # 480p only - 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', - 'only_matching': True, - }, { - # no uploader - 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', - 'only_matching': True, - }, { - # mobile page - 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', - 'only_matching': True, - }, { - # 4k - 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', - 'only_matching': True, - }, { - 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/', - 'only_matching': True, - }, { - 'url': 'https://m.spankbang.com/3vvn/play', - 'only_matching': True, - }, { - 'url': 'https://spankbang.com/2y3td/embed/', - 'only_matching': True, - }, { - 'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') or mobj.group('id_2') - webpage = self._download_webpage( - url.replace('/%s/embed' % video_id, '/%s/video' % video_id), - video_id, headers={'Cookie': 'country=US'}) - - if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): - raise ExtractorError( - 'Video %s is not available' % video_id, expected=True) - - formats = [] - - def extract_format(format_id, format_url): - f_url = url_or_none(format_url) - if not f_url: - return - f = parse_resolution(format_id) - ext = determine_ext(f_url) - if format_id.startswith('m3u8') or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - f_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif format_id.startswith('mpd') or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - f_url, video_id, mpd_id='dash', fatal=False)) - elif ext == 'mp4' or f.get('width') or f.get('height'): - f.update({ - 'url': f_url, - 'format_id': format_id, - }) - formats.append(f) - - STREAM_URL_PREFIX = 'stream_url_' - - for mobj in re.finditer( - r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' - % STREAM_URL_PREFIX, webpage): - extract_format(mobj.group('id', 'url')) - - if not formats: - stream_key = self._search_regex( - r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', - webpage, 'stream key', group='value') - - stream = self._download_json( - 'https://spankbang.com/api/videos/stream', video_id, - 'Downloading stream JSON', data=urlencode_postdata({ - 'id': stream_key, - 'data': 0, - }), headers={ - 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest', - }) - - for format_id, format_url in stream.items(): - if format_url and isinstance(format_url, list): - format_url = format_url[0] - extract_format(format_id, format_url) - - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) - - info = self._search_json_ld(webpage, video_id, default={}) - - title = self._html_search_regex( - r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None) - description = self._search_regex( - r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)', - webpage, 'description', default=None) - thumbnail = self._og_search_thumbnail(webpage, default=None) - uploader = self._html_search_regex( - (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>', - r'class="user"[^>]*><img[^>]+>([^<]+)'), - webpage, 'uploader', default=None) - duration = parse_duration(self._search_regex( - r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)', - webpage, 'duration', default=None)) - view_count = str_to_int(self._search_regex( - r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) - - age_limit = self._rta_search(webpage) - - return merge_dicts({ - 'id': video_id, - 'title': title or video_id, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'duration': duration, - 'view_count': view_count, - 'formats': formats, - 'age_limit': age_limit, - }, info - ) - - -class SpankBangPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)' - _TEST = { - 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', - 'info_dict': { - 'id': 'ug0k', - 'title': 'Big Ass Titties', - }, - 'playlist_mincount': 40, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage( - url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) - - entries = [self.url_result( - urljoin(url, mobj.group('path')), - ie=SpankBangIE.ie_key(), video_id=mobj.group('id')) - for mobj in re.finditer( - r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1' - % re.escape(display_id), webpage)] - - title = self._html_search_regex( - r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title', - fatal=False) - - return self.playlist_result(entries, playlist_id, title) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py deleted file mode 100644 index 35ab9ec37..000000000 --- a/youtube_dl/extractor/spankwire.py +++ /dev/null @@ -1,182 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - merge_dicts, - str_or_none, - str_to_int, - url_or_none, -) - - -class SpankwireIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?spankwire\.com/ - (?: - [^/]+/video| - EmbedPlayer\.aspx/?\?.*?\bArticleId= - ) - (?P<id>\d+) - ''' - _TESTS = [{ - # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4 - 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', - 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd', - 'info_dict': { - 'id': '103545', - 'ext': 'mp4', - 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', - 'description': 'Crazy Bitch X rated music video.', - 'duration': 222, - 'uploader': 'oreusz', - 'uploader_id': '124697', - 'timestamp': 1178587885, - 'upload_date': '20070508', - 'average_rating': float, - 'view_count': int, - 'comment_count': int, - 'age_limit': 18, - 'categories': list, - 'tags': list, - }, - }, { - # download URL pattern: */mp4_<format_id>_<video_id>.mp4 - 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', - 'md5': '09b3c20833308b736ae8902db2f8d7e6', - 'info_dict': { - 'id': '1921551', - 'ext': 'mp4', - 'title': 'Titcums Compiloation I', - 'description': 'cum on tits', - 'uploader': 'dannyh78999', - 'uploader_id': '3056053', - 'upload_date': '20150822', - 'age_limit': 18, - }, - 'params': { - 'proxy': '127.0.0.1:8118' - }, - 'skip': 'removed', - }, { - 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)', - webpage) - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id) - - title = video['title'] - - formats = [] - videos = video.get('videos') - if isinstance(videos, dict): - for format_id, format_url in videos.items(): - video_url = url_or_none(format_url) - if not format_url: - continue - height = int_or_none(self._search_regex( - r'(\d+)[pP]', format_id, 'height', default=None)) - m = re.search( - r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url) - if m: - tbr = int(m.group('tbr')) - height = height or int(m.group('height')) - else: - tbr = None - formats.append({ - 'url': video_url, - 'format_id': '%dp' % height if height else format_id, - 'height': height, - 'tbr': tbr, - }) - m3u8_url = url_or_none(video.get('HLS')) - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id')) - - view_count = str_to_int(video.get('viewed')) - - thumbnails = [] - for preference, t in enumerate(('', '2x'), start=0): - thumbnail_url = url_or_none(video.get('poster%s' % t)) - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'preference': preference, - }) - - def extract_names(key): - entries_list = video.get(key) - if not isinstance(entries_list, list): - return - entries = [] - for entry in entries_list: - name = str_or_none(entry.get('name')) - if name: - entries.append(name) - return entries - - categories = extract_names('categories') - tags = extract_names('tags') - - uploader = None - info = {} - - webpage = self._download_webpage( - 'https://www.spankwire.com/_/video%s/' % video_id, video_id, - fatal=False) - if webpage: - info = self._search_json_ld(webpage, video_id, default={}) - thumbnail_url = None - if 'thumbnail' in info: - thumbnail_url = url_or_none(info['thumbnail']) - del info['thumbnail'] - if not thumbnail_url: - thumbnail_url = self._og_search_thumbnail(webpage) - if thumbnail_url: - thumbnails.append({ - 'url': thumbnail_url, - 'preference': 10, - }) - uploader = self._html_search_regex( - r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>', - webpage, 'uploader', fatal=False) - if not view_count: - view_count = str_to_int(self._search_regex( - r'data-views=["\']([\d,.]+)', webpage, 'view count', - fatal=False)) - - return merge_dicts({ - 'id': video_id, - 'title': title, - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'thumbnails': thumbnails, - 'uploader': uploader, - 'uploader_id': str_or_none(video.get('userId')), - 'timestamp': int_or_none(video.get('time_approved_on')), - 'average_rating': float_or_none(video.get('rating')), - 'view_count': view_count, - 'comment_count': int_or_none(video.get('comments')), - 'age_limit': 18, - 'categories': categories, - 'tags': tags, - 'formats': formats, - }, info) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py deleted file mode 100644 index 2da32b9b2..000000000 --- a/youtube_dl/extractor/spiegel.py +++ /dev/null @@ -1,54 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from .jwplatform import JWPlatformIE - - -class SpiegelIE(InfoExtractor): - _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE - _TESTS = [{ - 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', - 'md5': '50c7948883ec85a3e431a0a44b7ad1d6', - 'info_dict': { - 'id': 'II0BUyxY', - 'display_id': '1259285', - 'ext': 'mp4', - 'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft', - 'description': 'md5:8029d8310232196eb235d27575a8b9f4', - 'duration': 48.0, - 'upload_date': '20130311', - 'timestamp': 1362997920, - }, - }, { - 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', - 'only_matching': True, - }, { - 'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html', - 'only_matching': True, - }, { - 'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7', - 'only_matching': True, - }, { - 'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html', - 'only_matching': True, - }, { - 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - media_id = self._html_search_regex( - r'("|["\'])mediaId\1\s*:\s*("|["\'])(?P<id>(?:(?!\2).)+)\2', - webpage, 'media id', group='id') - return { - '_type': 'url_transparent', - 'id': video_id, - 'display_id': video_id, - 'url': 'jwplatform:%s' % media_id, - 'title': self._og_search_title(webpage, default=None), - 'ie_key': JWPlatformIE.ie_key(), - } diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py deleted file mode 100644 index 5805f3d44..000000000 --- a/youtube_dl/extractor/spike.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import unicode_literals - -from .mtv import MTVServicesInfoExtractor - - -class BellatorIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' - _TESTS = [{ - 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', - 'info_dict': { - 'title': 'Michael Page vs. Evangelista Cyborg', - 'description': 'md5:0d917fc00ffd72dd92814963fc6cbb05', - }, - 'playlist_count': 3, - }, { - 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page', - 'only_matching': True, - }] - - _FEED_URL = 'http://www.bellator.com/feeds/mrss/' - _GEO_COUNTRIES = ['US'] - - -class ParamountNetworkIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' - _TESTS = [{ - 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13', - 'info_dict': { - 'id': '37ace3a8-1df6-48be-85b8-38df8229e241', - 'ext': 'mp4', - 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1', - 'description': 'md5:a739ca8f978a7802f67f8016d27ce114', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] - - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - _GEO_COUNTRIES = ['US'] - - def _get_feed_query(self, uri): - return { - 'arcEp': 'paramountnetwork.com', - 'imageEp': 'paramountnetwork.com', - 'mgid': uri, - } diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py deleted file mode 100644 index a417b5a4e..000000000 --- a/youtube_dl/extractor/sport5.py +++ /dev/null @@ -1,92 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class Sport5IE(InfoExtractor): - _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)' - _TESTS = [ - { - 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', - 'info_dict': { - 'id': 's5-Y59xx1-GUh2', - 'ext': 'mp4', - 'title': 'ולנסיה-קורדובה 0:3', - 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה', - 'duration': 228, - 'categories': list, - }, - 'skip': 'Blocked outside of Israel', - }, { - 'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE', - 'info_dict': { - 'id': 's5-SiXxx1-hKh2', - 'ext': 'mp4', - 'title': 'GOALS_CELTIC_270914.mp4', - 'description': '', - 'duration': 87, - 'categories': list, - }, - 'skip': 'Blocked outside of Israel', - } - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - media_id = mobj.group('id') - - webpage = self._download_webpage(url, media_id) - - video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id') - - metadata = self._download_xml( - 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id, - video_id) - - error = metadata.find('./Error') - if error is not None: - raise ExtractorError( - '%s returned error: %s - %s' % ( - self.IE_NAME, - error.find('./Name').text, - error.find('./Description').text), - expected=True) - - title = metadata.find('./Title').text - description = metadata.find('./Description').text - duration = int(metadata.find('./Duration').text) - - posters_el = metadata.find('./PosterLinks') - thumbnails = [{ - 'url': thumbnail.text, - 'width': int(thumbnail.get('width')), - 'height': int(thumbnail.get('height')), - } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else [] - - categories_el = metadata.find('./Categories') - categories = [ - cat.get('name') for cat in categories_el.findall('./Category') - ] if categories_el is not None else [] - - formats = [{ - 'url': fmt.text, - 'ext': 'mp4', - 'vbr': int(fmt.get('bitrate')), - 'width': int(fmt.get('width')), - 'height': int(fmt.get('height')), - } for fmt in metadata.findall('./PlaybackLinks/FileURL')] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'duration': duration, - 'categories': categories, - 'formats': formats, - } diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py deleted file mode 100644 index b9017fd2a..000000000 --- a/youtube_dl/extractor/sportbox.py +++ /dev/null @@ -1,99 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - js_to_json, - merge_dicts, -) - - -class SportBoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', - 'info_dict': { - 'id': '109158', - 'ext': 'mp4', - 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', - 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 292, - 'view_count': int, - 'timestamp': 1426237001, - 'upload_date': '20150313', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', - 'only_matching': True, - }, { - 'url': 'https://news.sportbox.ru/vdl/player/media/193095', - 'only_matching': True, - }, { - 'url': 'https://news.sportbox.ru/vdl/player/media/109158', - 'only_matching': True, - }, { - 'url': 'https://matchtv.ru/vdl/player/media/109158', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"', - webpage) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - sources = self._parse_json( - self._search_regex( - r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n', - webpage, 'sources'), - video_id, transform_source=js_to_json) - - formats = [] - for source in sources: - src = source.get('src') - if not src: - continue - if determine_ext(src) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': src, - }) - self._sort_formats(formats) - - player = self._parse_json( - self._search_regex( - r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage, - 'player options', default='{}'), - video_id, transform_source=js_to_json) - media_id = player['mediaId'] - - info = self._search_json_ld(webpage, media_id, default={}) - - view_count = int_or_none(self._search_regex( - r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) - - return merge_dicts(info, { - 'id': media_id, - 'title': self._og_search_title(webpage, default=None) or media_id, - 'thumbnail': player.get('poster'), - 'duration': int_or_none(player.get('duration')), - 'view_count': view_count, - 'formats': formats, - }) diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py deleted file mode 100644 index 3e497a939..000000000 --- a/youtube_dl/extractor/sportdeutschland.py +++ /dev/null @@ -1,105 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) -from ..utils import ( - clean_html, - float_or_none, - int_or_none, - parse_iso8601, - strip_or_none, - try_get, -) - - -class SportDeutschlandIE(InfoExtractor): - _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)' - _TESTS = [{ - 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', - 'info_dict': { - 'id': '5318cac0275701382770543d7edaf0a0', - 'ext': 'mp4', - 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1', - 'duration': 16106.36, - }, - 'params': { - 'noplaylist': True, - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', - 'info_dict': { - 'id': 'c6e2fdd01f63013854c47054d2ab776f', - 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals', - 'description': 'md5:5263ff4c31c04bb780c9f91130b48530', - 'duration': 31397, - }, - 'playlist_count': 2, - }, { - 'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - data = self._download_json( - 'https://backend.sportdeutschland.tv/api/permalinks/' + display_id, - display_id, query={'access_token': 'true'}) - asset = data['asset'] - title = (asset.get('title') or asset['label']).strip() - asset_id = asset.get('id') or asset.get('uuid') - info = { - 'id': asset_id, - 'title': title, - 'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'), - 'duration': int_or_none(asset.get('seconds')), - } - videos = asset.get('videos') or [] - if len(videos) > 1: - playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0] - if playlist_id: - if self._downloader.params.get('noplaylist'): - videos = [videos[int(playlist_id)]] - self.to_screen('Downloading just a single video because of --no-playlist') - else: - self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id) - - def entries(): - for i, video in enumerate(videos, 1): - video_id = video.get('uuid') - video_url = video.get('url') - if not (video_id and video_url): - continue - formats = self._extract_m3u8_formats( - video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False) - if not formats: - continue - yield { - 'id': video_id, - 'formats': formats, - 'title': title + ' - ' + (video.get('label') or 'Teil %d' % i), - 'duration': float_or_none(video.get('duration')), - } - info.update({ - '_type': 'multi_video', - 'entries': entries(), - }) - else: - formats = self._extract_m3u8_formats( - videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4') - section_title = strip_or_none(try_get(data, lambda x: x['section']['title'])) - info.update({ - 'formats': formats, - 'display_id': asset.get('permalink'), - 'thumbnail': try_get(asset, lambda x: x['images'][0]), - 'categories': [section_title] if section_title else None, - 'view_count': int_or_none(asset.get('views')), - 'is_live': asset.get('is_live') is True, - 'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')), - }) - return info diff --git a/youtube_dl/extractor/spotify.py b/youtube_dl/extractor/spotify.py deleted file mode 100644 index 826f98cff..000000000 --- a/youtube_dl/extractor/spotify.py +++ /dev/null @@ -1,156 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - clean_podcast_url, - float_or_none, - int_or_none, - strip_or_none, - try_get, - unified_strdate, -) - - -class SpotifyBaseIE(InfoExtractor): - _ACCESS_TOKEN = None - _OPERATION_HASHES = { - 'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf', - 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0', - 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d', - } - _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)' - - def _real_initialize(self): - self._ACCESS_TOKEN = self._download_json( - 'https://open.spotify.com/get_access_token', None)['accessToken'] - - def _call_api(self, operation, video_id, variables): - return self._download_json( - 'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={ - 'operationName': 'query' + operation, - 'variables': json.dumps(variables), - 'extensions': json.dumps({ - 'persistedQuery': { - 'sha256Hash': self._OPERATION_HASHES[operation], - }, - }) - }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data'] - - def _extract_episode(self, episode, series): - episode_id = episode['id'] - title = episode['name'].strip() - - formats = [] - audio_preview = episode.get('audioPreview') or {} - audio_preview_url = audio_preview.get('url') - if audio_preview_url: - f = { - 'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'), - 'vcodec': 'none', - } - audio_preview_format = audio_preview.get('format') - if audio_preview_format: - f['format_id'] = audio_preview_format - mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format) - if mobj: - f.update({ - 'abr': int(mobj.group(2)), - 'ext': mobj.group(1).lower(), - }) - formats.append(f) - - for item in (try_get(episode, lambda x: x['audio']['items']) or []): - item_url = item.get('url') - if not (item_url and item.get('externallyHosted')): - continue - formats.append({ - 'url': clean_podcast_url(item_url), - 'vcodec': 'none', - }) - - thumbnails = [] - for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []): - source_url = source.get('url') - if not source_url: - continue - thumbnails.append({ - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - }) - - return { - 'id': episode_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': strip_or_none(episode.get('description')), - 'duration': float_or_none(try_get( - episode, lambda x: x['duration']['totalMilliseconds']), 1000), - 'release_date': unified_strdate(try_get( - episode, lambda x: x['releaseDate']['isoString'])), - 'series': series, - } - - -class SpotifyIE(SpotifyBaseIE): - IE_NAME = 'spotify' - _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode' - _TEST = { - 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo', - 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b', - 'info_dict': { - 'id': '4Z7GAJ50bgctf6uclHlWKo', - 'ext': 'mp3', - 'title': 'From the archive: Why time management is ruining our lives', - 'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935', - 'duration': 2083.605, - 'release_date': '20201217', - 'series': "The Guardian's Audio Long Reads", - } - } - - def _real_extract(self, url): - episode_id = self._match_id(url) - episode = self._call_api('Episode', episode_id, { - 'uri': 'spotify:episode:' + episode_id - })['episode'] - return self._extract_episode( - episode, try_get(episode, lambda x: x['podcast']['name'])) - - -class SpotifyShowIE(SpotifyBaseIE): - IE_NAME = 'spotify:show' - _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show' - _TEST = { - 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M', - 'info_dict': { - 'id': '4PM9Ke6l66IRNpottHKV9M', - 'title': 'The Story from the Guardian', - 'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories', - }, - 'playlist_mincount': 36, - } - - def _real_extract(self, url): - show_id = self._match_id(url) - podcast = self._call_api('ShowEpisodes', show_id, { - 'limit': 1000000000, - 'offset': 0, - 'uri': 'spotify:show:' + show_id, - })['podcast'] - podcast_name = podcast.get('name') - - entries = [] - for item in (try_get(podcast, lambda x: x['episodes']['items']) or []): - episode = item.get('episode') - if not episode: - continue - entries.append(self._extract_episode(episode, podcast_name)) - - return self.playlist_result( - entries, show_id, podcast_name, podcast.get('description')) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py deleted file mode 100644 index 6c7e40ae4..000000000 --- a/youtube_dl/extractor/spreaker.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - float_or_none, - int_or_none, - str_or_none, - try_get, - unified_timestamp, - url_or_none, -) - - -def _extract_episode(data, episode_id=None): - title = data['title'] - download_url = data['download_url'] - - series = try_get(data, lambda x: x['show']['title'], compat_str) - uploader = try_get(data, lambda x: x['author']['fullname'], compat_str) - - thumbnails = [] - for image in ('image_original', 'image_medium', 'image'): - image_url = url_or_none(data.get('%s_url' % image)) - if image_url: - thumbnails.append({'url': image_url}) - - def stats(key): - return int_or_none(try_get( - data, - (lambda x: x['%ss_count' % key], - lambda x: x['stats']['%ss' % key]))) - - def duration(key): - return float_or_none(data.get(key), scale=1000) - - return { - 'id': compat_str(episode_id or data['episode_id']), - 'url': download_url, - 'display_id': data.get('permalink'), - 'title': title, - 'description': data.get('description'), - 'timestamp': unified_timestamp(data.get('published_at')), - 'uploader': uploader, - 'uploader_id': str_or_none(data.get('author_id')), - 'creator': uploader, - 'duration': duration('duration') or duration('length'), - 'view_count': stats('play'), - 'like_count': stats('like'), - 'comment_count': stats('message'), - 'format': 'MPEG Layer 3', - 'format_id': 'mp3', - 'container': 'mp3', - 'ext': 'mp3', - 'thumbnails': thumbnails, - 'series': series, - 'extractor_key': SpreakerIE.ie_key(), - } - - -class SpreakerIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - api\.spreaker\.com/ - (?: - (?:download/)?episode| - v2/episodes - )/ - (?P<id>\d+) - ''' - _TESTS = [{ - 'url': 'https://api.spreaker.com/episode/12534508', - 'info_dict': { - 'id': '12534508', - 'display_id': 'swm-ep15-how-to-market-your-music-part-2', - 'ext': 'mp3', - 'title': 'EP:15 | Music Marketing (Likes) - Part 2', - 'description': 'md5:0588c43e27be46423e183076fa071177', - 'timestamp': 1502250336, - 'upload_date': '20170809', - 'uploader': 'SWM', - 'uploader_id': '9780658', - 'duration': 1063.42, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'series': 'Success With Music (SWM)', - }, - }, { - 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', - 'only_matching': True, - }, { - 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', - 'only_matching': True, - }] - - def _real_extract(self, url): - episode_id = self._match_id(url) - data = self._download_json( - 'https://api.spreaker.com/v2/episodes/%s' % episode_id, - episode_id)['response']['episode'] - return _extract_episode(data, episode_id) - - -class SpreakerPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - episode_id = self._search_regex( - (r'data-episode_id=["\'](?P<id>\d+)', - r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id') - return self.url_result( - 'https://api.spreaker.com/episode/%s' % episode_id, - ie=SpreakerIE.ie_key(), video_id=episode_id) - - -class SpreakerShowIE(InfoExtractor): - _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://api.spreaker.com/show/4652058', - 'info_dict': { - 'id': '4652058', - }, - 'playlist_mincount': 118, - }] - - def _entries(self, show_id): - for page_num in itertools.count(1): - episodes = self._download_json( - 'https://api.spreaker.com/show/%s/episodes' % show_id, - show_id, note='Downloading JSON page %d' % page_num, query={ - 'page': page_num, - 'max_per_page': 100, - }) - pager = try_get(episodes, lambda x: x['response']['pager'], dict) - if not pager: - break - results = pager.get('results') - if not results or not isinstance(results, list): - break - for result in results: - if not isinstance(result, dict): - continue - yield _extract_episode(result) - if page_num == pager.get('last_page'): - break - - def _real_extract(self, url): - show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id) - - -class SpreakerShowPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/show/success-with-music', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - show_id = self._search_regex( - r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id') - return self.url_result( - 'https://api.spreaker.com/show/%s' % show_id, - ie=SpreakerShowIE.ie_key(), video_id=show_id) diff --git a/youtube_dl/extractor/springboardplatform.py b/youtube_dl/extractor/springboardplatform.py deleted file mode 100644 index 07d99b579..000000000 --- a/youtube_dl/extractor/springboardplatform.py +++ /dev/null @@ -1,125 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - xpath_attr, - xpath_text, - xpath_element, - unescapeHTML, - unified_timestamp, -) - - -class SpringboardPlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - cms\.springboardplatform\.com/ - (?: - (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)| - xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+) - ) - ''' - _TESTS = [{ - 'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1', - 'md5': '5c3cb7b5c55740d482561099e920f192', - 'info_dict': { - 'id': '981017', - 'ext': 'mp4', - 'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', - 'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1409132328, - 'upload_date': '20140827', - 'duration': 193, - }, - }, { - 'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1', - 'only_matching': True, - }, { - 'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10', - 'only_matching': True, - }, { - 'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') - for mobj in re.finditer( - r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1', - webpage)] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') or mobj.group('id_2') - index = mobj.group('index') or mobj.group('index_2') - - video = self._download_xml( - 'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s' - % (index, video_id), video_id) - - item = xpath_element(video, './/item', 'item', fatal=True) - - content = xpath_element( - item, './{http://search.yahoo.com/mrss/}content', 'content', - fatal=True) - title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True)) - - video_url = content.attrib['url'] - - if 'error_video.mp4' in video_url: - raise ExtractorError( - 'Video %s no longer exists' % video_id, expected=True) - - duration = int_or_none(content.get('duration')) - tbr = int_or_none(content.get('bitrate')) - filesize = int_or_none(content.get('fileSize')) - width = int_or_none(content.get('width')) - height = int_or_none(content.get('height')) - - description = unescapeHTML(xpath_text( - item, './description', 'description')) - thumbnail = xpath_attr( - item, './{http://search.yahoo.com/mrss/}thumbnail', 'url', - 'thumbnail') - - timestamp = unified_timestamp(xpath_text( - item, './{http://cms.springboardplatform.com/namespaces.html}created', - 'timestamp')) - - formats = [{ - 'url': video_url, - 'format_id': 'http', - 'tbr': tbr, - 'filesize': filesize, - 'width': width, - 'height': height, - }] - - m3u8_format = formats[0].copy() - m3u8_format.update({ - 'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8', - 'ext': 'mp4', - 'format_id': 'hls', - 'protocol': 'm3u8_native', - }) - formats.append(m3u8_format) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } diff --git a/youtube_dl/extractor/sprout.py b/youtube_dl/extractor/sprout.py deleted file mode 100644 index e243732f2..000000000 --- a/youtube_dl/extractor/sprout.py +++ /dev/null @@ -1,64 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .adobepass import AdobePassIE -from ..utils import ( - int_or_none, - smuggle_url, - update_url_query, -) - - -class SproutIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)' - _TESTS = [{ - 'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race', - 'info_dict': { - 'id': 'bm0foJFaTKqb', - 'ext': 'mp4', - 'title': 'Robot Bike Race', - 'description': 'md5:436b1d97117cc437f54c383f4debc66d', - 'timestamp': 1606148940, - 'upload_date': '20201123', - 'uploader': 'NBCU-MPAT', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.sproutonline.com/watch/cowboy-adventure', - 'only_matching': True, - }, { - 'url': 'https://www.universalkids.com/watch/robot-bike-race', - 'only_matching': True, - }] - _GEO_COUNTRIES = ['US'] - - def _real_extract(self, url): - display_id = self._match_id(url) - mpx_metadata = self._download_json( - # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/ - 'https://www.universalkids.com/_api/videos/' + display_id, - display_id)['mpxMetadata'] - media_pid = mpx_metadata['mediaPid'] - theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid - query = { - 'mbr': 'true', - 'manifest': 'm3u', - } - if mpx_metadata.get('entitlement') == 'auth': - query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout') - theplatform_url = smuggle_url( - update_url_query(theplatform_url, query), { - 'force_smil_url': True, - 'geo_countries': self._GEO_COUNTRIES, - }) - return { - '_type': 'url_transparent', - 'id': media_pid, - 'url': theplatform_url, - 'series': mpx_metadata.get('seriesName'), - 'season_number': int_or_none(mpx_metadata.get('seasonNumber')), - 'episode_number': int_or_none(mpx_metadata.get('episodeNumber')), - 'ie_key': 'ThePlatform', - } diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py deleted file mode 100644 index ac018e740..000000000 --- a/youtube_dl/extractor/srgssr.py +++ /dev/null @@ -1,252 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - parse_iso8601, - qualities, - try_get, -) - - -class SRGSSRIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn| - srgssr - ): - (?P<bu> - srf|rts|rsi|rtr|swi - ):(?:[^:]+:)? - (?P<type> - video|audio - ): - (?P<id> - [0-9a-f\-]{36}|\d+ - ) - ''' - _GEO_BYPASS = False - _GEO_COUNTRIES = ['CH'] - - _ERRORS = { - 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', - 'AGERATING18': 'To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m.', - # 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.', - 'GEOBLOCK': 'For legal reasons, this video is only available in Switzerland.', - 'LEGAL': 'The video cannot be transmitted for legal reasons.', - 'STARTDATE': 'This video is not yet available. Please try again later.', - } - _DEFAULT_LANGUAGE_CODES = { - 'srf': 'de', - 'rts': 'fr', - 'rsi': 'it', - 'rtr': 'rm', - 'swi': 'en', - } - - def _get_tokenized_src(self, url, video_id, format_id): - token = self._download_json( - 'http://tp.srgssr.ch/akahd/token?acl=*', - video_id, 'Downloading %s token' % format_id, fatal=False) or {} - auth_params = try_get(token, lambda x: x['token']['authparams']) - if auth_params: - url += ('?' if '?' not in url else '&') + auth_params - return url - - def _get_media_data(self, bu, media_type, media_id): - query = {'onlyChapters': True} if media_type == 'video' else {} - full_media_data = self._download_json( - 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json' - % (bu, media_type, media_id), - media_id, query=query)['chapterList'] - try: - media_data = next( - x for x in full_media_data if x.get('id') == media_id) - except StopIteration: - raise ExtractorError('No media information found') - - block_reason = media_data.get('blockReason') - if block_reason and block_reason in self._ERRORS: - message = self._ERRORS[block_reason] - if block_reason == 'GEOBLOCK': - self.raise_geo_restricted( - msg=message, countries=self._GEO_COUNTRIES) - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, message), expected=True) - - return media_data - - def _real_extract(self, url): - bu, media_type, media_id = re.match(self._VALID_URL, url).groups() - media_data = self._get_media_data(bu, media_type, media_id) - title = media_data['title'] - - formats = [] - q = qualities(['SD', 'HD']) - for source in (media_data.get('resourceList') or []): - format_url = source.get('url') - if not format_url: - continue - protocol = source.get('protocol') - quality = source.get('quality') - format_id = [] - for e in (protocol, source.get('encoding'), quality): - if e: - format_id.append(e) - format_id = '-'.join(format_id) - - if protocol in ('HDS', 'HLS'): - if source.get('tokenType') == 'AKAMAI': - format_url = self._get_tokenized_src( - format_url, media_id, format_id) - formats.extend(self._extract_akamai_formats( - format_url, media_id)) - elif protocol == 'HLS': - formats.extend(self._extract_m3u8_formats( - format_url, media_id, 'mp4', 'm3u8_native', - m3u8_id=format_id, fatal=False)) - elif protocol in ('HTTP', 'HTTPS'): - formats.append({ - 'format_id': format_id, - 'url': format_url, - 'quality': q(quality), - }) - - # This is needed because for audio medias the podcast url is usually - # always included, even if is only an audio segment and not the - # whole episode. - if int_or_none(media_data.get('position')) == 0: - for p in ('S', 'H'): - podcast_url = media_data.get('podcast%sdUrl' % p) - if not podcast_url: - continue - quality = p + 'D' - formats.append({ - 'format_id': 'PODCAST-' + quality, - 'url': podcast_url, - 'quality': q(quality), - }) - self._sort_formats(formats) - - subtitles = {} - if media_type == 'video': - for sub in (media_data.get('subtitleList') or []): - sub_url = sub.get('url') - if not sub_url: - continue - lang = sub.get('locale') or self._DEFAULT_LANGUAGE_CODES[bu] - subtitles.setdefault(lang, []).append({ - 'url': sub_url, - }) - - return { - 'id': media_id, - 'title': title, - 'description': media_data.get('description'), - 'timestamp': parse_iso8601(media_data.get('date')), - 'thumbnail': media_data.get('imageUrl'), - 'duration': float_or_none(media_data.get('duration'), 1000), - 'subtitles': subtitles, - 'formats': formats, - } - - -class SRGSSRPlayIE(InfoExtractor): - IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' - _VALID_URL = r'''(?x) - https?:// - (?:(?:www|play)\.)? - (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/ - (?: - [^/]+/(?P<type>video|audio)/[^?]+| - popup(?P<type_2>video|audio)player - ) - \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P<id>[0-9a-f\-]{36}|\d+) - ''' - - _TESTS = [{ - 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', - 'md5': '6db2226ba97f62ad42ce09783680046c', - 'info_dict': { - 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', - 'ext': 'mp4', - 'upload_date': '20130701', - 'title': 'Snowden beantragt Asyl in Russland', - 'timestamp': 1372708215, - 'duration': 113.827, - 'thumbnail': r're:^https?://.*1383719781\.png$', - }, - 'expected_warnings': ['Unable to download f4m manifest'], - }, { - 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc', - 'info_dict': { - 'id': '63cb0778-27f8-49af-9284-8c7a8c6d15fc', - 'ext': 'mp3', - 'upload_date': '20151013', - 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem', - 'timestamp': 1444709160, - 'duration': 336.816, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - 'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260', - 'md5': '67a2a9ae4e8e62a68d0e9820cc9782df', - 'info_dict': { - 'id': '6348260', - 'display_id': '6348260', - 'ext': 'mp4', - 'duration': 1796.76, - 'title': 'Le 19h30', - 'upload_date': '20141201', - 'timestamp': 1417458600, - 'thumbnail': r're:^https?://.*\.image', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270', - 'info_dict': { - 'id': '42960270', - 'ext': 'mp4', - 'title': 'Why people were against tax reforms', - 'description': 'md5:7ac442c558e9630e947427469c4b824d', - 'duration': 94.0, - 'upload_date': '20170215', - 'timestamp': 1487173560, - 'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964', - 'subtitles': 'count:9', - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', - 'only_matching': True, - }, { - 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5', - 'only_matching': True, - }, { - 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260', - 'only_matching': True, - }, { - # audio segment, has podcastSdUrl of the full episode - 'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - bu = mobj.group('bu') - media_type = mobj.group('type') or mobj.group('type_2') - media_id = mobj.group('id') - return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR') diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py deleted file mode 100644 index 359dadaa3..000000000 --- a/youtube_dl/extractor/srmediathek.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .ard import ARDMediathekBaseIE -from ..utils import ( - ExtractorError, - get_element_by_attribute, -) - - -class SRMediathekIE(ARDMediathekBaseIE): - IE_NAME = 'sr:mediathek' - IE_DESC = 'Saarländischer Rundfunk' - _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' - - _TESTS = [{ - 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455', - 'info_dict': { - 'id': '28455', - 'ext': 'mp4', - 'title': 'sportarena (26.10.2014)', - 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'skip': 'no longer available', - }, { - 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682', - 'info_dict': { - 'id': '37682', - 'ext': 'mp4', - 'title': 'Love, Cakes and Rock\'n\'Roll', - 'description': 'md5:18bf9763631c7d326c22603681e1123d', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - if '>Der gewünschte Beitrag ist leider nicht mehr verfügbar.<' in webpage: - raise ExtractorError('Video %s is no longer available' % video_id, expected=True) - - media_collection_url = self._search_regex( - r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url') - info = self._extract_media_info(media_collection_url, webpage, video_id) - info.update({ - 'id': video_id, - 'title': get_element_by_attribute('class', 'ardplayer-title', webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - }) - return info diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py deleted file mode 100644 index ae3dd1380..000000000 --- a/youtube_dl/extractor/stanfordoc.py +++ /dev/null @@ -1,91 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - orderedSet, - unescapeHTML, -) - - -class StanfordOpenClassroomIE(InfoExtractor): - IE_NAME = 'stanfordoc' - IE_DESC = 'Stanford Open ClassRoom' - _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' - _TEST = { - 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', - 'md5': '544a9468546059d4e80d76265b0443b8', - 'info_dict': { - 'id': 'PracticalUnix_intro-environment', - 'ext': 'mp4', - 'title': 'Intro Environment', - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - if mobj.group('course') and mobj.group('video'): # A specific video - course = mobj.group('course') - video = mobj.group('video') - info = { - 'id': course + '_' + video, - 'uploader': None, - 'upload_date': None, - } - - baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' - xmlUrl = baseUrl + video + '.xml' - mdoc = self._download_xml(xmlUrl, info['id']) - try: - info['title'] = mdoc.findall('./title')[0].text - info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text - except IndexError: - raise ExtractorError('Invalid metadata XML file') - return info - elif mobj.group('course'): # A course page - course = mobj.group('course') - info = { - 'id': course, - '_type': 'playlist', - 'uploader': None, - 'upload_date': None, - } - - coursepage = self._download_webpage( - url, info['id'], - note='Downloading course info page', - errnote='Unable to download course info page') - - info['title'] = self._html_search_regex( - r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id']) - - info['description'] = self._html_search_regex( - r'(?s)<description>([^<]+)</description>', - coursepage, 'description', fatal=False) - - links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage)) - info['entries'] = [self.url_result( - 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) - ) for l in links] - return info - else: # Root page - info = { - 'id': 'Stanford OpenClassroom', - '_type': 'playlist', - 'uploader': None, - 'upload_date': None, - } - info['title'] = info['id'] - - rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' - rootpage = self._download_webpage(rootURL, info['id'], - errnote='Unable to download course info page') - - links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage)) - info['entries'] = [self.url_result( - 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) - ) for l in links] - return info diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py deleted file mode 100644 index a6a191ceb..000000000 --- a/youtube_dl/extractor/steam.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - extract_attributes, - ExtractorError, - get_element_by_class, - js_to_json, -) - - -class SteamIE(InfoExtractor): - _VALID_URL = r"""(?x) - https?://store\.steampowered\.com/ - (agecheck/)? - (?P<urltype>video|app)/ #If the page is only for videos or for a game - (?P<gameID>\d+)/? - (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID - | - https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+) - """ - _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' - _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' - _TESTS = [{ - 'url': 'http://store.steampowered.com/video/105600/', - 'playlist': [ - { - 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592', - 'info_dict': { - 'id': '2040428', - 'ext': 'mp4', - 'title': 'Terraria 1.3 Trailer', - 'playlist_index': 1, - } - }, - { - 'md5': '911672b20064ca3263fa89650ba5a7aa', - 'info_dict': { - 'id': '2029566', - 'ext': 'mp4', - 'title': 'Terraria 1.2 Trailer', - 'playlist_index': 2, - } - } - ], - 'info_dict': { - 'id': '105600', - 'title': 'Terraria', - }, - 'params': { - 'playlistend': 2, - } - }, { - 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205', - 'info_dict': { - 'id': 'X8kpJBlzD2E', - 'ext': 'mp4', - 'upload_date': '20140617', - 'title': 'FRONTIERS - Trapping', - 'description': 'md5:bf6f7f773def614054089e5769c12a6e', - 'uploader': 'AAD Productions', - 'uploader_id': 'AtomicAgeDogGames', - } - }] - - def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - fileID = m.group('fileID') - if fileID: - videourl = url - playlist_id = fileID - else: - gameID = m.group('gameID') - playlist_id = gameID - videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id - - self._set_cookie('steampowered.com', 'mature_content', '1') - - webpage = self._download_webpage(videourl, playlist_id) - - if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: - videourl = self._AGECHECK_TEMPLATE % playlist_id - self.report_age_confirmation() - webpage = self._download_webpage(videourl, playlist_id) - - flash_vars = self._parse_json(self._search_regex( - r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage, - 'flash vars'), playlist_id, js_to_json) - - playlist_title = None - entries = [] - if fileID: - playlist_title = get_element_by_class('workshopItemTitle', webpage) - for movie in flash_vars.values(): - if not movie: - continue - youtube_id = movie.get('YOUTUBE_VIDEO_ID') - if not youtube_id: - continue - entries.append({ - '_type': 'url', - 'url': youtube_id, - 'ie_key': 'Youtube', - }) - else: - playlist_title = get_element_by_class('apphub_AppName', webpage) - for movie_id, movie in flash_vars.items(): - if not movie: - continue - video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False) - title = movie.get('MOVIE_NAME') - if not title or not video_id: - continue - entry = { - 'id': video_id, - 'title': title.replace('+', ' '), - } - formats = [] - flv_url = movie.get('FILENAME') - if flv_url: - formats.append({ - 'format_id': 'flv', - 'url': flv_url, - }) - highlight_element = self._search_regex( - r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id, - webpage, 'highlight element', fatal=False) - if highlight_element: - highlight_attribs = extract_attributes(highlight_element) - if highlight_attribs: - entry['thumbnail'] = highlight_attribs.get('data-poster') - for quality in ('', '-hd'): - for ext in ('webm', 'mp4'): - video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality)) - if video_url: - formats.append({ - 'format_id': ext + quality, - 'url': video_url, - }) - if not formats: - continue - entry['formats'] = formats - entries.append(entry) - if not entries: - raise ExtractorError('Could not find any videos') - - return self.playlist_result(entries, playlist_id, playlist_title) diff --git a/youtube_dl/extractor/stitcher.py b/youtube_dl/extractor/stitcher.py deleted file mode 100644 index 822782507..000000000 --- a/youtube_dl/extractor/stitcher.py +++ /dev/null @@ -1,144 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - clean_html, - clean_podcast_url, - ExtractorError, - int_or_none, - str_or_none, - try_get, - url_or_none, -) - - -class StitcherBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/' - - def _call_api(self, path, video_id, query): - resp = self._download_json( - 'https://api.prod.stitcher.com/' + path, - video_id, query=query) - error_massage = try_get(resp, lambda x: x['errors'][0]['message']) - if error_massage: - raise ExtractorError(error_massage, expected=True) - return resp['data'] - - def _extract_description(self, data): - return clean_html(data.get('html_description') or data.get('description')) - - def _extract_audio_url(self, episode): - return url_or_none(episode.get('audio_url') or episode.get('guid')) - - def _extract_show_info(self, show): - return { - 'thumbnail': show.get('image_base_url'), - 'series': show.get('title'), - } - - def _extract_episode(self, episode, audio_url, show_info): - info = { - 'id': compat_str(episode['id']), - 'display_id': episode.get('slug'), - 'title': episode['title'].strip(), - 'description': self._extract_description(episode), - 'duration': int_or_none(episode.get('duration')), - 'url': clean_podcast_url(audio_url), - 'vcodec': 'none', - 'timestamp': int_or_none(episode.get('date_published')), - 'season_number': int_or_none(episode.get('season')), - 'season_id': str_or_none(episode.get('season_id')), - } - info.update(show_info) - return info - - -class StitcherIE(StitcherBaseIE): - _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', - 'md5': 'e9635098e0da10b21a0e2b85585530f6', - 'info_dict': { - 'id': '40789481', - 'ext': 'mp3', - 'title': 'Machine Learning Mastery and Cancer Clusters', - 'description': 'md5:547adb4081864be114ae3831b4c2b42f', - 'duration': 1604, - 'thumbnail': r're:^https?://.*\.jpg', - 'upload_date': '20151008', - 'timestamp': 1444285800, - 'series': 'Talking Machines', - }, - }, { - 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', - 'info_dict': { - 'id': '40846275', - 'display_id': 'the-rare-hourlong-comedy-plus', - 'ext': 'mp3', - 'title': "The CW's 'Crazy Ex-Girlfriend'", - 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', - 'duration': 2235, - 'thumbnail': r're:^https?://.*\.jpg', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Page Not Found', - }, { - # escaped title - 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', - 'only_matching': True, - }, { - 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', - 'only_matching': True, - }, { - 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584', - 'only_matching': True, - }] - - def _real_extract(self, url): - audio_id = self._match_id(url) - data = self._call_api( - 'shows/episodes', audio_id, {'episode_ids': audio_id}) - episode = data['episodes'][0] - audio_url = self._extract_audio_url(episode) - if not audio_url: - self.raise_login_required() - show = try_get(data, lambda x: x['shows'][0], dict) or {} - return self._extract_episode( - episode, audio_url, self._extract_show_info(show)) - - -class StitcherShowIE(StitcherBaseIE): - _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)' - _TESTS = [{ - 'url': 'http://www.stitcher.com/podcast/the-talking-machines', - 'info_dict': { - 'id': 'the-talking-machines', - 'title': 'Talking Machines', - 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b', - }, - 'playlist_mincount': 106, - }, { - 'url': 'https://www.stitcher.com/show/the-talking-machines', - 'only_matching': True, - }] - - def _real_extract(self, url): - show_slug = self._match_id(url) - data = self._call_api( - 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000}) - show = try_get(data, lambda x: x['shows'][0], dict) or {} - show_info = self._extract_show_info(show) - - entries = [] - for episode in (data.get('episodes') or []): - audio_url = self._extract_audio_url(episode) - if not audio_url: - continue - entries.append(self._extract_episode(episode, audio_url, show_info)) - - return self.playlist_result( - entries, show_slug, show.get('title'), - self._extract_description(show)) diff --git a/youtube_dl/extractor/storyfire.py b/youtube_dl/extractor/storyfire.py deleted file mode 100644 index 9c698626f..000000000 --- a/youtube_dl/extractor/storyfire.py +++ /dev/null @@ -1,151 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import functools - -from .common import InfoExtractor -from ..utils import ( - # HEADRequest, - int_or_none, - OnDemandPagedList, - smuggle_url, -) - - -class StoryFireBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/' - - def _call_api(self, path, video_id, resource, query=None): - return self._download_json( - 'https://storyfire.com/app/%s/%s' % (path, video_id), video_id, - 'Downloading %s JSON metadata' % resource, query=query) - - def _parse_video(self, video): - title = video['title'] - vimeo_id = self._search_regex( - r'https?://player\.vimeo\.com/external/(\d+)', - video['vimeoVideoURL'], 'vimeo id') - - # video_url = self._request_webpage( - # HEADRequest(video['vimeoVideoURL']), video_id).geturl() - # formats = [] - # for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]: - # formats.extend(self._extract_m3u8_formats( - # v_url, video_id, 'mp4', 'm3u8_native', - # m3u8_id='hls' + suffix, fatal=False)) - # formats.extend(self._extract_mpd_formats( - # v_url.replace('.m3u8', '.mpd'), video_id, - # mpd_id='dash' + suffix, fatal=False)) - # self._sort_formats(formats) - - uploader_id = video.get('hostID') - - return { - '_type': 'url_transparent', - 'id': vimeo_id, - 'title': title, - 'description': video.get('description'), - 'url': smuggle_url( - 'https://player.vimeo.com/video/' + vimeo_id, { - 'http_headers': { - 'Referer': 'https://storyfire.com/', - } - }), - # 'formats': formats, - 'thumbnail': video.get('storyImage'), - 'view_count': int_or_none(video.get('views')), - 'like_count': int_or_none(video.get('likesCount')), - 'comment_count': int_or_none(video.get('commentsCount')), - 'duration': int_or_none(video.get('videoDuration')), - 'timestamp': int_or_none(video.get('publishDate')), - 'uploader': video.get('username'), - 'uploader_id': uploader_id, - 'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None, - 'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')), - } - - -class StoryFireIE(StoryFireBaseIE): - _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})' - _TEST = { - 'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181', - 'md5': 'caec54b9e4621186d6079c7ec100c1eb', - 'info_dict': { - 'id': '378954662', - 'ext': 'mp4', - 'title': 'Buzzfeed Teaches You About Memes', - 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', - 'timestamp': 1576129028, - 'description': 'md5:0b4e28021548e144bed69bb7539e62ea', - 'uploader': 'whang!', - 'upload_date': '20191212', - 'duration': 418, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download JSON metadata'] - } - - def _real_extract(self, url): - video_id = self._match_id(url) - video = self._call_api( - 'generic/video-detail', video_id, 'video')['video'] - return self._parse_video(video) - - -class StoryFireUserIE(StoryFireBaseIE): - _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video' - _TEST = { - 'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video', - 'info_dict': { - 'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2', - }, - 'playlist_mincount': 151, - } - _PAGE_SIZE = 20 - - def _fetch_page(self, user_id, page): - videos = self._call_api( - 'publicVideos', user_id, 'page %d' % (page + 1), { - 'skip': page * self._PAGE_SIZE, - })['videos'] - for video in videos: - yield self._parse_video(video) - - def _real_extract(self, url): - user_id = self._match_id(url) - entries = OnDemandPagedList(functools.partial( - self._fetch_page, user_id), self._PAGE_SIZE) - return self.playlist_result(entries, user_id) - - -class StoryFireSeriesIE(StoryFireBaseIE): - _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)' - _TESTS = [{ - 'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/', - 'info_dict': { - 'id': '-Lq6MsuIHLODO6d2dDkr', - }, - 'playlist_mincount': 13, - }, { - 'url': 'https://storyfire.com/write/series/stories/the_mortal_one/', - 'info_dict': { - 'id': 'the_mortal_one', - }, - 'playlist_count': 0, - }] - - def _extract_videos(self, stories): - for story in stories.values(): - if story.get('hasVideo'): - yield self._parse_video(story) - - def _real_extract(self, url): - series_id = self._match_id(url) - stories = self._call_api( - 'seriesStories', series_id, 'series stories') - return self.playlist_result(self._extract_videos(stories), series_id) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py deleted file mode 100644 index 34725274e..000000000 --- a/youtube_dl/extractor/streamable.py +++ /dev/null @@ -1,112 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, -) - - -class StreamableIE(InfoExtractor): - _VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)' - _TESTS = [ - { - 'url': 'https://streamable.com/dnd1', - 'md5': '3e3bc5ca088b48c2d436529b64397fef', - 'info_dict': { - 'id': 'dnd1', - 'ext': 'mp4', - 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol', - 'thumbnail': r're:https?://.*\.jpg$', - 'uploader': 'teabaker', - 'timestamp': 1454964157.35115, - 'upload_date': '20160208', - 'duration': 61.516, - 'view_count': int, - } - }, - # older video without bitrate, width/height, etc. info - { - 'url': 'https://streamable.com/moo', - 'md5': '2cf6923639b87fba3279ad0df3a64e73', - 'info_dict': { - 'id': 'moo', - 'ext': 'mp4', - 'title': '"Please don\'t eat me!"', - 'thumbnail': r're:https?://.*\.jpg$', - 'timestamp': 1426115495, - 'upload_date': '20150311', - 'duration': 12, - 'view_count': int, - } - }, - { - 'url': 'https://streamable.com/e/dnd1', - 'only_matching': True, - }, - { - 'url': 'https://streamable.com/s/okkqk/drxjds', - 'only_matching': True, - } - ] - - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', - webpage) - if mobj: - return mobj.group('src') - - def _real_extract(self, url): - video_id = self._match_id(url) - - # Note: Using the ajax API, as the public Streamable API doesn't seem - # to return video info like the title properly sometimes, and doesn't - # include info like the video duration - video = self._download_json( - 'https://ajax.streamable.com/videos/%s' % video_id, video_id) - - # Format IDs: - # 0 The video is being uploaded - # 1 The video is being processed - # 2 The video has at least one file ready - # 3 The video is unavailable due to an error - status = video.get('status') - if status != 2: - raise ExtractorError( - 'This video is currently unavailable. It may still be uploading or processing.', - expected=True) - - title = video.get('reddit_title') or video['title'] - - formats = [] - for key, info in video['files'].items(): - if not info.get('url'): - continue - formats.append({ - 'format_id': key, - 'url': self._proto_relative_url(info['url']), - 'width': int_or_none(info.get('width')), - 'height': int_or_none(info.get('height')), - 'filesize': int_or_none(info.get('size')), - 'fps': int_or_none(info.get('framerate')), - 'vbr': float_or_none(info.get('bitrate'), 1000) - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': video.get('description'), - 'thumbnail': self._proto_relative_url(video.get('thumbnail_url')), - 'uploader': video.get('owner', {}).get('user_name'), - 'timestamp': float_or_none(video.get('date_added')), - 'duration': float_or_none(video.get('duration')), - 'view_count': int_or_none(video.get('plays')), - 'formats': formats - } diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py deleted file mode 100644 index b97bb4374..000000000 --- a/youtube_dl/extractor/streamcloud.py +++ /dev/null @@ -1,78 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - urlencode_postdata, -) - - -class StreamcloudIE(InfoExtractor): - IE_NAME = 'streamcloud.eu' - _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?' - - _TESTS = [{ - 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', - 'md5': '6bea4c7fa5daaacc2a946b7146286686', - 'info_dict': { - 'id': 'skp9j99s4bpz', - 'ext': 'mp4', - 'title': 'youtube-dl test video \'/\\ ä ↭', - }, - 'skip': 'Only available from the EU' - }, { - 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - url = 'http://streamcloud.eu/%s' % video_id - - orig_webpage = self._download_webpage(url, video_id) - - if '>File Not Found<' in orig_webpage: - raise ExtractorError( - 'Video %s does not exist' % video_id, expected=True) - - fields = re.findall(r'''(?x)<input\s+ - type="(?:hidden|submit)"\s+ - name="([^"]+)"\s+ - (?:id="[^"]+"\s+)? - value="([^"]*)" - ''', orig_webpage) - - self._sleep(6, video_id) - - webpage = self._download_webpage( - url, video_id, data=urlencode_postdata(fields), headers={ - b'Content-Type': b'application/x-www-form-urlencoded', - }) - - try: - title = self._html_search_regex( - r'<h1[^>]*>([^<]+)<', webpage, 'title') - video_url = self._search_regex( - r'file:\s*"([^"]+)"', webpage, 'video URL') - except ExtractorError: - message = self._html_search_regex( - r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>', - webpage, 'message', default=None, group='message') - if message: - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) - raise - thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - 'thumbnail': thumbnail, - 'http_headers': { - 'Referer': url, - }, - } diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py deleted file mode 100644 index 97b2eb7f8..000000000 --- a/youtube_dl/extractor/streamcz.py +++ /dev/null @@ -1,126 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - merge_dicts, - parse_codecs, - urljoin, -) - - -class StreamCZIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:stream|televizeseznam)\.cz/[^?#]+/(?P<display_id>[^?#]+)-(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'https://www.televizeseznam.cz/video/lajna/buh-57953890', - 'md5': '40c41ade1464a390a0b447e333df4239', - 'info_dict': { - 'id': '57953890', - 'ext': 'mp4', - 'title': 'Bůh', - 'display_id': 'buh', - 'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165', - 'duration': 1369.6, - 'view_count': int, - } - }, { - 'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937', - 'md5': '41fd358000086a1ccdb068c77809b158', - 'info_dict': { - 'id': '64087937', - 'ext': 'mp4', - 'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna', - 'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna', - 'description': 'md5:97a811000a6460266029d6c1c2ebcd59', - 'duration': 50.2, - 'view_count': int, - } - }, { - 'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267', - 'md5': '3ee4d0be040e8f4a543e67e509d55e3f', - 'info_dict': { - 'id': '64147267', - 'ext': 'mp4', - 'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili', - 'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili', - 'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf', - 'duration': 442.84, - 'view_count': int, - } - }] - - def _extract_formats(self, spl_url, video): - for ext, pref, streams in ( - ('ts', -1, video.get('http_stream', {}).get('qualities', {})), - ('mp4', 1, video.get('mp4'))): - for format_id, stream in streams.items(): - if not stream.get('url'): - continue - yield merge_dicts({ - 'format_id': '-'.join((format_id, ext)), - 'ext': ext, - 'source_preference': pref, - 'url': urljoin(spl_url, stream['url']), - 'tbr': float_or_none(stream.get('bandwidth'), scale=1000), - 'duration': float_or_none(stream.get('duration'), scale=1000), - 'width': stream.get('resolution', 2 * [0])[0] or None, - 'height': stream.get('resolution', 2 * [0])[1] or int_or_none(format_id.replace('p', '')), - }, parse_codecs(stream.get('codec'))) - - def _real_extract(self, url): - display_id, video_id = re.match(self._VALID_URL, url).groups() - - data = self._download_json( - 'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result', - data=json.dumps({ - 'variables': {'urlName': video_id}, - 'query': ''' - query LoadEpisode($urlName : String){ episode(urlName: $urlName){ ...VideoDetailFragmentOnEpisode } } - fragment VideoDetailFragmentOnEpisode on Episode { - id - spl - urlName - name - perex - duration - views - }''' - }).encode('utf-8'), - headers={'Content-Type': 'application/json;charset=UTF-8'} - )['data']['episode'] - - spl_url = data['spl'] + 'spl2,3' - metadata = self._download_json(spl_url, video_id, 'Downloading playlist') - if 'Location' in metadata and 'data' not in metadata: - spl_url = metadata['Location'] - metadata = self._download_json(spl_url, video_id, 'Downloading redirected playlist') - video = metadata['data'] - - subtitles = {} - for subs in video.get('subtitles', {}).values(): - if not subs.get('language'): - continue - for ext, sub_url in subs.get('urls').items(): - subtitles.setdefault(subs['language'], []).append({ - 'ext': ext, - 'url': urljoin(spl_url, sub_url) - }) - - formats = list(self._extract_formats(spl_url, video)) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': data.get('name'), - 'description': data.get('perex'), - 'duration': float_or_none(data.get('duration')), - 'view_count': int_or_none(data.get('views')), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/streamsb.py b/youtube_dl/extractor/streamsb.py deleted file mode 100644 index bffcb3de1..000000000 --- a/youtube_dl/extractor/streamsb.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import binascii -import random -import re -import string - -from .common import InfoExtractor -from ..utils import urljoin, url_basename - - -def to_ascii_hex(str1): - return binascii.hexlify(str1.encode('utf-8')).decode('ascii') - - -def generate_random_string(length): - return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length)) - - -class StreamsbIE(InfoExtractor): - _DOMAINS = ('viewsb.com', ) - _VALID_URL = r'https://(?P<domain>%s)/(?P<id>.+)' % '|'.join(_DOMAINS) - _TEST = { - 'url': 'https://viewsb.com/dxfvlu4qanjx', - 'md5': '488d111a63415369bf90ea83adc8a325', - 'info_dict': { - 'id': 'dxfvlu4qanjx', - 'ext': 'mp4', - 'title': 'Sintel' - } - } - - def _real_extract(self, url): - domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id') - webpage = self._download_webpage(url, video_id) - - iframe_rel_url = self._search_regex(r'''(?i)<iframe\b[^>]+\bsrc\s*=\s*('|")(?P<path>/.*\.html)\1''', webpage, 'iframe', group='path') - iframe_url = urljoin('https://' + domain, iframe_rel_url) - - iframe_data = self._download_webpage(iframe_url, video_id) - app_version = self._search_regex(r'''<script\b[^>]+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50' - - video_code = url_basename(iframe_url).rsplit('.')[0] - - length = 12 - req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb')) - ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req)) - - video_data = self._download_webpage(ereq, video_id, headers={ - 'Referer': iframe_url, - 'watchsb': 'sbstream', - }) - player_data = self._parse_json(video_data, video_id) - title = player_data['stream_data']['title'] - formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - return { - 'id': video_id, - 'formats': formats, - 'title': title, - } diff --git a/youtube_dl/extractor/streetvoice.py b/youtube_dl/extractor/streetvoice.py deleted file mode 100644 index f21681ae7..000000000 --- a/youtube_dl/extractor/streetvoice.py +++ /dev/null @@ -1,100 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, - str_or_none, - strip_or_none, - try_get, - urljoin, -) - - -class StreetVoiceIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'https://streetvoice.com/skippylu/songs/123688/', - 'md5': '0eb535970629a5195685355f3ed60bfd', - 'info_dict': { - 'id': '123688', - 'ext': 'mp3', - 'title': '流浪', - 'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 270, - 'upload_date': '20100923', - 'uploader': 'Crispy脆樂團', - 'uploader_id': '627810', - 'uploader_url': 're:^https?://streetvoice.com/skippylu/', - 'timestamp': 1285261661, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - 'track': '流浪', - 'track_id': '123688', - 'album': '2010', - } - }, { - 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', - 'only_matching': True, - }] - - def _real_extract(self, url): - song_id = self._match_id(url) - base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id - song = self._download_json(base_url, song_id, query={ - 'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username', - }) - title = song['name'] - - formats = [] - for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]: - f_url = (self._download_json( - base_url + suffix + '/', song_id, - 'Downloading %s format URL' % format_id, - data=b'', fatal=False) or {}).get('file') - if not f_url: - continue - f = { - 'ext': 'mp3', - 'format_id': format_id, - 'url': f_url, - 'vcodec': 'none', - } - if format_id == 'hls': - f['protocol'] = 'm3u8_native' - abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None) - if abr: - abr = int(abr) - f.update({ - 'abr': abr, - 'tbr': abr, - }) - formats.append(f) - - user = song.get('user') or {} - username = user.get('username') - get_count = lambda x: int_or_none(song.get(x + '_count')) - - return { - 'id': song_id, - 'formats': formats, - 'title': title, - 'description': strip_or_none(song.get('synopsis')), - 'thumbnail': song.get('image'), - 'duration': int_or_none(song.get('length')), - 'timestamp': parse_iso8601(song.get('created_at')), - 'uploader': try_get(user, lambda x: x['profile']['nickname']), - 'uploader_id': str_or_none(user.get('id')), - 'uploader_url': urljoin(url, '/%s/' % username) if username else None, - 'view_count': get_count('plays'), - 'like_count': get_count('likes'), - 'comment_count': get_count('comments'), - 'repost_count': get_count('share'), - 'track': title, - 'track_id': song_id, - 'album': try_get(song, lambda x: x['album']['name']), - } diff --git a/youtube_dl/extractor/stretchinternet.py b/youtube_dl/extractor/stretchinternet.py deleted file mode 100644 index ec08eae55..000000000 --- a/youtube_dl/extractor/stretchinternet.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class StretchInternetIE(InfoExtractor): - _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)' - _TEST = { - 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video', - 'info_dict': { - 'id': '573272', - 'ext': 'mp4', - 'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA', - # 'timestamp': 1575668361, - # 'upload_date': '20191206', - 'uploader_id': '99997', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - media_url = self._download_json( - 'https://core.stretchlive.com/trinity/event/tcg/' + video_id, - video_id)[0]['media'][0]['url'] - event = self._download_json( - 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json', - video_id, query={'eventID': video_id, 'token': 'asdf'})['event'] - - return { - 'id': video_id, - 'title': event['title'], - # TODO: parse US timezone abbreviations - # 'timestamp': event.get('dateTimeString'), - 'url': 'https://' + media_url, - 'uploader_id': event.get('ownerID'), - } diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py deleted file mode 100644 index 539220a94..000000000 --- a/youtube_dl/extractor/stv.py +++ /dev/null @@ -1,95 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - compat_str, - float_or_none, - int_or_none, - smuggle_url, - str_or_none, - try_get, -) - - -class STVPlayerIE(InfoExtractor): - IE_NAME = 'stv:player' - _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})' - _TESTS = [{ - # shortform - 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/', - 'md5': '5adf9439c31d554f8be0707c7abe7e0a', - 'info_dict': { - 'id': '5333973339001', - 'ext': 'mp4', - 'upload_date': '20170301', - 'title': '60 seconds on set with Laura Norton', - 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!", - 'timestamp': 1488388054, - 'uploader_id': '1486976045', - }, - 'skip': 'this resource is unavailable outside of the UK', - }, { - # episodes - 'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s' - _PTYPE_MAP = { - 'episode': 'episodes', - 'video': 'shortform', - } - - def _real_extract(self, url): - ptype, video_id = re.match(self._VALID_URL, url).groups() - - webpage = self._download_webpage(url, video_id, fatal=False) or '' - props = (self._parse_json(self._search_regex( - r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>', - webpage, 'next data', default='{}'), video_id, - fatal=False) or {}).get('props') or {} - player_api_cache = try_get( - props, lambda x: x['initialReduxState']['playerApiCache']) or {} - - api_path, resp = None, {} - for k, v in player_api_cache.items(): - if k.startswith('/episodes/') or k.startswith('/shortform/'): - api_path, resp = k, v - break - else: - episode_id = str_or_none(try_get( - props, lambda x: x['pageProps']['episodeId'])) - api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id) - - result = resp.get('results') - if not result: - resp = self._download_json( - 'https://player.api.stv.tv/v1' + api_path, video_id) - result = resp['results'] - - video = result['video'] - video_id = compat_str(video['id']) - - subtitles = {} - _subtitles = result.get('_subtitles') or {} - for ext, sub_url in _subtitles.items(): - subtitles.setdefault('en', []).append({ - 'ext': 'vtt' if ext == 'webvtt' else ext, - 'url': sub_url, - }) - - programme = result.get('programme') or {} - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}), - 'description': result.get('summary'), - 'duration': float_or_none(video.get('length'), 1000), - 'subtitles': subtitles, - 'view_count': int_or_none(result.get('views')), - 'series': programme.get('name') or programme.get('shortName'), - 'ie_key': 'BrightcoveNew', - } diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py deleted file mode 100644 index 68051169b..000000000 --- a/youtube_dl/extractor/sunporno.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - parse_duration, - int_or_none, - qualities, - determine_ext, -) - - -class SunPornoIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.sunporno.com/videos/807778/', - 'md5': '507887e29033502f29dba69affeebfc9', - 'info_dict': { - 'id': '807778', - 'ext': 'mp4', - 'title': 'md5:0a400058e8105d39e35c35e7c5184164', - 'description': 'md5:a31241990e1bd3a64e72ae99afb325fb', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 302, - 'age_limit': 18, - } - }, { - 'url': 'http://embeds.sunporno.com/embed/807778', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://www.sunporno.com/videos/%s' % video_id, video_id) - - title = self._html_search_regex( - r'<title>([^<]+)', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._html_search_regex( - r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False) - - duration = parse_duration(self._search_regex( - (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<', - r'>Duration:\s*]+>\s*(\d+:\d+)\s*<'), - webpage, 'duration', fatal=False)) - - view_count = int_or_none(self._html_search_regex( - r'class="views">(?: