Your IP : 3.142.135.127


Current Path : /var/www/www-root/data/webdav/www.catalog.monolith-realty.ru/bitrix/modules/clouds/lib/
Upload File :
Current File : /var/www/www-root/data/webdav/www.catalog.monolith-realty.ru/bitrix/modules/clouds/lib/filehash.php

<?php
namespace Bitrix\Clouds;

use Bitrix\Main\Localization\Loc;
use Bitrix\Main\ORM\Data\DataManager;
use Bitrix\Main\ORM\Fields\DatetimeField;
use Bitrix\Main\ORM\Fields\IntegerField;
use Bitrix\Main\ORM\Fields\StringField;
use Bitrix\Main\ORM\Fields\Validators\LengthValidator;
use Bitrix\Main\ORM\Fields\ExpressionField;

Loc::loadMessages(__FILE__);

/**
 * <ul>
 * <li> ID integer mandatory
 * <li> BUCKET_ID integer mandatory
 * <li> FILE_PATH string(600) mandatory
 * <li> FILE_SIZE int optional
 * <li> FILE_MTIME datetime optional
 * <li> FILE_HASH string(50) optional
 * </ul>
 *
 * @package Bitrix\Clouds
 *
 * DO NOT WRITE ANYTHING BELOW THIS
 *
 * <<< ORMENTITYANNOTATION
 * @method static EO_FileHash_Query query()
 * @method static EO_FileHash_Result getByPrimary($primary, array $parameters = array())
 * @method static EO_FileHash_Result getById($id)
 * @method static EO_FileHash_Result getList(array $parameters = array())
 * @method static EO_FileHash_Entity getEntity()
 * @method static \Bitrix\Clouds\EO_FileHash createObject($setDefaultValues = true)
 * @method static \Bitrix\Clouds\EO_FileHash_Collection createCollection()
 * @method static \Bitrix\Clouds\EO_FileHash wakeUpObject($row)
 * @method static \Bitrix\Clouds\EO_FileHash_Collection wakeUpCollection($rows)
 */

class FileHashTable extends DataManager
{
	/**
	 * Returns DB table name for entity.
	 *
	 * @return string
	 */
	public static function getTableName()
	{
		return 'b_clouds_file_hash';
	}

	/**
	 * Returns entity map definition.
	 *
	 * @return array
	 */
	public static function getMap()
	{
		return [
			new IntegerField(
				'ID',
				[
					'primary' => true,
					'autocomplete' => true,
					'title' => Loc::getMessage('FILE_HASH_ENTITY_ID_FIELD'),
				]
			),
			new IntegerField(
				'BUCKET_ID',
				[
					'required' => true,
					'title' => Loc::getMessage('FILE_HASH_ENTITY_BUCKET_ID_FIELD'),
				]
			),
			new StringField(
				'FILE_PATH',
				[
					'required' => true,
					'validation' => [__CLASS__, 'validateFilePath'],
					'title' => Loc::getMessage('FILE_HASH_ENTITY_FILE_PATH_FIELD'),
				]
			),
			new IntegerField(
				'FILE_SIZE',
				[
					'title' => Loc::getMessage('FILE_HASH_ENTITY_FILE_SIZE_FIELD'),
				]
			),
			new DatetimeField(
				'FILE_MTIME',
				[
					'title' => Loc::getMessage('FILE_HASH_ENTITY_FILE_MTIME_FIELD'),
				]
			),
			new StringField(
				'FILE_HASH',
				[
					'validation' => [__CLASS__, 'validateFileHash'],
					'title' => Loc::getMessage('FILE_HASH_ENTITY_FILE_HASH_FIELD'),
				]
			),
		];
	}

	/**
	 * Returns validators for FILE_PATH field.
	 *
	 * @return array
	 */
	public static function validateFilePath()
	{
		return [
			new LengthValidator(null, 760),
		];
	}

	/**
	 * Returns validators for FILE_HASH field.
	 *
	 * @return array
	 */
	public static function validateFileHash()
	{
		return [
			new LengthValidator(null, 50),
		];
	}

	/**
	 * Stores file hashes to the database.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param array $files File list as it returned by \CCloudStorageBucket::ListFiles.
	 *
	 * @return \Bitrix\Main\DB\Result
	 * @see \CCloudStorageBucket::ListFiles
	 */
	public static function addList($bucketId, array $files)
	{
		$bucketId = intval($bucketId);
		$connection = \Bitrix\Main\Application::getConnection();
		$helper = $connection->getSqlHelper();
		$values = [];
		foreach ($files['file'] as $i => $file)
		{
			$fileSize = $files['file_size'][$i];
			$fileMtime = \CCloudUtil::gmtTimeToDateTime($files['file_mtime'][$i]);
			$fileHash = $files['file_hash'][$i];
			$values [] = '('
				. $bucketId
				. ",'" . $helper->forSql($file) . "'"
				. ',' . intval($fileSize)
				. ",'" . $fileMtime->format('Y-m-d h:i:s') . "'"
				. ",'" . $helper->forSql($fileHash) . "'"
				. ')'
			;
		}
		$sql = '
			INSERT INTO ' . static::getTableName() . '
			(BUCKET_ID, FILE_PATH, FILE_SIZE, FILE_MTIME, FILE_HASH)
			VALUES
			' . implode(",\n", $values) . '
		';
		return $connection->query($sql);
	}

	/**
	 * Sync file hashes to the database. Adds new keys and removes missing between $files[last_key] and $lastKey.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param string $path File list relative path.
	 * @param array $files File list as it returned by CCloudStorageBucket::ListFiles.
	 * @param string $prevLastKey Last key returned by previous call to CCloudStorageBucket::ListFiles.
	 *
	 * @return null|\Bitrix\Main\DB\Result
	 * @see \CCloudStorageBucket::ListFiles
	 */
	public static function syncList($bucketId, $path, array $files, $prevLastKey)
	{
		$result = null;
		$bucketId = intval($bucketId);
		$connection = \Bitrix\Main\Application::getConnection();
		$helper = $connection->getSqlHelper();

		$index = [];
		foreach ($files['file'] as $i => $filePath)
		{
			$index[$path . $filePath] = $i;
		}

		$filter = [
			'=BUCKET_ID' => $bucketId,
		];
		if ($prevLastKey)
		{
			$filter['>FILE_PATH'] = $path . $prevLastKey;
		}
		if ($files['last_key'])
		{
			$filter['<=FILE_PATH'] = $path . $files['last_key'];
		}
		$fileList = static::getList([
			'select' => ['ID', 'FILE_PATH', 'FILE_SIZE', 'FILE_HASH'],
			'filter' => $filter,
		]);
		while ($fileInfo = $fileList->fetch())
		{
			if (
				array_key_exists($fileInfo['FILE_PATH'], $index)
				&& ($files['file_size'][$index[$fileInfo['FILE_PATH']]] == $fileInfo['FILE_SIZE'])
				&& ($files['file_hash'][$index[$fileInfo['FILE_PATH']]] == $fileInfo['FILE_HASH'])
			)
			{
				unset($files['file'][$index[$fileInfo['FILE_PATH']]]);
			}
			else
			{
				$deleteResult = static::delete($fileInfo['ID']);
			}
		}

		$values = [];
		foreach ($files['file'] as $i => $file)
		{
			$fileSize = $files['file_size'][$i];
			$fileMtime = \CCloudUtil::gmtTimeToDateTime($files['file_mtime'][$i]);
			$fileHash = $files['file_hash'][$i];
			$values [] = '('
				. $bucketId
				. ",'" . $helper->forSql($path . $file) . "'"
				. ',' . intval($fileSize)
				. ",'" . $fileMtime->format('Y-m-d h:i:s') . "'"
				. ",'" . $helper->forSql($fileHash) . "'"
				. ')'
			;
		}

		$insertSize = 1000;
		while ($values)
		{
			$sql = '
				INSERT INTO ' . static::getTableName() . '
				(BUCKET_ID, FILE_PATH, FILE_SIZE, FILE_MTIME, FILE_HASH)
				VALUES
				' . implode(",\n", array_splice($values, 0, $insertSize)) . '
			';
			$result = $connection->query($sql);
		}

		return $result;
	}

	/**
	 * Sync file hashes to the database. Removes all keys beyond the $prevLastKey.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param string $path File list relative path.
	 * @param string $prevLastKey Last key returned by last call to CCloudStorageBucket::ListFiles.
	 *
	 * @return null|\Bitrix\Main\DB\Result
	 * @see \Bitrix\Clouds\FileHashTable::syncList
	 */
	public static function syncEnd($bucketId, $path, $prevLastKey)
	{
		$bucketId = intval($bucketId);
		$connection = \Bitrix\Main\Application::getConnection();
		$sqlHelper = $connection->getSqlHelper();
		$delete = '
			DELETE from ' . static::getTableName() . '
			WHERE BUCKET_ID = ' . $bucketId . '
			AND FILE_PATH like \'' . $sqlHelper->forSql($path) . '%\'
			AND FILE_PATH > \'' . $sqlHelper->forSql($path . $prevLastKey) . '\'
		';
		$result = $connection->query($delete);
		return $result;
	}

	/**
	 * Add a file hash to the database.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param string $path Path to the file.
	 * @param array $fileInfo File info as it returned by CCloudStorageBucket::GetFileInfo.
	 *
	 * @return null|\Bitrix\Main\DB\Result
	 * @see \CCloudStorageBucket::GetFileInfo
	 */
	public static function addFile($bucketId, $path, array $fileInfo)
	{
		return static::add([
			'BUCKET_ID' => $bucketId,
			'FILE_PATH' => $path,
			'FILE_SIZE' => $fileInfo['size'],
			'FILE_MTIME' => \CCloudUtil::gmtTimeToDateTime($fileInfo['mtime']),
			'FILE_HASH' => $fileInfo['hash'],
		]);
	}

	/**
	 * Returns last stored key for given bucket.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 *
	 * @return string
	 */
	public static function getLastKey($bucketId)
	{
		$bucketId = intval($bucketId);
		$connection = \Bitrix\Main\Application::getConnection();
		$sql = 'SELECT max(FILE_PATH) LAST_KEY from ' . static::getTableName() . ' WHERE BUCKET_ID=' . $bucketId;
		$last = $connection->query($sql)->fetch();
		return $last && $last['LAST_KEY'] ? $last['LAST_KEY'] : '';
	}

	/**
	 * Clears all stored file hashes for the bucket.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 *
	 * @return \Bitrix\Main\DB\Result
	 */
	public static function deleteAll($bucketId)
	{
		$bucketId = intval($bucketId);
		$connection = \Bitrix\Main\Application::getConnection();
		$delete = 'DELETE from ' . static::getTableName() . ' WHERE BUCKET_ID=' . $bucketId;
		$result = $connection->query($delete);
		return $result;
	}

	/**
	 * Clears a stored file hashe for the filePath for the bucket.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param string $filePath File path.
	 *
	 * @return \Bitrix\Main\DB\Result
	 */
	public static function deleteByFilePath($bucketId, $filePath)
	{
		$bucketId = intval($bucketId);
		$connection = \Bitrix\Main\Application::getConnection();
		$sqlHelper = $connection->getSqlHelper();
		$delete = '
			DELETE from ' . static::getTableName() . '
			WHERE BUCKET_ID = ' . $bucketId . "
			AND FILE_PATH = '" . $sqlHelper->forSql($filePath) . "'
		";
		$result = $connection->query($delete);
		return $result;
	}

	/**
	 * Returns file listing with "folders", sizes and modification times.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param string $path Directory path.
	 * @param array $order How to sort.
	 * @param array $filter Additional filter.
	 *
	 * @return \Bitrix\Main\DB\Result
	 */
	public static function dirList($bucketId, $path, $order, $filter)
	{
		$connection = \Bitrix\Main\Application::getConnection();
		$sqlHelper = $connection->getSqlHelper();

		$query = \Bitrix\Clouds\FileHashTable::query();
		$query->setSelect([
			new ExpressionField(
				'FILE_TYPE',
				'if(locate(\'/\', substring(%s, length(\'' . $sqlHelper->forSql($path) . '\')+1)) > 0, \'D\', \'F\')',
				['FILE_PATH']
			),
			new ExpressionField(
				'NAME',
				'substring_index(substring(%s, length(\'' . $sqlHelper->forSql($path) . '\')+1), \'/\', 1)',
				['FILE_PATH']
			),
			new ExpressionField(
				'SUM_FILE_SIZE',
				'SUM(%s)',
				['FILE_SIZE']
			),
			new ExpressionField(
				'MAX_FILE_MTIME',
				'MAX(%s)',
				['FILE_MTIME']
			),
			new ExpressionField(
				'FILE_COUNT',
				'COUNT(1)'
			),
		]);

		$filter['=BUCKET_ID'] = $bucketId;
		$filter['%=FILE_PATH'] = $path . '%';
		$query->setFilter($filter);
		$query->setGroup(['FILE_TYPE', 'NAME']);
		$query->setOrder($order);

		$sql = $query->getQuery();

		return $connection->query($sql);
	}

	/**
	 * Returns duplicate files by bucket, hash, and size.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param array $filter Additional filter to pass to FileHashTable.
	 * @param array $order Sort order.
	 * @param integer $limit Records count.
	 * @return \Bitrix\Main\DB\Result
	 * @see \Bitrix\Main\File\Internal\FileHashTable
	 */
	public static function duplicateList($bucketId, $filter, $order, $limit = 0)
	{
		$connection = \Bitrix\Main\Application::getConnection();

		$query = \Bitrix\Main\File\Internal\FileHashTable::query();
		$query->setSelect([
			'FILE_HASH',
			'FILE_SIZE',
			new ExpressionField(
				'FILE_COUNT',
				'COUNT(distinct %s, %s)',
				['FILE.SUBDIR', 'FILE.FILE_NAME']
			),
			new ExpressionField(
				'FILE_ID_MIN',
				'MIN(%s)',
				['FILE_ID']
			),
			new ExpressionField(
				'FILE_ID_MAX',
				'MAX(%s)',
				['FILE_ID']
			),
		]);

		$filter['=FILE.HANDLER_ID'] = $bucketId;
		$filter['>FILE_COUNT'] = 1;
		$query->setFilter($filter);
		$query->setGroup(['FILE_HASH', 'FILE_SIZE']);
		$query->setOrder($order);
		if ($limit > 0)
		{
			$query->setLimit($limit);
		}

		$sql = $query->getQuery();

		return $connection->query($sql);
	}

	/**
	 * Returns duplicate files list by bucket, hash, and size.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param array $fileHash File hash.
	 * @param array $fileSize File size.
	 * @return array
	 * @see \Bitrix\Main\File\Internal\FileHashTable
	 */
	public static function getFileDuplicates($bucketId, $fileHash, $fileSize)
	{
		$query = \Bitrix\Main\File\Internal\FileHashTable::getList([
			'select' => [
				'FILE_ID',
			],
			'filter' => [
				'=FILE.HANDLER_ID' => $bucketId,
				'=FILE_HASH' => $fileHash,
				'=FILE_SIZE' => $fileSize,
			],
			'order' => [
				'FILE_ID' => 'ASC',
			],
		]);

		$result = [];
		while ($fileDuplicate = $query->fetch())
		{
			$result[] = $fileDuplicate['FILE_ID'];
		}
		return $result;
	}

	/**
	 * Returns duplicates summary statistics.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @return array
	 */
	public static function getDuplicatesStat($bucketId)
	{
		$bucketId = intval($bucketId);
		$connection = \Bitrix\Main\Application::getConnection();
		$sql = "
			select sum(DUP_COUNT) DUP_COUNT, sum(DUP_SIZE) DUP_SIZE
			from (
				select
					b_file_hash.FILE_SIZE
					,b_file_hash.FILE_HASH
					,count(distinct b_file.SUBDIR, b_file.FILE_NAME)-1 DUP_COUNT
					,(count(distinct b_file.SUBDIR, b_file.FILE_NAME)-1) * b_file_hash.FILE_SIZE DUP_SIZE
				from
					b_file_hash
					inner join b_file on
						b_file.ID = b_file_hash.FILE_ID
				where
					b_file.HANDLER_ID = " . $bucketId . "
				group by
					b_file_hash.FILE_SIZE, b_file_hash.FILE_HASH
				having
					count(distinct b_file.SUBDIR, b_file.FILE_NAME) > 1
			) t
		";
		return $connection->query($sql)->fetch();
	}

	/**
	 * Copies file hash information from clouds module table to the main module.
	 * Returns an array with copy statistics information.
	 *
	 * @param integer $lastKey Where to start.
	 * @param integer $pageSize Batch size.
	 * @return array
	 */
	public static function copyToFileHash($lastKey, $pageSize)
	{
		$lastKey = (int)$lastKey;
		$pageSize = (int)$pageSize;
		$sql = "
			SELECT
				b_file.ID as FILE_ID
				,b_clouds_file_hash.FILE_SIZE as FILE_SIZE
				,b_clouds_file_hash.FILE_HASH as FILE_HASH
			FROM
				b_file
				INNER JOIN b_clouds_file_hash ON
					b_clouds_file_hash.BUCKET_ID = b_file.HANDLER_ID
					AND b_clouds_file_hash.FILE_PATH = concat('/', b_file.SUBDIR, '/', b_file.FILE_NAME)
				LEFT JOIN b_file_duplicate ON
					b_file_duplicate.DUPLICATE_ID = b_file.ID
			WHERE
				b_file.ID > " . $lastKey . "
				AND b_file_duplicate.DUPLICATE_ID is null
			ORDER BY b_file.ID
			LIMIT " . $pageSize . "
		";

		$connection = \Bitrix\Main\Application::getConnection();
		$fileIds = $connection->query('
			SELECT
				min(FILE_ID) as FILE_ID_MIN
				,max(FILE_ID) as FILE_ID_MAX
				,count(FILE_ID) FILE_ID_CNT
			FROM (' . $sql . ') t
		')->fetch();

		if ($fileIds['FILE_ID_CNT'] > 0)
		{
			$connection->queryExecute('INSERT IGNORE INTO b_file_hash '.$sql);
		}

		return $fileIds;
	}

	/**
	 * Checks candidates from file duplicates delete. Returns future file original identifier.
	 *
	 * @param integer $bucketId Clouds storage bucket identifier.
	 * @param array &$fileIds Array of file identifiers candidates for duplicate delete.
	 * @return false|integer
	 */
	public static function prepareDuplicates($bucketId, &$fileIds)
	{
		$originalId = false;
		if ($fileIds)
		{
			//Exclude any id that is already a duplicate
			$duplicates = \Bitrix\Main\File\Internal\FileDuplicateTable::getList([
				'select' => ['DUPLICATE_ID'],
				'filter' => [
					'=DUPLICATE_ID' => $fileIds,
				],
			]);
			while ($duplicate = $duplicates->fetch())
			{
				//Others will be excluded from the process
				$p = array_search($duplicate['DUPLICATE_ID'], $fileIds);
				if ($p !== false)
				{
					unset($fileIds[$p]);
				}
			}
		}

		if ($fileIds)
		{
			//Search throught file id for any existing originals
			$originals = \Bitrix\Main\File\Internal\FileDuplicateTable::getList([
				'select' => ['ORIGINAL_ID'],
				'filter' => [
					'=ORIGINAL_ID' => $fileIds,
				],
				'order' => ['ORIGINAL_ID' => 'ASC'],
			]);
			while ($original = $originals->fetch())
			{
				//First will be used for future deduplication
				if ($originalId === false)
				{
					$originalId = $original['ORIGINAL_ID'];
				}

				//Others will be excluded from the process
				$p = array_search($original['ORIGINAL_ID'], $fileIds);
				if ($p !== false)
				{
					unset($fileIds[$p]);
				}
			}
			//None originals exists yet
			if ($originalId === false)
			{
				$originalId = array_shift($fileIds);
			}
		}

		return $originalId;
	}
}