package io.cdap.plugin.gcp.gcs.source;

import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageException;
import com.google.common.base.Strings;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Macro;
import io.cdap.cdap.api.annotation.Metadata;
import io.cdap.cdap.api.annotation.MetadataProperty;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.cdap.etl.api.PipelineConfigurer;
import io.cdap.cdap.etl.api.batch.BatchContext;
import io.cdap.cdap.etl.api.batch.BatchSourceContext;
import io.cdap.plugin.common.Asset;
import io.cdap.plugin.common.ConfigUtil;
import io.cdap.plugin.common.LineageRecorder;
import io.cdap.plugin.common.ReferenceNames;
import io.cdap.plugin.format.input.PathTrackingInputFormat;
import io.cdap.plugin.format.plugin.AbstractFileSource;
import io.cdap.plugin.format.plugin.AbstractFileSourceConfig;
import io.cdap.plugin.format.plugin.FileSourceProperties;
import io.cdap.plugin.gcp.common.GCPConnectorConfig;
import io.cdap.plugin.gcp.common.GCPUtils;
import io.cdap.plugin.gcp.crypto.EncryptedFileSystem;
import io.cdap.plugin.gcp.gcs.GCSPath;
import java.lang.reflect.Type;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

@Name(GCSSource.NAME)
@Description("Reads objects from a path in a Google Cloud Storage bucket.")
@Metadata(properties = {@MetadataProperty(key = "connector", value = "GCS")})
@Plugin(type = "batchsource")
/* loaded from: input_file:io/cdap/plugin/gcp/gcs/source/GCSSource.class */
public class GCSSource extends AbstractFileSource<GCSSourceConfig> {
    public static final String NAME = "GCSFile";
    private final GCSSourceConfig config;
    private Asset asset;

    /* loaded from: input_file:io/cdap/plugin/gcp/gcs/source/GCSSource$GCSSourceConfig.class */
    public static class GCSSourceConfig extends AbstractFileSourceConfig implements FileSourceProperties {
        public static final String NAME_PATH = "path";
        public static final String NAME_FORMAT = "format";
        private static final String NAME_FILE_SYSTEM_PROPERTIES = "fileSystemProperties";
        private static final String NAME_FILE_REGEX = "fileRegex";
        private static final String NAME_DELIMITER = "delimiter";
        private static final String DEFAULT_ENCRYPTED_METADATA_SUFFIX = ".metadata";
        private static final Gson GSON = new Gson();
        private static final Type MAP_STRING_STRING_TYPE = new TypeToken<Map<String, String>>() { // from class: io.cdap.plugin.gcp.gcs.source.GCSSource.GCSSourceConfig.1
        }.getType();

        @Macro
        @Description("The path to read from. For example, gs://<bucket>/path/to/directory/")
        private String path;

        @Macro
        @Description("Map of properties to set on the InputFormat.")
        @Nullable
        private String fileSystemProperties;

        @Macro
        @Description("Minimum size of each partition used to read data. ")
        @Nullable
        private Long minSplitSize;

        @Macro
        @Description("Whether the data file is encrypted. If it is set to 'true', a associated metadata file needs to be provided for each data file. Please refer to the Documentation for the details of the metadata file content.")
        @Nullable
        private Boolean encrypted;

        @Macro
        @Description("The file name suffix for the metadata file of the encrypted data file. The default is '.metadata'.")
        @Nullable
        private String encryptedMetadataSuffix;

        @Macro
        @Description("A list of columns with the corresponding data types for whom the automatic data type detection gets skipped.")
        @Nullable
        private String override;

        @Macro
        @Description("The maximum number of rows that will get investigated for automatic data type detection.")
        @Nullable
        private Long sampleSize;

        @Name(ConfigUtil.NAME_USE_CONNECTION)
        @Description("Whether to use an existing connection.")
        @Nullable
        private Boolean useConnection;

        @Name(ConfigUtil.NAME_CONNECTION)
        @Description("The existing connection to use.")
        @Nullable
        @Macro
        private GCPConnectorConfig connection;

        @Override // io.cdap.plugin.format.plugin.AbstractFileSourceConfig, io.cdap.plugin.format.plugin.FileSourceProperties
        public void validate() {
        }

        @Override // io.cdap.plugin.format.plugin.AbstractFileSourceConfig, io.cdap.plugin.format.plugin.FileSourceProperties
        public void validate(FailureCollector failureCollector) {
            super.validate(failureCollector);
            ConfigUtil.validateConnection(this, this.useConnection, this.connection, failureCollector);
            if (!containsMacro("path")) {
                try {
                    GCSPath.from(this.path);
                } catch (IllegalArgumentException e) {
                    failureCollector.addFailure(e.getMessage(), (String) null).withConfigProperty("path").withStacktrace(e.getStackTrace());
                }
            }
            if (!containsMacro(NAME_FILE_SYSTEM_PROPERTIES)) {
                try {
                    getFileSystemProperties();
                } catch (Exception e2) {
                    failureCollector.addFailure("File system properties must be a valid json.", (String) null).withConfigProperty(NAME_FILE_SYSTEM_PROPERTIES).withStacktrace(e2.getStackTrace());
                }
            }
            if (containsMacro(NAME_FILE_REGEX)) {
                return;
            }
            try {
                getFilePattern();
            } catch (IllegalArgumentException e3) {
                failureCollector.addFailure(e3.getMessage(), (String) null).withConfigProperty(NAME_FILE_REGEX).withStacktrace(e3.getStackTrace());
            }
        }

        @Override // io.cdap.plugin.format.plugin.FileSourceProperties
        public String getPath() {
            return this.path;
        }

        @Nullable
        public Pattern getExclusionPattern() {
            if (isEncrypted()) {
                return Pattern.compile(".*" + Pattern.quote(getEncryptedMetadataSuffix()) + "$");
            }
            return null;
        }

        @Nullable
        public Long getMinSplitSize() {
            return this.minSplitSize;
        }

        @Override // io.cdap.plugin.format.plugin.AbstractFileSourceConfig, io.cdap.plugin.format.plugin.FileSourceProperties
        public boolean shouldAllowEmptyInput() {
            return false;
        }

        public boolean isCopyHeader() {
            return shouldCopyHeader();
        }

        public boolean isEncrypted() {
            return this.encrypted != null && this.encrypted.booleanValue();
        }

        public String getEncryptedMetadataSuffix() {
            return Strings.isNullOrEmpty(this.encryptedMetadataSuffix) ? DEFAULT_ENCRYPTED_METADATA_SUFFIX : this.encryptedMetadataSuffix;
        }

        Map<String, String> getFileSystemProperties() {
            return this.fileSystemProperties == null ? Collections.emptyMap() : (Map) GSON.fromJson(this.fileSystemProperties, MAP_STRING_STRING_TYPE);
        }
    }

    public GCSSource(GCSSourceConfig gCSSourceConfig) {
        super(gCSSourceConfig);
        this.config = gCSSourceConfig;
    }

    @Override // io.cdap.plugin.format.plugin.AbstractFileSource
    public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
        super.configurePipeline(pipelineConfigurer);
    }

    @Override // io.cdap.plugin.format.plugin.AbstractFileSource
    public void prepareRun(BatchSourceContext batchSourceContext) throws Exception {
        String bucket = GCSPath.from(this.config.getPath()).getBucket();
        try {
            this.asset = Asset.builder(Strings.isNullOrEmpty(this.config.getReferenceName()) ? ReferenceNames.normalizeFqn(this.config.getPath()) : this.config.getReferenceName()).setFqn(this.config.getPath()).setLocation(GCPUtils.getStorage(this.config.connection.getProject(), this.config.connection.getServiceAccount() == null ? null : GCPUtils.loadServiceAccountCredentials(this.config.connection.getServiceAccount(), this.config.connection.isServiceAccountFilePath().booleanValue())).get(bucket, new Storage.BucketGetOption[0]).getLocation()).build();
            super.prepareRun(batchSourceContext);
        } catch (StorageException e) {
            throw new RuntimeException(String.format("Unable to access bucket %s. ", bucket) + "Ensure you entered the correct bucket path and have permissions for it.", e);
        }
    }

    @Override // io.cdap.plugin.format.plugin.AbstractFileSource
    protected Map<String, String> getFileSystemProperties(BatchSourceContext batchSourceContext) {
        Map<String, String> fileSystemProperties = GCPUtils.getFileSystemProperties(this.config.connection, this.config.getPath(), new HashMap(this.config.getFileSystemProperties()));
        if (this.config.isCopyHeader()) {
            fileSystemProperties.put(PathTrackingInputFormat.COPY_HEADER, Boolean.TRUE.toString());
        }
        if (this.config.getFileEncoding() != null && !this.config.getFileEncoding().equalsIgnoreCase("UTF-8")) {
            fileSystemProperties.put(PathTrackingInputFormat.SOURCE_FILE_ENCODING, this.config.getFileEncoding());
        }
        if (this.config.getMinSplitSize() != null) {
            fileSystemProperties.put(FileInputFormat.SPLIT_MINSIZE, String.valueOf(this.config.getMinSplitSize()));
        }
        if (this.config.isEncrypted()) {
            TinkDecryptor.configure(this.config.getEncryptedMetadataSuffix(), fileSystemProperties);
            EncryptedFileSystem.configure(GoogleCloudStorageFileSystem.SCHEME, TinkDecryptor.class, fileSystemProperties);
            GCSRegexPathFilter.configure(this.config, fileSystemProperties);
        }
        return fileSystemProperties;
    }

    @Override // io.cdap.plugin.format.plugin.AbstractFileSource
    protected LineageRecorder getLineageRecorder(BatchSourceContext batchSourceContext) {
        return new LineageRecorder((BatchContext) batchSourceContext, this.asset);
    }

    @Override // io.cdap.plugin.format.plugin.AbstractFileSource
    protected void recordLineage(LineageRecorder lineageRecorder, List<String> list) {
        Object[] objArr = new Object[1];
        objArr[0] = this.config.isEncrypted() ? " and decrypt " : " ";
        lineageRecorder.recordRead("Read", String.format("Read%sfrom Google Cloud Storage.", objArr), list);
    }

    @Override // io.cdap.plugin.format.plugin.AbstractFileSource
    protected boolean shouldGetSchema() {
        return (this.config.containsMacro("project") || this.config.containsMacro("path") || this.config.containsMacro("format") || this.config.containsMacro("delimiter") || this.config.containsMacro("fileSystemProperties") || this.config.containsMacro("serviceFilePath") || this.config.containsMacro("serviceAccountJSON")) ? false : true;
    }
}
