Skip to content

Commit

Permalink
Merge pull request #15255 from cdapio/CDAP-20674-exclude-scala-from-t…
Browse files Browse the repository at this point in the history
…will-jar

CDAP-20674 exclude scala jars from twill jar in Dataproc
  • Loading branch information
albertshau authored Jul 26, 2023
2 parents 6d5e189 + eb4500f commit fb8cde7
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,15 @@ public static synchronized LocalFile getTwillJar(LocationFactory locationFactory
return getLocalFile(location, true);
}

// scala gets bundled in the twill jar because it's a Kafka dependency,
// but Kafka is not used in Dataproc jobs at all. Exclude it to make sure it doesn't
// clash with the scala on the cluster.
// For example, Dataproc 1.5 uses scala-libary 2.12.10, which is incompatible with 2.12.15
ApplicationBundler bundler = new ApplicationBundler(new ClassAcceptor() {
@Override
public boolean accept(String className, URL classUrl, URL classPathUrl) {
return !className.startsWith("org.apache.hadoop") && !classPathUrl.toString()
.contains("spark-assembly");
.contains("spark-assembly") && !classPathUrl.toString().contains("scala-library");
}
});
bundler.createBundle(location, ImmutableList.of(ApplicationMasterMain.class,
Expand Down Expand Up @@ -102,10 +106,10 @@ public boolean accept(String className, URL classUrl, URL classPathUrl) {
}, DataprocJobMain.class.getName());

// Add the logback-console.xml from resources
URL logbackURL = classLoader.getResource("logback-console.xml");
if (logbackURL != null) {
URL logbackUrl = classLoader.getResource("logback-console.xml");
if (logbackUrl != null) {
jarOut.putNextEntry(new JarEntry("logback-console.xml"));
Resources.copy(logbackURL, jarOut);
Resources.copy(logbackUrl, jarOut);
}
}
return getLocalFile(location, false);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright © 2023 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package io.cdap.cdap.runtime.spi;

import io.cdap.cdap.runtime.spi.runtimejob.DataprocJarUtil;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.HashSet;
import java.util.Set;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
import java.util.stream.Collectors;
import org.apache.twill.api.LocalFile;
import org.apache.twill.filesystem.LocalLocationFactory;
import org.apache.twill.filesystem.LocationFactory;
import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

public class DataprocJarUtilTest {

@ClassRule
public static final TemporaryFolder TMP_FOLDER = new TemporaryFolder();

@Test
public void testScalaLibraryNotPackaged() throws IOException {
LocationFactory locationFactory = new LocalLocationFactory(TMP_FOLDER.newFolder());
LocalFile localFile = DataprocJarUtil.getTwillJar(locationFactory);
File f = new File(localFile.getURI());

Set<String> scalaLibraries = new HashSet<>();
try (JarInputStream jarInput = new JarInputStream(Files.newInputStream(f.toPath()))) {
JarEntry entry;
while ((entry = jarInput.getNextJarEntry()) != null) {
// CDAP-20674: when this test was added, scala-library and scala-parser-combinators were
// both getting pulled into the twill.jar (with just scala-library causing problems)
// It is possible for this test to start failing in the future if twill changes.
// If that happens, it may be ok to add more scala libraries as expected libraries,
// but it should be a conscious, tested, decision
if (entry.getName().contains("scala") &&
!entry.getName().contains("scala-parser-combinators")) {
scalaLibraries.add(entry.getName());
}
}
}
Assert.assertTrue(String.format("Unexpected scala libraries in twill jar: %s",
scalaLibraries.stream().collect(Collectors.joining())), scalaLibraries.isEmpty());
}
}

0 comments on commit fb8cde7

Please sign in to comment.